diff options
author | dh155122 <none@none> | 2007-01-19 16:59:38 -0800 |
---|---|---|
committer | dh155122 <none@none> | 2007-01-19 16:59:38 -0800 |
commit | f4b3ec61df05330d25f55a36b975b4d7519fdeb1 (patch) | |
tree | 395c234b901886c84a82603a767e031fca136e09 /usr/src | |
parent | 2e59fc6dac28cd69376c21d6b90a5624160ba94c (diff) | |
download | illumos-gate-f4b3ec61df05330d25f55a36b975b4d7519fdeb1.tar.gz |
PSARC 2006/366 IP Instances
6289221 RFE: Need virtualized ip-stack for each local zone
6512601 panic in ipsec_in_tag - allocation failure
6514637 error message from dhcpagent: add_pkt_opt: option type 60 is missing required value
6364643 RFE: allow persistent setting of interface flags per zone
6307539 RFE: Invalid network address causes zone boot failure
5041214 Allow IPMP configuration with zones
5005887 RFE: zoneadmd should support plumbing an interface via DHCP
4991139 RFE: zones should provide a mechanism to configure a defaultrouter for a zone
6218378 zoneadmd doesn't set the netmask for non-loopback addresses hosted on lo0
4963280 zones: need to virtualize the IPv6 default address selection mechanism
4963285 zones: need support of stateless address autoconfiguration for IPv6
5048068 zones don't boot if one of its interfaces has failed
5057154 RFE: ability to change interface status from within a zone
4963287 zones should support the plumbing of the first (and only) logical interface
4978517 TCP privileged port space should be partitioned per zone
5023347 zones don't work well with network routes other than default
4963372 investigate whether global zone can act as a router for local zones
6378364 RFE: Allow each zone to have its own virtual IPFilter
Diffstat (limited to 'usr/src')
325 files changed, 32423 insertions, 15248 deletions
diff --git a/usr/src/Makefile b/usr/src/Makefile index 0e3d6f5059..b69335d869 100644 --- a/usr/src/Makefile +++ b/usr/src/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -134,13 +134,12 @@ closedbins: FRC $(ROOTDIRS) $(SUBDIRS) head ucbhead pkgdefs: FRC @cd $@; pwd; $(MAKE) $(TARGET) -.PARALLEL: sysheaders userheaders libheaders ucbheaders cmdheaders \ - commonheaders +.PARALLEL: sysheaders userheaders libheaders ucbheaders cmdheaders # librpcsvc has a dependency on headers installed by # userheaders, hence the .WAIT before libheaders. sgs: rootdirs .WAIT sysheaders userheaders .WAIT \ - libheaders ucbheaders cmdheaders commonheaders + libheaders ucbheaders cmdheaders # # top-level setup target to setup the development environment @@ -182,9 +181,6 @@ cmdheaders: FRC @cd cmd/fm; pwd; $(MAKE) install_h @cd cmd/mdb; pwd; $(MAKE) install_h -commonheaders: FRC - @cd uts/common/inet/ipf/netinet; pwd; $(MAKE) install_h - # each xmod target depends on a corresponding MACH-specific pseudotarget # before doing common xmod work # diff --git a/usr/src/Makefile.master b/usr/src/Makefile.master index a433e01aa7..b5af375164 100644 --- a/usr/src/Makefile.master +++ b/usr/src/Makefile.master @@ -144,6 +144,9 @@ CHOWN= $(TRUE) CHGRP= $(TRUE) MV= /usr/bin/mv -f RM= /usr/bin/rm -f +CUT= /usr/bin/cut +NM= /usr/ccs/bin/nm +DIFF= /usr/bin/diff GREP= /usr/bin/grep EGREP= /usr/bin/egrep SED= /usr/bin/sed diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/inform.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/inform.c index f4ba297828..e742e91649 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/inform.c +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/inform.c @@ -87,7 +87,10 @@ dhcp_inform(dhcp_smach_t *dsmp) (void) add_pkt_opt16(dpkt, CD_MAX_DHCP_SIZE, htons(dsmp->dsm_lif->lif_pif->pif_max - sizeof (struct udpiphdr))); - (void) add_pkt_opt(dpkt, CD_CLASS_ID, class_id, class_id_len); + if (class_id_len != 0) { + (void) add_pkt_opt(dpkt, CD_CLASS_ID, class_id, + class_id_len); + } (void) add_pkt_prl(dpkt, dsmp); (void) add_pkt_opt(dpkt, CD_END, NULL, 0); diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/init_reboot.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/init_reboot.c index 16f7dbb5f4..497a3e9d18 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/init_reboot.c +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/init_reboot.c @@ -78,7 +78,8 @@ dhcp_init_reboot_v4(dhcp_smach_t *dsmp) (void) add_pkt_opt16(dpkt, CD_MAX_DHCP_SIZE, htons(dsmp->dsm_lif->lif_pif->pif_max - sizeof (struct udpiphdr))); - (void) add_pkt_opt(dpkt, CD_CLASS_ID, class_id, class_id_len); + if (class_id_len != 0) + (void) add_pkt_opt(dpkt, CD_CLASS_ID, class_id, class_id_len); (void) add_pkt_prl(dpkt, dsmp); /* diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/renew.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/renew.c index d6a81ad904..dd65950201 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/renew.c +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/renew.c @@ -480,7 +480,10 @@ dhcp_extending(dhcp_smach_t *dsmp) htons(lif->lif_max - sizeof (struct udpiphdr))); (void) add_pkt_opt32(dpkt, CD_LEASE_TIME, htonl(DHCP_PERM)); - (void) add_pkt_opt(dpkt, CD_CLASS_ID, class_id, class_id_len); + if (class_id_len != 0) { + (void) add_pkt_opt(dpkt, CD_CLASS_ID, class_id, + class_id_len); + } (void) add_pkt_prl(dpkt, dsmp); /* * dsm_reqhost was set for this state machine in diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c index f6e418f210..936965e703 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c @@ -304,7 +304,10 @@ dhcp_requesting(iu_tq_t *tqp, void *arg) offer->opts[CD_SERVER_ID]->value, offer->opts[CD_SERVER_ID]->len); - (void) add_pkt_opt(dpkt, CD_CLASS_ID, class_id, class_id_len); + if (class_id_len != 0) { + (void) add_pkt_opt(dpkt, CD_CLASS_ID, class_id, + class_id_len); + } (void) add_pkt_prl(dpkt, dsmp); /* diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/select.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/select.c index 41cc76f7dc..9dfead3df0 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/select.c +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/select.c @@ -169,7 +169,10 @@ dhcp_selecting(dhcp_smach_t *dsmp) htons(dsmp->dsm_lif->lif_max - sizeof (struct udpiphdr))); (void) add_pkt_opt32(dpkt, CD_LEASE_TIME, htonl(DHCP_PERM)); - (void) add_pkt_opt(dpkt, CD_CLASS_ID, class_id, class_id_len); + if (class_id_len != 0) { + (void) add_pkt_opt(dpkt, CD_CLASS_ID, class_id, + class_id_len); + } (void) add_pkt_prl(dpkt, dsmp); if (df_get_bool(dsmp->dsm_name, dsmp->dsm_isv6, @@ -280,7 +283,7 @@ dhcp_collect_dlpi(iu_eh_t *eh, int fd, short events, iu_event_id_t id, return; } - if (pkt_v4_match(recv_type, DHCP_PACK | DHCP_PNAK)) { + if (pkt_v4_match(recv_type, DHCP_PACK)) { if (!dhcp_bound(dsmp, plp)) { dhcpmsg(MSG_WARNING, "dhcp_collect_dlpi: dhcp_bound " "failed for %s", dsmp->dsm_name); @@ -289,6 +292,9 @@ dhcp_collect_dlpi(iu_eh_t *eh, int fd, short events, iu_event_id_t id, } dhcpmsg(MSG_VERBOSE, "dhcp_collect_dlpi: %s on %s", pname, dsmp->dsm_name); + } else if (pkt_v4_match(recv_type, DHCP_PNAK)) { + free_pkt_entry(plp); + dhcp_restart(dsmp); } else { pkt_smach_enqueue(dsmp, plp); } diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/svc-ndp b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/svc-ndp index 0937c377a3..fe52065927 100644 --- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/svc-ndp +++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/svc-ndp @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -28,7 +28,7 @@ . /lib/svc/share/smf_include.sh . /lib/svc/share/routing_include.sh -smf_is_globalzone || exit $SMF_EXIT_OK +smf_configure_ip || exit $SMF_EXIT_OK daemon_args=`get_daemon_args $SMF_FMRI` options="adtf:" diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ripngd/ripng.xml b/usr/src/cmd/cmd-inet/usr.lib/in.ripngd/ripng.xml index 05391c8dfe..5b65c9417a 100644 --- a/usr/src/cmd/cmd-inet/usr.lib/in.ripngd/ripng.xml +++ b/usr/src/cmd/cmd-inet/usr.lib/in.ripngd/ripng.xml @@ -1,7 +1,7 @@ <?xml version="1.0"?> <!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1"> <!-- - Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Copyright 2007 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. CDDL HEADER START @@ -75,7 +75,7 @@ timeout_seconds='60'> <method_context> <method_credential user='root' group='root' -privileges='basic,proc_owner,proc_fork,proc_exec,proc_info,proc_session,file_chown,sys_net_config,net_privaddr,net_icmpaccess,net_rawaccess'/> +privileges='basic,proc_owner,proc_fork,proc_exec,proc_info,proc_session,file_chown,sys_ip_config,net_privaddr,net_icmpaccess,net_rawaccess'/> </method_context> </exec_method> diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ripngd/svc-ripng b/usr/src/cmd/cmd-inet/usr.lib/in.ripngd/svc-ripng index 6e0bea069c..f0fb43cd0c 100644 --- a/usr/src/cmd/cmd-inet/usr.lib/in.ripngd/svc-ripng +++ b/usr/src/cmd/cmd-inet/usr.lib/in.ripngd/svc-ripng @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -28,7 +28,7 @@ . /lib/svc/share/smf_include.sh . /lib/svc/share/routing_include.sh -smf_is_globalzone || exit $SMF_EXIT_OK +smf_configure_ip || exit $SMF_EXIT_OK daemon_args=`get_daemon_args $SMF_FMRI` options="sqp:Ptv" diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c index c1f32fd6aa..c297c2c75a 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c @@ -193,6 +193,10 @@ static int get_lun(char *); static void selectifs(int argc, char *argv[], int af, struct lifreq *lifrp); static int updownifs(iface_t *ifs, int up); +static int find_all_global_interfaces(struct lifconf *lifcp, char **buf, + int64_t lifc_flags); +static int find_all_zone_interfaces(struct lifconf *lifcp, char **buf, + int64_t lifc_flags); #define max(a, b) ((a) < (b) ? (b) : (a)) @@ -484,59 +488,30 @@ foreachinterface(void (*func)(), int argc, char *argv[], int af, struct lifreq lifrl; /* Local lifreq struct */ int numifs; unsigned bufsize; - ni_t *nip; int plumball = 0; int save_af = af; + buf = NULL; /* * Special case: * ifconfig -a plumb should find all network interfaces - * in the machine by traversing the devinfo tree. + * in the machine by traversing the devinfo tree for global zone. + * For non-global zones, only find the assigned interfaces. * Also, there is no need to SIOCGLIF* ioctls, since * those interfaces have already been plumbed */ if (argc > 0 && (strcmp(*argv, "plumb") == 0)) { - /* - * Look through the kernel's devinfo tree for - * network devices - */ - di_node_t root; - - /* - * DINFOCACHE is equivalent to DINFOSUBTREE | DINFOMINOR | - * DINFOPROP | DINFOFORCE. - */ - if ((root = di_init("/", DINFOCACHE)) == DI_NODE_NIL) { - (void) fprintf(stderr, "ifconfig: di_init failed;" - " check the devinfo driver.\n"); - exit(1); - } - - (void) di_walk_minor(root, DDI_NT_NET, DI_CHECK_ALIAS, NULL, - devfs_entry); - di_fini(root); - - /* - * Now, translate the linked list into - * a struct lifreq buffer - */ - bufsize = num_ni * sizeof (struct lifreq); - if ((buf = malloc(bufsize)) == NULL) - Perror0_exit("foreachinterface: malloc failed"); - - lifc.lifc_family = AF_UNSPEC; - lifc.lifc_flags = lifc_flags; - lifc.lifc_len = bufsize; - lifc.lifc_buf = buf; - - for (n = 0, lifrp = lifc.lifc_req; n < num_ni; n++, lifrp++) { - nip = ni_list; - (void) strncpy(lifrp->lifr_name, nip->ni_name, - sizeof (lifr.lifr_name)); - ni_list = nip->ni_next; - free(nip); + if (getzoneid() == GLOBAL_ZONEID) { + if (find_all_global_interfaces(&lifc, &buf, + lifc_flags) != 0) + return; + } else { + if (find_all_zone_interfaces(&lifc, &buf, + lifc_flags) != 0) + return; } - + if (lifc.lifc_len == 0) + return; plumball = 1; } else { lifn.lifn_family = AF_UNSPEC; @@ -664,7 +639,8 @@ foreachinterface(void (*func)(), int argc, char *argv[], int af, /* the func could have overwritten origname, so restore */ (void) strncpy(name, origname, sizeof (name)); } - free(buf); + if (buf != NULL) + free(buf); } static void @@ -1781,6 +1757,161 @@ updownifs(iface_t *ifs, int up) } /* + * static int find_all_global_interfaces(struct lifconf *lifcp, char **buf, + * int64_t lifc_flags) + * + * It finds all interfaces for the global zone, that is all + * the physical interfaces, using the kernel's devinfo tree. + * + * It takes in input a pointer to struct lifconf to receive interfaces + * informations, a **char to hold allocated buffer, and a lifc_flags. + * + * Return values: + * 0 = everything OK + * -1 = problem + */ +static int +find_all_global_interfaces(struct lifconf *lifcp, char **buf, + int64_t lifc_flags) +{ + unsigned bufsize; + int n; + di_node_t root; + ni_t *nip; + struct lifreq *lifrp; + + /* + * DINFOCACHE is equivalent to DINFOSUBTREE | DINFOMINOR | + * DINFOPROP | DINFOFORCE. + */ + if ((root = di_init("/", DINFOCACHE)) == DI_NODE_NIL) { + (void) fprintf(stderr, "ifconfig: di_init " + "failed; check the devinfo driver.\n"); + exit(1); + } + + (void) di_walk_minor(root, DDI_NT_NET, DI_CHECK_ALIAS, + NULL, devfs_entry); + di_fini(root); + + /* + * Now, translate the linked list into + * a struct lifreq buffer + */ + if (num_ni == 0) { + lifcp->lifc_family = AF_UNSPEC; + lifcp->lifc_flags = lifc_flags; + lifcp->lifc_len = 0; + lifcp->lifc_buf = NULL; + return (0); + } + + bufsize = num_ni * sizeof (struct lifreq); + if ((*buf = malloc(bufsize)) == NULL) + Perror0_exit("find_all_interfaces: malloc failed"); + + lifcp->lifc_family = AF_UNSPEC; + lifcp->lifc_flags = lifc_flags; + lifcp->lifc_len = bufsize; + lifcp->lifc_buf = *buf; + + for (n = 0, lifrp = lifcp->lifc_req; n < num_ni; n++, lifrp++) { + nip = ni_list; + (void) strncpy(lifrp->lifr_name, nip->ni_name, + sizeof (lifr.lifr_name)); + ni_list = nip->ni_next; + free(nip); + } + return (0); +} + +/* + * static int find_all_zone_interfaces(struct lifconf *lifcp, char **buf, + * int64_t lifc_flags) + * + * It finds all interfaces for an exclusive-IP zone, that is all the interfaces + * assigned to it. + * + * It takes in input a pointer to struct lifconf to receive interfaces + * informations, a **char to hold allocated buffer, and a lifc_flags. + * + * Return values: + * 0 = everything OK + * -1 = problem + */ +static int +find_all_zone_interfaces(struct lifconf *lifcp, char **buf, int64_t lifc_flags) +{ + zoneid_t zoneid; + unsigned bufsize; + char *dlnames, *ptr; + struct lifreq *lifrp; + int num_ni_saved, i; + + zoneid = getzoneid(); + + num_ni = 0; + if (zone_list_datalink(zoneid, &num_ni, NULL) != 0) + Perror0_exit("find_all_interfaces: list interfaces failed"); +again: + /* this zone doesn't have any data-links */ + if (num_ni == 0) { + lifcp->lifc_family = AF_UNSPEC; + lifcp->lifc_flags = lifc_flags; + lifcp->lifc_len = 0; + lifcp->lifc_buf = NULL; + return (0); + } + + dlnames = malloc(num_ni * LIFNAMSIZ); + if (dlnames == NULL) + Perror0_exit("find_all_interfaces: out of memory"); + num_ni_saved = num_ni; + + if (zone_list_datalink(zoneid, &num_ni, dlnames) != 0) + Perror0_exit("find_all_interfaces: list interfaces failed"); + + if (num_ni_saved < num_ni) { + /* list increased, try again */ + free(dlnames); + goto again; + } + + /* this zone doesn't have any data-links now */ + if (num_ni == 0) { + free(dlnames); + lifcp->lifc_family = AF_UNSPEC; + lifcp->lifc_flags = lifc_flags; + lifcp->lifc_len = 0; + lifcp->lifc_buf = NULL; + return (0); + } + + bufsize = num_ni * sizeof (struct lifreq); + if ((*buf = malloc(bufsize)) == NULL) { + free(dlnames); + Perror0_exit("find_all_interfaces: malloc failed"); + } + + lifrp = (struct lifreq *)*buf; + ptr = dlnames; + for (i = 0; i < num_ni; i++) { + if (strlcpy(lifrp->lifr_name, ptr, LIFNAMSIZ) >= + LIFNAMSIZ) + Perror0_exit("find_all_interfaces: overflow"); + ptr += LIFNAMSIZ; + lifrp++; + } + + free(dlnames); + lifcp->lifc_family = AF_UNSPEC; + lifcp->lifc_flags = lifc_flags; + lifcp->lifc_len = bufsize; + lifcp->lifc_buf = *buf; + return (0); +} + +/* * Create the next unused logical interface using the original name * and assign the address (and mask if '/<n>' is part of the address). * Use the new logical interface for subsequent subcommands by updating @@ -2760,8 +2891,13 @@ ifstatus(const char *ifname) if (!v4compat) { if (ioctl(s, SIOCGLIFINDEX, (caddr_t)&lifr) >= 0) (void) printf(" index %d", lifr.lifr_index); + /* + * Stack instances use GLOBAL_ZONEID for IP data structures + * even in the non-global zone. + */ if (ioctl(s, SIOCGLIFZONE, (caddr_t)&lifr) >= 0 && - lifr.lifr_zoneid != getzoneid()) { + lifr.lifr_zoneid != getzoneid() && + lifr.lifr_zoneid != GLOBAL_ZONEID) { char zone_name[ZONENAME_MAX]; if (lifr.lifr_zoneid == ALL_ZONES) { @@ -3928,6 +4064,7 @@ inetplumb(char *arg, int64_t param) int dev_fd; dlpi_if_attr_t dia; boolean_t islo; + zoneid_t zoneid; strptr = strchr(name, ':'); islo = (strcmp(name, LOOPBACK_IF) == 0); @@ -3962,6 +4099,27 @@ inetplumb(char *arg, int64_t param) return (0); } + /* + * For global zone, check if the interface is used by a non-global + * zone, note that the non-global zones doesn't need this check, + * because zoneadm has taken care of this when the zone boots. + */ + zoneid = getzoneid(); + if (zoneid == GLOBAL_ZONEID) { + int ret; + + zoneid = ALL_ZONES; + ret = zone_check_datalink(&zoneid, name); + if (ret == 0) { + char zonename[ZONENAME_MAX]; + + (void) getzonenamebyid(zoneid, zonename, ZONENAME_MAX); + (void) fprintf(stderr, "%s is used by non-global" + "zone: %s\n", name, zonename); + return (1); + } + } + if (debug) (void) printf("inetplumb: %s af %d\n", name, afp->af_af); diff --git a/usr/src/cmd/cmd-inet/usr.sbin/in.rdisc/rdisc.xml b/usr/src/cmd/cmd-inet/usr.sbin/in.rdisc/rdisc.xml index 7ae84f18fd..005b49158e 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/in.rdisc/rdisc.xml +++ b/usr/src/cmd/cmd-inet/usr.sbin/in.rdisc/rdisc.xml @@ -1,7 +1,7 @@ <?xml version="1.0"?> <!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1"> <!-- - Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Copyright 2007 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. CDDL HEADER START @@ -58,7 +58,7 @@ timeout_seconds='60'> <method_context> <method_credential user='root' group='root' -privileges='basic,proc_owner,proc_fork,proc_exec,proc_info,proc_session,file_chown,sys_net_config,net_icmpaccess,net_rawaccess'/> +privileges='basic,proc_owner,proc_fork,proc_exec,proc_info,proc_session,file_chown,sys_ip_config,net_icmpaccess,net_rawaccess'/> </method_context> </exec_method> diff --git a/usr/src/cmd/cmd-inet/usr.sbin/in.rdisc/svc-rdisc b/usr/src/cmd/cmd-inet/usr.sbin/in.rdisc/svc-rdisc index 76cdda3604..1bdb4dc11b 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/in.rdisc/svc-rdisc +++ b/usr/src/cmd/cmd-inet/usr.sbin/in.rdisc/svc-rdisc @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -28,7 +28,7 @@ . /lib/svc/share/smf_include.sh . /lib/svc/share/routing_include.sh -smf_is_globalzone || exit $SMF_EXIT_OK +smf_configure_ip || exit $SMF_EXIT_OK daemon_args=`get_daemon_args $SMF_FMRI` options="afsp:T:r" diff --git a/usr/src/cmd/cmd-inet/usr.sbin/in.routed/route.xml b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/route.xml index c7867bbba8..8982aba010 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/in.routed/route.xml +++ b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/route.xml @@ -1,7 +1,7 @@ <?xml version="1.0"?> <!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1"> <!-- - Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Copyright 2007 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. CDDL HEADER START @@ -58,7 +58,7 @@ timeout_seconds='60'> <method_context> <method_credential user='root' group='root' -privileges='basic,proc_owner,proc_fork,proc_exec,proc_info,proc_session,file_chown,sys_net_config,net_privaddr,net_icmpaccess,net_rawaccess'/> +privileges='basic,proc_owner,proc_fork,proc_exec,proc_info,proc_session,file_chown,sys_ip_config,net_privaddr,net_icmpaccess,net_rawaccess'/> </method_context> </exec_method> diff --git a/usr/src/cmd/cmd-inet/usr.sbin/in.routed/svc-route b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/svc-route index aa674562eb..a0a82c8a0b 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/in.routed/svc-route +++ b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/svc-route @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -28,7 +28,7 @@ . /lib/svc/share/smf_include.sh . /lib/svc/share/routing_include.sh -smf_is_globalzone || exit $SMF_EXIT_OK +smf_configure_ip || exit $SMF_EXIT_OK daemon_args=`get_daemon_args $SMF_FMRI` options="AdghmnqsStvVzT:F:P:" diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c b/usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c index a0ea8b8052..17891ffc78 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c @@ -18,7 +18,7 @@ * * CDDL HEADER END * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -246,25 +246,25 @@ main(int argc, char *argv[]) char tmp_buf[INET6_ADDRSTRLEN]; int c; int i; - boolean_t has_sys_net_config; + boolean_t has_sys_ip_config; progname = argv[0]; /* * This program needs the net_icmpaccess privilege for creating - * raw ICMP sockets. It needs sys_net_config for using the + * raw ICMP sockets. It needs sys_ip_config for using the * IP_NEXTHOP socket option (IPv4 only). We'll fail * on the socket call and report the error there when we have * insufficient privileges. * - * Non-global zones don't have the sys_net_config privilege, so + * Shared-IP zones don't have the sys_ip_config privilege, so * we need to check for it in our limit set before trying * to set it. */ - has_sys_net_config = priv_ineffect(PRIV_SYS_NET_CONFIG); + has_sys_ip_config = priv_ineffect(PRIV_SYS_IP_CONFIG); (void) __init_suid_priv(PU_CLEARLIMITSET, PRIV_NET_ICMPACCESS, - has_sys_net_config ? PRIV_SYS_NET_CONFIG : (char *)NULL, + has_sys_ip_config ? PRIV_SYS_IP_CONFIG : (char *)NULL, (char *)NULL); setbuf(stdout, (char *)0); @@ -1228,7 +1228,7 @@ set_nexthop(int family, struct addrinfo *ai_nexthop, int sock) nh = ((struct sockaddr_in *)ai_nexthop-> ai_addr)->sin_addr.s_addr; - /* now we need the sys_net_config privilege */ + /* now we need the sys_ip_config privilege */ (void) __priv_bracket(PRIV_ON); if (setsockopt(sock, IPPROTO_IP, IP_NEXTHOP, &nh, sizeof (ipaddr_t)) < 0) { diff --git a/usr/src/cmd/cmd-inet/usr.sbin/routeadm/svc-forwarding b/usr/src/cmd/cmd-inet/usr.sbin/routeadm/svc-forwarding index fc9c5e3719..db3ead4bf7 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/routeadm/svc-forwarding +++ b/usr/src/cmd/cmd-inet/usr.sbin/routeadm/svc-forwarding @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -61,7 +61,7 @@ fi case "$1" in 'start' | 'refresh' ) - smf_is_globalzone || exit $SMF_EXIT_OK + smf_configure_ip || exit $SMF_EXIT_OK # # Start ip forwarding. # diff --git a/usr/src/cmd/cmd-inet/usr.sbin/routeadm/svc-legacy-routing b/usr/src/cmd/cmd-inet/usr.sbin/routeadm/svc-legacy-routing index dcf8b39f40..d06380148e 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/routeadm/svc-legacy-routing +++ b/usr/src/cmd/cmd-inet/usr.sbin/routeadm/svc-legacy-routing @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -55,7 +55,7 @@ case "$method" in echo "${proto}-routing-stop-cmd not specified by routeadm." exit $SMF_EXIT_ERR_CONFIG fi - smf_is_globalzone || exit $SMF_EXIT_OK + smf_configure_ip || exit $SMF_EXIT_OK # Run daemon - fail if it does not successfully daemonize. eval "$daemon_prog $daemon_args" @@ -65,10 +65,16 @@ case "$method" in fi # Create pidfile. daemon_name=`/usr/bin/basename $daemon_prog` - /usr/bin/pgrep -P 1 -f $daemon_prog > /var/tmp/${daemon_name}.pid + if smf_is_globalzone; then + /usr/bin/pgrep -P 1 -z `smf_zonename` -f $daemon_prog > \ + /var/tmp/${daemon_name}.pid + else + /usr/bin/pgrep -z `smf_zonename` -f $daemon_prog > \ + /var/tmp/${daemon_name}.pid + fi ;; 'stop' ) - smf_is_globalzone || exit $SMF_EXIT_OK + smf_configure_ip || exit $SMF_EXIT_OK # Stop daemon - ignore result. if [ -n "$daemon_stop" ]; then diff --git a/usr/src/cmd/diff/Makefile b/usr/src/cmd/diff/Makefile index 1c44145731..d5443bb3d3 100644 --- a/usr/src/cmd/diff/Makefile +++ b/usr/src/cmd/diff/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -22,14 +21,14 @@ # #ident "%Z%%M% %I% %E% SMI" # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # PROG= diff diffh SRCS= $(PROG:%=%.c) -DIFF= diff -DIFFH= diffh +BINDIFF= diff +LIBDIFFH= diffh include ../Makefile.cmd # @@ -39,8 +38,8 @@ POFILE= diff.po DCFILE= diff.dc #XGETFLAGS += -a -x diff.xcl -ROOTBINDIFF = $(DIFF:%=$(ROOTBIN)/%) -ROOTLIBDIFFH = $(DIFFH:%=$(ROOTLIB)/%) +ROOTBINDIFF = $(BINDIFF:%=$(ROOTBIN)/%) +ROOTLIBDIFFH = $(LIBDIFFH:%=$(ROOTLIB)/%) .KEEP_STATE: diff --git a/usr/src/cmd/dladm/dladm.c b/usr/src/cmd/dladm/dladm.c index cd07c7cb45..c088f44b68 100644 --- a/usr/src/cmd/dladm/dladm.c +++ b/usr/src/cmd/dladm/dladm.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -116,6 +116,8 @@ static cmdfunc_t do_show_linkprop, do_set_linkprop, do_reset_linkprop; static cmdfunc_t do_create_secobj, do_delete_secobj, do_show_secobj; static cmdfunc_t do_init_linkprop, do_init_secobj; +static void show_linkprop_onelink(void *, const char *); + static void link_stats(const char *, uint_t); static void aggr_stats(uint32_t, uint_t); static void dev_stats(const char *dev, uint32_t); @@ -2340,6 +2342,7 @@ typedef struct show_linkprop_state { const char *ls_link; char *ls_line; char **ls_propvals; + prop_list_t *ls_proplist; boolean_t ls_parseable; boolean_t ls_persist; boolean_t ls_header; @@ -2420,8 +2423,8 @@ fail: static void print_linkprop_head(void) { - (void) printf("%-15s %-14s %-14s %-30s \n", - "PROPERTY", "VALUE", "DEFAULT", "POSSIBLE"); + (void) printf("%-12s %-15s %-14s %-14s %-20s \n", + "LINK", "PROPERTY", "VALUE", "DEFAULT", "POSSIBLE"); } static void @@ -2481,6 +2484,15 @@ show_linkprop(void *arg, const char *propname) char *ptr = statep->ls_line; char *lim = ptr + MAX_PROP_LINE; + if (statep->ls_persist && dladm_is_prop_temponly(propname, NULL)) + return (B_TRUE); + + if (statep->ls_parseable) + ptr += snprintf(ptr, lim - ptr, "LINK=\"%s\" ", + statep->ls_link); + else + ptr += snprintf(ptr, lim - ptr, "%-12s ", statep->ls_link); + if (statep->ls_parseable) ptr += snprintf(ptr, lim - ptr, "PROPERTY=\"%s\" ", propname); else @@ -2492,7 +2504,7 @@ show_linkprop(void *arg, const char *propname) print_linkprop(statep, propname, DLADM_PROP_VAL_DEFAULT, "DEFAULT", "%-14s ", &ptr); print_linkprop(statep, propname, DLADM_PROP_VAL_MODIFIABLE, - "POSSIBLE", "%-30s ", &ptr); + "POSSIBLE", "%-20s ", &ptr); if (statep->ls_header) { statep->ls_header = B_FALSE; @@ -2506,11 +2518,8 @@ show_linkprop(void *arg, const char *propname) static void do_show_linkprop(int argc, char **argv) { - int i, option, fd; - char linkname[MAXPATHLEN]; + int option; prop_list_t *proplist = NULL; - char *buf; - dladm_status_t status; show_linkprop_state_t state; opterr = 0; @@ -2544,8 +2553,31 @@ do_show_linkprop(int argc, char **argv) else if (optind != argc) usage(); - if (state.ls_link == NULL) - die("link name must be specified"); + state.ls_proplist = proplist; + + if (state.ls_link == NULL) { + (void) dladm_walk(show_linkprop_onelink, &state); + } else { + show_linkprop_onelink(&state, state.ls_link); + } + free_props(proplist); +} + +static void +show_linkprop_onelink(void *arg, const char *link) +{ + int i, fd; + char linkname[MAXPATHLEN]; + char *buf; + dladm_status_t status; + prop_list_t *proplist = NULL; + show_linkprop_state_t *statep; + const char *savep; + + statep = (show_linkprop_state_t *)arg; + savep = statep->ls_link; + statep->ls_link = link; + proplist = statep->ls_proplist; /* * When some WiFi links are opened for the first time, their hardware @@ -2553,37 +2585,37 @@ do_show_linkprop(int argc, char **argv) * if there are no open links, the retrieval of link properties * (below) will proceed slowly unless we hold the link open. */ - (void) snprintf(linkname, MAXPATHLEN, "/dev/%s", state.ls_link); + (void) snprintf(linkname, MAXPATHLEN, "/dev/%s", link); if ((fd = open(linkname, O_RDWR)) < 0) - die("cannot open %s: %s", state.ls_link, strerror(errno)); + die("cannot open %s: %s", link, strerror(errno)); buf = malloc((sizeof (char *) + DLADM_PROP_VAL_MAX) * MAX_PROP_VALS + MAX_PROP_LINE); if (buf == NULL) die("insufficient memory"); - state.ls_propvals = (char **)(void *)buf; + statep->ls_propvals = (char **)(void *)buf; for (i = 0; i < MAX_PROP_VALS; i++) { - state.ls_propvals[i] = buf + sizeof (char *) * MAX_PROP_VALS + + statep->ls_propvals[i] = buf + sizeof (char *) * MAX_PROP_VALS + i * DLADM_PROP_VAL_MAX; } - state.ls_line = buf + + statep->ls_line = buf + (sizeof (char *) + DLADM_PROP_VAL_MAX) * MAX_PROP_VALS; if (proplist != NULL) { for (i = 0; i < proplist->pl_count; i++) { - if (!show_linkprop(&state, + if (!show_linkprop(statep, proplist->pl_info[i].pi_name)) break; } } else { - status = dladm_walk_prop(state.ls_link, &state, show_linkprop); + status = dladm_walk_prop(link, statep, show_linkprop); if (status != DLADM_STATUS_OK) die_dlerr(status, "show-linkprop"); } (void) close(fd); free(buf); - free_props(proplist); + statep->ls_link = savep; } static dladm_status_t @@ -2591,17 +2623,18 @@ set_linkprop_persist(const char *link, const char *prop_name, char **prop_val, uint_t val_cnt, boolean_t reset) { dladm_status_t status; + char *errprop; status = dladm_set_prop(link, prop_name, prop_val, val_cnt, - DLADM_OPT_PERSIST); + DLADM_OPT_PERSIST, &errprop); if (status != DLADM_STATUS_OK) { if (reset) { warn_dlerr(status, "cannot persistently reset link " - "property '%s' on '%s'", prop_name, link); + "property '%s' on '%s'", errprop, link); } else { warn_dlerr(status, "cannot persistently set link " - "property '%s' on '%s'", prop_name, link); + "property '%s' on '%s'", errprop, link); } } return (status); @@ -2650,13 +2683,16 @@ set_linkprop(int argc, char **argv, boolean_t reset) die("link name must be specified"); if (proplist == NULL) { + char *errprop; + if (!reset) die("link property must be specified"); - status = dladm_set_prop(link, NULL, NULL, 0, DLADM_OPT_TEMP); + status = dladm_set_prop(link, NULL, NULL, 0, DLADM_OPT_TEMP, + &errprop); if (status != DLADM_STATUS_OK) { - warn_dlerr(status, "cannot reset link properties " - "on '%s'", link); + warn_dlerr(status, "cannot reset link property '%s' " + "on '%s'", errprop, link); } if (!temp) { dladm_status_t s; @@ -2688,7 +2724,7 @@ set_linkprop(int argc, char **argv, boolean_t reset) } } s = dladm_set_prop(link, pip->pi_name, val, count, - DLADM_OPT_TEMP); + DLADM_OPT_TEMP, NULL); if (s == DLADM_STATUS_OK) { if (!temp) { s = set_linkprop_persist(link, @@ -2734,10 +2770,12 @@ set_linkprop(int argc, char **argv, boolean_t reset) if (ptr >= lim) break; } - if (ptr > errmsg) + if (ptr > errmsg) { *(ptr - 1) = '\0'; - warn("link property '%s' must be one of: %s", - pip->pi_name, errmsg); + warn("link property '%s' must be one of: %s", + pip->pi_name, errmsg); + } else + warn("invalid link property '%s'", *val); free(propvals); break; } diff --git a/usr/src/cmd/ipf/lib/Makefile.com b/usr/src/cmd/ipf/lib/Makefile.com index e6bf79f936..8019bb8e9c 100644 --- a/usr/src/cmd/ipf/lib/Makefile.com +++ b/usr/src/cmd/ipf/lib/Makefile.com @@ -1,5 +1,25 @@ # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -35,7 +55,9 @@ OBJECTS= addicmp.o addipopt.o bcopywrap.o \ remove_pool.o remove_poolnode.o remove_hash.o \ remove_hashnode.o resetlexer.o rwlock_emul.o \ tcpflags.o var.o verbose.o \ - v6ionames.o v6optvalue.o + v6ionames.o v6optvalue.o printpool_live.o \ + printpooldata.o printhash_live.o printhashdata.o \ + printactivenat.o include $(SRC)/lib/Makefile.lib include ../../Makefile.ipf diff --git a/usr/src/cmd/ipf/lib/common/getifname.c b/usr/src/cmd/ipf/lib/common/getifname.c index ec3ae926b2..ef25075dfe 100644 --- a/usr/src/cmd/ipf/lib/common/getifname.c +++ b/usr/src/cmd/ipf/lib/common/getifname.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,7 +38,7 @@ struct ifnet *ptr; int len; # endif struct ifnet netif; - +#define SOLARIS_PFHOOKS 1 # ifdef SOLARIS_PFHOOKS if ((opts & OPT_DONOTHING) == 0) return "@"; diff --git a/usr/src/cmd/ipf/lib/common/getnattype.c b/usr/src/cmd/ipf/lib/common/getnattype.c index 312a862c3e..2c89b2bede 100644 --- a/usr/src/cmd/ipf/lib/common/getnattype.c +++ b/usr/src/cmd/ipf/lib/common/getnattype.c @@ -4,7 +4,13 @@ * See the IPFILTER.LICENCE file for details on licencing. * * Added redirect stuff and a variety of bug fixes. (mcn@EnGarde.com) + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ + +#pragma ident "%Z%%M% %I% %E% SMI" + #include "ipf.h" #include "kmem.h" @@ -16,19 +22,27 @@ static const char rcsid[] = "@(#)$Id: getnattype.c,v 1.3 2004/01/17 17:26:07 dar /* * Get a nat filter type given its kernel address. */ -char *getnattype(ipnat) -ipnat_t *ipnat; +char *getnattype(nat, alive) +nat_t *nat; +int alive; { static char unknownbuf[20]; - ipnat_t ipnatbuff; + ipnat_t *ipn, ipnatbuff; char *which; + int type; - if (!ipnat) + if (!nat) return "???"; - if (kmemcpy((char *)&ipnatbuff, (long)ipnat, sizeof(ipnatbuff))) - return "!!!"; + if (alive) { + type = nat->nat_redir; + } else { + ipn = nat->nat_ptr; + if (kmemcpy((char *)&ipnatbuff, (long)ipn, sizeof(ipnatbuff))) + return "!!!"; + type = ipnatbuff.in_redir; + } - switch (ipnatbuff.in_redir) + switch (type) { case NAT_MAP : which = "MAP"; @@ -43,8 +57,7 @@ ipnat_t *ipnat; which = "BIMAP"; break; default : - sprintf(unknownbuf, "unknown(%04x)", - ipnatbuff.in_redir & 0xffffffff); + sprintf(unknownbuf, "unknown(%04x)", type & 0xffffffff); which = unknownbuf; break; } diff --git a/usr/src/cmd/ipf/lib/common/load_pool.c b/usr/src/cmd/ipf/lib/common/load_pool.c index d27b6c381b..b8146c060c 100644 --- a/usr/src/cmd/ipf/lib/common/load_pool.c +++ b/usr/src/cmd/ipf/lib/common/load_pool.c @@ -4,8 +4,13 @@ * See the IPFILTER.LICENCE file for details on licencing. * * $Id: load_pool.c,v 1.14.2.2 2005/02/01 02:44:06 darrenr Exp $ + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ +#pragma ident "%Z%%M% %I% %E% SMI" + #include <fcntl.h> #include <sys/ioctl.h> #include "ipf.h" diff --git a/usr/src/cmd/ipf/lib/common/printactivenat.c b/usr/src/cmd/ipf/lib/common/printactivenat.c index 389818b67b..ace6b6cfe3 100644 --- a/usr/src/cmd/ipf/lib/common/printactivenat.c +++ b/usr/src/cmd/ipf/lib/common/printactivenat.c @@ -4,8 +4,13 @@ * See the IPFILTER.LICENCE file for details on licencing. * * Added redirect stuff and a variety of bug fixes. (mcn@EnGarde.com) + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ +#pragma ident "%Z%%M% %I% %E% SMI" + #include "ipf.h" @@ -14,12 +19,12 @@ static const char rcsid[] = "@(#)$Id: printactivenat.c,v 1.3.2.4 2004/05/11 16:0 #endif -void printactivenat(nat, opts) +void printactivenat(nat, opts, alive) nat_t *nat; -int opts; +int opts, alive; { - printf("%s", getnattype(nat->nat_ptr)); + printf("%s", getnattype(nat, alive)); if (nat->nat_flags & SI_CLONE) printf(" CLONE"); diff --git a/usr/src/cmd/ipf/lib/common/printhash_live.c b/usr/src/cmd/ipf/lib/common/printhash_live.c new file mode 100755 index 0000000000..082ee74bb4 --- /dev/null +++ b/usr/src/cmd/ipf/lib/common/printhash_live.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2002 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/ioctl.h> +#include "ipf.h" +#include "netinet/ipl.h" + +#define PRINTF (void)printf +#define FPRINTF (void)fprintf + + +iphtable_t *printhash_live(hp, fd, name, opts) +iphtable_t *hp; +int fd; +char *name; +int opts; +{ + iphtent_t entry, *top, *node; + ipflookupiter_t iter; + int i, printed, last; + ipfobj_t obj; + + if ((name != NULL) && strncmp(name, hp->iph_name, FR_GROUPLEN)) + return hp->iph_next; + + printhashdata(hp, opts); + + if ((opts & OPT_DEBUG) == 0) + PRINTF("\t{"); + + obj.ipfo_rev = IPFILTER_VERSION; + obj.ipfo_type = IPFOBJ_LOOKUPITER; + obj.ipfo_ptr = &iter; + obj.ipfo_size = sizeof(iter); + + iter.ili_data = &entry; + iter.ili_type = IPLT_HASH; + iter.ili_otype = IPFLOOKUPITER_NODE; + iter.ili_ival = IPFGENITER_LOOKUP; + iter.ili_unit = hp->iph_unit; + strncpy(iter.ili_name, hp->iph_name, FR_GROUPLEN); + + last = 0; + top = NULL; + + while (!last && (ioctl(fd, SIOCLOOKUPITER, &obj) == 0)) { + if (entry.ipe_snext == NULL) + last = 1; + entry.ipe_snext = top; + top = malloc(sizeof(*top)); + if (top == NULL) + break; + bcopy(&entry, top, sizeof(entry)); + } + + while (top != NULL) { + node = top; + (void) printhashnode(hp, node, bcopywrap, opts); + top = node->ipe_snext; + free(node); + printed++; + + if ((opts & OPT_DEBUG) == 0) + putchar(';'); + } + + if (printed == 0) + putchar(';'); + + if ((opts & OPT_DEBUG) == 0) + PRINTF(" };\n"); + return hp->iph_next; +} diff --git a/usr/src/cmd/ipf/lib/common/printhashdata.c b/usr/src/cmd/ipf/lib/common/printhashdata.c new file mode 100755 index 0000000000..cef1afb35c --- /dev/null +++ b/usr/src/cmd/ipf/lib/common/printhashdata.c @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2002 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "ipf.h" + +#define PRINTF (void)printf +#define FPRINTF (void)fprintf + + +void printhashdata(hp, opts) +iphtable_t *hp; +int opts; +{ + + if ((opts & OPT_DEBUG) == 0) { + if ((hp->iph_type & IPHASH_ANON) == IPHASH_ANON) + PRINTF("# 'anonymous' table\n"); + switch (hp->iph_type & ~IPHASH_ANON) + { + case IPHASH_LOOKUP : + PRINTF("table"); + break; + case IPHASH_GROUPMAP : + PRINTF("group-map"); + if (hp->iph_flags & FR_INQUE) + PRINTF(" in"); + else if (hp->iph_flags & FR_OUTQUE) + PRINTF(" out"); + else + PRINTF(" ???"); + break; + default : + PRINTF("%#x", hp->iph_type); + break; + } + PRINTF(" role = "); + } else { + PRINTF("Hash Table Number: %s", hp->iph_name); + if ((hp->iph_type & IPHASH_ANON) == IPHASH_ANON) + PRINTF("(anon)"); + putchar(' '); + PRINTF("Role: "); + } + + switch (hp->iph_unit) + { + case IPL_LOGNAT : + PRINTF("nat"); + break; + case IPL_LOGIPF : + PRINTF("ipf"); + break; + case IPL_LOGAUTH : + PRINTF("auth"); + break; + case IPL_LOGCOUNT : + PRINTF("count"); + break; + default : + PRINTF("#%d", hp->iph_unit); + break; + } + + if ((opts & OPT_DEBUG) == 0) { + if ((hp->iph_type & ~IPHASH_ANON) == IPHASH_LOOKUP) + PRINTF(" type = hash"); + PRINTF(" number = %s size = %lu", + hp->iph_name, (u_long)hp->iph_size); + if (hp->iph_seed != 0) + PRINTF(" seed = %lu", hp->iph_seed); + putchar('\n'); + } else { + PRINTF(" Type: "); + switch (hp->iph_type & ~IPHASH_ANON) + { + case IPHASH_LOOKUP : + PRINTF("lookup"); + break; + case IPHASH_GROUPMAP : + PRINTF("groupmap Group. %s", hp->iph_name); + break; + default : + break; + } + + putchar('\n'); + PRINTF("\t\tSize: %lu\tSeed: %lu", + (u_long)hp->iph_size, hp->iph_seed); + PRINTF("\tRef. Count: %d\tMasks: %#x\n", hp->iph_ref, + hp->iph_masks[0]); + } + + if ((opts & OPT_DEBUG) != 0) { + struct in_addr m; + int i; + + for (i = 0; i < 32; i++) { + if ((1 << i) & hp->iph_masks[0]) { + ntomask(4, i, &m.s_addr); + PRINTF("\t\tMask: %s\n", inet_ntoa(m)); + } + } + } +} diff --git a/usr/src/cmd/ipf/lib/common/printpool_live.c b/usr/src/cmd/ipf/lib/common/printpool_live.c new file mode 100644 index 0000000000..77f95d84de --- /dev/null +++ b/usr/src/cmd/ipf/lib/common/printpool_live.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2002 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/ioctl.h> +#include "ipf.h" +#include "netinet/ipl.h" + +#define PRINTF (void)printf +#define FPRINTF (void)fprintf + + +ip_pool_t *printpool_live(pool, fd, name, opts) +ip_pool_t *pool; +int fd; +char *name; +int opts; +{ + ip_pool_node_t entry, *top, *node; + ipflookupiter_t iter; + int i, printed, last; + ipfobj_t obj; + + if ((name != NULL) && strncmp(name, pool->ipo_name, FR_GROUPLEN)) + return pool->ipo_next; + + printpooldata(pool, opts); + + if ((opts & OPT_DEBUG) == 0) + PRINTF("\t{"); + + obj.ipfo_rev = IPFILTER_VERSION; + obj.ipfo_type = IPFOBJ_LOOKUPITER; + obj.ipfo_ptr = &iter; + obj.ipfo_size = sizeof(iter); + + iter.ili_data = &entry; + iter.ili_type = IPLT_POOL; + iter.ili_otype = IPFLOOKUPITER_NODE; + iter.ili_ival = IPFGENITER_LOOKUP; + iter.ili_unit = pool->ipo_unit; + strncpy(iter.ili_name, pool->ipo_name, FR_GROUPLEN); + + last = 0; + top = NULL; + + while (!last && (ioctl(fd, SIOCLOOKUPITER, &obj) == 0)) { + if (entry.ipn_next == NULL) + last = 1; + entry.ipn_next = top; + top = malloc(sizeof(*top)); + if (top == NULL) + break; + bcopy(&entry, top, sizeof(entry)); + } + + while (top != NULL) { + node = top; + (void) printpoolnode(node, opts); + top = node->ipn_next; + free(node); + printed++; + + if ((opts & OPT_DEBUG) == 0) + putchar(';'); + } + + if (printed == 0) + putchar(';'); + + if ((opts & OPT_DEBUG) == 0) + PRINTF(" };\n"); + return pool->ipo_next; +} diff --git a/usr/src/cmd/ipf/lib/common/printpooldata.c b/usr/src/cmd/ipf/lib/common/printpooldata.c new file mode 100644 index 0000000000..a699af67df --- /dev/null +++ b/usr/src/cmd/ipf/lib/common/printpooldata.c @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2002 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "ipf.h" + +#define PRINTF (void)printf +#define FPRINTF (void)fprintf + +void printpooldata(pool, opts) +ip_pool_t *pool; +int opts; +{ + + if ((opts & OPT_DEBUG) == 0) { + if ((pool->ipo_flags & IPOOL_ANON) != 0) + PRINTF("# 'anonymous' tree %s\n", pool->ipo_name); + PRINTF("table role = "); + } else { + PRINTF("Name: %s", pool->ipo_name); + if ((pool->ipo_flags & IPOOL_ANON) == IPOOL_ANON) + PRINTF("(anon)"); + putchar(' '); + PRINTF("Role: "); + } + + switch (pool->ipo_unit) + { + case IPL_LOGIPF : + PRINTF("ipf"); + break; + case IPL_LOGNAT : + PRINTF("nat"); + break; + case IPL_LOGSTATE : + PRINTF("state"); + break; + case IPL_LOGAUTH : + PRINTF("auth"); + break; + case IPL_LOGSYNC : + PRINTF("sync"); + break; + case IPL_LOGSCAN : + PRINTF("scan"); + break; + case IPL_LOGLOOKUP : + PRINTF("lookup"); + break; + case IPL_LOGCOUNT : + PRINTF("count"); + break; + default : + PRINTF("unknown(%d)", pool->ipo_unit); + } + + if ((opts & OPT_DEBUG) == 0) { + PRINTF(" type = tree number = %s\n", pool->ipo_name); + } else { + putchar(' '); + + PRINTF("\tReferences: %d\tHits: %lu\n", pool->ipo_ref, + pool->ipo_hits); + PRINTF("\tNodes Starting at %p\n", pool->ipo_list); + } +} diff --git a/usr/src/cmd/ipf/svc/ipfilter b/usr/src/cmd/ipf/svc/ipfilter index 43d25c4f68..07090184e4 100644 --- a/usr/src/cmd/ipf/svc/ipfilter +++ b/usr/src/cmd/ipf/svc/ipfilter @@ -22,7 +22,7 @@ # # ident "%Z%%M% %I% %E% SMI" # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -36,11 +36,12 @@ IPNATCONF=/etc/ipf/ipnat.conf IPPOOLCONF=/etc/ipf/ippool.conf PFILCHECKED=no +zone=`smf_zonename` ipfid=`/usr/sbin/modinfo 2>&1 | awk '/ipf/ { print $1 } ' - 2>/dev/null` if [ -f $PIDFILE ] ; then pid=`cat $PIDFILE 2>/dev/null` else - pid=`pgrep ipmon` + pid=`pgrep -z $zone ipmon` fi logmsg() diff --git a/usr/src/cmd/ipf/tools/Makefile.tools b/usr/src/cmd/ipf/tools/Makefile.tools index e3467560e5..2cb3585169 100644 --- a/usr/src/cmd/ipf/tools/Makefile.tools +++ b/usr/src/cmd/ipf/tools/Makefile.tools @@ -1,5 +1,25 @@ # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # @@ -48,7 +68,7 @@ ipftest := LDLIBS += -lsocket -lnsl -lmd ipfstat := LDLIBS += -lsocket -lnsl -lkvm -lelf ipmon := LDLIBS += -lsocket -lnsl ipnat := LDLIBS += -lsocket -lnsl -lkvm -lelf -ippool := LDLIBS += -lsocket -lnsl -lkvm +ippool := LDLIBS += -lsocket -lnsl -lkvm -lelf CLEANFILES += $(OBJS) diff --git a/usr/src/cmd/ipf/tools/ip_fil.c b/usr/src/cmd/ipf/tools/ip_fil.c index 4aaa9935be..daf170f41d 100644 --- a/usr/src/cmd/ipf/tools/ip_fil.c +++ b/usr/src/cmd/ipf/tools/ip_fil.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -68,6 +68,7 @@ struct file; #include <stdlib.h> #include <ctype.h> #include <fcntl.h> +#include <sys/zone.h> #include <arpa/inet.h> #ifdef __hpux @@ -133,6 +134,7 @@ struct file; #ifdef IPFILTER_COMPILED # include "netinet/ip_rules.h" #endif +#include "netinet/ipf_stack.h" #if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) # include <sys/malloc.h> #endif @@ -150,7 +152,7 @@ extern struct protosw inetsw[]; static struct ifnet **ifneta = NULL; static int nifs = 0; -static int frzerostats __P((caddr_t)); +static int frzerostats __P((caddr_t, ipf_stack_t *ifs)); static void fr_setifpaddr __P((struct ifnet *, char *)); void init_ifp __P((void)); #if defined(__sgi) && (IRIX < 60500) @@ -173,32 +175,36 @@ static int write_output __P((struct ifnet *, struct mbuf *, #endif -int iplattach() +int iplattach(ifs, ns) +ipf_stack_t *ifs; +netstack_t *ns; { - fr_running = 1; + ifs->ifs_fr_running = 1; return 0; } -int ipldetach() +int ipldetach(ifs) +ipf_stack_t *ifs; { - fr_running = -1; + ifs->ifs_fr_running = -1; return 0; } -static int frzerostats(data) +static int frzerostats(data, ifs) caddr_t data; +ipf_stack_t *ifs; { friostat_t fio; int error; - fr_getstat(&fio); + fr_getstat(&fio, ifs); error = copyoutptr(&fio, data, sizeof(fio)); if (error) return EFAULT; - bzero((char *)frstats, sizeof(*frstats) * 2); + bzero((char *)ifs->ifs_frstats, sizeof(*ifs->ifs_frstats) * 2); return 0; } @@ -213,41 +219,46 @@ ioctlcmd_t cmd; caddr_t data; int mode; { - int error = 0, unit = 0, tmp; + int error = 0, unit = 0, tmp, uid; friostat_t fio; + ipf_stack_t *ifs; + extern ipf_stack_t *get_ifs(); unit = dev; + uid = getuid(); + + ifs = get_ifs(); SPL_NET(s); if (unit == IPL_LOGNAT) { - if (fr_running > 0) - error = fr_nat_ioctl(data, cmd, mode); + if (ifs->ifs_fr_running > 0) + error = fr_nat_ioctl(data, cmd, mode, uid, NULL, ifs); else error = EIO; SPL_X(s); return error; } if (unit == IPL_LOGSTATE) { - if (fr_running > 0) - error = fr_state_ioctl(data, cmd, mode); + if (ifs->ifs_fr_running > 0) + error = fr_state_ioctl(data, cmd, mode, uid, NULL, ifs); else error = EIO; SPL_X(s); return error; } if (unit == IPL_LOGAUTH) { - if (fr_running > 0) { + if (ifs->ifs_fr_running > 0) { if ((cmd == (ioctlcmd_t)SIOCADAFR) || (cmd == (ioctlcmd_t)SIOCRMAFR)) { if (!(mode & FWRITE)) { error = EPERM; } else { error = frrequest(unit, cmd, data, - fr_active, 1); + ifs->ifs_fr_active, 1, ifs); } } else { - error = fr_auth_ioctl(data, mode, cmd); + error = fr_auth_ioctl(data, mode, cmd, uid, NULL, ifs); } } else error = EIO; @@ -256,7 +267,7 @@ int mode; } if (unit == IPL_LOGSYNC) { #ifdef IPFILTER_SYNC - if (fr_running > 0) + if (ifs->ifs_fr_running > 0) error = fr_sync_ioctl(data, cmd, mode); else #endif @@ -266,7 +277,7 @@ int mode; } if (unit == IPL_LOGSCAN) { #ifdef IPFILTER_SCAN - if (fr_running > 0) + if (ifs->ifs_fr_running > 0) error = fr_scan_ioctl(data, cmd, mode); else #endif @@ -275,8 +286,9 @@ int mode; return error; } if (unit == IPL_LOGLOOKUP) { - if (fr_running > 0) - error = ip_lookup_ioctl(data, cmd, mode); + if (ifs->ifs_fr_running > 0) + error = ip_lookup_ioctl(data, cmd, mode, uid, + NULL, ifs); else error = EIO; SPL_X(s); @@ -287,8 +299,8 @@ int mode; { case FIONREAD : #ifdef IPFILTER_LOG - error = COPYOUT(&iplused[IPL_LOGIPF], (caddr_t)data, - sizeof(iplused[IPL_LOGIPF])); + error = COPYOUT(&ifs->ifs_iplused[IPL_LOGIPF], (caddr_t)data, + sizeof(ifs->ifs_iplused[IPL_LOGIPF])); #endif break; case SIOCFRENB : @@ -299,9 +311,9 @@ int mode; if (error) break; if (tmp) - error = iplattach(); + error = iplattach(ifs, NULL); else - error = ipldetach(); + error = ipldetach(ifs); } break; case SIOCIPFSET : @@ -311,16 +323,18 @@ int mode; } case SIOCIPFGETNEXT : case SIOCIPFGET : - error = fr_ipftune(cmd, (void *)data); + error = fr_ipftune(cmd, (void *)data, ifs); break; case SIOCSETFF : if (!(mode & FWRITE)) error = EPERM; else - error = COPYIN(data, &fr_flags, sizeof(fr_flags)); + error = COPYIN(data, &ifs->ifs_fr_flags, + sizeof(ifs->ifs_fr_flags)); break; case SIOCGETFF : - error = COPYOUT(&fr_flags, data, sizeof(fr_flags)); + error = COPYOUT(&ifs->ifs_fr_flags, data, + sizeof(ifs->ifs_fr_flags)); break; case SIOCFUNCL : error = fr_resolvefunc(data); @@ -332,7 +346,8 @@ int mode; if (!(mode & FWRITE)) error = EPERM; else - error = frrequest(unit, cmd, data, fr_active, 1); + error = frrequest(unit, cmd, data, + ifs->ifs_fr_active, 1, ifs); break; case SIOCINIFR : case SIOCRMIFR : @@ -340,26 +355,28 @@ int mode; if (!(mode & FWRITE)) error = EPERM; else - error = frrequest(unit, cmd, data, 1 - fr_active, 1); + error = frrequest(unit, cmd, data, + 1 - ifs->ifs_fr_active, 1, ifs); break; case SIOCSWAPA : if (!(mode & FWRITE)) error = EPERM; else { - bzero((char *)frcache, sizeof(frcache[0]) * 2); - *(u_int *)data = fr_active; - fr_active = 1 - fr_active; + bzero((char *)ifs->ifs_frcache, + sizeof(ifs->ifs_frcache[0]) * 2); + *(u_int *)data = ifs->ifs_fr_active; + ifs->ifs_fr_active = 1 - ifs->ifs_fr_active; } break; case SIOCGETFS : - fr_getstat(&fio); + fr_getstat(&fio, ifs); error = fr_outobj(data, &fio, IPFOBJ_IPFSTAT); break; case SIOCFRZST : if (!(mode & FWRITE)) error = EPERM; else - error = frzerostats(data); + error = frzerostats(data, ifs); break; case SIOCIPFFL : if (!(mode & FWRITE)) @@ -367,7 +384,7 @@ int mode; else { error = COPYIN(data, &tmp, sizeof(tmp)); if (!error) { - tmp = frflush(unit, 4, tmp); + tmp = frflush(unit, 4, tmp, ifs); error = COPYOUT(&tmp, data, sizeof(tmp)); } } @@ -379,7 +396,7 @@ int mode; else { error = COPYIN(data, &tmp, sizeof(tmp)); if (!error) { - tmp = frflush(unit, 6, tmp); + tmp = frflush(unit, 6, tmp, ifs); error = COPYOUT(&tmp, data, sizeof(tmp)); } } @@ -388,10 +405,10 @@ int mode; case SIOCSTLCK : error = COPYIN(data, &tmp, sizeof(tmp)); if (error == 0) { - fr_state_lock = tmp; - fr_nat_lock = tmp; - fr_frag_lock = tmp; - fr_auth_lock = tmp; + ifs->ifs_fr_state_lock = tmp; + ifs->ifs_fr_nat_lock = tmp; + ifs->ifs_fr_frag_lock = tmp; + ifs->ifs_fr_auth_lock = tmp; } else error = EFAULT; break; @@ -400,17 +417,17 @@ int mode; if (!(mode & FWRITE)) error = EPERM; else - *(int *)data = ipflog_clear(unit); + *(int *)data = ipflog_clear(unit, ifs); break; #endif /* IPFILTER_LOG */ case SIOCGFRST : - error = fr_outobj(data, fr_fragstats(), IPFOBJ_FRAGSTAT); + error = fr_outobj(data, fr_fragstats(ifs), IPFOBJ_FRAGSTAT); break; case SIOCFRSYN : if (!(mode & FWRITE)) error = EPERM; else { - frsync(IPFSYNC_RESYNC, IPFSYNC_RESYNC, NULL, NULL); + frsync(IPFSYNC_RESYNC, IPFSYNC_RESYNC, NULL, NULL, ifs); } break; default : @@ -422,51 +439,61 @@ int mode; } -void fr_forgetifp(ifp) +void fr_forgetifp(ifp, ifs) void *ifp; +ipf_stack_t *ifs; { register frentry_t *f; - WRITE_ENTER(&ipf_mutex); - for (f = ipacct[0][fr_active]; (f != NULL); f = f->fr_next) + WRITE_ENTER(&ifs->ifs_ipf_mutex); + for (f = ifs->ifs_ipacct[0][ifs->ifs_fr_active]; (f != NULL); + f = f->fr_next) if (f->fr_ifa == ifp) f->fr_ifa = (void *)-1; - for (f = ipacct[1][fr_active]; (f != NULL); f = f->fr_next) + for (f = ifs->ifs_ipacct[1][ifs->ifs_fr_active]; (f != NULL); + f = f->fr_next) if (f->fr_ifa == ifp) f->fr_ifa = (void *)-1; - for (f = ipfilter[0][fr_active]; (f != NULL); f = f->fr_next) + for (f = ifs->ifs_ipfilter[0][ifs->ifs_fr_active]; (f != NULL); + f = f->fr_next) if (f->fr_ifa == ifp) f->fr_ifa = (void *)-1; - for (f = ipfilter[1][fr_active]; (f != NULL); f = f->fr_next) + for (f = ifs->ifs_ipfilter[1][ifs->ifs_fr_active]; (f != NULL); + f = f->fr_next) if (f->fr_ifa == ifp) f->fr_ifa = (void *)-1; #ifdef USE_INET6 - for (f = ipacct6[0][fr_active]; (f != NULL); f = f->fr_next) + for (f = ifs->ifs_ipacct6[0][ifs->ifs_fr_active]; (f != NULL); + f = f->fr_next) if (f->fr_ifa == ifp) f->fr_ifa = (void *)-1; - for (f = ipacct6[1][fr_active]; (f != NULL); f = f->fr_next) + for (f = ifs->ifs_ipacct6[1][ifs->ifs_fr_active]; (f != NULL); + f = f->fr_next) if (f->fr_ifa == ifp) f->fr_ifa = (void *)-1; - for (f = ipfilter6[0][fr_active]; (f != NULL); f = f->fr_next) + for (f = ifs->ifs_ipfilter6[0][ifs->ifs_fr_active]; (f != NULL); + f = f->fr_next) if (f->fr_ifa == ifp) f->fr_ifa = (void *)-1; - for (f = ipfilter6[1][fr_active]; (f != NULL); f = f->fr_next) + for (f = ifs->ifs_ipfilter6[1][ifs->ifs_fr_active]; (f != NULL); + f = f->fr_next) if (f->fr_ifa == ifp) f->fr_ifa = (void *)-1; #endif - RWLOCK_EXIT(&ipf_mutex); - fr_natifpsync(IPFSYNC_OLDIFP, ifp, NULL); + RWLOCK_EXIT(&ifs->ifs_ipf_mutex); + fr_natifpsync(IPFSYNC_OLDIFP, ifp, NULL, ifs); } -void fr_resolvedest(fdp, v) +void fr_resolvedest(fdp, v, ifs) frdest_t *fdp; int v; +ipf_stack_t *ifs; { fdp->fd_ifp = NULL; if (*fdp->fd_ifname) { - fdp->fd_ifp = GETIFP(fdp->fd_ifname, v); + fdp->fd_ifp = GETIFP(fdp->fd_ifname, v, ifs); if (!fdp->fd_ifp) fdp->fd_ifp = (struct ifnet *)-1; } @@ -579,9 +606,11 @@ char *addr; } } -struct ifnet *get_unit(name, v) +/*ARGSUSED*/ +struct ifnet *get_unit(name, v, ifs) char *name; int v; +ipf_stack_t *ifs; { struct ifnet *ifp, **ifpp, **old_ifneta; char *addr; @@ -781,10 +810,11 @@ int dst; } -void frsync(command, version, nic, data) +void frsync(command, version, nic, data, ifs) int command, version; void *nic; char *data; +ipf_stack_t *ifs; { return; } @@ -897,10 +927,11 @@ fr_info_t *fin; { static u_short ipid = 0; u_short id; + ipf_stack_t *ifs = fin->fin_ifs; - MUTEX_ENTER(&ipf_rw); + MUTEX_ENTER(&ifs->ifs_ipf_rw); id = ipid++; - MUTEX_EXIT(&ipf_rw); + MUTEX_EXIT(&ifs->ifs_ipf_rw); return id; } @@ -957,10 +988,11 @@ size_t size; /* * return the first IP Address associated with an interface */ -int fr_ifpaddr(v, atype, ifptr, inp, inpmask) +int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs) int v, atype; void *ifptr; struct in_addr *inp, *inpmask; +ipf_stack_t *ifs; { struct ifnet *ifp = ifptr; #ifdef __sgi diff --git a/usr/src/cmd/ipf/tools/ipfstat.c b/usr/src/cmd/ipf/tools/ipfstat.c index b2f3195a99..de7d9c9b12 100644 --- a/usr/src/cmd/ipf/tools/ipfstat.c +++ b/usr/src/cmd/ipf/tools/ipfstat.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -143,6 +143,8 @@ static void showipstates __P((ips_stat_t *)); static void showauthstates __P((fr_authstat_t *)); static void showgroups __P((friostat_t *)); static void usage __P((char *)); +static void printlivelist __P((int, int, frentry_t *, char *, char *)); +static void printdeadlist __P((int, int, frentry_t *, char *, char *)); static void printlist __P((frentry_t *, char *)); static void parse_ipportstr __P((const char *, i6addr_t *, int *)); static void ipfstate_live __P((char *, friostat_t **, ips_stat_t **, @@ -261,13 +263,12 @@ char *argv[]; if (kern != NULL || memf != NULL) { (void)setgid(getgid()); (void)setreuid(getuid(), getuid()); + if (openkmem(kern, memf) == -1) + exit(-1); } if (live_kernel == 1) (void) checkrev(device); - if (openkmem(kern, memf) == -1) - exit(-1); - (void)setgid(getgid()); (void)setreuid(getuid(), getuid()); @@ -778,21 +779,52 @@ u_32_t frf; /* * Print out a list of rules from the kernel, starting at the one passed. */ -static void printlist(fp, comment) +static void printlivelist(out, set, fp, group, comment) +int out, set; frentry_t *fp; -char *comment; +char *group, *comment; { + frgroup_t *grtop, *grtail, *g; struct frentry fb, *fg; - char *data; - u_32_t type; int n; - - for (n = 1; fp; n++) { - if (kmemcpy((char *)&fb, (u_long)fp, sizeof(fb)) == -1) { - perror("kmemcpy"); + ipfruleiter_t rule; + ipfobj_t obj; + + fb.fr_next = fp; + n = 0; + + grtop = NULL; + grtail = NULL; + rule.iri_ver = use_inet6? AF_INET6 : AF_INET; + rule.iri_inout = out; + rule.iri_active = set; + rule.iri_rule = &fb; + if (group != NULL) + strncpy(rule.iri_group, group, FR_GROUPLEN); + else + rule.iri_group[0] = '\0'; + + bzero((char *)&obj, sizeof(obj)); + obj.ipfo_rev = IPFILTER_VERSION; + obj.ipfo_type = IPFOBJ_IPFITER; + obj.ipfo_size = sizeof(rule); + obj.ipfo_ptr = &rule; + + do { + u_long array[1000]; + + memset(array, 0xff, sizeof(array)); + fp = (frentry_t *)array; + rule.iri_rule = fp; + if (ioctl(ipf_fd, SIOCIPFITER, &obj) == -1) { + perror("ioctl(SIOCIPFITER)"); return; } - fp = &fb; + if (fp->fr_data != NULL) + fp->fr_data = (char *)fp + sizeof(*fp); + + n++; + if (opts & (OPT_HITS|OPT_VERBOSE)) #ifdef USE_QUAD_T PRINTF("%qu ", (unsigned long long) fp->fr_hits); @@ -807,45 +839,132 @@ char *comment; #endif if (opts & OPT_SHOWLINENO) PRINTF("@%d ", n); + + printfr(fp, ioctl); + if (opts & OPT_DEBUG) { + binprint(fp, sizeof(*fp)); + if (fp->fr_data != NULL && fp->fr_dsize > 0) + binprint(fp->fr_data, fp->fr_dsize); + } + + if (fp->fr_grhead[0] != '\0') { + g = calloc(1, sizeof(*g)); + + if (g != NULL) { + strncpy(g->fg_name, fp->fr_grhead, + FR_GROUPLEN); + if (grtop == NULL) { + grtop = g; + grtail = g; + } else { + grtail->fg_next = g; + grtail = g; + } + } + } + } while (fp->fr_next != NULL); + + while ((g = grtop) != NULL) { + printlivelist(out, set, NULL, g->fg_name, comment); + grtop = g->fg_next; + free(g); + } +} + + +static void printdeadlist(out, set, fp, group, comment) +int out, set; +frentry_t *fp; +char *group, *comment; +{ + frgroup_t *grtop, *grtail, *g; + struct frentry fb, *fg; + char *data; + u_32_t type; + int n; + + fb.fr_next = fp; + n = 0; + grtop = NULL; + grtail = NULL; + + do { + fp = fb.fr_next; + if (kmemcpy((char *)&fb, (u_long)fb.fr_next, + sizeof(fb)) == -1) { + perror("kmemcpy"); + return; + } + data = NULL; - type = fp->fr_type & ~FR_T_BUILTIN; + type = fb.fr_type & ~FR_T_BUILTIN; if (type == FR_T_IPF || type == FR_T_BPFOPC) { - if (fp->fr_dsize) { - data = malloc(fp->fr_dsize); - if (data == NULL) { - perror("malloc"); - exit(1); - } + if (fb.fr_dsize) { + data = malloc(fb.fr_dsize); - if (kmemcpy(data, (u_long)fp->fr_data, - fp->fr_dsize) == -1) { + if (kmemcpy(data, (u_long)fb.fr_data, + fb.fr_dsize) == -1) { perror("kmemcpy"); return; } - fp->fr_data = data; + fb.fr_data = data; } } + n++; + + if (opts & (OPT_HITS|OPT_VERBOSE)) +#ifdef USE_QUAD_T + PRINTF("%qu ", (unsigned long long) fb.fr_hits); +#else + PRINTF("%lu ", fb.fr_hits); +#endif + if (opts & (OPT_ACCNT|OPT_VERBOSE)) +#ifdef USE_QUAD_T + PRINTF("%qu ", (unsigned long long) fb.fr_bytes); +#else + PRINTF("%lu ", fb.fr_bytes); +#endif + if (opts & OPT_SHOWLINENO) + PRINTF("@%d ", n); + printfr(fp, ioctl); if (opts & OPT_DEBUG) { binprint(fp, sizeof(*fp)); - if (fp->fr_data != NULL && fp->fr_dsize > 0) - binprint(fp->fr_data, fp->fr_dsize); + if (fb.fr_data != NULL && fb.fr_dsize > 0) + binprint(fb.fr_data, fb.fr_dsize); } if (data != NULL) free(data); - if (fp->fr_grp != NULL) { - if (!kmemcpy((char *)&fg, (u_long)fp->fr_grp, - sizeof(fg))) - printlist(fg, comment); + if (fb.fr_grhead[0] != '\0') { + g = calloc(1, sizeof(*g)); + + if (g != NULL) { + strncpy(g->fg_name, fb.fr_grhead, + FR_GROUPLEN); + if (grtop == NULL) { + grtop = g; + grtail = g; + } else { + grtail->fg_next = g; + grtail = g; + } + } } if (type == FR_T_CALLFUNC) { - printlist(fp->fr_data, "# callfunc: "); + printdeadlist(out, set, fb.fr_data, group, + "# callfunc: "); } - fp = fp->fr_next; + } while (fb.fr_next != NULL); + + while ((g = grtop) != NULL) { + printdeadlist(out, set, NULL, g->fg_name, comment); + grtop = g->fg_next; + free(g); } } + /* * print out all of the asked for rule sets, using the stats struct as * the base from which to get the pointers. @@ -908,7 +1027,10 @@ struct friostat *fiop; (opts & OPT_INACTIVE) ? "inactive " : "", filters[i]); return; } - printlist(fp, NULL); + if (live_kernel == 1) + printlivelist(i, set, fp, NULL, NULL); + else + printdeadlist(i, set, fp, NULL, NULL); } diff --git a/usr/src/cmd/ipf/tools/ipftest.c b/usr/src/cmd/ipf/tools/ipftest.c index e5233860e0..37b47b6dbe 100644 --- a/usr/src/cmd/ipf/tools/ipftest.c +++ b/usr/src/cmd/ipf/tools/ipftest.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -22,14 +22,9 @@ static const char rcsid[] = "@(#)$Id: ipftest.c,v 1.44.2.4 2005/07/16 06:05:28 d extern char *optarg; extern struct frentry *ipfilter[2][2]; extern struct ipread snoop, etherf, tcpd, pcap, iptext, iphex; -extern struct ifnet *get_unit __P((char *, int)); +extern struct ifnet *get_unit __P((char *, int, ipf_stack_t *)); extern void init_ifp __P((void)); -extern int fr_running; -ipfmutex_t ipl_mutex, ipf_authmx, ipf_rw, ipf_stinsert; -ipfmutex_t ipf_nat_new, ipf_natio, ipf_timeoutlock; -ipfrwlock_t ipf_mutex, ipf_global, ipf_ipidfrag, ip_poolrw, ipf_frcache; -ipfrwlock_t ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth; int opts = OPT_DONOTHING; int use_inet6 = 0; int pfil_delayed_copy = 0; @@ -37,12 +32,16 @@ int main __P((int, char *[])); int loadrules __P((char *, int)); int kmemcpy __P((char *, long, int)); int kstrncpy __P((char *, long, int n)); -void dumpnat __P((void)); -void dumpstate __P((void)); -void dumplookups __P((void)); -void dumpgroups __P((void)); -void drain_log __P((char *)); +void dumpnat __P((ipf_stack_t *ifs)); +void dumpstate __P((ipf_stack_t *ifs)); +void dumplookups __P((ipf_stack_t *ifs)); +void dumpgroups __P((ipf_stack_t *ifs)); +void drain_log __P((char *, ipf_stack_t *ifs)); void fixv4sums __P((mb_t *, ip_t *)); +ipf_stack_t *get_ifs __P((void)); +ipf_stack_t *create_ifs __P((void)); +netstack_t *create_ns __P((void)); + #if defined(__NetBSD__) || defined(__OpenBSD__) || SOLARIS || \ (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) || \ @@ -84,6 +83,8 @@ char *argv[]; struct ipread *r; mb_t mb, *m; ip_t *ip; + ipf_stack_t *ifs; + netstack_t *ns; m = &mb; dir = 0; @@ -96,17 +97,34 @@ char *argv[]; ifname = "anon0"; datain = NULL; - MUTEX_INIT(&ipf_rw, "ipf rw mutex"); - MUTEX_INIT(&ipf_timeoutlock, "ipf timeout lock"); - RWLOCK_INIT(&ipf_global, "ipf filter load/unload mutex"); - RWLOCK_INIT(&ipf_mutex, "ipf filter rwlock"); - RWLOCK_INIT(&ipf_frcache, "ipf cache rwlock"); - RWLOCK_INIT(&ipf_ipidfrag, "ipf IP NAT-Frag rwlock"); - initparse(); - if (fr_initialise() == -1) - abort(); - fr_running = 1; + ifs = create_ifs(); + ns = create_ns(); + ifs->ifs_netstack = ns; + +#if defined(IPFILTER_DEFAULT_BLOCK) + ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH; +#else + ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH; +#endif + ipftuneable_alloc(ifs); + + bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache)); + MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex"); + MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock"); + RWLOCK_INIT(&ifs->ifs_ipf_global, "ipf filter load/unload mutex"); + RWLOCK_INIT(&ifs->ifs_ipf_mutex, "ipf filter rwlock"); + RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock"); + RWLOCK_INIT(&ifs->ifs_ipf_frcache, "ipf cache rwlock"); + + fr_loginit(ifs); + fr_authinit(ifs); + fr_fraginit(ifs); + fr_stateinit(ifs); + fr_natinit(ifs); + appr_init(ifs); + ip_lookup_init(ifs); + ifs->ifs_fr_running = 1; while ((c = getopt(argc, argv, "6bdDF:i:I:l:N:P:or:RT:vxX")) != -1) switch (c) @@ -207,7 +225,7 @@ char *argv[]; &iface, &dir)) > 0) { if (iface == NULL || *iface == '\0') iface = ifname; - ifp = get_unit(iface, IP_V(ip)); + ifp = get_unit(iface, IP_V(ip), ifs); if (ifp == NULL) { fprintf(stderr, "out of memory\n"); exit(1); @@ -226,7 +244,7 @@ char *argv[]; /* ipfr_slowtimer(); */ m = &mb; m->mb_len = i; - i = fr_check(ip, hlen, ifp, dir, &m); + i = fr_check(ip, hlen, ifp, dir, &m, ifs); if ((opts & OPT_NAT) == 0) switch (i) { @@ -294,17 +312,17 @@ char *argv[]; (*r->r_close)(); if (logout != NULL) { - drain_log(logout); + drain_log(logout, ifs); } if (dump == 1) { - dumpnat(); - dumpstate(); - dumplookups(); - dumpgroups(); + dumpnat(ifs); + dumpstate(ifs); + dumplookups(ifs); + dumpgroups(ifs); } - fr_deinitialise(); + fr_deinitialise(ifs); return 0; } @@ -621,17 +639,18 @@ int n; /* * Display the built up NAT table rules and mapping entries. */ -void dumpnat() +void dumpnat(ifs) + ipf_stack_t *ifs; { ipnat_t *ipn; nat_t *nat; printf("List of active MAP/Redirect filters:\n"); - for (ipn = nat_list; ipn != NULL; ipn = ipn->in_next) + for (ipn = ifs->ifs_nat_list; ipn != NULL; ipn = ipn->in_next) printnat(ipn, opts & (OPT_DEBUG|OPT_VERBOSE)); printf("\nList of active sessions:\n"); - for (nat = nat_instances; nat; nat = nat->nat_next) { - printactivenat(nat, opts); + for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { + printactivenat(nat, opts, 0); if (nat->nat_aps) printaps(nat->nat_aps, opts); } @@ -641,18 +660,20 @@ void dumpnat() /* * Display the built up state table rules and mapping entries. */ -void dumpstate() +void dumpstate(ifs) + ipf_stack_t *ifs; { ipstate_t *ips; printf("List of active state sessions:\n"); - for (ips = ips_list; ips != NULL; ) + for (ips = ifs->ifs_ips_list; ips != NULL; ) ips = printstate(ips, opts & (OPT_DEBUG|OPT_VERBOSE), - fr_ticks); + ifs->ifs_fr_ticks); } -void dumplookups() +void dumplookups(ifs) + ipf_stack_t *ifs; { iphtable_t *iph; ip_pool_t *ipl; @@ -660,17 +681,20 @@ void dumplookups() printf("List of configured pools\n"); for (i = 0; i < IPL_LOGSIZE; i++) - for (ipl = ip_pool_list[i]; ipl != NULL; ipl = ipl->ipo_next) + for (ipl = ifs->ifs_ip_pool_list[i]; ipl != NULL; + ipl = ipl->ipo_next) printpool(ipl, bcopywrap, NULL, opts); printf("List of configured hash tables\n"); for (i = 0; i < IPL_LOGSIZE; i++) - for (iph = ipf_htables[i]; iph != NULL; iph = iph->iph_next) + for (iph = ifs->ifs_ipf_htables[i]; iph != NULL; + iph = iph->iph_next) printhash(iph, bcopywrap, NULL, opts); } -void dumpgroups() +void dumpgroups(ifs) + ipf_stack_t *ifs; { frgroup_t *fg; frentry_t *fr; @@ -678,7 +702,8 @@ void dumpgroups() printf("List of groups configured (set 0)\n"); for (i = 0; i < IPL_LOGSIZE; i++) - for (fg = ipfgroups[i][0]; fg != NULL; fg = fg->fg_next) { + for (fg = ifs->ifs_ipfgroups[i][0]; fg != NULL; + fg = fg->fg_next) { printf("Dev.%d. Group %s Ref %d Flags %#x\n", i, fg->fg_name, fg->fg_ref, fg->fg_flags); for (fr = fg->fg_start; fr != NULL; fr = fr->fr_next) { @@ -693,7 +718,8 @@ void dumpgroups() printf("List of groups configured (set 1)\n"); for (i = 0; i < IPL_LOGSIZE; i++) - for (fg = ipfgroups[i][1]; fg != NULL; fg = fg->fg_next) { + for (fg = ifs->ifs_ipfgroups[i][1]; fg != NULL; + fg = fg->fg_next) { printf("Dev.%d. Group %s Ref %d Flags %#x\n", i, fg->fg_name, fg->fg_ref, fg->fg_flags); for (fr = fg->fg_start; fr != NULL; fr = fr->fr_next) { @@ -708,8 +734,9 @@ void dumpgroups() } -void drain_log(filename) +void drain_log(filename, ifs) char *filename; +ipf_stack_t *ifs; { char buffer[DEFAULT_IPFLOGSIZE]; struct iovec iov; @@ -735,7 +762,7 @@ char *filename; uio.uio_resid = iov.iov_len; resid = uio.uio_resid; - if (ipflog_read(i, &uio) == 0) { + if (ipflog_read(i, &uio, ifs) == 0) { /* * If nothing was read then break out. */ @@ -782,3 +809,35 @@ ip_t *ip; *(u_short *)csump = fr_cksum(m, ip, ip->ip_p, hdr); } } + +ipf_stack_t *gifs; + +/* + * Allocate and keep pointer for get_ifs() + */ +ipf_stack_t * +create_ifs() +{ + ipf_stack_t *ifs; + + KMALLOCS(ifs, ipf_stack_t *, sizeof (*ifs)); + bzero(ifs, sizeof (*ifs)); + gifs = ifs; + return (ifs); +} + +ipf_stack_t * +get_ifs() +{ + return (gifs); +} + +netstack_t * +create_ns() +{ + netstack_t *ns; + + KMALLOCS(ns, netstack_t *, sizeof (*ns)); + bzero(ns, sizeof (*ns)); + return (ns); +} diff --git a/usr/src/cmd/ipf/tools/ipnat.c b/usr/src/cmd/ipf/tools/ipnat.c index df64e0d92d..421472e6bc 100644 --- a/usr/src/cmd/ipf/tools/ipnat.c +++ b/usr/src/cmd/ipf/tools/ipnat.c @@ -5,7 +5,7 @@ * * Added redirect stuff and a variety of bug fixes. (mcn@EnGarde.com) * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -83,11 +83,14 @@ char thishost[MAXHOSTNAMELEN]; extern char *optarg; -void dostats __P((natstat_t *, int)), flushtable __P((int, int)); +void dostats __P((int, natstat_t *, int, int)); +void flushtable __P((int, int)); void usage __P((char *)); int main __P((int, char*[])); void showhostmap __P((natstat_t *nsp)); void natstat_dead __P((natstat_t *, char *)); +void dostats_live __P((int, natstat_t *, int)); +void showhostmap_live __P((int, natstat_t *)); int opts; @@ -183,9 +186,10 @@ char *argv[]; if (!(opts & OPT_DONOTHING) && (kernel == NULL) && (core == NULL)) { +#ifdef notdef if (openkmem(kernel, core) == -1) exit(1); - +#endif if (((fd = open(IPNAT_NAME, mode)) == -1) && ((fd = open(IPNAT_NAME, O_RDONLY)) == -1)) { (void) fprintf(stderr, "%s: open: %s\n", IPNAT_NAME, @@ -210,7 +214,7 @@ char *argv[]; natstat_dead(nsp, kernel); if (opts & (OPT_LIST|OPT_STAT)) - dostats(nsp, opts); + dostats(fd, nsp, opts, 0); exit(0); } @@ -220,7 +224,7 @@ char *argv[]; ipnat_parsefile(fd, ipnat_addrule, ioctl, file); } if (opts & (OPT_LIST|OPT_STAT)) - dostats(nsp, opts); + dostats(fd, nsp, opts, 1); return 0; } @@ -283,9 +287,9 @@ char *kernel; /* * Display NAT statistics. */ -void dostats(nsp, opts) +void dostats(fd, nsp, opts, alive) natstat_t *nsp; -int opts; +int fd, opts, alive; { nat_t *np, nat; ipnat_t ipn; @@ -312,6 +316,10 @@ int opts; * Show list of NAT rules and NAT sessions ? */ if (opts & OPT_LIST) { + if (alive) { + dostats_live(fd, nsp, opts); + return; + } printf("List of active MAP/Redirect filters:\n"); while (nsp->ns_list) { if (kmemcpy((char *)&ipn, (long)nsp->ns_list, @@ -330,7 +338,7 @@ int opts; for (np = nsp->ns_instances; np; np = nat.nat_next) { if (kmemcpy((char *)&nat, (long)np, sizeof(nat))) break; - printactivenat(&nat, opts); + printactivenat(&nat, opts, 0); if (nat.nat_aps) printaps(nat.nat_aps, opts); } @@ -406,3 +414,87 @@ int fd, opts; printf("%d entries flushed from NAT list\n", n); } } + +/* + * Display NAT statistics. + */ +void dostats_live(fd, nsp, opts) +natstat_t *nsp; +int fd, opts; +{ + ipfgeniter_t iter; + ipfobj_t obj; + ipnat_t ipn; + nat_t nat; + + bzero((char *)&obj, sizeof(obj)); + obj.ipfo_rev = IPFILTER_VERSION; + obj.ipfo_type = IPFOBJ_GENITER; + obj.ipfo_size = sizeof(iter); + obj.ipfo_ptr = &iter; + + iter.igi_type = IPFGENITER_IPNAT; + iter.igi_data = &ipn; + + /* + * Show list of NAT rules and NAT sessions ? + */ + printf("List of active MAP/Redirect filters:\n"); + while (nsp->ns_list) { + if (ioctl(fd, SIOCGENITER, &obj) == -1) + break; + if (opts & OPT_HITS) + printf("%lu ", ipn.in_hits); + printnat(&ipn, opts & (OPT_DEBUG|OPT_VERBOSE)); + nsp->ns_list = ipn.in_next; + } + + printf("\nList of active sessions:\n"); + + iter.igi_type = IPFGENITER_NAT; + iter.igi_data = &nat; + + while (nsp->ns_instances != NULL) { + if (ioctl(fd, SIOCGENITER, &obj) == -1) + break; + printactivenat(&nat, opts, 1); + if (nat.nat_aps) + printaps(nat.nat_aps, opts); + nsp->ns_instances = nat.nat_next; + } + + if (opts & OPT_VERBOSE) + showhostmap_live(fd, nsp); +} + +/* + * Display the active host mapping table. + */ +void showhostmap_live(fd, nsp) +int fd; +natstat_t *nsp; +{ + hostmap_t hm, *hmp; + ipfgeniter_t iter; + ipfobj_t obj; + + bzero((char *)&obj, sizeof(obj)); + obj.ipfo_rev = IPFILTER_VERSION; + obj.ipfo_type = IPFOBJ_GENITER; + obj.ipfo_size = sizeof(iter); + obj.ipfo_ptr = &iter; + + iter.igi_type = IPFGENITER_HOSTMAP; + iter.igi_data = &hm; + + printf("\nList of active host mappings:\n"); + + while (nsp->ns_maplist != NULL) { + if (ioctl(fd, SIOCGENITER, &obj) == -1) + break; + printhostmap(&hm, 0); + nsp->ns_maplist = hm.hm_next; + } +} + + diff --git a/usr/src/cmd/ipf/tools/ippool.c b/usr/src/cmd/ipf/tools/ippool.c index edf6793898..5f18379420 100644 --- a/usr/src/cmd/ipf/tools/ippool.c +++ b/usr/src/cmd/ipf/tools/ippool.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,14 +33,15 @@ #include <netdb.h> #include <ctype.h> #include <unistd.h> +#include <nlist.h> #include "ipf.h" +#include "netinet/ipl.h" #include "netinet/ip_lookup.h" #include "netinet/ip_pool.h" #include "netinet/ip_htable.h" #include "kmem.h" - extern int ippool_yyparse __P((void)); extern int ippool_yydebug; extern FILE *ippool_yyin; @@ -58,6 +59,9 @@ int poolflush __P((int, char *[])); int poolstats __P((int, char *[])); int gettype __P((char *, u_int *)); int getrole __P((char *)); +void poollist_dead __P((int, char *, int, char *, char *)); +void showpools_live(int, int, ip_pool_stat_t *, char *, int); +void showhashs_live(int, int, iphtstat_t *, char *, int); int opts = 0; int fd = -1; @@ -418,34 +422,42 @@ char *argv[]; } op.iplo_unit = role; - if (openkmem(kernel, core) == -1) - exit(-1); + if (live_kernel == 0) { + poollist_dead(role, poolname, type, kernel, core); + return (0); + } if (type == IPLT_ALL || type == IPLT_POOL) { plstp = &plstat; op.iplo_type = IPLT_POOL; op.iplo_size = sizeof(plstat); op.iplo_struct = &plstat; - c = ioctl(fd, SIOCLOOKUPSTAT, &op); - if (c == -1) { - perror("ioctl(SIOCLOOKUPSTAT)"); - return -1; - } + op.iplo_name[0] = '\0'; + op.iplo_arg = 0; if (role != IPL_LOGALL) { - ptr = plstp->ipls_list[role]; - while (ptr != NULL) { - ptr = printpool(ptr, kmemcpywrap, poolname, - opts); + op.iplo_unit = role; + + c = ioctl(fd, SIOCLOOKUPSTAT, &op); + if (c == -1) { + perror("ioctl(SIOCLOOKUPSTAT)"); + return -1; } + + showpools_live(fd, role, &plstat, poolname, opts); } else { for (role = 0; role <= IPL_LOGMAX; role++) { - ptr = plstp->ipls_list[role]; - while (ptr != NULL) { - ptr = printpool(ptr, kmemcpywrap, - poolname, opts); + op.iplo_unit = role; + + c = ioctl(fd, SIOCLOOKUPSTAT, &op); + if (c == -1) { + perror("ioctl(SIOCLOOKUPSTAT)"); + return -1; } + + showpools_live(fd, role, &plstat, poolname, opts); } + role = IPL_LOGALL; } } @@ -454,25 +466,20 @@ char *argv[]; op.iplo_type = IPLT_HASH; op.iplo_size = sizeof(htstat); op.iplo_struct = &htstat; - c = ioctl(fd, SIOCLOOKUPSTAT, &op); - if (c == -1) { - perror("ioctl(SIOCLOOKUPSTAT)"); - return -1; - } + op.iplo_name[0] = '\0'; + op.iplo_arg = 0; if (role != IPL_LOGALL) { - hptr = htstp->iphs_tables; - while (hptr != NULL) { - hptr = printhash(hptr, kmemcpywrap, - poolname, opts); + op.iplo_unit = role; + + c = ioctl(fd, SIOCLOOKUPSTAT, &op); + if (c == -1) { + perror("ioctl(SIOCLOOKUPSTAT)"); + return -1; } + showhashs_live(fd, role, &htstat, poolname, opts); } else { for (role = 0; role <= IPL_LOGMAX; role++) { - hptr = htstp->iphs_tables; - while (hptr != NULL) { - hptr = printhash(hptr, kmemcpywrap, - poolname, opts); - } op.iplo_unit = role; c = ioctl(fd, SIOCLOOKUPSTAT, &op); @@ -480,12 +487,115 @@ char *argv[]; perror("ioctl(SIOCLOOKUPSTAT)"); return -1; } + + showhashs_live(fd, role, &htstat, poolname, opts); } } } return 0; } +void poollist_dead(role, poolname, type, kernel, core) +int role, type; +char *poolname, *kernel, *core; +{ + iphtable_t *hptr; + ip_pool_t *ptr; + + if (openkmem(kernel, core) == -1) + exit(-1); + + if (type == IPLT_ALL || type == IPLT_POOL) { + ip_pool_t *pools[IPL_LOGSIZE]; + struct nlist names[2] = { { "ip_pool_list" } , { "" } }; + + if (nlist(kernel, names) != 1) + return; + + bzero(&pools, sizeof(pools)); + if (kmemcpy((char *)&pools, names[0].n_value, sizeof(pools))) + return; + + if (role != IPL_LOGALL) { + ptr = pools[role]; + while (ptr != NULL) { + ptr = printpool(ptr, kmemcpywrap, + poolname, opts); + } + } else { + for (role = 0; role <= IPL_LOGMAX; role++) { + ptr = pools[role]; + while (ptr != NULL) { + ptr = printpool(ptr, kmemcpywrap, + poolname, opts); + } + } + role = IPL_LOGALL; + } + } + if (type == IPLT_ALL || type == IPLT_HASH) { + iphtable_t *tables[IPL_LOGSIZE]; + struct nlist names[2] = { { "ipf_htables" } , { "" } }; + + if (nlist(kernel, names) != 1) + return; + + bzero(&tables, sizeof(tables)); + if (kmemcpy((char *)&tables, names[0].n_value, sizeof(tables))) + return; + + if (role != IPL_LOGALL) { + hptr = tables[role]; + while (hptr != NULL) { + hptr = printhash(hptr, kmemcpywrap, + poolname, opts); + } + } else { + for (role = 0; role <= IPL_LOGMAX; role++) { + hptr = tables[role]; + while (hptr != NULL) { + hptr = printhash(hptr, kmemcpywrap, + poolname, opts); + } + } + } + } +} + + +void +showpools_live(fd, role, plstp, poolname, opts) +int fd, role; +ip_pool_stat_t *plstp; +char *poolname; +int opts; +{ + ipflookupiter_t iter; + ip_pool_t pool; + ipfobj_t obj; + + obj.ipfo_rev = IPFILTER_VERSION; + obj.ipfo_type = IPFOBJ_LOOKUPITER; + obj.ipfo_size = sizeof(iter); + obj.ipfo_ptr = &iter; + + iter.ili_type = IPLT_POOL; + iter.ili_otype = IPFLOOKUPITER_LIST; + iter.ili_ival = IPFGENITER_LOOKUP; + iter.ili_data = &pool; + iter.ili_unit = role; + *iter.ili_name = '\0'; + + while (plstp->ipls_list[role] != NULL) { + if (ioctl(fd, SIOCLOOKUPITER, &obj)) { + perror("ioctl(SIOCLOOKUPITER)"); + break; + } + (void) printpool_live(&pool, fd, poolname, opts); + + plstp->ipls_list[role] = pool.ipo_next; + } +} int poolstats(argc, argv) int argc; @@ -702,3 +812,37 @@ u_int *minor; } return type; } + +void showhashs_live(fd, role, htstp, poolname, opts) +int fd, role; +iphtstat_t *htstp; +char *poolname; +int opts; +{ + ipflookupiter_t iter; + iphtable_t table; + ipfobj_t obj; + + obj.ipfo_rev = IPFILTER_VERSION; + obj.ipfo_type = IPFOBJ_LOOKUPITER; + obj.ipfo_size = sizeof(iter); + obj.ipfo_ptr = &iter; + + iter.ili_type = IPLT_HASH; + iter.ili_otype = IPFLOOKUPITER_LIST; + iter.ili_ival = IPFGENITER_LOOKUP; + iter.ili_data = &table; + iter.ili_unit = role; + *iter.ili_name = '\0'; + + while (htstp->iphs_tables != NULL) { + if (ioctl(fd, SIOCLOOKUPITER, &obj)) { + perror("ioctl(SIOCLOOKUPITER)"); + break; + } + + printhash_live(&table, fd, poolname, opts); + + htstp->iphs_tables = table.iph_next; + } +} diff --git a/usr/src/cmd/mdb/common/modules/genunix/Makefile.files b/usr/src/cmd/mdb/common/modules/genunix/Makefile.files index b125e5f21b..c4fa4477b6 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/Makefile.files +++ b/usr/src/cmd/mdb/common/modules/genunix/Makefile.files @@ -55,6 +55,7 @@ GENUNIX_SRCS = \ modhash.c \ ndievents.c \ net.c \ + netstack.c \ nvpair.c \ pg.c \ rctl.c \ diff --git a/usr/src/cmd/mdb/common/modules/genunix/genunix.c b/usr/src/cmd/mdb/common/modules/genunix/genunix.c index 66acba369f..decf655500 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/genunix.c +++ b/usr/src/cmd/mdb/common/modules/genunix/genunix.c @@ -87,6 +87,7 @@ #include "ndievents.h" #include "mmd.h" #include "net.h" +#include "netstack.h" #include "nvpair.h" #include "ctxop.h" #include "tsd.h" @@ -3418,6 +3419,9 @@ static const mdb_dcmd_t dcmds[] = { "[-t stream | dgram | raw | #] [-p #]", "filter and display sonode", sonode }, + /* from netstack.c */ + { "netstack", "", "show stack instances", netstack }, + /* from nvpair.c */ { NVPAIR_DCMD_NAME, NVPAIR_DCMD_USAGE, NVPAIR_DCMD_DESCR, nvpair_print }, @@ -3755,20 +3759,28 @@ static const mdb_walker_t walkers[] = { NULL, modchain_walk_step, NULL }, /* from net.c */ - { "ar", "walk ar_t structures using MI", - mi_payload_walk_init, mi_payload_walk_step, - mi_payload_walk_fini, &mi_ar_arg }, - { "icmp", "walk ICMP control structures using MI", - mi_payload_walk_init, mi_payload_walk_step, - mi_payload_walk_fini, &mi_icmp_arg }, - { "ill", "walk ill_t structures using MI", - mi_payload_walk_init, mi_payload_walk_step, - mi_payload_walk_fini, &mi_ill_arg }, + { "ar", "walk ar_t structures using MI for all stacks", + mi_payload_walk_init, mi_payload_walk_step, NULL, &mi_ar_arg }, + { "icmp", "walk ICMP control structures using MI for all stacks", + mi_payload_walk_init, mi_payload_walk_step, NULL, + &mi_icmp_arg }, + { "ill", "walk ill_t structures using MI for all stacks", + mi_payload_walk_init, mi_payload_walk_step, NULL, &mi_ill_arg }, + { "mi", "given a MI_O, walk the MI", mi_walk_init, mi_walk_step, mi_walk_fini, NULL }, { "sonode", "given a sonode, walk its children", sonode_walk_init, sonode_walk_step, sonode_walk_fini, NULL }, + { "ar_stacks", "walk all the ar_stack_t", + ar_stacks_walk_init, ar_stacks_walk_step, NULL }, + { "icmp_stacks", "walk all the icmp_stack_t", + icmp_stacks_walk_init, icmp_stacks_walk_step, NULL }, + { "tcp_stacks", "walk all the tcp_stack_t", + tcp_stacks_walk_init, tcp_stacks_walk_step, NULL }, + { "udp_stacks", "walk all the udp_stack_t", + udp_stacks_walk_init, udp_stacks_walk_step, NULL }, + /* from nvpair.c */ { NVPAIR_WALKER_NAME, NVPAIR_WALKER_DESCR, nvpair_walk_init, nvpair_walk_step, NULL }, @@ -3860,6 +3872,10 @@ static const mdb_walker_t walkers[] = { mdi_phci_ph_next_walk_step, mdi_phci_ph_next_walk_fini }, + /* from netstack.c */ + { "netstack", "walk a list of kernel netstacks", + netstack_walk_init, netstack_walk_step, NULL }, + { NULL } }; diff --git a/usr/src/cmd/mdb/common/modules/genunix/net.c b/usr/src/cmd/mdb/common/modules/genunix/net.c index 0195489ed9..00f295cb5d 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/net.c +++ b/usr/src/cmd/mdb/common/modules/genunix/net.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -51,8 +51,6 @@ #include <inet/rawip_impl.h> #include <inet/mi.h> -#define MIH2MIO(mihp) (&(mihp)->mh_o) - #define ADDR_V6_WIDTH 23 #define ADDR_V4_WIDTH 15 @@ -65,6 +63,104 @@ #define NETSTAT_FIRST 0x80000000u + +/* Walkers for various *_stack_t */ +int +ar_stacks_walk_init(mdb_walk_state_t *wsp) +{ + if (mdb_layered_walk("netstack", wsp) == -1) { + mdb_warn("can't walk 'netstack'"); + return (WALK_ERR); + } + return (WALK_NEXT); +} + +int +ar_stacks_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t kaddr; + netstack_t nss; + + if (mdb_vread(&nss, sizeof (nss), wsp->walk_addr) == -1) { + mdb_warn("can't read netstack at %p", wsp->walk_addr); + return (WALK_ERR); + } + kaddr = (uintptr_t)nss.netstack_modules[NS_ARP]; + return (wsp->walk_callback(kaddr, wsp->walk_layer, wsp->walk_cbdata)); +} + +int +icmp_stacks_walk_init(mdb_walk_state_t *wsp) +{ + if (mdb_layered_walk("netstack", wsp) == -1) { + mdb_warn("can't walk 'netstack'"); + return (WALK_ERR); + } + return (WALK_NEXT); +} + +int +icmp_stacks_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t kaddr; + netstack_t nss; + + if (mdb_vread(&nss, sizeof (nss), wsp->walk_addr) == -1) { + mdb_warn("can't read netstack at %p", wsp->walk_addr); + return (WALK_ERR); + } + kaddr = (uintptr_t)nss.netstack_modules[NS_ICMP]; + return (wsp->walk_callback(kaddr, wsp->walk_layer, wsp->walk_cbdata)); +} + +int +tcp_stacks_walk_init(mdb_walk_state_t *wsp) +{ + if (mdb_layered_walk("netstack", wsp) == -1) { + mdb_warn("can't walk 'netstack'"); + return (WALK_ERR); + } + return (WALK_NEXT); +} + +int +tcp_stacks_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t kaddr; + netstack_t nss; + + if (mdb_vread(&nss, sizeof (nss), wsp->walk_addr) == -1) { + mdb_warn("can't read netstack at %p", wsp->walk_addr); + return (WALK_ERR); + } + kaddr = (uintptr_t)nss.netstack_modules[NS_TCP]; + return (wsp->walk_callback(kaddr, wsp->walk_layer, wsp->walk_cbdata)); +} + +int +udp_stacks_walk_init(mdb_walk_state_t *wsp) +{ + if (mdb_layered_walk("netstack", wsp) == -1) { + mdb_warn("can't walk 'netstack'"); + return (WALK_ERR); + } + return (WALK_NEXT); +} + +int +udp_stacks_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t kaddr; + netstack_t nss; + + if (mdb_vread(&nss, sizeof (nss), wsp->walk_addr) == -1) { + mdb_warn("can't read netstack at %p", wsp->walk_addr); + return (WALK_ERR); + } + kaddr = (uintptr_t)nss.netstack_modules[NS_UDP]; + return (wsp->walk_callback(kaddr, wsp->walk_layer, wsp->walk_cbdata)); +} + /* * Print an IPv4 address and port number in a compact and easy to read format * The arguments are in network byte order @@ -228,11 +324,14 @@ mi_walk_step(mdb_walk_state_t *wsp) return (WALK_ERR); } - status = wsp->walk_callback(wsp->walk_addr, miop, wsp->walk_cbdata); - /* Only true in the first iteration */ - if (wdp->mi_wd_miofirst == NULL) + if (wdp->mi_wd_miofirst == NULL) { wdp->mi_wd_miofirst = wsp->walk_addr; + status = WALK_NEXT; + } else { + status = wsp->walk_callback(wsp->walk_addr + sizeof (MI_O), + &miop[1], wsp->walk_cbdata); + } wsp->walk_addr = (uintptr_t)miop->mi_o_next; return (status); @@ -244,21 +343,9 @@ mi_walk_fini(mdb_walk_state_t *wsp) mdb_free(wsp->walk_data, sizeof (struct mi_walk_data)); } -typedef struct mi_payload_walk_data_s { - uintptr_t mi_pwd_first; - void *mi_pwd_data; -} mi_payload_walk_data_t; - -static void -delete_mi_payload_walk_data(mi_payload_walk_data_t *pwdp, size_t payload_size) -{ - mdb_free(pwdp->mi_pwd_data, payload_size); - mdb_free(pwdp, sizeof (mi_payload_walk_data_t)); -} - typedef struct mi_payload_walk_arg_s { - const char *mi_pwa_obj; /* load object of mi_o_head_t * */ - const char *mi_pwa_sym; /* symbol name of mi_o_head_t * */ + const char *mi_pwa_walker; /* Underlying walker */ + const off_t mi_pwa_head_off; /* Offset for mi_o_head_t * in stack */ const size_t mi_pwa_size; /* size of mi payload */ const uint_t mi_pwa_flags; /* device and/or module */ } mi_payload_walk_arg_t; @@ -270,45 +357,11 @@ int mi_payload_walk_init(mdb_walk_state_t *wsp) { const mi_payload_walk_arg_t *arg = wsp->walk_arg; - mi_payload_walk_data_t *pwdp; - GElf_Sym sym; - mi_head_t *mihp; - - /* Determine the address to start or end the walk with */ - if (mdb_lookup_by_obj(arg->mi_pwa_obj, arg->mi_pwa_sym, &sym) == -1) { - mdb_warn("failed to lookup %s`%s", - arg->mi_pwa_obj, arg->mi_pwa_sym); - return (WALK_ERR); - } - if (mdb_vread(&mihp, sizeof (mihp), (uintptr_t)sym.st_value) == -1) { - mdb_warn("failed to read address of global MI Head " - "mi_o_head_t at %p", (uintptr_t)sym.st_value); + if (mdb_layered_walk(arg->mi_pwa_walker, wsp) == -1) { + mdb_warn("can't walk '%s'", arg->mi_pwa_walker); return (WALK_ERR); } - - pwdp = mdb_alloc(sizeof (mi_payload_walk_data_t), UM_SLEEP); - pwdp->mi_pwd_data = mdb_alloc(arg->mi_pwa_size, UM_SLEEP); - wsp->walk_data = pwdp; - - if (wsp->walk_addr == NULL) { - /* Do not immediately return WALK_DONE below */ - pwdp->mi_pwd_first = NULL; - /* We determined where to begin */ - wsp->walk_addr = (uintptr_t)MIH2MIO(mihp); - } else { - /* Do not cycle through all of the MI_O objects */ - pwdp->mi_pwd_first = (uintptr_t)MIH2MIO(mihp); - /* We were given where to begin */ - wsp->walk_addr = (uintptr_t)((MI_OP)wsp->walk_addr - 1); - } - - if (mdb_layered_walk("genunix`mi", wsp) == -1) { - mdb_warn("failed to walk genunix`mi"); - delete_mi_payload_walk_data(pwdp, arg->mi_pwa_size); - return (WALK_ERR); - } - return (WALK_NEXT); } @@ -316,63 +369,43 @@ int mi_payload_walk_step(mdb_walk_state_t *wsp) { const mi_payload_walk_arg_t *arg = wsp->walk_arg; - mi_payload_walk_data_t *pwdp = wsp->walk_data; - void *payload = pwdp->mi_pwd_data; - uintptr_t payload_kaddr = (uintptr_t)((MI_OP)wsp->walk_addr + 1); - const MI_O *mio = wsp->walk_layer; + uintptr_t kaddr; - /* If this is a local walk, prevent cycling */ - if (wsp->walk_addr == pwdp->mi_pwd_first) - return (WALK_DONE); + kaddr = wsp->walk_addr + arg->mi_pwa_head_off; - /* - * This was a global walk, prevent reading this payload as the - * initial MI_O is the head of the list and is not the header - * to a valid payload - */ - if (pwdp->mi_pwd_first == NULL) { - pwdp->mi_pwd_first = wsp->walk_addr; - return (WALK_NEXT); + if (mdb_vread(&kaddr, sizeof (kaddr), kaddr) == -1) { + mdb_warn("can't read address of mi head at %p for %s", + kaddr, arg->mi_pwa_walker); + return (WALK_ERR); } - if (mio->mi_o_isdev == B_FALSE) { - /* mio is a module */ - if (!(arg->mi_pwa_flags & MI_PAYLOAD_MODULE)) - return (WALK_NEXT); - } else { - /* mio is a device */ - if (!(arg->mi_pwa_flags & MI_PAYLOAD_DEVICE)) - return (WALK_NEXT); + if (kaddr == 0) { + /* Empty list */ + return (WALK_DONE); } - if (mdb_vread(payload, arg->mi_pwa_size, payload_kaddr) == -1) { - mdb_warn("failed to read payload at %p", payload_kaddr); + if (mdb_pwalk("genunix`mi", wsp->walk_callback, + wsp->walk_cbdata, kaddr) == -1) { + mdb_warn("failed to walk genunix`mi"); return (WALK_ERR); } - - return (wsp->walk_callback(payload_kaddr, payload, wsp->walk_cbdata)); -} - -void -mi_payload_walk_fini(mdb_walk_state_t *wsp) -{ - const mi_payload_walk_arg_t *arg = wsp->walk_arg; - - delete_mi_payload_walk_data(wsp->walk_data, arg->mi_pwa_size); + return (WALK_NEXT); } const mi_payload_walk_arg_t mi_ar_arg = { - "arp", "ar_g_head", sizeof (ar_t), + "ar_stacks", OFFSETOF(arp_stack_t, as_head), sizeof (ar_t), MI_PAYLOAD_DEVICE | MI_PAYLOAD_MODULE }; const mi_payload_walk_arg_t mi_icmp_arg = { - "icmp", "icmp_g_head", sizeof (icmp_t), + "icmp_stacks", OFFSETOF(icmp_stack_t, is_head), sizeof (icmp_t), MI_PAYLOAD_DEVICE | MI_PAYLOAD_MODULE }; -const mi_payload_walk_arg_t mi_ill_arg = - { "ip", "ip_g_head", sizeof (ill_t), MI_PAYLOAD_MODULE }; +const mi_payload_walk_arg_t mi_ill_arg = { + "ip_stacks", OFFSETOF(ip_stack_t, ips_ip_g_head), sizeof (ill_t), + MI_PAYLOAD_MODULE +}; int sonode(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) @@ -557,6 +590,20 @@ mi(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) return (DCMD_OK); } +static int +ns_to_stackid(uintptr_t kaddr) +{ + netstack_t nss; + + if (mdb_vread(&nss, sizeof (nss), kaddr) == -1) { + mdb_warn("failed to read netstack_t %p", kaddr); + return (0); + } + return (nss.netstack_stackid); +} + + + static void netstat_tcp_verbose_pr(const tcp_t *tcp) { @@ -620,6 +667,8 @@ netstat_tcp_cb(uintptr_t kaddr, const void *walk_data, void *cb_data, int af) mdb_printf(" "); net_ipv6addrport_pr(&tcp->tcp_remote_v6, tcp->tcp_fport); } + mdb_printf(" %4i", ns_to_stackid((uintptr_t)connp->conn_netstack)); + mdb_printf(" %4i\n", connp->conn_zoneid); if (opts & NETSTAT_VERBOSE) @@ -676,6 +725,8 @@ netstat_udp_cb(uintptr_t kaddr, const void *walk_data, void *cb_data, int af) mdb_printf(" "); net_ipv6addrport_pr(&udp.udp_v6dst, udp.udp_dstport); } + mdb_printf(" %4i", ns_to_stackid((uintptr_t)connp.conn_netstack)); + mdb_printf(" %4i\n", connp.conn_zoneid); return (WALK_NEXT); @@ -1158,10 +1209,10 @@ netstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) if ((optP == NULL) || (strcmp("tcp", optP) == 0)) { if ((optf == NULL) || (strcmp("inet", optf) == 0)) { /* Print TCPv4 connection */ - mdb_printf( - "%<u>%-?s St %*s %*s %s%</u>\n", + mdb_printf("%<u>%-?s St %*s %*s " + "%s% %s%</u>\n", "TCPv4", ADDR_V4_WIDTH, "Local Address", - ADDR_V4_WIDTH, "Remote Address", "Zone"); + ADDR_V4_WIDTH, "Remote Address", "Stack", "Zone"); if (opts & NETSTAT_VERBOSE) netstat_tcp_verbose_header_pr(); @@ -1175,10 +1226,10 @@ netstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) if ((optf == NULL) || (strcmp("inet6", optf) == 0)) { /* Print TCPv6 connection */ - mdb_printf( - "%<u>%-?s St %*s %*s %s\n%</u>", + mdb_printf("%<u>%-?s St %*s %*s " + "%s %s%\n%</u>", "TCPv6", ADDR_V6_WIDTH, "Local Address", - ADDR_V6_WIDTH, "Remote Address", "Zone"); + ADDR_V6_WIDTH, "Remote Address", "Stack", "Zone"); if (opts & NETSTAT_VERBOSE) netstat_tcp_verbose_header_pr(); @@ -1194,10 +1245,10 @@ netstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) if ((optP == NULL) || (strcmp("udp", optP) == 0)) { if ((optf == NULL) || (strcmp("inet", optf) == 0)) { /* Print UDPv4 connection */ - mdb_printf( - "%<u>%-?s St %*s %*s %s\n%</u>", + mdb_printf("%<u>%-?s St %*s %*s " + "%s %s%\n%</u>", "UDPv4", ADDR_V4_WIDTH, "Local Address", - ADDR_V4_WIDTH, "Remote Address", "Zone"); + ADDR_V4_WIDTH, "Remote Address", "Stack", "Zone"); if (mdb_walk("udp_cache", netstat_udpv4_cb, (void *)(uintptr_t)opts) == -1) { @@ -1209,10 +1260,10 @@ netstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) if ((optf == NULL) || (strcmp("inet6", optf) == 0)) { /* Print UDPv6 connection */ - mdb_printf( - "%<u>%-?s St %*s %*s %s\n%</u>", + mdb_printf("%<u>%-?s St %*s %*s " + "%s %s%\n%</u>", "UDPv6", ADDR_V6_WIDTH, "Local Address", - ADDR_V6_WIDTH, "Remote Address", "Zone"); + ADDR_V6_WIDTH, "Remote Address", "Stack", "Zone"); if (mdb_walk("udp_cache", netstat_udpv6_cb, (void *)(uintptr_t)opts) == -1) { diff --git a/usr/src/cmd/mdb/common/modules/genunix/net.h b/usr/src/cmd/mdb/common/modules/genunix/net.h index 45e03a5352..59df026bd4 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/net.h +++ b/usr/src/cmd/mdb/common/modules/genunix/net.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,7 +44,14 @@ extern int mi_walk_step(mdb_walk_state_t *); extern void mi_walk_fini(mdb_walk_state_t *); extern int mi_payload_walk_init(mdb_walk_state_t *); extern int mi_payload_walk_step(mdb_walk_state_t *); -extern void mi_payload_walk_fini(mdb_walk_state_t *); +extern int ar_stacks_walk_init(mdb_walk_state_t *); +extern int ar_stacks_walk_step(mdb_walk_state_t *); +extern int icmp_stacks_walk_init(mdb_walk_state_t *); +extern int icmp_stacks_walk_step(mdb_walk_state_t *); +extern int tcp_stacks_walk_init(mdb_walk_state_t *); +extern int tcp_stacks_walk_step(mdb_walk_state_t *); +extern int udp_stacks_walk_init(mdb_walk_state_t *); +extern int udp_stacks_walk_step(mdb_walk_state_t *); extern int sonode(uintptr_t, uint_t, int, const mdb_arg_t *); extern int mi(uintptr_t, uint_t, int, const mdb_arg_t *); diff --git a/usr/src/cmd/mdb/common/modules/genunix/netstack.c b/usr/src/cmd/mdb/common/modules/genunix/netstack.c new file mode 100644 index 0000000000..588bd6dbf3 --- /dev/null +++ b/usr/src/cmd/mdb/common/modules/genunix/netstack.c @@ -0,0 +1,123 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <mdb/mdb_modapi.h> +#include <mdb/mdb_ks.h> +#include <mdb/mdb_ctf.h> +#include <sys/types.h> +#include <sys/netstack.h> + +int +netstack_walk_init(mdb_walk_state_t *wsp) +{ + GElf_Sym sym; + uintptr_t addr; + + if (mdb_lookup_by_name("netstack_head", &sym) == -1) { + mdb_warn("couldn't find netstack_head"); + return (WALK_ERR); + } + addr = (uintptr_t)sym.st_value; + + if (mdb_vread(&wsp->walk_addr, sizeof (wsp->walk_addr), addr) == -1) { + mdb_warn("failed to read address of initial netstack " + "at %p", addr); + return (WALK_ERR); + } + return (WALK_NEXT); +} + +int +netstack_walk_step(mdb_walk_state_t *wsp) +{ + int status; + netstack_t nss; + + if (wsp->walk_addr == NULL) + return (WALK_DONE); + + if (mdb_vread(&nss, sizeof (netstack_t), wsp->walk_addr) == -1) { + mdb_warn("failed to read netstack at %p", wsp->walk_addr); + return (WALK_ERR); + } + + status = wsp->walk_callback(wsp->walk_addr, &nss, + wsp->walk_cbdata); + + if (status != WALK_NEXT) + return (status); + + wsp->walk_addr = (uintptr_t)nss.netstack_next; + return (status); +} + +/*ARGSUSED*/ +int +netstack(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + netstack_t nss; + uint_t quiet = FALSE; + uint_t verbose = FALSE; + + if (!(flags & DCMD_ADDRSPEC)) { + if (mdb_walk_dcmd("genunix`netstack", "genunix`netstack", + argc, argv) == -1) { + mdb_warn("failed to walk netstack"); + return (DCMD_ERR); + } + return (DCMD_OK); + } + if (mdb_getopts(argc, argv, + 'v', MDB_OPT_SETBITS, TRUE, &verbose, + 'q', MDB_OPT_SETBITS, TRUE, &quiet, + NULL) != argc) + return (DCMD_USAGE); + + if (DCMD_HDRSPEC(flags) && !quiet) { + mdb_printf("%?s %-7s %6s\n", + "ADDR", "STACKID", "FLAGS"); + } + + if (mdb_vread(&nss, sizeof (nss), addr) == -1) { + mdb_warn("couldn't read netstack at %p", addr); + return (DCMD_ERR); + } + + /* + * Options are specified for filtering, so If any option is specified on + * the command line, just print address and exit. + */ + if (quiet) { + mdb_printf("%0?p\n", addr); + return (DCMD_OK); + } + + mdb_printf("%0?p %6d %06x\n", + addr, nss.netstack_stackid, nss.netstack_flags); + + return (DCMD_OK); +} diff --git a/usr/src/cmd/mdb/common/modules/genunix/netstack.h b/usr/src/cmd/mdb/common/modules/genunix/netstack.h new file mode 100644 index 0000000000..392565caca --- /dev/null +++ b/usr/src/cmd/mdb/common/modules/genunix/netstack.h @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _NETSTACK_H +#define _NETSTACK_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <mdb/mdb_modapi.h> + +#ifdef __cplusplus +extern "C" { +#endif + +int netstack_walk_init(mdb_walk_state_t *); +int netstack_walk_step(mdb_walk_state_t *); + +int netstack(uintptr_t, uint_t, int, const mdb_arg_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _NETSTACK_H */ diff --git a/usr/src/cmd/mdb/common/modules/hook/hook.c b/usr/src/cmd/mdb/common/modules/hook/hook.c index d9ab29eb24..ec9679b2f9 100644 --- a/usr/src/cmd/mdb/common/modules/hook/hook.c +++ b/usr/src/cmd/mdb/common/modules/hook/hook.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +29,7 @@ #include <sys/rwlock.h> #include <mdb/mdb_modapi.h> #include <sys/queue.h> +#include <inet/ip.h> #include <sys/hook.h> #include <sys/hook_impl.h> @@ -153,6 +154,7 @@ hookeventlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) int hookrootlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { + struct hook_stack *hks; hook_family_int_head_t hfh; hook_family_int_t hf, *hfp; char hrrstr[MAX_LENGTH]; @@ -160,8 +162,15 @@ hookrootlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) if (argc) return (DCMD_USAGE); - if (mdb_readvar(&hfh, "familylist") == -1) { - mdb_warn("couldn't read symbol 'familylist'"); + if (mdb_vread((void *)&hks, sizeof (hks), + (uintptr_t)(addr + OFFSETOF(netstack_t, netstack_hook))) == -1) { + mdb_warn("couldn't read netstack_hook"); + return (DCMD_ERR); + } + + if (mdb_vread((void *)&hfh, sizeof (hfh), (uintptr_t)((uintptr_t)hks + + OFFSETOF(hook_stack_t, hks_familylist))) == -1) { + mdb_warn("couldn't read hook family head"); return (DCMD_ERR); } @@ -192,7 +201,7 @@ hookrootlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) static int -hookevent_walk_init(mdb_walk_state_t *wsp) +hookevent_stack_walk_init(mdb_walk_state_t *wsp) { hook_family_int_t hf; @@ -212,7 +221,7 @@ hookevent_walk_init(mdb_walk_state_t *wsp) } static int -hookevent_walk_step(mdb_walk_state_t *wsp) +hookevent_stack_walk_step(mdb_walk_state_t *wsp) { hook_event_int_t hr; @@ -228,7 +237,6 @@ hookevent_walk_step(mdb_walk_state_t *wsp) wsp->walk_cbdata)); } - static const mdb_dcmd_t dcmds[] = { { "hookrootlist", "", "display hook family information", hookrootlist }, { "hookeventlist", "", "display hook event information", @@ -238,8 +246,8 @@ static const mdb_dcmd_t dcmds[] = { }; static const mdb_walker_t walkers[] = { - { "hookevent", "walk a list of hooks", - hookevent_walk_init, hookevent_walk_step, NULL }, + { "hookevent_stack", "walk list of hooks", + hookevent_stack_walk_init, hookevent_stack_walk_step, NULL }, { NULL } }; diff --git a/usr/src/cmd/mdb/common/modules/ip/ip.c b/usr/src/cmd/mdb/common/modules/ip/ip.c index 8320fee2a5..8a24af3b8d 100644 --- a/usr/src/cmd/mdb/common/modules/ip/ip.c +++ b/usr/src/cmd/mdb/common/modules/ip/ip.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -76,34 +76,92 @@ typedef struct illif_walk_data { static int iphdr(uintptr_t, uint_t, int, const mdb_arg_t *); static int ip6hdr(uintptr_t, uint_t, int, const mdb_arg_t *); +static int ire_format(uintptr_t addr, const ire_t *irep, uint_t *verbose); + +/* + * Given the kernel address of an ip_stack_t, return the stackid + */ +static int +ips_to_stackid(uintptr_t kaddr) +{ + ip_stack_t ipss; + netstack_t nss; + + if (mdb_vread(&ipss, sizeof (ipss), kaddr) == -1) { + mdb_warn("failed to read ip_stack_t %p", kaddr); + return (0); + } + kaddr = (uintptr_t)ipss.ips_netstack; + if (mdb_vread(&nss, sizeof (nss), kaddr) == -1) { + mdb_warn("failed to read netstack_t %p", kaddr); + return (0); + } + return (nss.netstack_stackid); +} + int -illif_walk_init(mdb_walk_state_t *wsp) +ip_stacks_walk_init(mdb_walk_state_t *wsp) +{ + if (mdb_layered_walk("netstack", wsp) == -1) { + mdb_warn("can't walk 'netstack'"); + return (WALK_ERR); + } + return (WALK_NEXT); +} + +int +ip_stacks_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t kaddr; + netstack_t nss; + +#ifdef DEBUG + mdb_printf("DEBUG: ip_stacks_walk_step: addr %p\n", wsp->walk_addr); +#endif + if (mdb_vread(&nss, sizeof (nss), wsp->walk_addr) == -1) { + mdb_warn("can't read netstack at %p", wsp->walk_addr); + return (WALK_ERR); + } + kaddr = (uintptr_t)nss.netstack_modules[NS_IP]; + +#ifdef DEBUG + mdb_printf("DEBUG: ip_stacks_walk_step: ip_stack_t at %p\n", kaddr); +#endif + return (wsp->walk_callback(kaddr, wsp->walk_layer, wsp->walk_cbdata)); +} + +/* + * Called with walk_addr being the address of ips_ill_g_heads + */ +int +illif_stack_walk_init(mdb_walk_state_t *wsp) { illif_walk_data_t *iw; - if (wsp->walk_addr != NULL) { - mdb_warn("illif supports only global walks\n"); + if (wsp->walk_addr == NULL) { + mdb_warn("illif_stack supports only local walks\n"); return (WALK_ERR); } iw = mdb_alloc(sizeof (illif_walk_data_t), UM_SLEEP); - if (mdb_readsym(iw->ill_g_heads, MAX_G_HEADS * sizeof (ill_g_head_t), - "ill_g_heads") == -1) { - mdb_warn("failed to read 'ill_g_heads'"); + if (mdb_vread(iw->ill_g_heads, MAX_G_HEADS * sizeof (ill_g_head_t), + wsp->walk_addr) == -1) { + mdb_warn("failed to read 'ips_ill_g_heads' at %p", + wsp->walk_addr); mdb_free(iw, sizeof (illif_walk_data_t)); return (WALK_ERR); } iw->ill_list = 0; - wsp->walk_addr = (uintptr_t)iw->IP_VX_ILL_G_LIST(0); + wsp->walk_addr = (uintptr_t)iw->ill_g_heads[0].ill_g_list_head; wsp->walk_data = iw; return (WALK_NEXT); } int -illif_walk_step(mdb_walk_state_t *wsp) +illif_stack_walk_step(mdb_walk_state_t *wsp) { uintptr_t addr = wsp->walk_addr; illif_walk_data_t *iw = wsp->walk_data; @@ -116,13 +174,15 @@ illif_walk_step(mdb_walk_state_t *wsp) wsp->walk_addr = (uintptr_t)iw->ill_if.illif_next; - if (wsp->walk_addr == (uintptr_t)iw->IP_VX_ILL_G_LIST(list)) { + if (wsp->walk_addr == + (uintptr_t)iw->ill_g_heads[list].ill_g_list_head) { if (++list >= MAX_G_HEADS) return (WALK_DONE); iw->ill_list = list; - wsp->walk_addr = (uintptr_t)iw->IP_VX_ILL_G_LIST(list); + wsp->walk_addr = + (uintptr_t)iw->ill_g_heads[list].ill_g_list_head; return (WALK_NEXT); } @@ -130,7 +190,7 @@ illif_walk_step(mdb_walk_state_t *wsp) } void -illif_walk_fini(mdb_walk_state_t *wsp) +illif_stack_walk_fini(mdb_walk_state_t *wsp) { mdb_free(wsp->walk_data, sizeof (illif_walk_data_t)); } @@ -176,6 +236,45 @@ illif_cb(uintptr_t addr, const illif_walk_data_t *iw, illif_cbdata_t *id) } int +illif_walk_init(mdb_walk_state_t *wsp) +{ + if (mdb_layered_walk("ip_stacks", wsp) == -1) { + mdb_warn("can't walk 'ip_stacks'"); + return (WALK_ERR); + } + + return (WALK_NEXT); +} + +int +illif_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t kaddr; + +#ifdef DEBUG + mdb_printf("DEBUG: illif_walk_step: addr %p\n", wsp->walk_addr); +#endif + + kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ill_g_heads); + + if (mdb_vread(&kaddr, sizeof (kaddr), kaddr) == -1) { + mdb_warn("can't read ips_ip_cache_table at %p", kaddr); + return (WALK_ERR); + } +#ifdef DEBUG + mdb_printf("DEBUG: illif_walk_step: ips_ill_g_heads %p\n", kaddr); +#endif + + if (mdb_pwalk("illif_stack", wsp->walk_callback, + wsp->walk_cbdata, kaddr) == -1) { + mdb_warn("couldn't walk 'illif_stack' for ips_ill_g_heads %p", + kaddr); + return (WALK_ERR); + } + return (WALK_NEXT); +} + +int illif(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { illif_cbdata_t id; @@ -265,6 +364,112 @@ ire_walk_step(mdb_walk_state_t *wsp) return (wsp->walk_callback(wsp->walk_addr, &ire, wsp->walk_cbdata)); } +int +ire_ctable_walk_init(mdb_walk_state_t *wsp) +{ + if (mdb_layered_walk("ip_stacks", wsp) == -1) { + mdb_warn("can't walk 'ip_stacks'"); + return (WALK_ERR); + } + + return (WALK_NEXT); +} + +int +ire_ctable_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t kaddr; + irb_t *irb; + int verbose = 0; + uint32_t cache_table_size; + int i; + +#ifdef DEBUG + mdb_printf("DEBUG: ire_ctable_walk_step: addr %p\n", wsp->walk_addr); +#endif + + kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ip_cache_table_size); + + if (mdb_vread(&cache_table_size, sizeof (uint32_t), kaddr) == -1) { + mdb_warn("can't read ips_ip_cache_table at %p", kaddr); + return (WALK_ERR); + } +#ifdef DEBUG + mdb_printf("DEBUG: ire_ctable_walk_step: ips_ip_cache_table_size %u\n", + cache_table_size); +#endif + + kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ip_cache_table); + if (mdb_vread(&kaddr, sizeof (kaddr), kaddr) == -1) { + mdb_warn("can't read ips_ip_cache_table at %p", kaddr); + return (WALK_ERR); + } +#ifdef DEBUG + mdb_printf("DEBUG: ire_ctable_walk_step: ips_ip_cache_table %p\n", + kaddr); +#endif + + irb = mdb_alloc(sizeof (irb_t) * cache_table_size, UM_SLEEP|UM_GC); + if (mdb_vread(irb, sizeof (irb_t) * cache_table_size, kaddr) == -1) { + mdb_warn("can't read irb at %p", kaddr); + return (WALK_ERR); + } + for (i = 0; i < cache_table_size; i++) { + kaddr = (uintptr_t)irb[i].irb_ire; +#ifdef DEBUG + mdb_printf("DEBUG: ire_ctable_walk_step: %d ire %p\n", + i, kaddr); +#endif + + if (mdb_pwalk("ire_next", (mdb_walk_cb_t)ire_format, &verbose, + kaddr) == -1) { + mdb_warn("can't walk 'ire_next' for ire %p", kaddr); + return (WALK_ERR); + } + } + return (WALK_NEXT); +} + +/* ARGSUSED */ +int +ire_next_walk_init(mdb_walk_state_t *wsp) +{ +#ifdef DEBUG + mdb_printf("DEBUG: ire_next_walk_init: addr %p\n", wsp->walk_addr); +#endif + return (WALK_NEXT); +} + +int +ire_next_walk_step(mdb_walk_state_t *wsp) +{ + ire_t ire; + int status; + +#ifdef DEBUG + mdb_printf("DEBUG: ire_next_walk_step: addr %p\n", wsp->walk_addr); +#endif + + if (wsp->walk_addr == NULL) + return (WALK_DONE); + + if (mdb_vread(&ire, sizeof (ire), wsp->walk_addr) == -1) { + mdb_warn("can't read ire at %p", wsp->walk_addr); + return (WALK_ERR); + } + status = wsp->walk_callback(wsp->walk_addr, &ire, + wsp->walk_cbdata); + + if (status != WALK_NEXT) + return (status); + + wsp->walk_addr = (uintptr_t)ire.ire_next; +#ifdef DEBUG + mdb_printf("DEBUG: ire_ctable_walk_step: next %p\n", wsp->walk_addr); +#endif + return (status); +} + static int ire_format(uintptr_t addr, const ire_t *irep, uint_t *verbose) { @@ -318,15 +523,20 @@ ire_format(uintptr_t addr, const ire_t *irep, uint_t *verbose) mdb_printf("%<b>%?p%</b> %40N <%hb>\n" "%?s %40N <%hb>\n" - "%?s %40d <%hb>\n", + "%?s %40d %4d <%hb>\n", addr, &irep->ire_src_addr_v6, irep->ire_type, tmasks, "", &irep->ire_addr_v6, (ushort_t)irep->ire_marks, mmasks, - "", irep->ire_zoneid, irep->ire_flags, fmasks); + "", ips_to_stackid((uintptr_t)irep->ire_ipst), + irep->ire_zoneid, + irep->ire_flags, fmasks); } else if (irep->ire_ipversion == 6) { - mdb_printf("%?p %30N %30N %4d\n", addr, &irep->ire_src_addr_v6, - &irep->ire_addr_v6, irep->ire_zoneid); + mdb_printf("%?p %30N %30N %5d %4d\n", + addr, &irep->ire_src_addr_v6, + &irep->ire_addr_v6, + ips_to_stackid((uintptr_t)irep->ire_ipst), + irep->ire_zoneid); } else if (*verbose) { @@ -335,12 +545,14 @@ ire_format(uintptr_t addr, const ire_t *irep, uint_t *verbose) "%?s %40d <%hb>\n", addr, irep->ire_src_addr, irep->ire_type, tmasks, "", irep->ire_addr, (ushort_t)irep->ire_marks, mmasks, - "", irep->ire_zoneid, irep->ire_flags, fmasks); + "", ips_to_stackid((uintptr_t)irep->ire_ipst), + irep->ire_zoneid, irep->ire_flags, fmasks); } else { - mdb_printf("%?p %30I %30I %4d\n", addr, irep->ire_src_addr, - irep->ire_addr, irep->ire_zoneid); + mdb_printf("%?p %30I %30I %5d %4d\n", addr, irep->ire_src_addr, + irep->ire_addr, ips_to_stackid((uintptr_t)irep->ire_ipst), + irep->ire_zoneid); } return (WALK_NEXT); @@ -676,13 +888,13 @@ ire(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) if (verbose) { mdb_printf("%?s %40s %-20s%\n" "%?s %40s %-20s%\n" - "%<u>%?s %40s %-20s%</u>\n", + "%<u>%?s %40s %4s %-20s%</u>\n", "ADDR", "SRC", "TYPE", "", "DST", "MARKS", - "", "ZONE", "FLAGS"); + "", "STACK", "ZONE", "FLAGS"); } else { - mdb_printf("%<u>%?s %30s %30s %4s%</u>\n", - "ADDR", "SRC", "DST", "ZONE"); + mdb_printf("%<u>%?s %30s %30s %5s %4s%</u>\n", + "ADDR", "SRC", "DST", "STACK", "ZONE"); } } @@ -853,10 +1065,19 @@ static const mdb_dcmd_t dcmds[] = { }; static const mdb_walker_t walkers[] = { - { "illif", "walk list of ill interface types", - illif_walk_init, illif_walk_step, illif_walk_fini }, + { "illif", "walk list of ill interface types for all stacks", + illif_walk_init, illif_walk_step, NULL }, + { "illif_stack", "walk list of ill interface types", + illif_stack_walk_init, illif_stack_walk_step, + illif_stack_walk_fini }, { "ire", "walk active ire_t structures", ire_walk_init, ire_walk_step, NULL }, + { "ire_ctable", "walk ire_t structures in the ctable", + ire_ctable_walk_init, ire_ctable_walk_step, NULL }, + { "ire_next", "walk ire_t structures in the ctable", + ire_next_walk_init, ire_next_walk_step, NULL }, + { "ip_stacks", "walk all the ip_stack_t", + ip_stacks_walk_init, ip_stacks_walk_step, NULL }, { NULL } }; diff --git a/usr/src/cmd/mdb/common/modules/neti/neti.c b/usr/src/cmd/mdb/common/modules/neti/neti.c index 788099b048..e58fa42f0b 100644 --- a/usr/src/cmd/mdb/common/modules/neti/neti.c +++ b/usr/src/cmd/mdb/common/modules/neti/neti.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,8 +38,6 @@ */ #define PROT_LENGTH 32 -LIST_HEAD(netd_listhead, net_data); - /* * List pfhooks netinfo information. */ @@ -47,6 +45,7 @@ LIST_HEAD(netd_listhead, net_data); int netinfolist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { + struct neti_stack *nts; struct netd_listhead nlh; struct net_data nd, *p; char str[PROT_LENGTH]; @@ -54,8 +53,15 @@ netinfolist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) if (argc) return (DCMD_USAGE); - if (mdb_readvar(&nlh, "netd_head") == -1) { - mdb_warn("couldn't read symbol 'netd_head'"); + if (mdb_vread((void *)&nts, sizeof (nts), + (uintptr_t)(addr + OFFSETOF(netstack_t, netstack_neti))) == -1) { + mdb_warn("couldn't read netstack_neti"); + return (DCMD_ERR); + } + + if (mdb_vread((void *)&nlh, sizeof (nlh), (uintptr_t)((uintptr_t)nts + + OFFSETOF(neti_stack_t, nts_netd_head))) == -1) { + mdb_warn("couldn't read netd list head"); return (DCMD_ERR); } mdb_printf("%<u>%?s %?s %10s%</u>\n", diff --git a/usr/src/cmd/mdb/common/modules/sctp/sctp.c b/usr/src/cmd/mdb/common/modules/sctp/sctp.c index c40d3249e1..aa088b45e6 100644 --- a/usr/src/cmd/mdb/common/modules/sctp/sctp.c +++ b/usr/src/cmd/mdb/common/modules/sctp/sctp.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -65,23 +65,48 @@ #define MDB_SCTP_SHOW_ALL 0xffffffff -uint_t sctp_conn_hash_size; -static GElf_Sym sctp_list_sym; -static list_t sctp_list; - -/* - * Both the ill and ipif global arrays are small, so we just read - * in the whole arrays. - */ -static sctp_ill_hash_t local_g_ills[SCTP_ILL_HASH]; -static sctp_ipif_hash_t local_g_ipifs[SCTP_IPIF_HASH]; - /* * Copy from usr/src/uts/common/os/list.c. Should we have a generic * mdb list walker? */ #define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset)) +static int +ns_to_stackid(uintptr_t kaddr) +{ + netstack_t nss; + + if (mdb_vread(&nss, sizeof (nss), kaddr) == -1) { + mdb_warn("failed to read netdstack info %p", kaddr); + return (0); + } + return (nss.netstack_stackid); +} + +int +sctp_stacks_walk_init(mdb_walk_state_t *wsp) +{ + if (mdb_layered_walk("netstack", wsp) == -1) { + mdb_warn("can't walk 'netstack'"); + return (WALK_ERR); + } + return (WALK_NEXT); +} + +int +sctp_stacks_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t kaddr; + netstack_t nss; + + if (mdb_vread(&nss, sizeof (nss), wsp->walk_addr) == -1) { + mdb_warn("can't read netstack at %p", wsp->walk_addr); + return (WALK_ERR); + } + kaddr = (uintptr_t)nss.netstack_modules[NS_SCTP]; + return (wsp->walk_callback(kaddr, wsp->walk_layer, wsp->walk_cbdata)); +} + static char * sctp_faddr_state(int state) { @@ -713,8 +738,9 @@ sctp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) mdb_nhconvert(&lport, &sctp.sctp_lport, sizeof (lport)); mdb_nhconvert(&fport, &sctp.sctp_fport, sizeof (fport)); - mdb_printf("%<u>%p% %22s S=%-6hu D=%-6hu% ZONE=%d%</u>", addr, - state2str(&sctp), lport, fport, connp.conn_zoneid); + mdb_printf("%<u>%p% %22s S=%-6hu D=%-6hu% STACK=%d ZONE=%d%</u>", addr, + state2str(&sctp), lport, fport, + ns_to_stackid((uintptr_t)connp.conn_netstack), connp.conn_zoneid); if (sctp.sctp_faddrs) { sctp_faddr_t faddr; @@ -888,8 +914,6 @@ sctp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) mdb_printf("%<b>Hash Tables%</b>\n"); mdb_printf("conn_hash_next\t%?p\t", sctp.sctp_conn_hash_next); mdb_printf("conn_hash_prev\t%?p\n", sctp.sctp_conn_hash_prev); - mdb_printf("[ conn_hash bucket\t%?d ]\n", - SCTP_CONN_HASH(sctp.sctp_ports)); mdb_printf("listen_hash_next%?p\t", sctp.sctp_listen_hash_next); @@ -956,8 +980,9 @@ typedef struct fanout_walk_data { } fanout_walk_data_t; typedef struct fanout_init { - const char *symname; - int (*getsize)(); + const char *nested_walker_name; + size_t offset; /* for what used to be a symbol */ + int (*getsize)(sctp_stack_t *); uintptr_t (*getnext)(sctp_t *); } fanout_init_t; @@ -967,8 +992,9 @@ listen_next(sctp_t *sctp) return ((uintptr_t)sctp->sctp_listen_hash_next); } +/* ARGSUSED */ static int -listen_size(void) +listen_size(sctp_stack_t *sctps) { return (SCTP_LISTEN_FANOUT_SIZE); } @@ -980,17 +1006,15 @@ conn_next(sctp_t *sctp) } static int -conn_size(void) +conn_size(sctp_stack_t *sctps) { - GElf_Sym sym; int size; + uintptr_t kaddr; - if (mdb_lookup_by_name("sctp_conn_hash_size", &sym) == -1) { - mdb_warn("can't read 'sctp_conn_hash_size'"); - return (1); - } - if (mdb_vread(&size, sizeof (size), sym.st_value) == -1) { - mdb_warn("can't dereference 'sctp_conn_hash_size'"); + kaddr = (uintptr_t)&sctps->sctps_conn_hash_size; + + if (mdb_vread(&size, sizeof (size), kaddr) == -1) { + mdb_warn("can't read 'sctps_conn_hash_size' at %p", kaddr); return (1); } return (size); @@ -1002,8 +1026,9 @@ bind_next(sctp_t *sctp) return ((uintptr_t)sctp->sctp_bind_hash); } +/* ARGSUSED */ static int -bind_size(void) +bind_size(sctp_stack_t *sctps) { return (SCTP_BIND_FANOUT_SIZE); } @@ -1048,22 +1073,25 @@ find_next_hash_item(fanout_walk_data_t *fw) } static int -fanout_walk_init(mdb_walk_state_t *wsp) +fanout_stack_walk_init(mdb_walk_state_t *wsp) { - GElf_Sym sym; fanout_walk_data_t *lw; fanout_init_t *fi = wsp->walk_arg; + sctp_stack_t *sctps = (sctp_stack_t *)wsp->walk_addr; + uintptr_t kaddr; - if (mdb_lookup_by_name(fi->symname, &sym) == -1) { - mdb_warn("failed to read '%s'", fi->symname); + if (mdb_vread(&kaddr, sizeof (kaddr), + wsp->walk_addr + fi->offset) == -1) { + mdb_warn("can't read sctp fanout at %p", + wsp->walk_addr + fi->offset); return (WALK_ERR); } lw = mdb_alloc(sizeof (*lw), UM_SLEEP); lw->index = 0; - lw->size = fi->getsize(); + lw->size = fi->getsize(sctps); lw->sctp = NULL; - lw->fanout = (sctp_tf_t *)(uintptr_t)sym.st_value; + lw->fanout = (sctp_tf_t *)kaddr; lw->getnext = fi->getnext; if ((wsp->walk_addr = find_next_hash_item(lw)) == NULL) { @@ -1074,7 +1102,7 @@ fanout_walk_init(mdb_walk_state_t *wsp) } static int -fanout_walk_step(mdb_walk_state_t *wsp) +fanout_stack_walk_step(mdb_walk_state_t *wsp) { fanout_walk_data_t *fw = wsp->walk_data; uintptr_t addr = wsp->walk_addr; @@ -1097,43 +1125,62 @@ fanout_walk_step(mdb_walk_state_t *wsp) } static void -fanout_walk_fini(mdb_walk_state_t *wsp) +fanout_stack_walk_fini(mdb_walk_state_t *wsp) { fanout_walk_data_t *fw = wsp->walk_data; mdb_free(fw, sizeof (*fw)); } -static int -sctp_walk_init(mdb_walk_state_t *wsp) +int +fanout_walk_init(mdb_walk_state_t *wsp) { - wsp->walk_addr = (uintptr_t)list_object(&sctp_list, - sctp_list.list_head.list_next); + if (mdb_layered_walk("sctp_stacks", wsp) == -1) { + mdb_warn("can't walk 'sctp_stacks'"); + return (WALK_ERR); + } + return (WALK_NEXT); } -static int -sctp_walk_step(mdb_walk_state_t *wsp) +int +fanout_walk_step(mdb_walk_state_t *wsp) { - uintptr_t psctp = wsp->walk_addr; - sctp_t sctp; - int status; + fanout_init_t *fi = wsp->walk_arg; - if (mdb_vread(&sctp, sizeof (sctp), psctp) == -1) { - mdb_warn("failed to read sctp at %p", psctp); + if (mdb_pwalk(fi->nested_walker_name, wsp->walk_callback, + wsp->walk_cbdata, wsp->walk_addr) == -1) { + mdb_warn("couldn't walk '%s'for address %p", + fi->nested_walker_name, wsp->walk_addr); return (WALK_ERR); } - status = wsp->walk_callback(psctp, &sctp, wsp->walk_cbdata); - if (status != WALK_NEXT) - return (status); + return (WALK_NEXT); +} - if ((psctp = (uintptr_t)sctp.sctp_list.list_next) == - sctp_list_sym.st_value + OFFSETOF(list_t, list_head)) { - return (WALK_DONE); - } else { - wsp->walk_addr = (uintptr_t)list_object(&sctp_list, psctp); - return (WALK_NEXT); +int +sctps_walk_init(mdb_walk_state_t *wsp) +{ + + if (mdb_layered_walk("sctp_stacks", wsp) == -1) { + mdb_warn("can't walk 'sctp_stacks'"); + return (WALK_ERR); + } + + return (WALK_NEXT); +} + +int +sctps_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t kaddr; + + kaddr = wsp->walk_addr + OFFSETOF(sctp_stack_t, sctps_g_list); + if (mdb_pwalk("list", wsp->walk_callback, + wsp->walk_cbdata, kaddr) == -1) { + mdb_warn("couldn't walk 'list' for address %p", kaddr); + return (WALK_ERR); } + return (WALK_NEXT); } static int @@ -1281,66 +1328,78 @@ sctp_walk_saddr_fini(mdb_walk_state_t *wsp) mdb_free(swalker, sizeof (saddr_walk_t)); } -static int -sctp_walk_ill_init(mdb_walk_state_t *wsp) + +typedef struct ill_walk_data { + sctp_ill_hash_t ills[SCTP_ILL_HASH]; + uint32_t count; +} ill_walk_data_t; + +typedef struct ipuf_walk_data { + sctp_ipif_hash_t ipifs[SCTP_IPIF_HASH]; + uint32_t count; +} ipif_walk_data_t; + + +int +sctp_ill_walk_init(mdb_walk_state_t *wsp) { - intptr_t i; + if (mdb_layered_walk("sctp_stacks", wsp) == -1) { + mdb_warn("can't walk 'sctp_stacks'"); + return (WALK_ERR); + } - /* Find the first ill. */ - for (i = 0; i < SCTP_ILL_HASH; i++) { - if (local_g_ills[i].ill_count > 0) { - wsp->walk_addr = (uintptr_t)list_object( - &local_g_ills[i].sctp_ill_list, - local_g_ills[i].sctp_ill_list.list_head.list_next); - wsp->walk_data = (void *)i; - wsp->walk_arg = (void *)1; - return (WALK_NEXT); - } + return (WALK_NEXT); +} + +int +sctp_ill_walk_step(mdb_walk_state_t *wsp) +{ + if (mdb_pwalk("sctp_stack_walk_ill", wsp->walk_callback, + wsp->walk_cbdata, wsp->walk_addr) == -1) { + mdb_warn("couldn't walk 'sctp_stack_walk_ill' for addr %p", + wsp->walk_addr); + return (WALK_ERR); } - return (WALK_DONE); + return (WALK_NEXT); } +/* + * wsp->walk_addr is the address of sctps_ill_list + */ static int -sctp_walk_ill_step(mdb_walk_state_t *wsp) +sctp_stack_ill_walk_init(mdb_walk_state_t *wsp) { - uintptr_t ill_ptr = wsp->walk_addr; - sctp_ill_t ill; - int status; - intptr_t i, j; + ill_walk_data_t iw; + intptr_t i; + uintptr_t kaddr, uaddr; + size_t offset; - if (mdb_vread(&ill, sizeof (sctp_ill_t), ill_ptr) == -1) { - mdb_warn("failed to read sctp_ill_t at %p", ill_ptr); + kaddr = wsp->walk_addr + OFFSETOF(sctp_stack_t, sctps_ills_count); + if (mdb_vread(&iw.count, sizeof (iw.count), kaddr) == -1) { + mdb_warn("can't read sctps_ills_count at %p", kaddr); return (WALK_ERR); } - status = wsp->walk_callback(ill_ptr, &ill, wsp->walk_cbdata); - if (status != WALK_NEXT) - return (status); - - i = (intptr_t)wsp->walk_data; - j = (intptr_t)wsp->walk_arg; + kaddr = wsp->walk_addr + OFFSETOF(sctp_stack_t, sctps_g_ills); - /* - * If there is still an ill in the current list, return it. - * Otherwise, go to the next list and find another one. - */ - if (j++ < local_g_ills[i].ill_count) { - wsp->walk_addr = (uintptr_t)ill.sctp_ills.list_next; - wsp->walk_data = (void *)i; - wsp->walk_arg = (void *)j; - return (WALK_NEXT); - } else { - list_t *ill_list; - - for (i = i + 1; i < SCTP_ILL_HASH; i++) { - if (local_g_ills[i].ill_count > 0) { - ill_list = &local_g_ills[i].sctp_ill_list; - wsp->walk_addr = (uintptr_t)list_object( - ill_list, ill_list->list_head.list_next); + if (mdb_vread(&kaddr, sizeof (kaddr), kaddr) == -1) { + mdb_warn("can't read scpts_g_ills %p", kaddr); + return (WALK_ERR); + } + if (mdb_vread(&iw.ills, sizeof (iw.ills), kaddr) == -1) { + mdb_warn("failed to read 'sctps_g_ills'"); + return (NULL); + } - /* Record the current position. */ - wsp->walk_data = (void *)i; - wsp->walk_arg = (void *)1; - return (WALK_NEXT); + /* Find the first ill. */ + for (i = 0; i < SCTP_ILL_HASH; i++) { + if (iw.ills[i].ill_count > 0) { + uaddr = (uintptr_t)&iw.ills[i].sctp_ill_list; + offset = uaddr - (uintptr_t)&iw.ills; + if (mdb_pwalk("list", wsp->walk_callback, + wsp->walk_cbdata, kaddr+offset) == -1) { + mdb_warn("couldn't walk 'list' for address %p", + kaddr); + return (WALK_ERR); } } } @@ -1348,72 +1407,84 @@ sctp_walk_ill_step(mdb_walk_state_t *wsp) } static int -sctp_walk_ipif_init(mdb_walk_state_t *wsp) +sctp_stack_ill_walk_step(mdb_walk_state_t *wsp) { - intptr_t i; - list_t *ipif_list; + return (wsp->walk_callback(wsp->walk_addr, wsp->walk_layer, + wsp->walk_cbdata)); +} - for (i = 0; i < SCTP_IPIF_HASH; i++) { - if (local_g_ipifs[i].ipif_count > 0) { - ipif_list = &local_g_ipifs[i].sctp_ipif_list; +int +sctp_ipif_walk_init(mdb_walk_state_t *wsp) +{ + if (mdb_layered_walk("sctp_stacks", wsp) == -1) { + mdb_warn("can't walk 'sctp_stacks'"); + return (WALK_ERR); + } + return (WALK_NEXT); +} - wsp->walk_addr = (uintptr_t)list_object(ipif_list, - ipif_list->list_head.list_next); - wsp->walk_data = (void *)i; - wsp->walk_arg = (void *)1; - return (WALK_NEXT); - } +int +sctp_ipif_walk_step(mdb_walk_state_t *wsp) +{ + if (mdb_pwalk("sctp_stack_walk_ipif", wsp->walk_callback, + wsp->walk_cbdata, wsp->walk_addr) == -1) { + mdb_warn("couldn't walk 'sctp_stack_walk_ipif' for addr %p", + wsp->walk_addr); + return (WALK_ERR); } - return (WALK_DONE); + return (WALK_NEXT); } +/* + * wsp->walk_addr is the address of sctps_ipif_list + */ static int -sctp_walk_ipif_step(mdb_walk_state_t *wsp) +sctp_stack_ipif_walk_init(mdb_walk_state_t *wsp) { - uintptr_t ipif_ptr = wsp->walk_addr; - sctp_ipif_t ipif; - int status; - intptr_t i, j; + ipif_walk_data_t iw; + intptr_t i; + uintptr_t kaddr, uaddr; + size_t offset; - if (mdb_vread(&ipif, sizeof (sctp_ipif_t), ipif_ptr) == -1) { - mdb_warn("failed to read sctp_ipif_t at %p", ipif_ptr); + kaddr = wsp->walk_addr + OFFSETOF(sctp_stack_t, sctps_g_ipifs_count); + if (mdb_vread(&iw.count, sizeof (iw.count), kaddr) == -1) { + mdb_warn("can't read sctps_g_ipifs_count at %p", kaddr); return (WALK_ERR); } - status = wsp->walk_callback(ipif_ptr, &ipif, wsp->walk_cbdata); - if (status != WALK_NEXT) - return (status); + kaddr = wsp->walk_addr + OFFSETOF(sctp_stack_t, sctps_g_ipifs); - i = (intptr_t)wsp->walk_data; - j = (intptr_t)wsp->walk_arg; - - /* - * If there is still an ipif in the current list, return it. - * Otherwise, go to the next list and find another one. - */ - if (j++ < local_g_ipifs[i].ipif_count) { - wsp->walk_addr = (uintptr_t)ipif.sctp_ipifs.list_next; - wsp->walk_data = (void *)i; - wsp->walk_arg = (void *)j; - return (WALK_NEXT); - } else { - list_t *ipif_list; - - for (i = i + 1; i < SCTP_IPIF_HASH; i++) { - if (local_g_ipifs[i].ipif_count > 0) { - ipif_list = &local_g_ipifs[i].sctp_ipif_list; - wsp->walk_addr = (uintptr_t)list_object( - ipif_list, ipif_list->list_head.list_next); + if (mdb_vread(&kaddr, sizeof (kaddr), kaddr) == -1) { + mdb_warn("can't read scpts_g_ipifs %p", kaddr); + return (WALK_ERR); + } + if (mdb_vread(&iw.ipifs, sizeof (iw.ipifs), kaddr) == -1) { + mdb_warn("failed to read 'sctps_g_ipifs'"); + return (NULL); + } - /* Record the current position */ - wsp->walk_data = (void *)i; - wsp->walk_arg = (void *)1; - return (WALK_NEXT); + /* Find the first ipif. */ + for (i = 0; i < SCTP_IPIF_HASH; i++) { + if (iw.ipifs[i].ipif_count > 0) { + uaddr = (uintptr_t)&iw.ipifs[i].sctp_ipif_list; + offset = uaddr - (uintptr_t)&iw.ipifs; + if (mdb_pwalk("list", wsp->walk_callback, + wsp->walk_cbdata, kaddr+offset) == -1) { + mdb_warn("couldn't walk 'list' for address %p", + kaddr); + return (WALK_ERR); } } } return (WALK_DONE); } +static int +sctp_stack_ipif_walk_step(mdb_walk_state_t *wsp) +{ + return (wsp->walk_callback(wsp->walk_addr, wsp->walk_layer, + wsp->walk_cbdata)); +} + static void sctp_help(void) { @@ -1455,37 +1526,59 @@ static const mdb_dcmd_t dcmds[] = { }; static const fanout_init_t listen_fanout_init = { - "sctp_listen_fanout", listen_size, listen_next + "sctp_stack_listen_fanout", OFFSETOF(sctp_stack_t, sctps_listen_fanout), + listen_size, listen_next }; static const fanout_init_t conn_fanout_init = { - "sctp_conn_fanout", conn_size, conn_next + "sctp_stack_conn_fanout", OFFSETOF(sctp_stack_t, sctps_conn_fanout), + conn_size, conn_next }; static const fanout_init_t bind_fanout_init = { - "sctp_bind_fanout", bind_size, bind_next + "sctp_stack_bind_fanout", OFFSETOF(sctp_stack_t, sctps_bind_fanout), + bind_size, bind_next }; static const mdb_walker_t walkers[] = { - { "sctps", "walk the full chain of sctps", - sctp_walk_init, sctp_walk_step, NULL }, - { "sctp_listen_fanout", "walk the sctp listen fanout", - fanout_walk_init, fanout_walk_step, fanout_walk_fini, + { "sctps", "walk the full chain of sctps for all stacks", + sctps_walk_init, sctps_walk_step, NULL }, + { "sctp_listen_fanout", "walk the sctp listen fanout for all stacks", + fanout_walk_init, fanout_walk_step, NULL, (void *)&listen_fanout_init }, - { "sctp_conn_fanout", "walk the sctp conn fanout", - fanout_walk_init, fanout_walk_step, fanout_walk_fini, + { "sctp_conn_fanout", "walk the sctp conn fanout for all stacks", + fanout_walk_init, fanout_walk_step, NULL, (void *)&conn_fanout_init }, - { "sctp_bind_fanout", "walk the sctp bind fanout", - fanout_walk_init, fanout_walk_step, fanout_walk_fini, + { "sctp_bind_fanout", "walk the sctp bind fanout for all stacks", + fanout_walk_init, fanout_walk_step, NULL, + (void *)&bind_fanout_init }, + { "sctp_stack_listen_fanout", + "walk the sctp listen fanout for one stack", + fanout_stack_walk_init, fanout_stack_walk_step, + fanout_stack_walk_fini, + (void *)&listen_fanout_init }, + { "sctp_stack_conn_fanout", "walk the sctp conn fanout for one stack", + fanout_stack_walk_init, fanout_stack_walk_step, + fanout_stack_walk_fini, + (void *)&conn_fanout_init }, + { "sctp_stack_bind_fanout", "walk the sctp bind fanoutfor one stack", + fanout_stack_walk_init, fanout_stack_walk_step, + fanout_stack_walk_fini, (void *)&bind_fanout_init }, { "sctp_walk_faddr", "walk the peer address list of a given sctp_t", sctp_walk_faddr_init, sctp_walk_faddr_step, NULL }, { "sctp_walk_saddr", "walk the local address list of a given sctp_t", sctp_walk_saddr_init, sctp_walk_saddr_step, sctp_walk_saddr_fini }, - { "sctp_walk_ill", "walk the sctp_g_ills list", - sctp_walk_ill_init, sctp_walk_ill_step, NULL }, - { "sctp_walk_ipif", "walk the sctp_g_ipif list", - sctp_walk_ipif_init, sctp_walk_ipif_step, NULL }, + { "sctp_walk_ill", "walk the sctp_g_ills list for all stacks", + sctp_ill_walk_init, sctp_ill_walk_step, NULL }, + { "sctp_walk_ipif", "walk the sctp_g_ipif list for all stacks", + sctp_ipif_walk_init, sctp_ipif_walk_step, NULL }, + { "sctp_stack_walk_ill", "walk the sctp_g_ills list for one stack", + sctp_stack_ill_walk_init, sctp_stack_ill_walk_step, NULL }, + { "sctp_stack_walk_ipif", "walk the sctp_g_ipif list for one stack", + sctp_stack_ipif_walk_init, sctp_stack_ipif_walk_step, NULL }, + { "sctp_stacks", "walk all the sctp_stack_t", + sctp_stacks_walk_init, sctp_stacks_walk_step, NULL }, { NULL } }; @@ -1494,44 +1587,5 @@ static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers }; const mdb_modinfo_t * _mdb_init(void) { - GElf_Sym sym; - GElf_Sym ills_sym; - GElf_Sym ipifs_sym; - - if (mdb_lookup_by_name("sctp_g_list", &sctp_list_sym) == -1) { - mdb_warn("failed to find 'sctp_g_list'"); - return (NULL); - } - if (mdb_vread(&sctp_list, sizeof (list_t), - (uintptr_t)sctp_list_sym.st_value) == -1) { - mdb_warn("failed to read 'sctp_g_list'"); - return (NULL); - } - if (mdb_lookup_by_name("sctp_conn_hash_size", &sym) != -1) { - if (mdb_vread(&sctp_conn_hash_size, - sizeof (sctp_conn_hash_size), sym.st_value) == -1) { - mdb_warn("failed to read 'sctp_conn_hash_size'"); - return (NULL); - } - } - if (mdb_lookup_by_name("sctp_g_ills", &ills_sym) == -1) { - mdb_warn("failed to find 'sctp_g_ills'"); - return (NULL); - } - if (mdb_vread(&local_g_ills, sizeof (local_g_ills), - (uintptr_t)ills_sym.st_value) == -1) { - mdb_warn("failed to read 'sctp_g_ills'"); - return (NULL); - } - if (mdb_lookup_by_name("sctp_g_ipifs", &ipifs_sym) == -1) { - mdb_warn("failed to find 'sctp_g_ipifs'"); - return (NULL); - } - if (mdb_vread(&local_g_ipifs, sizeof (local_g_ipifs), - (uintptr_t)ipifs_sym.st_value) == -1) { - mdb_warn("failed to read 'sctp_g_ipifs'"); - return (NULL); - } - return (&modinfo); } diff --git a/usr/src/cmd/svc/milestone/net-init b/usr/src/cmd/svc/milestone/net-init index 5862c4edf6..89c0abdb9a 100644 --- a/usr/src/cmd/svc/milestone/net-init +++ b/usr/src/cmd/svc/milestone/net-init @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -39,11 +39,12 @@ . /lib/svc/share/smf_include.sh # -# In a zone we need this service to be up, but all of the work -# it tries to do is irrelevant (and will actually lead to the service -# failing if we try to do it), so just bail out. +# In a shared-IP zone we need this service to be up, but all of the work +# it tries to do is irrelevant (and will actually lead to the service +# failing if we try to do it), so just bail out. +# In the global zone and exclusive-IP zones we proceed. # -smf_is_globalzone || exit $SMF_EXIT_OK +smf_configure_ip || exit $SMF_EXIT_OK # Configure IPv6 Default Address Selection. if [ -f /etc/inet/ipaddrsel.conf ]; then @@ -56,7 +57,8 @@ fi # automatically exit. Note that it may already be running if we're not # executing as part of system boot. # -/usr/bin/pgrep -x -u 0 in.mpathd >/dev/null 2>&1 || /usr/lib/inet/in.mpathd -a +/usr/bin/pgrep -x -u 0 -z `smf_zonename` in.mpathd >/dev/null 2>&1 || \ + /usr/lib/inet/in.mpathd -a # # Pass to the kernel the list of supported IPsec protocols and algorithms. diff --git a/usr/src/cmd/svc/milestone/net-loopback b/usr/src/cmd/svc/milestone/net-loopback index 9a23eb27b6..3688a206f5 100644 --- a/usr/src/cmd/svc/milestone/net-loopback +++ b/usr/src/cmd/svc/milestone/net-loopback @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -28,11 +28,12 @@ . /lib/svc/share/smf_include.sh # -# In a zone we need this service to be up, but all of the work -# it tries to do is irrelevant (and will actually lead to the service -# failing if we try to do it), so just bail out. +# In a shared-IP zone we need this service to be up, but all of the work +# it tries to do is irrelevant (and will actually lead to the service +# failing if we try to do it), so just bail out. +# In the global zone and exclusive-IP zones we proceed. # -smf_is_globalzone || exit $SMF_EXIT_OK +smf_configure_ip || exit $SMF_EXIT_OK # # Cause ifconfig to not automatically start in.mpathd when IPMP groups are diff --git a/usr/src/cmd/svc/milestone/net-physical b/usr/src/cmd/svc/milestone/net-physical index 60c4ee8f98..cc260062ae 100644 --- a/usr/src/cmd/svc/milestone/net-physical +++ b/usr/src/cmd/svc/milestone/net-physical @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T. @@ -33,11 +33,12 @@ . /lib/svc/share/net_include.sh # -# In a zone we need this service to be up, but all of the work -# it tries to do is irrelevant (and will actually lead to the service -# failing if we try to do it), so just bail out. +# In a shared-IP zone we need this service to be up, but all of the work +# it tries to do is irrelevant (and will actually lead to the service +# failing if we try to do it), so just bail out. +# In the global zone and exclusive-IP zones we proceed. # -smf_is_globalzone || exit $SMF_EXIT_OK +smf_configure_ip || exit $SMF_EXIT_OK # Print warnings to console warn_failed_ifs() { @@ -57,14 +58,16 @@ SUNW_NO_MPATHD=; export SUNW_NO_MPATHD smf_netstrategy -# -# Bring up link aggregations and initialize security objects. -# Note that link property initialization is deferred until after -# IP interfaces are plumbed to ensure that the links will not -# be unloaded (and the property settings lost). -# -/sbin/dladm up-aggr -/sbin/dladm init-secobj +if smf_is_globalzone; then + # + # Bring up link aggregations and initialize security objects. + # Note that link property initialization is deferred until after + # IP interfaces are plumbed to ensure that the links will not + # be unloaded (and the property settings lost). + # + /sbin/dladm up-aggr + /sbin/dladm init-secobj +fi # # If the system was net booted by DHCP, hand DHCP management off to the @@ -197,13 +200,15 @@ if [ -n "$inet6_list" ]; then [ -n "$inet6_failed" ] && warn_failed_ifs "plumb IPv6" $inet6_failed fi -# -# Unfortunately, if a driver unloads and then is subsequently reloaded, no -# mechanism currently exists to restore the properties of its associated -# links. Hence, we wait until after interfaces have been plumbed (above) -# to initialize link properties. -# -/sbin/dladm init-linkprop +if smf_is_globalzone; then + # + # Unfortunately, if a driver unloads and then is subsequently reloaded, + # no mechanism currently exists to restore the properties of its + # associated links. Hence, we wait until after interfaces have been + # plumbed (above) to initialize link properties. + # + /sbin/dladm init-linkprop +fi # # Process the /etc/hostname.* files of plumbed IPv4 interfaces. If an diff --git a/usr/src/cmd/svc/milestone/net-routing-setup b/usr/src/cmd/svc/milestone/net-routing-setup index 09f5f4eabe..dbc879e5e1 100644 --- a/usr/src/cmd/svc/milestone/net-routing-setup +++ b/usr/src/cmd/svc/milestone/net-routing-setup @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -30,11 +30,12 @@ . /lib/svc/share/smf_include.sh # -# In a zone we need this service to be up, but all of the work +# In a shared-IP zone we need this service to be up, but all of the work # it tries to do is irrelevant (and will actually lead to the service # failing if we try to do it), so just bail out. +# In the global zone and exclusive-IP zones we proceed. # -smf_is_globalzone || exit $SMF_EXIT_OK +smf_configure_ip || exit $SMF_EXIT_OK # # If routing.conf file is in place, and has not already been read in diff --git a/usr/src/cmd/svc/milestone/net-svc b/usr/src/cmd/svc/milestone/net-svc index e71e34c0d4..a91d3d09fa 100644 --- a/usr/src/cmd/svc/milestone/net-svc +++ b/usr/src/cmd/svc/milestone/net-svc @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -36,11 +36,12 @@ case "$1" in 'start') # - # In a zone we need this service to be up, but all of the work - # it tries to do is irrelevant (and will actually lead to the service - # failing if we try to do it), so just bail out. + # In a shared-IP zone we need this service to be up, but all of the + # work it tries to do is irrelevant (and will actually lead to the + # service failing if we try to do it), so just bail out. + # In the global zone and exclusive-IP zones we proceed. # - smf_is_globalzone || exit 0 + smf_configure_ip || exit 0 ;; # Fall through -- rest of script is the initialization code 'stop') diff --git a/usr/src/cmd/svc/shell/smf_include.sh b/usr/src/cmd/svc/shell/smf_include.sh index 066993fe01..9357c1bfbc 100644 --- a/usr/src/cmd/svc/shell/smf_include.sh +++ b/usr/src/cmd/svc/shell/smf_include.sh @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -74,6 +74,27 @@ smf_is_nonglobalzone() { return 1 } +# smf_configure_ip +# +# Returns zero (success) if this zone needs IP to be configured i.e. +# the global zone or has an exclusive stack. 1 otherwise. +# +smf_configure_ip() { + [ "${SMF_ZONENAME:=`/sbin/zonename`}" = "global" -o \ + `/sbin/zonename -t` = exclusive ] && return 0 + return 1 +} + +# smf_dont_configure_ip +# +# Inverse of smf_configure_ip +# +smf_dont_configure_ip() { + [ "${SMF_ZONENAME:=`/sbin/zonename`}" != "global" -a \ + `/sbin/zonename -t` = shared ] && return 0 + return 1 +} + # smf_is_system_labeled # # Returns zero (success) if system is labeled (aka Trusted Extensions). diff --git a/usr/src/cmd/truss/print.c b/usr/src/cmd/truss/print.c index 81718694e9..09dec70e82 100644 --- a/usr/src/cmd/truss/print.c +++ b/usr/src/cmd/truss/print.c @@ -2326,6 +2326,7 @@ prt_zga(private_t *pri, int raw, long val) case ZONE_ATTR_INITNAME: s = "ZONE_ATTR_INITNAME"; break; case ZONE_ATTR_BOOTARGS: s = "ZONE_ATTR_BOOTARGS"; break; case ZONE_ATTR_BRAND: s = "ZONE_ATTR_BRAND"; break; + case ZONE_ATTR_FLAGS: s = "ZONE_ATTR_FLAGS"; break; case ZONE_ATTR_PHYS_MCAP: s = "ZONE_ATTR_PHYS_MCAP"; break; } } diff --git a/usr/src/cmd/truss/systable.c b/usr/src/cmd/truss/systable.c index a26004d0c0..e9b8c6a6e5 100644 --- a/usr/src/cmd/truss/systable.c +++ b/usr/src/cmd/truss/systable.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -797,6 +797,10 @@ static const struct systable zonetable[] = { {"zone_boot", 2, DEC, NOV, HID, DEC}, /* 7 */ {"zone_version", 2, HEX, NOV, HID, DEC}, /* 8 */ {"zone_setattr", 5, DEC, NOV, HID, DEC, ZGA, HEX, DEC}, /* 9 */ +{"zone_add_datalink", 3, DEC, NOV, HID, DEC, STG}, /* 10 */ +{"zone_remove_datalink", 3, DEC, NOV, HID, DEC, STG}, /* 11 */ +{"zone_check_datalink", 3, DEC, NOV, HID, HEX, STG}, /* 12 */ +{"zone_list_datalink", 4, DEC, NOV, HID, DEC, HEX, HEX}, /* 13 */ }; #define NZONECODE (sizeof (zonetable) / sizeof (struct systable)) @@ -963,6 +967,10 @@ const struct sysalias sysalias[] = { { "getzoneid", SYS_zone }, { "zone_list", SYS_zone }, { "zone_shutdown", SYS_zone }, + { "zone_add_datalink", SYS_zone }, + { "zone_remove_datalink", SYS_zone }, + { "zone_check_datalink", SYS_zone }, + { "zone_list_datalink", SYS_zone }, { "is_system_labeled", SYS_labelsys }, { "tnrh", SYS_labelsys }, { "tnrhtp", SYS_labelsys }, diff --git a/usr/src/cmd/zoneadm/Makefile b/usr/src/cmd/zoneadm/Makefile index e11609c6dd..27ced72cee 100644 --- a/usr/src/cmd/zoneadm/Makefile +++ b/usr/src/cmd/zoneadm/Makefile @@ -20,7 +20,7 @@ # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -34,7 +34,7 @@ include ../Makefile.cmd ROOTMANIFESTDIR= $(ROOTSVCSYSTEM) -OBJS= zoneadm.o sw_cmp.o zfs.o +OBJS= zoneadm.o sw_cmp.o zfs.o dlprims.o SRCS = $(OBJS:.o=.c) POFILE=zoneadm_all.po POFILES= $(OBJS:%.o=%.po) diff --git a/usr/src/cmd/zoneadm/dlprims.c b/usr/src/cmd/zoneadm/dlprims.c new file mode 100644 index 0000000000..083e5a0743 --- /dev/null +++ b/usr/src/cmd/zoneadm/dlprims.c @@ -0,0 +1,273 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* FIXME: from snoop. Use common library when it comes into existence */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/stropts.h> +#include <sys/signal.h> +#include <sys/dlpi.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <net/if.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <strings.h> +#include <string.h> +#include <stdarg.h> +#include <unistd.h> +#include <stropts.h> +#include <stdlib.h> +#include <ctype.h> +#include <values.h> + +#define DLMAXWAIT (10) /* max wait in seconds for response */ +#define DLMAXBUF (80) + +typedef union dlbuf { + union DL_primitives dl; + char *buf[DLMAXBUF]; +} dlbuf_t; + +static int timed_getmsg(int, struct strbuf *, struct strbuf *, int *, int); +static boolean_t expecting(ulong_t, union DL_primitives *); + +/* + * Issue DL_INFO_REQ and wait for DL_INFO_ACK. + */ +static int +dlinforeq(int fd, dl_info_ack_t *infoackp) +{ + dlbuf_t buf; + struct strbuf ctl; + int flags; + + buf.dl.info_req.dl_primitive = DL_INFO_REQ; + + ctl.maxlen = sizeof (buf); + ctl.len = DL_INFO_REQ_SIZE; + ctl.buf = (char *)&buf.dl; + + flags = RS_HIPRI; + + if (putmsg(fd, &ctl, NULL, flags) < 0) + return (-1); + if (timed_getmsg(fd, &ctl, NULL, &flags, DLMAXWAIT) != 0) + return (-1); + + if (!expecting(DL_INFO_ACK, &buf.dl)) + return (-1); + + if (ctl.len < DL_INFO_ACK_SIZE) + return (-1); + if (flags != RS_HIPRI) + return (-1); + if (infoackp != NULL) + *infoackp = buf.dl.info_ack; + return (0); +} + +/* + * Issue DL_ATTACH_REQ. + * Return zero on success, nonzero on error. + */ +static int +dlattachreq(int fd, ulong_t ppa) +{ + dlbuf_t buf; + struct strbuf ctl; + int flags; + + buf.dl.attach_req.dl_primitive = DL_ATTACH_REQ; + buf.dl.attach_req.dl_ppa = ppa; + + ctl.maxlen = sizeof (buf.dl); + ctl.len = DL_ATTACH_REQ_SIZE; + ctl.buf = (char *)&buf.dl; + + flags = 0; + + if (putmsg(fd, &ctl, NULL, flags) < 0) + return (-1); + if (timed_getmsg(fd, &ctl, NULL, &flags, DLMAXWAIT) != 0) + return (-1); + + if (!expecting(DL_OK_ACK, &buf.dl)) + return (-1); + return (0); +} + +static int +timed_getmsg(int fd, struct strbuf *ctlp, struct strbuf *datap, int *flagsp, + int timeout) +{ + struct pollfd pfd; + int rc; + + pfd.fd = fd; + + pfd.events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI; + if ((rc = poll(&pfd, 1, timeout * 1000)) == 0) + return (0); + else if (rc == -1) + return (0); + + /* poll returned > 0 for this fd so getmsg should not block */ + *flagsp = 0; + + if ((rc = getmsg(fd, ctlp, datap, flagsp)) < 0) + return (0); + + /* + * Check for MOREDATA and/or MORECTL. + */ + if ((rc & (MORECTL | MOREDATA)) == (MORECTL | MOREDATA)) + return (-1); + if (rc & MORECTL) + return (-1); + if (rc & MOREDATA) + return (-1); + /* + * Check for at least sizeof (long) control data portion. + */ + if (ctlp->len < sizeof (long)) + return (-1); + return (0); +} + +static boolean_t +expecting(ulong_t prim, union DL_primitives *dlp) +{ + if (dlp->dl_primitive == DL_ERROR_ACK || dlp->dl_primitive != prim) + return (B_FALSE); + + return (B_TRUE); +} + +/* + * Convert a device id to a ppa value + * e.g. "le0" -> 0 + */ +static int +device_ppa(char *device) +{ + char *p; + char *tp; + + p = strpbrk(device, "0123456789"); + if (p == NULL) + return (0); + /* ignore numbers within device names */ + for (tp = p; *tp != '\0'; tp++) + if (!isdigit(*tp)) + return (device_ppa(tp)); + return (atoi(p)); +} + +/* + * Convert a device id to a pathname. + * DLPI style 1 devices: "le0" -> "/dev/le0". + * DLPI style 2 devices: "le0" -> "/dev/le". + */ +static char * +device_path(char *device) +{ + static char buff[IF_NAMESIZE + 1]; + struct stat st; + char *p; + + (void) strcpy(buff, "/dev/"); + (void) strlcat(buff, device, IF_NAMESIZE); + + if (stat(buff, &st) == 0) + return (buff); + + for (p = buff + (strlen(buff) - 1); p > buff; p--) { + if (isdigit(*p)) + *p = '\0'; + else + break; + } + return (buff); +} + +/* + * Open up the device, and attach if needed. + */ +int +ifname_open(char *device) +{ + char *devname; + ulong_t ppa; + int netfd; + dl_info_ack_t netdl; + + /* + * Determine which network device + * to use if none given. + * Should get back a value like "/dev/le0". + */ + + devname = device_path(device); + if ((netfd = open(devname, O_RDWR)) < 0) + return (-1); + + ppa = device_ppa(device); + + /* + * Check for DLPI Version 2. + */ + if (dlinforeq(netfd, &netdl) != 0) { + (void) close(netfd); + return (-1); + } + + if (netdl.dl_version != DL_VERSION_2) { + (void) close(netfd); + return (-1); + } + + /* + * Attach for DLPI Style 2. + */ + if (netdl.dl_provider_style == DL_STYLE2) { + if (dlattachreq(netfd, ppa) != 0) { + (void) close(netfd); + return (-1); + } + + /* Reread more specific information */ + if (dlinforeq(netfd, &netdl) != 0) { + (void) close(netfd); + return (-1); + } + } + + return (netfd); +} diff --git a/usr/src/cmd/zoneadm/zoneadm.c b/usr/src/cmd/zoneadm/zoneadm.c index 9da3182f85..f302cdc4c7 100644 --- a/usr/src/cmd/zoneadm/zoneadm.c +++ b/usr/src/cmd/zoneadm/zoneadm.c @@ -95,6 +95,7 @@ typedef struct zone_entry { char zbrand[MAXNAMELEN]; char zroot[MAXPATHLEN]; char zuuid[UUID_PRINTABLE_STRING_LENGTH]; + zone_iptype_t ziptype; } zone_entry_t; static zone_entry_t *zents; @@ -142,6 +143,8 @@ struct cmd { static int cleanup_zonepath(char *, boolean_t); +extern int ifname_open(char *); + static int help_func(int argc, char *argv[]); static int ready_func(int argc, char *argv[]); static int boot_func(int argc, char *argv[]); @@ -415,12 +418,19 @@ static void zone_print(zone_entry_t *zent, boolean_t verbose, boolean_t parsable) { static boolean_t firsttime = B_TRUE; + char *ip_type_str; + + if (zent->ziptype == ZS_EXCLUSIVE) + ip_type_str = "excl"; + else + ip_type_str = "shared"; assert(!(verbose && parsable)); if (firsttime && verbose) { firsttime = B_FALSE; - (void) printf("%*s %-16s %-14s %-30s %-10s\n", ZONEID_WIDTH, - "ID", "NAME", "STATUS", "PATH", "BRAND"); + (void) printf("%*s %-16s %-10s %-30s %-8s %-6s\n", + ZONEID_WIDTH, "ID", "NAME", "STATUS", "PATH", "BRAND", + "IP"); } if (!verbose) { char *cp, *clim; @@ -439,7 +449,8 @@ zone_print(zone_entry_t *zent, boolean_t verbose, boolean_t parsable) (void) printf("%.*s\\:", clim - cp, cp); cp = clim + 1; } - (void) printf("%s:%s:%s\n", cp, zent->zuuid, zent->zbrand); + (void) printf("%s:%s:%s:%s\n", cp, zent->zuuid, zent->zbrand, + ip_type_str); return; } if (zent->zstate_str != NULL) { @@ -447,8 +458,8 @@ zone_print(zone_entry_t *zent, boolean_t verbose, boolean_t parsable) (void) printf("%*s", ZONEID_WIDTH, "-"); else (void) printf("%*lu", ZONEID_WIDTH, zent->zid); - (void) printf(" %-16s %-14s %-30s %-10s\n", zent->zname, - zent->zstate_str, zent->zroot, zent->zbrand); + (void) printf(" %-16s %-10s %-30s %-8s %-6s\n", zent->zname, + zent->zstate_str, zent->zroot, zent->zbrand, ip_type_str); } } @@ -524,6 +535,54 @@ lookup_zone_info(const char *zone_name, zoneid_t zid, zone_entry_t *zent) return (Z_ERR); } + /* + * Get ip type of the zone. + * Note for global zone, ZS_SHARED is set always. + */ + if (zid == GLOBAL_ZONEID) { + zent->ziptype = ZS_SHARED; + } else { + + if (zent->zstate_num == ZONE_STATE_RUNNING) { + ushort_t flags; + + if (zone_getattr(zid, ZONE_ATTR_FLAGS, &flags, + sizeof (flags)) < 0) { + zperror2(zent->zname, + gettext("could not get zone flags")); + return (Z_ERR); + } + if (flags & ZF_NET_EXCL) + zent->ziptype = ZS_EXCLUSIVE; + else + zent->ziptype = ZS_SHARED; + } else { + zone_dochandle_t handle; + + if ((handle = zonecfg_init_handle()) == NULL) { + zperror2(zent->zname, + gettext("could not init handle")); + return (Z_ERR); + } + if ((err = zonecfg_get_handle(zent->zname, handle)) + != Z_OK) { + zperror2(zent->zname, + gettext("could not get handle")); + zonecfg_fini_handle(handle); + return (Z_ERR); + } + + if ((err = zonecfg_get_iptype(handle, &zent->ziptype)) + != Z_OK) { + zperror2(zent->zname, + gettext("could not get ip-type")); + zonecfg_fini_handle(handle); + return (Z_ERR); + } + zonecfg_fini_handle(handle); + } + } + return (Z_OK); } @@ -1542,6 +1601,7 @@ fake_up_local_zone(zoneid_t zid, zone_entry_t *zeptr) ssize_t result; uuid_t uuid; FILE *fp; + ushort_t flags; (void) memset(zeptr, 0, sizeof (*zeptr)); @@ -1575,6 +1635,15 @@ fake_up_local_zone(zoneid_t zid, zone_entry_t *zeptr) if (zonecfg_get_uuid(zeptr->zname, uuid) == Z_OK && !uuid_is_null(uuid)) uuid_unparse(uuid, zeptr->zuuid); + + if (zone_getattr(zid, ZONE_ATTR_FLAGS, &flags, sizeof (flags)) < 0) { + zperror2(zeptr->zname, gettext("could not get zone flags")); + exit(Z_ERR); + } + if (flags & ZF_NET_EXCL) + zeptr->ziptype = ZS_EXCLUSIVE; + else + zeptr->ziptype = ZS_SHARED; } static int @@ -2739,8 +2808,8 @@ print_net_err(char *phys, char *addr, int af, char *msg) if (!found_af && af != AF_UNSPEC) { (void) fprintf(stderr, gettext("could not verify %s %s=%s %s=%s\n\tthe %s address " - "family is not configured on this interface in the\n\t" - "global zone\n"), + "family is not configured on this network interface in " + "the\n\tglobal zone\n"), "net", "address", addr, "physical", phys, af2str(af)); return; } @@ -2757,11 +2826,19 @@ verify_handle(int cmd_num, zone_dochandle_t handle, char *argv[]) int return_code = Z_OK; int err; boolean_t in_alt_root; + zone_iptype_t iptype; + int fd; in_alt_root = zonecfg_in_alt_root(); if (in_alt_root) goto no_net; + if ((err = zonecfg_get_iptype(handle, &iptype)) != Z_OK) { + errno = err; + zperror(cmd_to_str(cmd_num), B_TRUE); + zonecfg_fini_handle(handle); + return (Z_ERR); + } if ((err = zonecfg_setnwifent(handle)) != Z_OK) { errno = err; zperror(cmd_to_str(cmd_num), B_TRUE); @@ -2771,47 +2848,114 @@ verify_handle(int cmd_num, zone_dochandle_t handle, char *argv[]) while (zonecfg_getnwifent(handle, &nwiftab) == Z_OK) { struct lifreq lifr; sa_family_t af = AF_UNSPEC; - int so, res; + char dl_owner_zname[ZONENAME_MAX]; + zoneid_t dl_owner_zid; + zoneid_t target_zid; + int res; /* skip any loopback interfaces */ if (strcmp(nwiftab.zone_nwif_physical, "lo0") == 0) continue; - if ((res = zonecfg_valid_net_address(nwiftab.zone_nwif_address, - &lifr)) != Z_OK) { - print_net_err(nwiftab.zone_nwif_physical, - nwiftab.zone_nwif_address, af, - zonecfg_strerror(res)); - return_code = Z_ERR; - continue; - } - af = lifr.lifr_addr.ss_family; - (void) memset(&lifr, 0, sizeof (lifr)); - (void) strlcpy(lifr.lifr_name, nwiftab.zone_nwif_physical, - sizeof (lifr.lifr_name)); - lifr.lifr_addr.ss_family = af; - if ((so = socket(af, SOCK_DGRAM, 0)) < 0) { - (void) fprintf(stderr, gettext("could not verify %s " - "%s=%s %s=%s: could not get socket: %s\n"), "net", - "address", nwiftab.zone_nwif_address, "physical", - nwiftab.zone_nwif_physical, strerror(errno)); - return_code = Z_ERR; - continue; - } - if (ioctl(so, SIOCGLIFFLAGS, &lifr) < 0) { + switch (iptype) { + case ZS_SHARED: + if ((res = zonecfg_valid_net_address( + nwiftab.zone_nwif_address, &lifr)) != Z_OK) { + print_net_err(nwiftab.zone_nwif_physical, + nwiftab.zone_nwif_address, af, + zonecfg_strerror(res)); + return_code = Z_ERR; + continue; + } + af = lifr.lifr_addr.ss_family; + if (!zonecfg_ifname_exists(af, + nwiftab.zone_nwif_physical)) { + /* + * The interface failed to come up. We continue + * on anyway for the sake of consistency: a + * zone is not shut down if the interface fails + * any time after boot, nor does the global zone + * fail to boot if an interface fails. + */ + (void) fprintf(stderr, + gettext("WARNING: skipping network " + "interface '%s' which may not be " + "present/plumbed in the global " + "zone.\n"), + nwiftab.zone_nwif_physical); + } + break; + case ZS_EXCLUSIVE: + /* Warning if it exists for either IPv4 or IPv6 */ + + if (zonecfg_ifname_exists(AF_INET, + nwiftab.zone_nwif_physical) || + zonecfg_ifname_exists(AF_INET6, + nwiftab.zone_nwif_physical)) { + (void) fprintf(stderr, + gettext("WARNING: skipping network " + "interface '%s' which is used in the " + "global zone.\n"), + nwiftab.zone_nwif_physical); + break; + } /* - * The interface failed to come up. We continue on - * anyway for the sake of consistency: a zone is not - * shut down if the interface fails any time after - * boot, nor does the global zone fail to boot if an - * interface fails. + * Verify that the physical interface can + * be opened */ - (void) fprintf(stderr, - gettext("WARNING: skipping interface '%s' which " - "may not be present/plumbed in the global zone.\n"), - nwiftab.zone_nwif_physical); + fd = ifname_open(nwiftab.zone_nwif_physical); + if (fd == -1) { + (void) fprintf(stderr, + gettext("WARNING: skipping network " + "interface '%s' which cannot be opened.\n"), + nwiftab.zone_nwif_physical); + break; + } else { + (void) close(fd); + } + /* + * Verify whether the physical interface is already + * used by a zone. + */ + dl_owner_zid = ALL_ZONES; + if (zone_check_datalink(&dl_owner_zid, + nwiftab.zone_nwif_physical) != 0) + break; + /* + * If the zone being verified is + * running and owns the interface + */ + target_zid = getzoneidbyname(target_zone); + if (target_zid == dl_owner_zid) + break; + + /* Zone id match failed, use name to check */ + if (getzonenamebyid(dl_owner_zid, dl_owner_zname, + ZONENAME_MAX) < 0) { + /* No name, show ID instead */ + (void) snprintf(dl_owner_zname, ZONENAME_MAX, + "<%d>", dl_owner_zid); + } else if (strcmp(dl_owner_zname, target_zone) == 0) + break; + + /* + * Note here we only report a warning that + * the interface is already in use by another + * running zone, and the verify process just + * goes on, if the interface is still in use + * when this zone really boots up, zoneadmd + * will find it. If the name of the zone which + * owns this interface cannot be determined, + * then it is not possible to determine if there + * is a conflict so just report it as a warning. + */ + (void) fprintf(stderr, + gettext("WARNING: skipping network interface " + "'%s' which is used by the non-global zone " + "'%s'.\n"), nwiftab.zone_nwif_physical, + dl_owner_zname); + break; } - (void) close(so); } (void) zonecfg_endnwifent(handle); no_net: @@ -3440,6 +3584,10 @@ warn_ip_match(zone_dochandle_t s_handle, char *source_zone, != NULL) *p = '\0'; + /* For exclusive-IP zones, address is not specified. */ + if (strlen(s_nwiftab.zone_nwif_address) == 0) + continue; + if (strcmp(t_nwiftab.zone_nwif_address, s_nwiftab.zone_nwif_address) == 0) { (void) fprintf(stderr, diff --git a/usr/src/cmd/zoneadmd/Makefile b/usr/src/cmd/zoneadmd/Makefile index 34914694a8..2c3077f678 100644 --- a/usr/src/cmd/zoneadmd/Makefile +++ b/usr/src/cmd/zoneadmd/Makefile @@ -22,7 +22,7 @@ # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -43,7 +43,7 @@ CFLAGS += $(CCVERBOSE) LAZYLIBS = $(ZLAZYLOAD) -ltsnet -ltsol $(ZNOLAZYLOAD) lint := LAZYLIBS = -ltsnet -ltsol LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair \ - -lgen -lbsm -lcontract -lzfs -luuid -lbrand $(LAZYLIBS) + -lgen -lbsm -lcontract -lzfs -luuid -lbrand -ldladm $(LAZYLIBS) XGETFLAGS += -a -x zoneadmd.xcl .KEEP_STATE: diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c index d12c3ac25f..68a8592969 100644 --- a/usr/src/cmd/zoneadmd/vplat.c +++ b/usr/src/cmd/zoneadmd/vplat.c @@ -74,6 +74,10 @@ #include <sys/stropts.h> #include <sys/conf.h> +#include <sys/dlpi.h> +#include <libdlpi.h> +#include <libdladm.h> + #include <inet/tcp.h> #include <arpa/inet.h> #include <netinet/in.h> @@ -974,6 +978,29 @@ mount_one_dev_symlink_cb(void *arg, const char *source, const char *target) return (di_prof_add_symlink(prof, source, target)); } +static int +get_iptype(zlog_t *zlogp, zone_iptype_t *iptypep) +{ + zone_dochandle_t handle; + + if ((handle = zonecfg_init_handle()) == NULL) { + zerror(zlogp, B_TRUE, "getting zone configuration handle"); + return (-1); + } + if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { + zerror(zlogp, B_FALSE, "invalid configuration"); + zonecfg_fini_handle(handle); + return (-1); + } + if (zonecfg_get_iptype(handle, iptypep) != Z_OK) { + zerror(zlogp, B_FALSE, "invalid ip-type configuration"); + zonecfg_fini_handle(handle); + return (-1); + } + zonecfg_fini_handle(handle); + return (0); +} + /* * Apply the standard lists of devices/symlinks/mappings and the user-specified * list of devices (via zonecfg) to the /dev filesystem. The filesystem will @@ -989,6 +1016,8 @@ mount_one_dev(zlog_t *zlogp, char *devpath) di_prof_t prof = NULL; int err; int retval = -1; + zone_iptype_t iptype; + const char *curr_iptype; if (di_prof_init(devpath, &prof)) { zerror(zlogp, B_TRUE, "failed to initialize profile"); @@ -1002,8 +1031,21 @@ mount_one_dev(zlog_t *zlogp, char *devpath) goto cleanup; } + if (get_iptype(zlogp, &iptype) < 0) { + zerror(zlogp, B_TRUE, "unable to determine ip-type"); + goto cleanup; + } + switch (iptype) { + case ZS_SHARED: + curr_iptype = "shared"; + break; + case ZS_EXCLUSIVE: + curr_iptype = "exclusive"; + break; + } + if (brand_platform_iter_devices(bh, zone_name, - mount_one_dev_device_cb, prof) != 0) { + mount_one_dev_device_cb, prof, curr_iptype) != 0) { zerror(zlogp, B_TRUE, "failed to add standard device"); goto cleanup; } @@ -1715,7 +1757,7 @@ addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr) * If anything goes wrong, log an error message and return an error. */ static int -unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id) +unconfigure_shared_network_interfaces(zlog_t *zlogp, zoneid_t zone_id) { struct lifnum lifn; struct lifconf lifc; @@ -1734,7 +1776,7 @@ unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id) lifn.lifn_flags = (int)lifc_flags; if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) { zerror(zlogp, B_TRUE, - "could not determine number of interfaces"); + "could not determine number of network interfaces"); ret_code = -1; goto bad; } @@ -1750,7 +1792,8 @@ unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id) lifc.lifc_len = bufsize; lifc.lifc_buf = buf; if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) { - zerror(zlogp, B_TRUE, "could not get configured interfaces"); + zerror(zlogp, B_TRUE, "could not get configured network " + "interfaces"); ret_code = -1; goto bad; } @@ -1776,14 +1819,14 @@ unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id) continue; zerror(zlogp, B_TRUE, "%s: could not determine the zone to which this " - "interface is bound", lifrl.lifr_name); + "network interface is bound", lifrl.lifr_name); ret_code = -1; continue; } if (lifrl.lifr_zoneid == zone_id) { if (ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifrl) < 0) { zerror(zlogp, B_TRUE, - "%s: could not remove interface", + "%s: could not remove network interface", lifrl.lifr_name); ret_code = -1; continue; @@ -1927,7 +1970,7 @@ who_is_using(zlog_t *zlogp, struct lifreq *lifr) return (NULL); } if ((rtmsg.hdr.rtm_addrs & RTA_IFP) == 0) { - zerror(zlogp, B_FALSE, "interface not found"); + zerror(zlogp, B_FALSE, "network interface not found"); return (NULL); } cp = ((char *)(&rtmsg.hdr + 1)); @@ -1945,7 +1988,8 @@ who_is_using(zlog_t *zlogp, struct lifreq *lifr) break; } if (ifp == NULL) { - zerror(zlogp, B_FALSE, "interface could not be determined"); + zerror(zlogp, B_FALSE, "network interface could not be " + "determined"); return (NULL); } @@ -1964,8 +2008,8 @@ who_is_using(zlog_t *zlogp, struct lifreq *lifr) (void) strlcpy(lifr->lifr_name, save_if_name, sizeof (save_if_name)); if (i < 0) { zerror(zlogp, B_TRUE, - "%s: could not determine the zone interface belongs to", - lifr->lifr_name); + "%s: could not determine the zone network interface " + "belongs to", lifr->lifr_name); return (NULL); } if (getzonenamebyid(lifr->lifr_zoneid, answer, sizeof (answer)) < 0) @@ -2061,7 +2105,7 @@ configure_one_interface(zlog_t *zlogp, zoneid_t zone_id, * the console by zoneadm(1M) so instead we log the * message to syslog and continue. */ - zerror(&logsys, B_TRUE, "WARNING: skipping interface " + zerror(&logsys, B_TRUE, "WARNING: skipping network interface " "'%s' which may not be present/plumbed in the " "global zone.", lifr.lifr_name); (void) close(s); @@ -2081,8 +2125,8 @@ configure_one_interface(zlog_t *zlogp, zoneid_t zone_id, lifr.lifr_zoneid = zone_id; if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) { - zerror(zlogp, B_TRUE, "%s: could not place interface into zone", - lifr.lifr_name); + zerror(zlogp, B_TRUE, "%s: could not place network interface " + "into zone", lifr.lifr_name); goto bad; } @@ -2180,7 +2224,8 @@ configure_one_interface(zlog_t *zlogp, zoneid_t zone_id, */ if (errno != EADDRNOTAVAIL) { zerror(zlogp, B_TRUE, - "%s: could not bring interface up", lifr.lifr_name); + "%s: could not bring network interface up", + lifr.lifr_name); goto bad; } if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { @@ -2192,11 +2237,12 @@ configure_one_interface(zlog_t *zlogp, zoneid_t zone_id, errno = save_errno; if (zone_using == NULL) zerror(zlogp, B_TRUE, - "%s: could not bring interface up", lifr.lifr_name); + "%s: could not bring network interface up", + lifr.lifr_name); else - zerror(zlogp, B_TRUE, "%s: could not bring interface " - "up: address in use by zone '%s'", lifr.lifr_name, - zone_using); + zerror(zlogp, B_TRUE, "%s: could not bring network " + "interface up: address in use by zone '%s'", + lifr.lifr_name, zone_using); goto bad; } if ((lifr.lifr_flags & IFF_MULTICAST) && ((af == AF_INET && @@ -2249,13 +2295,13 @@ configure_one_interface(zlog_t *zlogp, zoneid_t zone_id, */ if (rlen < mcast_rtmsg.m_rtm.rtm_msglen) { if (rlen < 0) { - zerror(zlogp, B_TRUE, "WARNING: interface " - "'%s' not available as default for " - "multicast.", lifr.lifr_name); + zerror(zlogp, B_TRUE, "WARNING: network " + "interface '%s' not available as default " + "for multicast.", lifr.lifr_name); } else { - zerror(zlogp, B_FALSE, "WARNING: interface " - "'%s' not available as default for " - "multicast; routing socket returned " + zerror(zlogp, B_FALSE, "WARNING: network " + "interface '%s' not available as default " + "for multicast; routing socket returned " "unexpected %d bytes.", lifr.lifr_name, rlen); } @@ -2321,7 +2367,7 @@ bad: * whatever we set up, and return an error. */ static int -configure_network_interfaces(zlog_t *zlogp) +configure_shared_network_interfaces(zlog_t *zlogp) { zone_dochandle_t handle; struct zone_nwiftab nwiftab, loopback_iftab; @@ -2383,6 +2429,279 @@ configure_network_interfaces(zlog_t *zlogp) return (0); } +static void +show_owner(zlog_t *zlogp, char *dlname) +{ + zoneid_t dl_owner_zid; + char dl_owner_zname[ZONENAME_MAX]; + + dl_owner_zid = ALL_ZONES; + if (zone_check_datalink(&dl_owner_zid, dlname) != 0) + (void) snprintf(dl_owner_zname, ZONENAME_MAX, "<unknown>"); + else if (getzonenamebyid(dl_owner_zid, dl_owner_zname, ZONENAME_MAX) + < 0) + (void) snprintf(dl_owner_zname, ZONENAME_MAX, "<%d>", + dl_owner_zid); + + errno = EPERM; + zerror(zlogp, B_TRUE, "WARNING: skipping network interface '%s' " + "which is used by the non-global zone '%s'.\n", + dlname, dl_owner_zname); +} + +static int +add_datalink(zlog_t *zlogp, zoneid_t zoneid, char *dlname) +{ + /* First check if it's in use by global zone. */ + if (zonecfg_ifname_exists(AF_INET, dlname) || + zonecfg_ifname_exists(AF_INET6, dlname)) { + errno = EPERM; + zerror(zlogp, B_TRUE, "WARNING: skipping network interface " + "'%s' which is used in the global zone.", dlname); + return (-1); + } + + /* Add access control information */ + if (zone_add_datalink(zoneid, dlname) != 0) { + /* If someone got this link before us, show its name */ + if (errno == EPERM) + show_owner(zlogp, dlname); + else + zerror(zlogp, B_TRUE, "WARNING: unable to add network " + "interface '%s'.", dlname); + return (-1); + } + + /* Hold the link for this zone */ + if (dladm_hold_link(dlname, zoneid, B_FALSE) < 0) { + zerror(zlogp, B_TRUE, "WARNING: unable to hold network " + "interface '%s'.", dlname); + (void) zone_remove_datalink(zoneid, dlname); + return (-1); + } + + return (0); +} + +static int +remove_datalink(zlog_t *zlogp, zoneid_t zoneid, char *dlname) +{ + /* + * Remove access control information. + * If the errno is ENXIO, the interface is not added yet, + * nothing to report then. + */ + if (zone_remove_datalink(zoneid, dlname) != 0) { + if (errno == ENXIO) + return (0); + zerror(zlogp, B_TRUE, "unable to remove network interface '%s'", + dlname); + return (-1); + } + + if (dladm_rele_link(dlname, 0, B_FALSE) < 0) { + zerror(zlogp, B_TRUE, "unable to release network " + "interface '%s'", dlname); + return (-1); + } + return (0); +} + +/* + * Add the kernel access control information for the interface names. + * If anything goes wrong, we log a general error message, attempt to tear down + * whatever we set up, and return an error. + */ +static int +configure_exclusive_network_interfaces(zlog_t *zlogp) +{ + zone_dochandle_t handle; + struct zone_nwiftab nwiftab; + zoneid_t zoneid; + char rootpath[MAXPATHLEN]; + char path[MAXPATHLEN]; + di_prof_t prof = NULL; + boolean_t added = B_FALSE; + + if ((zoneid = getzoneidbyname(zone_name)) == -1) { + zerror(zlogp, B_TRUE, "unable to get zoneid"); + return (-1); + } + + if ((handle = zonecfg_init_handle()) == NULL) { + zerror(zlogp, B_TRUE, "getting zone configuration handle"); + return (-1); + } + if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { + zerror(zlogp, B_FALSE, "invalid configuration"); + zonecfg_fini_handle(handle); + return (-1); + } + + if (zonecfg_setnwifent(handle) != Z_OK) { + zonecfg_fini_handle(handle); + return (0); + } + + for (;;) { + if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK) + break; + + if (prof == NULL) { + if (zone_get_devroot(zone_name, rootpath, + sizeof (rootpath)) != Z_OK) { + (void) zonecfg_endnwifent(handle); + zonecfg_fini_handle(handle); + zerror(zlogp, B_TRUE, + "unable to determine dev root"); + return (-1); + } + (void) snprintf(path, sizeof (path), "%s%s", rootpath, + "/dev"); + if (di_prof_init(path, &prof) != 0) { + (void) zonecfg_endnwifent(handle); + zonecfg_fini_handle(handle); + zerror(zlogp, B_TRUE, + "failed to initialize profile"); + return (-1); + } + } + + /* + * Only create the /dev entry if it's not in use. + * Note here the zone still boots when the interfaces + * assigned is inaccessible, used by others, etc. + */ + if (add_datalink(zlogp, zoneid, nwiftab.zone_nwif_physical) + == 0) { + if (di_prof_add_dev(prof, nwiftab.zone_nwif_physical) + != 0) { + (void) zonecfg_endnwifent(handle); + zonecfg_fini_handle(handle); + zerror(zlogp, B_TRUE, + "failed to add network device"); + return (-1); + } + added = B_TRUE; + } + } + (void) zonecfg_endnwifent(handle); + zonecfg_fini_handle(handle); + + if (prof != NULL && added) { + if (di_prof_commit(prof) != 0) { + zerror(zlogp, B_TRUE, "failed to commit profile"); + return (-1); + } + } + if (prof != NULL) + di_prof_fini(prof); + + return (0); +} + +/* + * Get the list of the data-links from kernel, and try to remove it + */ +static int +unconfigure_exclusive_network_interfaces_run(zlog_t *zlogp, zoneid_t zoneid) +{ + char *dlnames, *ptr; + int dlnum, dlnum_saved, i; + + dlnum = 0; + if (zone_list_datalink(zoneid, &dlnum, NULL) != 0) { + zerror(zlogp, B_TRUE, "unable to list network interfaces"); + return (-1); + } +again: + /* this zone doesn't have any data-links */ + if (dlnum == 0) + return (0); + + dlnames = malloc(dlnum * LIFNAMSIZ); + if (dlnames == NULL) { + zerror(zlogp, B_TRUE, "memory allocation failed"); + return (-1); + } + dlnum_saved = dlnum; + + if (zone_list_datalink(zoneid, &dlnum, dlnames) != 0) { + zerror(zlogp, B_TRUE, "unable to list network interfaces"); + free(dlnames); + return (-1); + } + if (dlnum_saved < dlnum) { + /* list increased, try again */ + free(dlnames); + goto again; + } + ptr = dlnames; + for (i = 0; i < dlnum; i++) { + /* Remove access control information */ + if (remove_datalink(zlogp, zoneid, ptr) != 0) { + free(dlnames); + return (-1); + } + ptr += LIFNAMSIZ; + } + free(dlnames); + return (0); +} + +/* + * Get the list of the data-links from configuration, and try to remove it + */ +static int +unconfigure_exclusive_network_interfaces_static(zlog_t *zlogp, zoneid_t zoneid) +{ + zone_dochandle_t handle; + struct zone_nwiftab nwiftab; + + if ((handle = zonecfg_init_handle()) == NULL) { + zerror(zlogp, B_TRUE, "getting zone configuration handle"); + return (-1); + } + if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { + zerror(zlogp, B_FALSE, "invalid configuration"); + zonecfg_fini_handle(handle); + return (-1); + } + if (zonecfg_setnwifent(handle) != Z_OK) { + zonecfg_fini_handle(handle); + return (0); + } + for (;;) { + if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK) + break; + /* Remove access control information */ + if (remove_datalink(zlogp, zoneid, nwiftab.zone_nwif_physical) + != 0) { + (void) zonecfg_endnwifent(handle); + zonecfg_fini_handle(handle); + return (-1); + } + } + (void) zonecfg_endnwifent(handle); + zonecfg_fini_handle(handle); + return (0); +} + +/* + * Remove the access control information from the kernel for the exclusive + * network interfaces. + */ +static int +unconfigure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid) +{ + if (unconfigure_exclusive_network_interfaces_run(zlogp, zoneid) != 0) { + return (unconfigure_exclusive_network_interfaces_static(zlogp, + zoneid)); + } + + return (0); +} + static int tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid, const struct sockaddr_storage *local, const struct sockaddr_storage *remote) @@ -3562,6 +3881,8 @@ vplat_create(zlog_t *zlogp, boolean_t mount_cmd) tsol_zcent_t *zcent = NULL; int match = 0; int doi = 0; + int flags; + zone_iptype_t iptype; if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { zerror(zlogp, B_TRUE, "unable to determine zone root"); @@ -3570,11 +3891,35 @@ vplat_create(zlog_t *zlogp, boolean_t mount_cmd) if (zonecfg_in_alt_root()) resolve_lofs(zlogp, rootpath, sizeof (rootpath)); + if (get_iptype(zlogp, &iptype) < 0) { + zerror(zlogp, B_TRUE, "unable to determine ip-type"); + return (-1); + } + switch (iptype) { + case ZS_SHARED: + flags = 0; + break; + case ZS_EXCLUSIVE: + flags = ZCF_NET_EXCL; + break; + } + if ((privs = priv_allocset()) == NULL) { zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); return (-1); } priv_emptyset(privs); + if (iptype == ZS_EXCLUSIVE) { + /* + * add PRIV_NET_RAWACCESS and PRIV_SYS_IP_CONFIG + */ + if (priv_addset(privs, PRIV_NET_RAWACCESS) != 0 || + priv_addset(privs, PRIV_SYS_IP_CONFIG) != 0) { + zerror(zlogp, B_TRUE, + "Failed to add networking privileges"); + goto error; + } + } if (get_privset(zlogp, privs, mount_cmd) != 0) goto error; @@ -3669,7 +4014,8 @@ vplat_create(zlog_t *zlogp, boolean_t mount_cmd) xerr = 0; if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf, - rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel)) == -1) { + rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel, + flags)) == -1) { if (xerr == ZE_AREMOUNTS) { if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) { zerror(zlogp, B_FALSE, @@ -3863,9 +4209,31 @@ vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd, zoneid_t zoneid) return (-1); } - if (!mount_cmd && configure_network_interfaces(zlogp) != 0) { - lofs_discard_mnttab(); - return (-1); + if (!mount_cmd) { + zone_iptype_t iptype; + + if (get_iptype(zlogp, &iptype) < 0) { + zerror(zlogp, B_TRUE, "unable to determine ip-type"); + lofs_discard_mnttab(); + return (-1); + } + + switch (iptype) { + case ZS_SHARED: + /* Always do this to make lo0 get configured */ + if (configure_shared_network_interfaces(zlogp) != 0) { + lofs_discard_mnttab(); + return (-1); + } + break; + case ZS_EXCLUSIVE: + if (configure_exclusive_network_interfaces(zlogp) != + 0) { + lofs_discard_mnttab(); + return (-1); + } + break; + } } write_index_file(zoneid); @@ -3952,6 +4320,7 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) char cmdbuf[MAXPATHLEN]; char brand[MAXNAMELEN]; brand_handle_t bh = NULL; + ushort_t flags; kzone = zone_name; if (zonecfg_in_alt_root()) { @@ -4016,11 +4385,41 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) goto error; } - if (!unmount_cmd && - unconfigure_network_interfaces(zlogp, zoneid) != 0) { - zerror(zlogp, B_FALSE, - "unable to unconfigure network interfaces in zone"); - goto error; + if (!unmount_cmd) { + zone_iptype_t iptype; + + if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags, + sizeof (flags)) < 0) { + if (get_iptype(zlogp, &iptype) < 0) { + zerror(zlogp, B_TRUE, "unable to determine " + "ip-type"); + goto error; + } + } else { + if (flags & ZF_NET_EXCL) + iptype = ZS_EXCLUSIVE; + else + iptype = ZS_SHARED; + } + + switch (iptype) { + case ZS_SHARED: + if (unconfigure_shared_network_interfaces(zlogp, + zoneid) != 0) { + zerror(zlogp, B_FALSE, "unable to unconfigure " + "network interfaces in zone"); + goto error; + } + break; + case ZS_EXCLUSIVE: + if (unconfigure_exclusive_network_interfaces(zlogp, + zoneid) != 0) { + zerror(zlogp, B_FALSE, "unable to unconfigure " + "network interfaces in zone"); + goto error; + } + break; + } } if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) { diff --git a/usr/src/cmd/zonecfg/zonecfg.c b/usr/src/cmd/zonecfg/zonecfg.c index 34d6b99480..443c0895b1 100644 --- a/usr/src/cmd/zonecfg/zonecfg.c +++ b/usr/src/cmd/zonecfg/zonecfg.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -178,6 +178,7 @@ static char *res_types[] = { ALIAS_MAXSEMIDS, ALIAS_SHARES, "scheduling-class", + "ip-type", NULL }; @@ -217,6 +218,7 @@ static char *prop_types[] = { ALIAS_MAXLOCKEDMEM, ALIAS_MAXSWAP, "scheduling-class", + "ip-type", NULL }; @@ -273,6 +275,7 @@ static const char *clear_cmds[] = { "clear limitpriv", "clear bootargs", "clear scheduling-class", + "clear ip-type", "clear " ALIAS_MAXLWPS, "clear " ALIAS_MAXSHMMEM, "clear " ALIAS_MAXSHMIDS, @@ -317,6 +320,7 @@ static const char *set_cmds[] = { "set limitpriv=", "set bootargs=", "set scheduling-class=", + "set ip-type=", "set " ALIAS_MAXLWPS "=", "set " ALIAS_MAXSHMMEM "=", "set " ALIAS_MAXSHMIDS "=", @@ -344,6 +348,7 @@ static const char *info_cmds[] = { "info bootargs", "info brand", "info scheduling-class", + "info ip-type", "info max-lwps", "info max-shm-memory", "info max-shm-ids", @@ -914,6 +919,11 @@ usage(bool verbose, uint_t flags) pt_to_str(PT_PHYSICAL), gettext("<interface>")); (void) fprintf(fp, gettext("See ifconfig(1M) for " "details of the <interface> string.\n")); + (void) fprintf(fp, gettext("%s %s is valid if the %s " + "property is set to %s, otherwise it must not be " + "set.\n"), + cmd_to_str(CMD_SET), pt_to_str(PT_ADDRESS), + pt_to_str(PT_IPTYPE), "shared"); break; case RT_DEVICE: (void) fprintf(fp, gettext("The '%s' resource scope is " @@ -1095,6 +1105,8 @@ usage(bool verbose, uint_t flags) (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"), pt_to_str(PT_SCHED)); (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"), + pt_to_str(PT_IPTYPE)); + (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"), pt_to_str(PT_MAXLWPS)); (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"), pt_to_str(PT_MAXSHMMEM)); @@ -1571,6 +1583,7 @@ export_func(cmd_t *cmd) char *limitpriv; FILE *of; boolean_t autoboot; + zone_iptype_t iptype; bool need_to_close = FALSE; assert(cmd != NULL); @@ -1651,6 +1664,19 @@ export_func(cmd_t *cmd) (void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET), pt_to_str(PT_SCHED), sched); + if (zonecfg_get_iptype(handle, &iptype) == Z_OK) { + switch (iptype) { + case ZS_SHARED: + (void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET), + pt_to_str(PT_IPTYPE), "shared"); + break; + case ZS_EXCLUSIVE: + (void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET), + pt_to_str(PT_IPTYPE), "exclusive"); + break; + } + } + if ((err = zonecfg_setipdent(handle)) != Z_OK) { zone_perror(zone, err, FALSE); goto done; @@ -2157,7 +2183,8 @@ gz_invalid_rt_property(int type) { return (global_zone && (type == RT_ZONENAME || type == RT_ZONEPATH || type == RT_AUTOBOOT || type == RT_LIMITPRIV || - type == RT_BOOTARGS || type == RT_BRAND || type == RT_SCHED)); + type == RT_BOOTARGS || type == RT_BRAND || type == RT_SCHED || + type == RT_IPTYPE)); } static boolean_t @@ -2165,7 +2192,8 @@ gz_invalid_property(int type) { return (global_zone && (type == PT_ZONENAME || type == PT_ZONEPATH || type == PT_AUTOBOOT || type == PT_LIMITPRIV || - type == PT_BOOTARGS || type == PT_BRAND || type == PT_SCHED)); + type == PT_BOOTARGS || type == PT_BRAND || type == PT_SCHED || + type == PT_IPTYPE)); } void @@ -3268,6 +3296,13 @@ clear_global(cmd_t *cmd) else need_to_commit = TRUE; return; + case PT_IPTYPE: + /* shared is default; we'll treat as equivalent to clearing */ + if ((err = zonecfg_set_iptype(handle, ZS_SHARED)) != Z_OK) + z_cmd_rt_perror(CMD_CLEAR, RT_IPTYPE, err, TRUE); + else + need_to_commit = TRUE; + return; case PT_MAXLWPS: remove_aliased_rctl(PT_MAXLWPS, ALIAS_MAXLWPS); return; @@ -3555,6 +3590,30 @@ valid_fs_type(const char *type) return (B_TRUE); } +static boolean_t +allow_exclusive() +{ + brand_handle_t bh; + char brand[MAXNAMELEN]; + boolean_t ret; + + if (zonecfg_get_brand(handle, brand, sizeof (brand)) != Z_OK) { + zerr("%s: %s\n", zone, gettext("could not get zone brand")); + return (B_FALSE); + } + if ((bh = brand_open(brand)) == NULL) { + zerr("%s: %s\n", zone, gettext("unknown brand.")); + return (B_FALSE); + } + ret = brand_allow_exclusive_ip(bh); + brand_close(bh); + if (!ret) + zerr(gettext("%s cannot be '%s' when %s is '%s'."), + pt_to_str(PT_IPTYPE), "exclusive", + pt_to_str(PT_BRAND), brand); + return (ret); +} + static void set_aliased_rctl(char *alias, int prop_type, char *s) { @@ -3605,6 +3664,7 @@ set_func(cmd_t *cmd) int arg, err, res_type, prop_type; property_value_ptr_t pp; boolean_t autoboot; + zone_iptype_t iptype; boolean_t force_set = FALSE; size_t physmem_size = sizeof (in_progress_mcaptab.zone_physmem_cap); uint64_t mem_cap, mem_limit; @@ -3655,6 +3715,8 @@ set_func(cmd_t *cmd) res_type = RT_BOOTARGS; } else if (prop_type == PT_SCHED) { res_type = RT_SCHED; + } else if (prop_type == PT_IPTYPE) { + res_type = RT_IPTYPE; } else if (prop_type == PT_MAXLWPS) { res_type = RT_MAXLWPS; } else if (prop_type == PT_MAXSHMMEM) { @@ -3831,6 +3893,26 @@ set_func(cmd_t *cmd) else need_to_commit = TRUE; return; + case RT_IPTYPE: + if (strcmp(prop_id, "shared") == 0) { + iptype = ZS_SHARED; + } else if (strcmp(prop_id, "exclusive") == 0) { + iptype = ZS_EXCLUSIVE; + } else { + zerr(gettext("%s value must be '%s' or '%s'."), + pt_to_str(PT_IPTYPE), "shared", "exclusive"); + saw_error = TRUE; + return; + } + if (iptype == ZS_EXCLUSIVE && !allow_exclusive()) { + saw_error = TRUE; + return; + } + if ((err = zonecfg_set_iptype(handle, iptype)) != Z_OK) + zone_perror(zone, err, TRUE); + else + need_to_commit = TRUE; + return; case RT_MAXLWPS: set_aliased_rctl(ALIAS_MAXLWPS, prop_type, prop_id); return; @@ -4304,6 +4386,28 @@ info_sched(zone_dochandle_t handle, FILE *fp) } static void +info_iptype(zone_dochandle_t handle, FILE *fp) +{ + zone_iptype_t iptype; + int err; + + if ((err = zonecfg_get_iptype(handle, &iptype)) == Z_OK) { + switch (iptype) { + case ZS_SHARED: + (void) fprintf(fp, "%s: %s\n", pt_to_str(PT_IPTYPE), + "shared"); + break; + case ZS_EXCLUSIVE: + (void) fprintf(fp, "%s: %s\n", pt_to_str(PT_IPTYPE), + "exclusive"); + break; + } + } else { + zone_perror(zone, err, TRUE); + } +} + +static void output_fs(FILE *fp, struct zone_fstab *fstab) { zone_fsopt_t *this; @@ -4430,6 +4534,7 @@ info_net(zone_dochandle_t handle, FILE *fp, cmd_t *cmd) strcmp(user.zone_nwif_physical, lookup.zone_nwif_physical) != 0) continue; /* no match */ + /* If present make sure it matches */ if (strlen(user.zone_nwif_address) > 0 && !zonecfg_same_net_address(user.zone_nwif_address, lookup.zone_nwif_address)) @@ -4822,6 +4927,7 @@ info_func(cmd_t *cmd) if (!global_zone) { info_limitpriv(handle, fp); info_sched(handle, fp); + info_iptype(handle, fp); } info_aliased_rctl(handle, fp, ALIAS_MAXLWPS); info_aliased_rctl(handle, fp, ALIAS_MAXSHMMEM); @@ -4867,6 +4973,9 @@ info_func(cmd_t *cmd) case RT_SCHED: info_sched(handle, fp); break; + case RT_IPTYPE: + info_iptype(handle, fp); + break; case RT_MAXLWPS: info_aliased_rctl(handle, fp, ALIAS_MAXLWPS); break; @@ -5051,6 +5160,7 @@ verify_func(cmd_t *cmd) char brand[MAXNAMELEN]; int err, ret_val = Z_OK, arg; bool save = FALSE; + zone_iptype_t iptype; boolean_t has_cpu_shares = B_FALSE; optind = 0; @@ -5102,6 +5212,11 @@ verify_func(cmd_t *cmd) } } + if (zonecfg_get_iptype(handle, &iptype) != Z_OK) { + zerr("%s %s", gettext("cannot get"), pt_to_str(PT_IPTYPE)); + ret_val = Z_REQD_RESOURCE_MISSING; + saw_error = TRUE; + } if ((err = zonecfg_setipdent(handle)) != Z_OK) { zone_perror(zone, err, TRUE); return; @@ -5130,10 +5245,30 @@ verify_func(cmd_t *cmd) return; } while (zonecfg_getnwifent(handle, &nwiftab) == Z_OK) { - check_reqd_prop(nwiftab.zone_nwif_address, RT_NET, - PT_ADDRESS, &ret_val); + /* + * physical is required in all cases. + * A shared IP requires an address, while + * an exclusive IP must not have an address. + */ check_reqd_prop(nwiftab.zone_nwif_physical, RT_NET, PT_PHYSICAL, &ret_val); + + switch (iptype) { + case ZS_SHARED: + check_reqd_prop(nwiftab.zone_nwif_address, RT_NET, + PT_ADDRESS, &ret_val); + break; + case ZS_EXCLUSIVE: + if (strlen(nwiftab.zone_nwif_address) > 0) { + zerr(gettext("%s: %s cannot be specified " + "for an exclusive IP type"), + rt_to_str(RT_NET), pt_to_str(PT_ADDRESS)); + saw_error = TRUE; + if (ret_val == Z_OK) + ret_val = Z_INVAL; + } + break; + } } (void) zonecfg_endnwifent(handle); @@ -5492,27 +5627,35 @@ end_func(cmd_t *cmd) } break; case RT_NET: - /* First make sure everything was filled in. */ + /* + * First make sure everything was filled in. + * Since we don't know whether IP will be shared + * or exclusive here, some checks are deferred until + * the verify command. + */ (void) end_check_reqd(in_progress_nwiftab.zone_nwif_physical, PT_PHYSICAL, &validation_failed); - (void) end_check_reqd(in_progress_nwiftab.zone_nwif_address, - PT_ADDRESS, &validation_failed); if (validation_failed) { saw_error = TRUE; return; } - if (end_op == CMD_ADD) { /* Make sure there isn't already one like this. */ bzero(&tmp_nwiftab, sizeof (tmp_nwiftab)); + (void) strlcpy(tmp_nwiftab.zone_nwif_physical, + in_progress_nwiftab.zone_nwif_physical, + sizeof (tmp_nwiftab.zone_nwif_physical)); (void) strlcpy(tmp_nwiftab.zone_nwif_address, in_progress_nwiftab.zone_nwif_address, sizeof (tmp_nwiftab.zone_nwif_address)); if (zonecfg_lookup_nwif(handle, &tmp_nwiftab) == Z_OK) { - zerr(gettext("A %s resource " - "with the %s '%s' already exists."), - rt_to_str(RT_NET), pt_to_str(PT_ADDRESS), + zerr(gettext("A %s resource with the %s '%s', " + "and %s '%s' already exists."), + rt_to_str(RT_NET), + pt_to_str(PT_PHYSICAL), + in_progress_nwiftab.zone_nwif_physical, + pt_to_str(PT_ADDRESS), in_progress_nwiftab.zone_nwif_address); saw_error = TRUE; return; diff --git a/usr/src/cmd/zonecfg/zonecfg.h b/usr/src/cmd/zonecfg/zonecfg.h index 64808e9623..4f960b56d1 100644 --- a/usr/src/cmd/zonecfg/zonecfg.h +++ b/usr/src/cmd/zonecfg/zonecfg.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -93,9 +93,10 @@ typedef int bool; #define RT_MAXSEMIDS 21 /* really a rctl alias property, but for info */ #define RT_SHARES 22 /* really a rctl alias property, but for info */ #define RT_SCHED 23 /* really a property, but for info ... */ +#define RT_IPTYPE 24 /* really a property, but for info ... */ #define RT_MIN RT_UNKNOWN -#define RT_MAX RT_SCHED +#define RT_MAX RT_IPTYPE /* property types: increment PT_MAX when expanding this list */ #define PT_UNKNOWN 0 @@ -132,9 +133,10 @@ typedef int bool; #define PT_MAXLOCKEDMEM 31 #define PT_MAXSWAP 32 #define PT_SCHED 33 +#define PT_IPTYPE 34 #define PT_MIN PT_UNKNOWN -#define PT_MAX PT_SCHED +#define PT_MAX PT_IPTYPE #define MAX_EQ_PROP_PAIRS 3 diff --git a/usr/src/cmd/zonecfg/zonecfg_grammar.y b/usr/src/cmd/zonecfg/zonecfg_grammar.y index 5c0dc2263e..7ee9b4d612 100644 --- a/usr/src/cmd/zonecfg/zonecfg_grammar.y +++ b/usr/src/cmd/zonecfg/zonecfg_grammar.y @@ -21,7 +21,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -59,6 +59,7 @@ extern void yyerror(char *s); %token HELP CREATE EXPORT ADD DELETE REMOVE SELECT SET INFO CANCEL END VERIFY %token COMMIT REVERT EXIT SEMICOLON TOKEN ZONENAME ZONEPATH AUTOBOOT POOL NET %token FS IPD ATTR DEVICE RCTL SPECIAL RAW DIR OPTIONS TYPE ADDRESS PHYSICAL +%token IPTYPE %token NAME MATCH PRIV LIMIT ACTION VALUE EQUAL OPEN_SQ_BRACKET CLOSE_SQ_BRACKET %token OPEN_PAREN CLOSE_PAREN COMMA DATASET LIMITPRIV BOOTARGS BRAND PSET %token MCAP NCPUS IMPORTANCE SHARES MAXLWPS MAXSHMMEM MAXSHMIDS MAXMSGIDS @@ -70,7 +71,7 @@ extern void yyerror(char *s); %type <ival> resource_type NET FS IPD DEVICE RCTL ATTR DATASET PSET MCAP %type <ival> property_name SPECIAL RAW DIR OPTIONS TYPE ADDRESS PHYSICAL NAME MATCH ZONENAME ZONEPATH AUTOBOOT POOL LIMITPRIV BOOTARGS VALUE PRIV LIMIT - ACTION BRAND SCHED + ACTION BRAND SCHED IPTYPE %type <cmd> command %type <cmd> add_command ADD %type <cmd> cancel_command CANCEL @@ -442,6 +443,15 @@ info_command: INFO $$->cmd_res_type = RT_AUTOBOOT; $$->cmd_prop_nv_pairs = 0; } + | INFO IPTYPE + { + if (($$ = alloc_cmd()) == NULL) + YYERROR; + cmd = $$; + $$->cmd_handler = &info_func; + $$->cmd_res_type = RT_IPTYPE; + $$->cmd_prop_nv_pairs = 0; + } | INFO POOL { if (($$ = alloc_cmd()) == NULL) @@ -840,6 +850,7 @@ property_name: SPECIAL { $$ = PT_SPECIAL; } | ZONENAME { $$ = PT_ZONENAME; } | ZONEPATH { $$ = PT_ZONEPATH; } | AUTOBOOT { $$ = PT_AUTOBOOT; } + | IPTYPE { $$ = PT_IPTYPE; } | POOL { $$ = PT_POOL; } | LIMITPRIV { $$ = PT_LIMITPRIV; } | BOOTARGS { $$ = PT_BOOTARGS; } diff --git a/usr/src/cmd/zonecfg/zonecfg_lex.l b/usr/src/cmd/zonecfg/zonecfg_lex.l index 53f726ca2e..81a0594c22 100644 --- a/usr/src/cmd/zonecfg/zonecfg_lex.l +++ b/usr/src/cmd/zonecfg/zonecfg_lex.l @@ -21,7 +21,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -184,6 +184,9 @@ char *safe_strdup(char *s); <TSTATE>autoboot { return AUTOBOOT; } <CSTATE>autoboot { return AUTOBOOT; } +<TSTATE>ip-type { return IPTYPE; } +<CSTATE>ip-type { return IPTYPE; } + <TSTATE>pool { return POOL; } <CSTATE>pool { return POOL; } diff --git a/usr/src/cmd/zonename/zonename.c b/usr/src/cmd/zonename/zonename.c index 3a3a5df27d..2ab2f1f520 100644 --- a/usr/src/cmd/zonename/zonename.c +++ b/usr/src/cmd/zonename/zonename.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,21 +32,54 @@ #include <zone.h> #include <libzonecfg.h> #include <dlfcn.h> +#include <sys/zone.h> #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ #endif +/* + * -t prints "shared" vs. "exclusive" + */ int -main(void) +main(int argc, char *argv[]) { + zoneid_t zoneid; char zonename[ZONENAME_MAX]; FILE *fp; + int arg; + boolean_t stacktype = B_FALSE; (void) setlocale(LC_ALL, ""); (void) textdomain(TEXT_DOMAIN); - if (getzonenamebyid(getzoneid(), zonename, sizeof (zonename)) < 0) { + opterr = 0; + while ((arg = getopt(argc, argv, "t")) != EOF) { + switch (arg) { + case 't': + stacktype = B_TRUE; + break; + } + } + + zoneid = getzoneid(); + + if (stacktype) { + ushort_t flags; + + if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags, + sizeof (flags)) < 0) { + perror("could not determine zone IP type"); + exit(1); + } + if (flags & ZF_NET_EXCL) + (void) puts("exclusive"); + else + (void) puts("shared"); + return (0); + } + + if (getzonenamebyid(zoneid, zonename, sizeof (zonename)) < 0) { (void) fputs(gettext("could not determine zone name\n"), stderr); return (1); diff --git a/usr/src/common/net/patricia/radix.c b/usr/src/common/net/patricia/radix.c index ec8bdf570e..a61cbaa619 100644 --- a/usr/src/common/net/patricia/radix.c +++ b/usr/src/common/net/patricia/radix.c @@ -1,5 +1,5 @@ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * * Copyright (c) 1988, 1989, 1993 @@ -1221,6 +1221,7 @@ rn_freehead(rnh) rnh->rnh_walktree = NULL; #ifdef _KERNEL + RADIX_NODE_HEAD_DESTROY(rnh); FreeHead(rnh, sizeof (*rnh)); #else Free(rnh, NULL); diff --git a/usr/src/head/libzonecfg.h b/usr/src/head/libzonecfg.h index 10ee4a2bb4..8272817020 100644 --- a/usr/src/head/libzonecfg.h +++ b/usr/src/head/libzonecfg.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -238,6 +238,11 @@ struct zone_devpermtab { char *zone_devperm_acl; }; +typedef enum zone_iptype { + ZS_SHARED, + ZS_EXCLUSIVE +} zone_iptype_t; + /* * Basic configuration management routines. */ @@ -277,6 +282,8 @@ extern int zonecfg_get_zonepath(zone_dochandle_t, char *, size_t); extern int zonecfg_set_zonepath(zone_dochandle_t, char *); extern int zonecfg_get_autoboot(zone_dochandle_t, boolean_t *); extern int zonecfg_set_autoboot(zone_dochandle_t, boolean_t); +extern int zonecfg_get_iptype(zone_dochandle_t, zone_iptype_t *); +extern int zonecfg_set_iptype(zone_dochandle_t, zone_iptype_t); extern int zonecfg_get_pool(zone_dochandle_t, char *, size_t); extern int zonecfg_set_pool(zone_dochandle_t, char *); extern int zonecfg_get_bootargs(zone_dochandle_t, char *, size_t); @@ -500,6 +507,7 @@ extern boolean_t zonecfg_valid_fs_type(const char *); */ extern boolean_t zonecfg_same_net_address(char *, char *); extern int zonecfg_valid_net_address(char *, struct lifreq *); +extern boolean_t zonecfg_ifname_exists(sa_family_t, char *); /* * Rctl-related common functions. diff --git a/usr/src/head/zone.h b/usr/src/head/zone.h index 5250aaba3f..c4077c03de 100644 --- a/usr/src/head/zone.h +++ b/usr/src/head/zone.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -58,7 +58,7 @@ extern int zone_get_id(const char *, zoneid_t *); /* System call API */ extern zoneid_t zone_create(const char *, const char *, const struct priv_set *, const char *, size_t, const char *, size_t, int *, - int, int, const bslabel_t *); + int, int, const bslabel_t *, int); extern int zone_boot(zoneid_t); extern int zone_destroy(zoneid_t); extern ssize_t zone_getattr(zoneid_t, int, void *, size_t); @@ -67,6 +67,10 @@ extern int zone_enter(zoneid_t); extern int zone_list(zoneid_t *, uint_t *); extern int zone_shutdown(zoneid_t); extern int zone_version(int *); +extern int zone_add_datalink(zoneid_t, char *); +extern int zone_remove_datalink(zoneid_t, char *); +extern int zone_check_datalink(zoneid_t *, char *); +extern int zone_list_datalink(zoneid_t, int *, char *); #ifdef __cplusplus } diff --git a/usr/src/lib/brand/lx/lx_support/lx_support.c b/usr/src/lib/brand/lx/lx_support/lx_support.c index ab2fcdb491..714fc38e50 100644 --- a/usr/src/lib/brand/lx/lx_support/lx_support.c +++ b/usr/src/lib/brand/lx/lx_support/lx_support.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -420,6 +420,7 @@ lxs_verify(char *xmlfile) struct zone_devtab devtab; boolean_t audio, restart; char *idev, *odev; + zone_iptype_t iptype; if ((handle = zonecfg_init_handle()) == NULL) lxs_err(gettext("internal libzonecfg.so.1 error"), 0); @@ -469,6 +470,20 @@ lxs_verify(char *xmlfile) lxs_err(gettext("lx zones do not support added devices")); } + /* + * Check to see whether the zone has ip-type configured as exclusive + */ + if (zonecfg_get_iptype(handle, &iptype) != Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("zonecfg provided an invalid XML file")); + } + + if (iptype == ZS_EXCLUSIVE) { + zonecfg_fini_handle(handle); + lxs_err(gettext("lx zones do not support an 'exclusive' " + "ip-type")); + } + /* Extract any relevant attributes from the config file. */ lxs_getattrs(handle, &restart, &audio, &idev, &odev); zonecfg_fini_handle(handle); diff --git a/usr/src/lib/brand/lx/zone/platform.xml b/usr/src/lib/brand/lx/zone/platform.xml index 85e763fa71..a53f0ee509 100644 --- a/usr/src/lib/brand/lx/zone/platform.xml +++ b/usr/src/lib/brand/lx/zone/platform.xml @@ -20,7 +20,7 @@ CDDL HEADER END - Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Copyright 2007 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. ident "%Z%%M% %I% %E% SMI" @@ -31,7 +31,7 @@ <!DOCTYPE platform PUBLIC "-//Sun Microsystems Inc//Zones Platform//EN" "file:///usr/share/lib/xml/dtd/zone_platform.dtd.1"> -<platform name="lx"> +<platform name="lx" allow-exclusive-ip="false"> <!-- Global filesystems to mount when booting the zone --> <global_mount special="/dev" directory="/native/dev" type="dev" opt="attrdir=%R/dev" /> diff --git a/usr/src/lib/brand/native/zone/platform.xml b/usr/src/lib/brand/native/zone/platform.xml index d2bbc839cf..0919348bc4 100644 --- a/usr/src/lib/brand/native/zone/platform.xml +++ b/usr/src/lib/brand/native/zone/platform.xml @@ -20,7 +20,7 @@ CDDL HEADER END - Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Copyright 2007 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. ident "%Z%%M% %I% %E% SMI" @@ -31,7 +31,7 @@ <!DOCTYPE platform PUBLIC "-//Sun Microsystems Inc//Zones Platform//EN" "file:///usr/share/lib/xml/dtd/zone_platform.dtd.1"> -<platform name="native"> +<platform name="native" allow-exclusive-ip="true"> <!-- Global filesystems to mount when booting the zone --> <global_mount special="/dev" directory="/dev" type="dev" @@ -86,6 +86,29 @@ <device match="zero" /> <device match="zfs" /> + <!-- Devices to create in exclusive IP zone only --> + <device match="icmp" ip-type="exclusive" /> + <device match="icmp6" ip-type="exclusive" /> + <device match="ip" ip-type="exclusive" /> + <device match="ip6" ip-type="exclusive" /> + <device match="ipauth" ip-type="exclusive" /> + <device match="ipf" ip-type="exclusive" /> + <device match="ipl" ip-type="exclusive" /> + <device match="iplookup" ip-type="exclusive" /> + <device match="ipnat" ip-type="exclusive" /> + <device match="ipscan" ip-type="exclusive" /> + <device match="ipsecah" ip-type="exclusive" /> + <device match="ipsecesp" ip-type="exclusive" /> + <device match="ipstate" ip-type="exclusive" /> + <device match="ipsync" ip-type="exclusive" /> + <device match="keysock" ip-type="exclusive" /> + <device match="rawip" ip-type="exclusive" /> + <device match="rawip6" ip-type="exclusive" /> + <device match="rts" ip-type="exclusive" /> + <device match="sctp" ip-type="exclusive" /> + <device match="sctp6" ip-type="exclusive" /> + <device match="spdsock" ip-type="exclusive" /> + <!-- Renamed devices to create under /dev --> <device match="zcons/%z/zoneconsole" name="zconsole" /> diff --git a/usr/src/lib/brand/sn1/zone/platform.xml b/usr/src/lib/brand/sn1/zone/platform.xml index 01b03485ea..326f2e2ab5 100644 --- a/usr/src/lib/brand/sn1/zone/platform.xml +++ b/usr/src/lib/brand/sn1/zone/platform.xml @@ -20,7 +20,7 @@ CDDL HEADER END - Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Copyright 2007 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. ident "%Z%%M% %I% %E% SMI" @@ -31,7 +31,7 @@ <!DOCTYPE platform PUBLIC "-//Sun Microsystems Inc//Zones Platform//EN" "file:///usr/share/lib/xml/dtd/zone_platform.dtd.1"> -<platform name="native"> +<platform name="native" allow-exclusive-ip="true"> <!-- Global filesystems to mount when booting the zone --> <global_mount special="/dev" directory="/dev" type="dev" @@ -86,6 +86,29 @@ <device match="zero" /> <device match="zfs" /> + <!-- Devices to create in exclusive IP zone only --> + <device match="icmp" ip-type="exclusive" /> + <device match="icmp6" ip-type="exclusive" /> + <device match="ip" ip-type="exclusive" /> + <device match="ip6" ip-type="exclusive" /> + <device match="ipauth" ip-type="exclusive" /> + <device match="ipf" ip-type="exclusive" /> + <device match="ipl" ip-type="exclusive" /> + <device match="iplookup" ip-type="exclusive" /> + <device match="ipnat" ip-type="exclusive" /> + <device match="ipscan" ip-type="exclusive" /> + <device match="ipsecah" ip-type="exclusive" /> + <device match="ipsecesp" ip-type="exclusive" /> + <device match="ipstate" ip-type="exclusive" /> + <device match="ipsync" ip-type="exclusive" /> + <device match="keysock" ip-type="exclusive" /> + <device match="rawip" ip-type="exclusive" /> + <device match="rawip6" ip-type="exclusive" /> + <device match="rts" ip-type="exclusive" /> + <device match="sctp" ip-type="exclusive" /> + <device match="sctp6" ip-type="exclusive" /> + <device match="spdsock" ip-type="exclusive" /> + <!-- Renamed devices to create under /dev --> <device match="zcons/%z/zoneconsole" name="zconsole" /> diff --git a/usr/src/lib/libbrand/common/libbrand.c b/usr/src/lib/libbrand/common/libbrand.c index 0ce5a93365..068d720247 100644 --- a/usr/src/lib/libbrand/common/libbrand.c +++ b/usr/src/lib/libbrand/common/libbrand.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -65,8 +65,10 @@ #define DTD_ELEM_VERIFY_CFG ((const xmlChar *) "verify_cfg") #define DTD_ELEM_VERIFY_ADM ((const xmlChar *) "verify_adm") +#define DTD_ATTR_ALLOWEXCL ((const xmlChar *) "allow-exclusive-ip") #define DTD_ATTR_ARCH ((const xmlChar *) "arch") #define DTD_ATTR_DIRECTORY ((const xmlChar *) "directory") +#define DTD_ATTR_IPTYPE ((const xmlChar *) "ip-type") #define DTD_ATTR_MATCH ((const xmlChar *) "match") #define DTD_ATTR_MODE ((const xmlChar *) "mode") #define DTD_ATTR_NAME ((const xmlChar *) "name") @@ -78,6 +80,8 @@ #define DTD_ATTR_TARGET ((const xmlChar *) "target") #define DTD_ATTR_TYPE ((const xmlChar *) "type") +#define DTD_ENTITY_TRUE "true" + static volatile boolean_t libbrand_initialized = B_FALSE; static char i_curr_arch[MAXNAMELEN]; static char i_curr_zone[ZONENAME_MAX]; @@ -538,6 +542,34 @@ brand_is_native(brand_handle_t bh) return ((strcmp(bhp->bh_name, NATIVE_BRAND_NAME) == 0) ? 1 : 0); } +boolean_t +brand_allow_exclusive_ip(brand_handle_t bh) +{ + struct brand_handle *bhp = (struct brand_handle *)bh; + xmlNodePtr node; + xmlChar *allow_excl; + boolean_t ret; + + assert(bhp != NULL); + + if ((node = xmlDocGetRootElement(bhp->bh_platform)) == NULL) + return (B_FALSE); + + allow_excl = xmlGetProp(node, DTD_ATTR_ALLOWEXCL); + if (allow_excl == NULL) + return (B_FALSE); + + /* Note: only return B_TRUE if it's "true" */ + if (strcmp((char *)allow_excl, DTD_ENTITY_TRUE) == 0) + ret = B_TRUE; + else + ret = B_FALSE; + + xmlFree(allow_excl); + + return (ret); +} + /* * Iterate over brand privileges * @@ -738,12 +770,13 @@ brand_platform_iter_link(brand_handle_t bh, */ int brand_platform_iter_devices(brand_handle_t bh, const char *zonename, - int (*func)(void *, const char *, const char *), void *data) + int (*func)(void *, const char *, const char *), void *data, + const char *curr_iptype) { struct brand_handle *bhp = (struct brand_handle *)bh; const char *curr_arch = get_curr_arch(); xmlNodePtr node; - xmlChar *match, *name, *arch; + xmlChar *match, *name, *arch, *iptype; char match_exp[MAXPATHLEN]; boolean_t err = B_FALSE; int ret = 0; @@ -752,6 +785,7 @@ brand_platform_iter_devices(brand_handle_t bh, const char *zonename, assert(bhp != NULL); assert(zonename != NULL); assert(func != NULL); + assert(curr_iptype != NULL); if ((node = xmlDocGetRootElement(bhp->bh_platform)) == NULL) return (-1); @@ -764,7 +798,9 @@ brand_platform_iter_devices(brand_handle_t bh, const char *zonename, match = xmlGetProp(node, DTD_ATTR_MATCH); name = xmlGetProp(node, DTD_ATTR_NAME); arch = xmlGetProp(node, DTD_ATTR_ARCH); - if ((match == NULL) || (name == NULL) || (arch == NULL)) { + iptype = xmlGetProp(node, DTD_ATTR_IPTYPE); + if ((match == NULL) || (name == NULL) || (arch == NULL) || + (iptype == NULL)) { err = B_TRUE; goto next; } @@ -774,6 +810,11 @@ brand_platform_iter_devices(brand_handle_t bh, const char *zonename, (strcmp((char *)arch, curr_arch) != 0)) goto next; + /* check if the iptype matches */ + if ((strcmp((char *)iptype, "all") != 0) && + (strcmp((char *)iptype, curr_iptype) != 0)) + goto next; + /* Substitute token values as needed. */ if ((ret = i_substitute_tokens((char *)match, match_exp, sizeof (match_exp), @@ -798,6 +839,8 @@ next: xmlFree(name); if (arch != NULL) xmlFree(arch); + if (iptype != NULL) + xmlFree(iptype); if (err) return (-1); if (ret != 0) diff --git a/usr/src/lib/libbrand/common/libbrand.h b/usr/src/lib/libbrand/common/libbrand.h index bb31cee86e..0254a9fa3e 100644 --- a/usr/src/lib/libbrand/common/libbrand.h +++ b/usr/src/lib/libbrand/common/libbrand.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -42,6 +42,8 @@ extern void brand_close(brand_handle_t); extern int brand_is_native(brand_handle_t); +extern boolean_t brand_allow_exclusive_ip(brand_handle_t); + extern int brand_get_boot(brand_handle_t, const char *, const char *, char *, size_t, int, char **); extern int brand_get_brandname(brand_handle_t, char *, size_t); @@ -63,7 +65,7 @@ extern int brand_config_iter_privilege(brand_handle_t, int (*func)(void *, const char *, const char *), void *); extern int brand_platform_iter_devices(brand_handle_t, const char *, - int (*)(void *, const char *, const char *), void *); + int (*)(void *, const char *, const char *), void *, const char *); extern int brand_platform_iter_gmounts(brand_handle_t, const char *, int (*)(void *, const char *, const char *, const char *, const char *), void *); diff --git a/usr/src/lib/libbrand/common/mapfile-vers b/usr/src/lib/libbrand/common/mapfile-vers index a9daa750ad..f42011400e 100644 --- a/usr/src/lib/libbrand/common/mapfile-vers +++ b/usr/src/lib/libbrand/common/mapfile-vers @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -27,6 +27,7 @@ SUNWprivate { global: + brand_allow_exclusive_ip; brand_close; brand_config_iter_privilege; brand_get_boot; diff --git a/usr/src/lib/libbrand/dtd/zone_platform.dtd.1 b/usr/src/lib/libbrand/dtd/zone_platform.dtd.1 index a9e8c07bdd..28ac8d42c3 100644 --- a/usr/src/lib/libbrand/dtd/zone_platform.dtd.1 +++ b/usr/src/lib/libbrand/dtd/zone_platform.dtd.1 @@ -20,7 +20,7 @@ CDDL HEADER END - Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Copyright 2007 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. ident "%Z%%M% %I% %E% SMI" @@ -45,11 +45,16 @@ arch Identifies devices only available for certain architectures. Can be "sparc" or "i386". + ip-type Optional, identifies devices only available for certain IP + types. Can be "shared" or "exclusive". If it's not specified, + the default value "all" will be used, which means it's + available regardless the IP type. + For example, the following entry: <device match="brand/windows/foo" name="bar" arch="sparc" /> would result in mapping the following global zone device: /dev/brand/windows/foo - into the zone as: + into the zone (disregarding its IP type) as: /dev/bar but the mapping would only exist on sparc machines. @@ -58,7 +63,8 @@ <!ATTLIST device match CDATA #REQUIRED name CDATA "" - arch ( sparc | i386 ) "all" > + arch ( sparc | i386 ) "all" + ip-type ( shared | exclusive ) "all" > <!-- symlink @@ -138,7 +144,10 @@ name The name of the brand. This must match the name of the directory in which this file is stored, as well as the name of the brand that refers to it. + allow-exclusive-ip Whether the zones of this brand can have their + own exclusive IP stack. It is a boolean value. --> <!ELEMENT platform (device | global_mount | mount | symlink)* > -<!ATTLIST platform name CDATA #REQUIRED> +<!ATTLIST platform name CDATA #REQUIRED + allow-exclusive-ip (true | false) #REQUIRED> diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers index 8e1b399567..560ac9d878 100644 --- a/usr/src/lib/libc/port/mapfile-vers +++ b/usr/src/lib/libc/port/mapfile-vers @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -2219,14 +2219,18 @@ SUNWprivate_1.1 { _xgetwidth; __xpg6 = NODIRECT; _yield; + zone_add_datalink; zone_boot; + zone_check_datalink; zone_create; zone_destroy; zone_enter; zone_getattr; zone_get_id; zone_list; + zone_list_datalink; zonept; + zone_remove_datalink; zone_setattr; zone_shutdown; zone_version; diff --git a/usr/src/lib/libc/port/sys/zone.c b/usr/src/lib/libc/port/sys/zone.c index 609cf381d4..7c747d9c97 100644 --- a/usr/src/lib/libc/port/sys/zone.c +++ b/usr/src/lib/libc/port/sys/zone.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,7 +44,7 @@ zoneid_t zone_create(const char *name, const char *root, const struct priv_set *privs, const char *rctls, size_t rctlsz, const char *zfs, size_t zfssz, - int *extended_error, int match, int doi, const bslabel_t *label) + int *extended_error, int match, int doi, const bslabel_t *label, int flags) { zone_def zd; priv_data_t *d; @@ -63,6 +63,7 @@ zone_create(const char *name, const char *root, const struct priv_set *privs, zd.match = match; zd.doi = doi; zd.label = label; + zd.flags = flags; return ((zoneid_t)syscall(SYS_zone, ZONE_CREATE, &zd)); } @@ -221,3 +222,28 @@ zone_version(int *version) { return (syscall(SYS_zone, ZONE_VERSION, version)); } + + +int +zone_add_datalink(zoneid_t zoneid, char *dlname) +{ + return (syscall(SYS_zone, ZONE_ADD_DATALINK, zoneid, dlname)); +} + +int +zone_remove_datalink(zoneid_t zoneid, char *dlname) +{ + return (syscall(SYS_zone, ZONE_DEL_DATALINK, zoneid, dlname)); +} + +int +zone_check_datalink(zoneid_t *zoneidp, char *dlname) +{ + return (syscall(SYS_zone, ZONE_CHECK_DATALINK, zoneidp, dlname)); +} + +int +zone_list_datalink(zoneid_t zoneid, int *dlnump, char *buf) +{ + return (syscall(SYS_zone, ZONE_LIST_DATALINK, zoneid, dlnump, buf)); +} diff --git a/usr/src/lib/libdladm/common/libdladm.c b/usr/src/lib/libdladm/common/libdladm.c index 154fc08675..2a62bb8232 100644 --- a/usr/src/lib/libdladm/common/libdladm.c +++ b/usr/src/lib/libdladm/common/libdladm.c @@ -19,12 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" +#include <ctype.h> #include <unistd.h> #include <stropts.h> #include <errno.h> @@ -38,6 +39,7 @@ #include <libdevinfo.h> #include <libdladm_impl.h> #include <libintl.h> +#include <sys/vlan.h> typedef struct dladm_dev { char dd_name[IFNAMSIZ]; @@ -162,6 +164,74 @@ i_dladm_nt_net_walk(di_node_t node, di_minor_t minor, void *arg) } /* + * Hold a data-link. + */ +static int +i_dladm_hold_link(const char *name, zoneid_t zoneid, boolean_t docheck) +{ + int fd; + dld_hold_vlan_t dhv; + + if (strlen(name) >= IFNAMSIZ) { + errno = EINVAL; + return (-1); + } + + if ((fd = open(DLD_CONTROL_DEV, O_RDWR)) < 0) + return (-1); + + bzero(&dhv, sizeof (dld_hold_vlan_t)); + (void) strlcpy(dhv.dhv_name, name, IFNAMSIZ); + dhv.dhv_zid = zoneid; + dhv.dhv_docheck = docheck; + + if (i_dladm_ioctl(fd, DLDIOCHOLDVLAN, &dhv, sizeof (dhv)) < 0) { + int olderrno = errno; + + (void) close(fd); + errno = olderrno; + return (-1); + } + + (void) close(fd); + return (0); +} + +/* + * Release a data-link. + */ +static int +i_dladm_rele_link(const char *name, zoneid_t zoneid, boolean_t docheck) +{ + int fd; + dld_hold_vlan_t dhv; + + if (strlen(name) >= IFNAMSIZ) { + errno = EINVAL; + return (-1); + } + + if ((fd = open(DLD_CONTROL_DEV, O_RDWR)) < 0) + return (-1); + + bzero(&dhv, sizeof (dld_hold_vlan_t)); + (void) strlcpy(dhv.dhv_name, name, IFNAMSIZ); + dhv.dhv_zid = zoneid; + dhv.dhv_docheck = docheck; + + if (i_dladm_ioctl(fd, DLDIOCRELEVLAN, &dhv, sizeof (dhv)) < 0) { + int olderrno = errno; + + (void) close(fd); + errno = olderrno; + return (-1); + } + + (void) close(fd); + return (0); +} + +/* * Invoke the specified callback function for each active DDI_NT_NET * node. */ @@ -186,7 +256,6 @@ dladm_walk(void (*fn)(void *, const char *), void *arg) ddp = dw.dw_dev_list; while (ddp) { fn(arg, ddp->dd_name); - (void) dladm_walk_vlan(fn, arg, ddp->dd_name); last_ddp = ddp; ddp = ddp->dd_next; free(last_ddp); @@ -304,6 +373,9 @@ dladm_status2str(dladm_status_t status, char *buf) case DLADM_STATUS_IOERR: s = "I/O error"; break; + case DLADM_STATUS_TEMPONLY: + s = "change cannot be persistent, specify -t please"; + break; default: s = "<unknown error>"; break; @@ -506,3 +578,21 @@ dladm_set_rootdir(const char *rootdir) (void) closedir(dp); return (DLADM_STATUS_OK); } + +/* + * Do a "hold" operation to a link. + */ +int +dladm_hold_link(const char *name, zoneid_t zoneid, boolean_t docheck) +{ + return (i_dladm_hold_link(name, zoneid, docheck)); +} + +/* + * Do a "release" operation to a link. + */ +int +dladm_rele_link(const char *name, zoneid_t zoneid, boolean_t docheck) +{ + return (i_dladm_rele_link(name, zoneid, docheck)); +} diff --git a/usr/src/lib/libdladm/common/libdladm.h b/usr/src/lib/libdladm/common/libdladm.h index 1cf3700fc6..7421ab1445 100644 --- a/usr/src/lib/libdladm/common/libdladm.h +++ b/usr/src/lib/libdladm/common/libdladm.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -64,7 +64,8 @@ typedef enum { DLADM_STATUS_BADVALCNT, DLADM_STATUS_DBNOTFOUND, DLADM_STATUS_DENIED, - DLADM_STATUS_IOERR + DLADM_STATUS_IOERR, + DLADM_STATUS_TEMPONLY } dladm_status_t; typedef enum { @@ -82,13 +83,16 @@ typedef void (dladm_walkcb_t)(void *, const char *); extern int dladm_walk(dladm_walkcb_t *, void *); extern int dladm_walk_vlan(dladm_walkcb_t *, void *, const char *); extern int dladm_info(const char *, dladm_attr_t *); +extern int dladm_hold_link(const char *, zoneid_t, boolean_t); +extern int dladm_rele_link(const char *, zoneid_t, boolean_t); extern dladm_status_t dladm_set_prop(const char *, const char *, - char **, uint_t, uint_t); + char **, uint_t, uint_t, char **); extern dladm_status_t dladm_get_prop(const char *, dladm_prop_type_t, const char *, char **, uint_t *); extern dladm_status_t dladm_walk_prop(const char *, void *, boolean_t (*)(void *, const char *)); +extern boolean_t dladm_is_prop_temponly(const char *, char **); extern dladm_status_t dladm_set_secobj(const char *, dladm_secobj_class_t, uint8_t *, uint_t, uint_t); diff --git a/usr/src/lib/libdladm/common/linkprop.c b/usr/src/lib/libdladm/common/linkprop.c index 8e1ef849e9..e3b9ed386f 100644 --- a/usr/src/lib/libdladm/common/linkprop.c +++ b/usr/src/lib/libdladm/common/linkprop.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,14 +29,66 @@ #include <strings.h> #include <errno.h> #include <ctype.h> +#include <sys/types.h> #include <sys/stat.h> +#include <sys/dld.h> +#include <sys/zone.h> +#include <fcntl.h> +#include <unistd.h> +#include <libdevinfo.h> +#include <zone.h> #include <libwladm.h> #include <libdladm_impl.h> +#include <dlfcn.h> +#include <link.h> + static dladm_status_t i_dladm_set_prop_db(const char *, const char *, char **, uint_t); static dladm_status_t i_dladm_get_prop_db(const char *, const char *, char **, uint_t *); +static dladm_status_t i_dladm_get_prop_temp(const char *, dladm_prop_type_t, + const char *, char **, uint_t *); +static dladm_status_t i_dladm_set_prop_temp(const char *, const char *, + char **, uint_t, uint_t, char **); +static boolean_t i_dladm_is_prop_temponly(const char *prop_name, + char **); + +typedef struct val_desc { + char *vd_name; + void *vd_val; +} val_desc_t; + +struct prop_desc; + +typedef dladm_status_t pd_getf_t(const char *, char **, uint_t *); +typedef dladm_status_t pd_setf_t(const char *, val_desc_t *, uint_t); +typedef dladm_status_t pd_checkf_t(struct prop_desc *, char **, + uint_t, val_desc_t **); + +static pd_getf_t do_get_zone; +static pd_setf_t do_set_zone; +static pd_checkf_t do_check_zone; + +typedef struct prop_desc { + char *pd_name; + val_desc_t pd_defval; + val_desc_t *pd_modval; + uint_t pd_nmodval; + boolean_t pd_temponly; + pd_setf_t *pd_set; + pd_getf_t *pd_getmod; + pd_getf_t *pd_get; + pd_checkf_t *pd_check; +} prop_desc_t; + +static prop_desc_t prop_table[] = { + { "zone", { "", NULL }, NULL, 0, B_TRUE, + do_set_zone, NULL, + do_get_zone, do_check_zone} +}; + +#define MAX_PROPS (sizeof (prop_table) / sizeof (prop_desc_t)) /* * Convert a wladm_status_t to a dladm_status_t. This is used by wrappers @@ -79,7 +131,7 @@ dladm_wladmstatus2status(wladm_status_t wstatus) dladm_status_t dladm_set_prop(const char *link, const char *prop_name, char **prop_val, - uint_t val_cnt, uint_t flags) + uint_t val_cnt, uint_t flags, char **errprop) { dladm_status_t status = DLADM_STATUS_BADARG; @@ -88,15 +140,27 @@ dladm_set_prop(const char *link, const char *prop_name, char **prop_val, return (DLADM_STATUS_BADARG); if ((flags & DLADM_OPT_TEMP) != 0) { - if (wladm_is_valid(link)) { - status = dladm_wladmstatus2status( - wladm_set_prop(link, prop_name, - prop_val, val_cnt)); + status = i_dladm_set_prop_temp(link, prop_name, prop_val, + val_cnt, flags, errprop); + if (status == DLADM_STATUS_TEMPONLY && + (flags & DLADM_OPT_PERSIST) != 0) + return (DLADM_STATUS_TEMPONLY); + + if (status == DLADM_STATUS_NOTFOUND) { + status = DLADM_STATUS_BADARG; + if (wladm_is_valid(link)) { + status = dladm_wladmstatus2status( + wladm_set_prop(link, prop_name, + prop_val, val_cnt, errprop)); + } } if (status != DLADM_STATUS_OK) return (status); } if ((flags & DLADM_OPT_PERSIST) != 0) { + if (i_dladm_is_prop_temponly(prop_name, errprop)) + return (DLADM_STATUS_TEMPONLY); + status = i_dladm_set_prop_db(link, prop_name, prop_val, val_cnt); } @@ -107,20 +171,35 @@ dladm_status_t dladm_walk_prop(const char *link, void *arg, boolean_t (*func)(void *, const char *)) { + int i; + if (link == NULL || func == NULL) return (DLADM_STATUS_BADARG); + /* For wifi links, show wifi properties first */ if (wladm_is_valid(link)) { - return (dladm_wladmstatus2status( - wladm_walk_prop(link, arg, func))); + dladm_status_t status; + + status = dladm_wladmstatus2status( + wladm_walk_prop(link, arg, func)); + if (status != DLADM_STATUS_OK) + return (status); } - return (DLADM_STATUS_BADARG); + + /* Then show data-link properties if there are any */ + for (i = 0; i < MAX_PROPS; i++) { + if (!func(arg, prop_table[i].pd_name)) + break; + } + return (DLADM_STATUS_OK); } dladm_status_t dladm_get_prop(const char *link, dladm_prop_type_t type, const char *prop_name, char **prop_val, uint_t *val_cntp) { + dladm_status_t status; + if (link == NULL || prop_name == NULL || prop_val == NULL || val_cntp == NULL || *val_cntp == 0) return (DLADM_STATUS_BADARG); @@ -130,6 +209,11 @@ dladm_get_prop(const char *link, dladm_prop_type_t type, prop_val, val_cntp)); } + status = i_dladm_get_prop_temp(link, type, prop_name, + prop_val, val_cntp); + if (status != DLADM_STATUS_NOTFOUND) + return (status); + if (wladm_is_valid(link)) { wladm_prop_type_t wtype; @@ -421,7 +505,7 @@ process_linkprop_init(linkprop_db_state_t *lsp, char *buf, propval[i] = (char *)lvp->lv_name; status = dladm_set_prop(lsp->ls_link, lip->li_name, - propval, valcnt, DLADM_OPT_TEMP); + propval, valcnt, DLADM_OPT_TEMP, NULL); /* * We continue with initializing other properties even @@ -698,3 +782,422 @@ dladm_init_linkprop(void) return (LINKPROP_RW_DB(&state, B_FALSE)); } + +static dladm_status_t +i_dladm_get_zoneid(const char *link, zoneid_t *zidp) +{ + int fd; + dld_hold_vlan_t dhv; + + if ((fd = open(DLD_CONTROL_DEV, O_RDWR)) < 0) + return (dladm_errno2status(errno)); + + bzero(&dhv, sizeof (dld_hold_vlan_t)); + (void) strlcpy(dhv.dhv_name, link, IFNAMSIZ); + dhv.dhv_zid = -1; + + if (i_dladm_ioctl(fd, DLDIOCZIDGET, &dhv, sizeof (dhv)) < 0 && + errno != ENOENT) { + dladm_status_t status = dladm_errno2status(errno); + + (void) close(fd); + return (status); + } + + if (errno == ENOENT) + *zidp = GLOBAL_ZONEID; + else + *zidp = dhv.dhv_zid; + + (void) close(fd); + return (DLADM_STATUS_OK); +} + +typedef int (*zone_get_devroot_t)(char *, char *, size_t); + +static int +i_dladm_get_zone_dev(char *zone_name, char *dev, size_t devlen) +{ + char root[MAXPATHLEN]; + zone_get_devroot_t real_zone_get_devroot; + void *dlhandle; + void *sym; + int ret; + + if ((dlhandle = dlopen("libzonecfg.so.1", RTLD_LAZY)) == NULL) + return (-1); + + if ((sym = dlsym(dlhandle, "zone_get_devroot")) == NULL) { + (void) dlclose(dlhandle); + return (-1); + } + + real_zone_get_devroot = (zone_get_devroot_t)sym; + + if ((ret = real_zone_get_devroot(zone_name, root, sizeof (root))) == 0) + (void) snprintf(dev, devlen, "%s%s", root, "/dev"); + (void) dlclose(dlhandle); + return (ret); +} + +static dladm_status_t +i_dladm_add_deventry(zoneid_t zid, const char *link) +{ + char path[MAXPATHLEN]; + di_prof_t prof = NULL; + char zone_name[ZONENAME_MAX]; + dladm_status_t status; + + if (getzonenamebyid(zid, zone_name, sizeof (zone_name)) < 0) + return (dladm_errno2status(errno)); + if (i_dladm_get_zone_dev(zone_name, path, sizeof (path)) != 0) + return (dladm_errno2status(errno)); + if (di_prof_init(path, &prof) != 0) + return (dladm_errno2status(errno)); + + status = DLADM_STATUS_OK; + if (di_prof_add_dev(prof, link) != 0) { + status = dladm_errno2status(errno); + goto cleanup; + } + if (di_prof_commit(prof) != 0) + status = dladm_errno2status(errno); +cleanup: + if (prof) + di_prof_fini(prof); + + return (status); +} + +static dladm_status_t +i_dladm_remove_deventry(zoneid_t zid, const char *link) +{ + char path[MAXPATHLEN]; + di_prof_t prof = NULL; + char zone_name[ZONENAME_MAX]; + dladm_status_t status; + + if (getzonenamebyid(zid, zone_name, sizeof (zone_name)) < 0) + return (dladm_errno2status(errno)); + if (i_dladm_get_zone_dev(zone_name, path, sizeof (path)) != 0) + return (dladm_errno2status(errno)); + if (di_prof_init(path, &prof) != 0) + return (dladm_errno2status(errno)); + + status = DLADM_STATUS_OK; + if (di_prof_add_exclude(prof, link) != 0) { + status = dladm_errno2status(errno); + goto cleanup; + } + if (di_prof_commit(prof) != 0) + status = dladm_errno2status(errno); +cleanup: + if (prof) + di_prof_fini(prof); + + return (status); +} + +static dladm_status_t +do_get_zone(const char *link, char **prop_val, uint_t *val_cnt) +{ + char zone_name[ZONENAME_MAX]; + zoneid_t zid; + dladm_status_t status; + + status = i_dladm_get_zoneid(link, &zid); + if (status != DLADM_STATUS_OK) + return (status); + + *val_cnt = 1; + if (zid != GLOBAL_ZONEID) { + if (getzonenamebyid(zid, zone_name, sizeof (zone_name)) < 0) + return (dladm_errno2status(errno)); + + (void) strncpy(*prop_val, zone_name, DLADM_PROP_VAL_MAX); + } else { + *prop_val[0] = '\0'; + } + + return (DLADM_STATUS_OK); +} + +static dladm_status_t +do_set_zone(const char *link, val_desc_t *vdp, uint_t val_cnt) +{ + dladm_status_t status; + zoneid_t zid_old, zid_new; + char buff[IF_NAMESIZE + 1]; + struct stat st; + + if (val_cnt != 1) + return (DLADM_STATUS_BADVALCNT); + + status = i_dladm_get_zoneid(link, &zid_old); + if (status != DLADM_STATUS_OK) + return (status); + + /* Do nothing if setting to current value */ + zid_new = (zoneid_t)vdp->vd_val; + if (zid_new == zid_old) + return (DLADM_STATUS_OK); + + /* Do a stat to get the vlan created by MAC, if it's not there */ + (void) strcpy(buff, "/dev/"); + (void) strlcat(buff, link, IF_NAMESIZE); + (void) stat(buff, &st); + + if (zid_old != GLOBAL_ZONEID) { + if (dladm_rele_link(link, GLOBAL_ZONEID, B_TRUE) < 0) + return (dladm_errno2status(errno)); + + if (zone_remove_datalink(zid_old, (char *)link) != 0 && + errno != ENXIO) { + status = dladm_errno2status(errno); + goto rollback1; + } + + status = i_dladm_remove_deventry(zid_old, link); + if (status != DLADM_STATUS_OK) + goto rollback2; + } + + if (zid_new != GLOBAL_ZONEID) { + if (zone_add_datalink(zid_new, (char *)link) != 0) { + status = dladm_errno2status(errno); + goto rollback3; + } + + if (dladm_hold_link(link, zid_new, B_TRUE) < 0) { + (void) zone_remove_datalink(zid_new, (char *)link); + status = dladm_errno2status(errno); + goto rollback3; + } + + status = i_dladm_add_deventry(zid_new, link); + if (status != DLADM_STATUS_OK) { + (void) dladm_rele_link(link, GLOBAL_ZONEID, B_FALSE); + (void) zone_remove_datalink(zid_new, (char *)link); + goto rollback3; + } + } + return (DLADM_STATUS_OK); + +rollback3: + if (zid_old != GLOBAL_ZONEID) + (void) i_dladm_add_deventry(zid_old, link); +rollback2: + if (zid_old != GLOBAL_ZONEID) + (void) zone_add_datalink(zid_old, (char *)link); +rollback1: + (void) dladm_hold_link(link, zid_old, B_FALSE); +cleanexit: + return (status); +} + +/* ARGSUSED */ +static dladm_status_t +do_check_zone(prop_desc_t *pdp, char **prop_val, uint_t val_cnt, + val_desc_t **vdpp) +{ + zoneid_t zid; + val_desc_t *vdp = NULL; + + if (val_cnt != 1) + return (DLADM_STATUS_BADVALCNT); + + if ((zid = getzoneidbyname(*prop_val)) == -1) + return (DLADM_STATUS_BADVAL); + + if (zid != GLOBAL_ZONEID) { + ushort_t flags; + + if (zone_getattr(zid, ZONE_ATTR_FLAGS, &flags, + sizeof (flags)) < 0) { + return (dladm_errno2status(errno)); + } + + if (!(flags & ZF_NET_EXCL)) { + return (DLADM_STATUS_BADVAL); + } + } + + vdp = malloc(sizeof (val_desc_t)); + if (vdp == NULL) + return (DLADM_STATUS_NOMEM); + + vdp->vd_val = (void *)zid; + *vdpp = vdp; + return (DLADM_STATUS_OK); +} + +static dladm_status_t +i_dladm_get_prop_temp(const char *link, dladm_prop_type_t type, + const char *prop_name, char **prop_val, uint_t *val_cntp) +{ + int i; + dladm_status_t status; + uint_t cnt; + prop_desc_t *pdp; + + if (link == NULL || prop_name == NULL || prop_val == NULL || + val_cntp == NULL || *val_cntp == 0) + return (DLADM_STATUS_BADARG); + + for (i = 0; i < MAX_PROPS; i++) + if (strcasecmp(prop_name, prop_table[i].pd_name) == 0) + break; + + if (i == MAX_PROPS) + return (DLADM_STATUS_NOTFOUND); + + pdp = &prop_table[i]; + status = DLADM_STATUS_OK; + + switch (type) { + case DLADM_PROP_VAL_CURRENT: + status = pdp->pd_get(link, prop_val, val_cntp); + break; + case DLADM_PROP_VAL_DEFAULT: + if (pdp->pd_defval.vd_name == NULL) { + status = DLADM_STATUS_NOTSUP; + break; + } + (void) strcpy(*prop_val, pdp->pd_defval.vd_name); + *val_cntp = 1; + break; + + case DLADM_PROP_VAL_MODIFIABLE: + if (pdp->pd_getmod != NULL) { + status = pdp->pd_getmod(link, prop_val, val_cntp); + break; + } + cnt = pdp->pd_nmodval; + if (cnt == 0) { + status = DLADM_STATUS_NOTSUP; + } else if (cnt > *val_cntp) { + status = DLADM_STATUS_TOOSMALL; + } else { + for (i = 0; i < cnt; i++) { + (void) strcpy(prop_val[i], + pdp->pd_modval[i].vd_name); + } + *val_cntp = cnt; + } + break; + default: + status = DLADM_STATUS_BADARG; + break; + } + + return (status); +} + +static dladm_status_t +i_dladm_set_one_prop_temp(const char *link, prop_desc_t *pdp, char **prop_val, + uint_t val_cnt, uint_t flags) +{ + dladm_status_t status; + val_desc_t *vdp = NULL; + uint_t cnt; + + if (pdp->pd_temponly && (flags & DLADM_OPT_PERSIST) != 0) + return (DLADM_STATUS_TEMPONLY); + + if (pdp->pd_set == NULL) + return (DLADM_STATUS_PROPRDONLY); + + if (prop_val != NULL) { + if (pdp->pd_check != NULL) + status = pdp->pd_check(pdp, prop_val, val_cnt, &vdp); + else + status = DLADM_STATUS_BADARG; + + if (status != DLADM_STATUS_OK) + return (status); + + cnt = val_cnt; + } else { + if (pdp->pd_defval.vd_name == NULL) + return (DLADM_STATUS_NOTSUP); + + if ((vdp = malloc(sizeof (val_desc_t))) == NULL) + return (DLADM_STATUS_NOMEM); + + (void) memcpy(vdp, &pdp->pd_defval, sizeof (val_desc_t)); + cnt = 1; + } + + status = pdp->pd_set(link, vdp, cnt); + + free(vdp); + return (status); +} + +static dladm_status_t +i_dladm_set_prop_temp(const char *link, const char *prop_name, char **prop_val, + uint_t val_cnt, uint_t flags, char **errprop) +{ + int i; + dladm_status_t status = DLADM_STATUS_OK; + boolean_t found = B_FALSE; + + for (i = 0; i < MAX_PROPS; i++) { + prop_desc_t *pdp = &prop_table[i]; + dladm_status_t s; + + if (prop_name != NULL && + (strcasecmp(prop_name, pdp->pd_name) != 0)) + continue; + + found = B_TRUE; + s = i_dladm_set_one_prop_temp(link, pdp, prop_val, val_cnt, + flags); + + if (prop_name != NULL) { + status = s; + break; + } else { + if (s != DLADM_STATUS_OK && + s != DLADM_STATUS_NOTSUP) { + if (errprop != NULL) + *errprop = pdp->pd_name; + status = s; + break; + } + } + } + + if (!found) + status = DLADM_STATUS_NOTFOUND; + + return (status); +} + +static boolean_t +i_dladm_is_prop_temponly(const char *prop_name, char **errprop) +{ + int i; + + for (i = 0; i < MAX_PROPS; i++) { + prop_desc_t *pdp = &prop_table[i]; + + if (prop_name != NULL && + (strcasecmp(prop_name, pdp->pd_name) != 0)) + continue; + + if (errprop != NULL) + *errprop = pdp->pd_name; + + if (pdp->pd_temponly) + return (B_TRUE); + } + + return (B_FALSE); +} + +boolean_t +dladm_is_prop_temponly(const char *prop_name, char **errprop) +{ + return (i_dladm_is_prop_temponly(prop_name, errprop)); +} diff --git a/usr/src/lib/libdladm/common/mapfile-vers b/usr/src/lib/libdladm/common/mapfile-vers index 2af8201536..34d1e2c6b2 100644 --- a/usr/src/lib/libdladm/common/mapfile-vers +++ b/usr/src/lib/libdladm/common/mapfile-vers @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -33,6 +33,7 @@ SUNWprivate_1.1 { dladm_get_prop; dladm_set_prop; dladm_walk_prop; + dladm_is_prop_temponly; dladm_get_secobj; dladm_set_secobj; dladm_unset_secobj; @@ -44,6 +45,8 @@ SUNWprivate_1.1 { dladm_init_secobj; dladm_set_rootdir; + dladm_hold_link; + dladm_rele_link; local: *; }; diff --git a/usr/src/lib/libsecdb/exec_attr.txt b/usr/src/lib/libsecdb/exec_attr.txt index 40de76e3d4..e568985fcb 100644 --- a/usr/src/lib/libsecdb/exec_attr.txt +++ b/usr/src/lib/libsecdb/exec_attr.txt @@ -104,12 +104,12 @@ File System Management:suser:cmd:::/usr/sbin/umount:uid=0 File System Management:suser:cmd:::/usr/sbin/umountall:uid=0 File System Management:suser:cmd:::/usr/sbin/unshare:uid=0;gid=root File System Management:suser:cmd:::/usr/sbin/unshareall:uid=0;gid=root -IP Filter Management:solaris:cmd:::/usr/sbin/ipf:privs=sys_net_config -IP Filter Management:solaris:cmd:::/usr/sbin/ipfs:privs=sys_net_config -IP Filter Management:solaris:cmd:::/usr/sbin/ipmon:privs=sys_net_config -IP Filter Management:solaris:cmd:::/usr/sbin/ipfstat:privs=sys_net_config;gid=sys -IP Filter Management:solaris:cmd:::/usr/sbin/ipnat:privs=sys_net_config;gid=sys -IP Filter Management:solaris:cmd:::/usr/sbin/ippool:privs=sys_net_config;gid=sys +IP Filter Management:solaris:cmd:::/usr/sbin/ipf:privs=sys_ip_config +IP Filter Management:solaris:cmd:::/usr/sbin/ipfs:privs=sys_ip_config +IP Filter Management:solaris:cmd:::/usr/sbin/ipmon:privs=sys_ip_config +IP Filter Management:solaris:cmd:::/usr/sbin/ipfstat:privs=sys_ip_config;gid=sys +IP Filter Management:solaris:cmd:::/usr/sbin/ipnat:privs=sys_ip_config;gid=sys +IP Filter Management:solaris:cmd:::/usr/sbin/ippool:privs=sys_ip_config;gid=sys Kerberos Server Management:solaris:cmd:::/usr/lib/krb5/krb5kdc:uid=0 Kerberos Server Management:solaris:cmd:::/usr/lib/krb5/kadmind:uid=0 Kerberos Server Management:solaris:cmd:::/usr/lib/krb5/kprop:euid=0;privs=none @@ -175,9 +175,9 @@ Name Service Security:suser:cmd:::/usr/sbin/nisinit:euid=0 Name Service Security:suser:cmd:::/usr/sbin/nislog:euid=0 Name Service Security:suser:cmd:::/usr/sbin/rpc.nisd:uid=0;gid=0 Network Management:solaris:cmd:::/sbin/ifconfig:uid=0 -Network Management:solaris:cmd:::/sbin/route:privs=sys_net_config +Network Management:solaris:cmd:::/sbin/route:privs=sys_ip_config Network Management:solaris:cmd:::/sbin/routeadm:euid=0;\ - privs=proc_chroot,proc_owner,sys_net_config + privs=proc_chroot,proc_owner,sys_ip_config Network Management:solaris:cmd:::/sbin/dladm:euid=dladm;egid=sys;\ privs=sys_net_config,net_rawaccess,proc_audit Network Management:suser:cmd:::/usr/bin/netstat:uid=0 @@ -194,15 +194,15 @@ Network Management:suser:cmd:::/usr/sbin/snoop:uid=0 Network Management:suser:cmd:::/usr/sbin/spray:euid=0 Network Link Security:solaris:cmd:::/sbin/dladm:euid=dladm;egid=sys;\ privs=sys_net_config,net_rawaccess,proc_audit -Network Security:solaris:cmd:::/usr/lib/inet/certdb:privs=sys_net_config -Network Security:solaris:cmd:::/usr/lib/inet/certlocal:privs=sys_net_config -Network Security:solaris:cmd:::/usr/lib/inet/certrldb:privs=sys_net_config -Network Security:solaris:cmd:::/usr/lib/inet/in.iked:privs=sys_net_config,net_privaddr -Network Security:solaris:cmd:::/usr/sbin/ikeadm:privs=sys_net_config -Network Security:solaris:cmd:::/usr/sbin/ikecert:privs=sys_net_config -Network Security:solaris:cmd:::/usr/sbin/ipsecconf:privs=sys_net_config -Network Security:solaris:cmd:::/usr/sbin/ipseckey:privs=sys_net_config -Network Security:solaris:cmd:::/usr/sbin/ipsecalgs:privs=sys_net_config +Network Security:solaris:cmd:::/usr/lib/inet/certdb:privs=sys_ip_config +Network Security:solaris:cmd:::/usr/lib/inet/certlocal:privs=sys_ip_config +Network Security:solaris:cmd:::/usr/lib/inet/certrldb:privs=sys_ip_config +Network Security:solaris:cmd:::/usr/lib/inet/in.iked:privs=sys_ip_config,net_privaddr +Network Security:solaris:cmd:::/usr/sbin/ikeadm:privs=sys_ip_config +Network Security:solaris:cmd:::/usr/sbin/ikecert:privs=sys_ip_config +Network Security:solaris:cmd:::/usr/sbin/ipsecconf:privs=sys_ip_config +Network Security:solaris:cmd:::/usr/sbin/ipseckey:privs=sys_ip_config +Network Security:solaris:cmd:::/usr/sbin/ipsecalgs:privs=sys_ip_config Network Security:solaris:cmd:::/usr/sbin/ksslcfg:euid=0 Network Security:suser:cmd:::/usr/bin/ssh-keygen:uid=0;gid=sys Network Security:suser:cmd:::/usr/lib/inet/certdb:euid=0 diff --git a/usr/src/lib/libwladm/common/libwladm.c b/usr/src/lib/libwladm/common/libwladm.c index 567be6148c..7724008ee5 100644 --- a/usr/src/lib/libwladm/common/libwladm.c +++ b/usr/src/lib/libwladm/common/libwladm.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1054,7 +1054,7 @@ do_set_prop(int fd, wldp_t *gbuf, prop_desc_t *pdp, wladm_status_t wladm_set_prop(const char *link, const char *prop_name, - char **prop_val, uint_t val_cnt) + char **prop_val, uint_t val_cnt, char **errprop) { int fd, i; wldp_t *gbuf = NULL; @@ -1089,8 +1089,12 @@ wladm_set_prop(const char *link, const char *prop_name, break; } else { if (s != WLADM_STATUS_OK && - s != WLADM_STATUS_NOTSUP) + s != WLADM_STATUS_NOTSUP) { + if (errprop != NULL) + *errprop = pdp->pd_name; status = s; + break; + } } } if (!found) diff --git a/usr/src/lib/libwladm/common/libwladm.h b/usr/src/lib/libwladm/common/libwladm.h index 0a5d24df9e..45122cf312 100644 --- a/usr/src/lib/libwladm/common/libwladm.h +++ b/usr/src/lib/libwladm/common/libwladm.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -174,7 +174,7 @@ extern wladm_status_t wladm_get_link_attr(const char *, wladm_link_attr_t *); extern wladm_status_t wladm_walk(void *, boolean_t (*)(void *, const char *)); extern boolean_t wladm_is_valid(const char *); extern wladm_status_t wladm_set_prop(const char *, const char *, char **, - uint_t); + uint_t, char **); extern wladm_status_t wladm_walk_prop(const char *, void *, boolean_t (*)(void *, const char *)); extern wladm_status_t wladm_get_prop(const char *, wladm_prop_type_t, diff --git a/usr/src/lib/libzonecfg/common/libzonecfg.c b/usr/src/lib/libzonecfg/common/libzonecfg.c index 1a3fb37c8c..cce47ce753 100644 --- a/usr/src/lib/libzonecfg/common/libzonecfg.c +++ b/usr/src/lib/libzonecfg/common/libzonecfg.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,6 +45,7 @@ #include <sys/mnttab.h> #include <sys/nvpair.h> #include <sys/types.h> +#include <sys/sockio.h> #include <ftw.h> #include <pool.h> #include <libscf.h> @@ -65,7 +66,6 @@ #include <libzonecfg.h> #include "zonecfg_impl.h" - #define _PATH_TMPFILE "/zonecfg.XXXXXX" #define ZONE_CB_RETRY_COUNT 10 #define ZONE_EVENT_PING_SUBCLASS "ping" @@ -95,6 +95,7 @@ #define DTD_ATTR_ACTION (const xmlChar *) "action" #define DTD_ATTR_ADDRESS (const xmlChar *) "address" #define DTD_ATTR_AUTOBOOT (const xmlChar *) "autoboot" +#define DTD_ATTR_IPTYPE (const xmlChar *) "ip-type" #define DTD_ATTR_DIR (const xmlChar *) "directory" #define DTD_ATTR_LIMIT (const xmlChar *) "limit" #define DTD_ATTR_LIMITPRIV (const xmlChar *) "limitpriv" @@ -1497,6 +1498,69 @@ out: return (error); } +int +zonecfg_get_iptype(zone_dochandle_t handle, zone_iptype_t *iptypep) +{ + char property[10]; /* 10 is big enough for "shared"/"exclusive" */ + int err; + + err = getrootattr(handle, DTD_ATTR_IPTYPE, property, sizeof (property)); + if (err == Z_BAD_PROPERTY) { + /* Return default value */ + *iptypep = ZS_SHARED; + return (Z_OK); + } else if (err != Z_OK) { + return (err); + } + + if (strlen(property) == 0 || + strcmp(property, "shared") == 0) + *iptypep = ZS_SHARED; + else if (strcmp(property, "exclusive") == 0) + *iptypep = ZS_EXCLUSIVE; + else + return (Z_INVAL); + + return (Z_OK); +} + +int +zonecfg_set_iptype(zone_dochandle_t handle, zone_iptype_t iptype) +{ + xmlNodePtr cur; + + if (handle == NULL) + return (Z_INVAL); + + cur = xmlDocGetRootElement(handle->zone_dh_doc); + if (cur == NULL) { + return (Z_EMPTY_DOCUMENT); + } + + if (xmlStrcmp(cur->name, DTD_ELEM_ZONE) != 0) { + return (Z_WRONG_DOC_TYPE); + } + switch (iptype) { + case ZS_SHARED: + /* + * Since "shared" is the default, we don't write it to the + * configuration file, so that it's easier to migrate those + * zones elsewhere, eg., to systems which are not IP-Instances + * aware. + * xmlUnsetProp only fails when the attribute doesn't exist, + * which we don't care. + */ + (void) xmlUnsetProp(cur, DTD_ATTR_IPTYPE); + break; + case ZS_EXCLUSIVE: + if (xmlSetProp(cur, DTD_ATTR_IPTYPE, + (const xmlChar *) "exclusive") == NULL) + return (Z_INVAL); + break; + } + return (Z_OK); +} + static int newprop(xmlNodePtr node, const xmlChar *attrname, char *src) { @@ -2038,6 +2102,30 @@ zonecfg_valid_net_address(char *address, struct lifreq *lifr) return (Z_OK); } +boolean_t +zonecfg_ifname_exists(sa_family_t af, char *ifname) +{ + struct lifreq lifr; + int so; + int save_errno; + + (void) memset(&lifr, 0, sizeof (lifr)); + (void) strlcpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name)); + lifr.lifr_addr.ss_family = af; + if ((so = socket(af, SOCK_DGRAM, 0)) < 0) { + /* Odd - can't tell if the ifname exists */ + return (B_FALSE); + } + if (ioctl(so, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { + save_errno = errno; + (void) close(so); + errno = save_errno; + return (B_FALSE); + } + (void) close(so); + return (B_TRUE); +} + int zonecfg_lookup_nwif(zone_dochandle_t handle, struct zone_nwiftab *tabptr) { diff --git a/usr/src/lib/libzonecfg/common/mapfile-vers b/usr/src/lib/libzonecfg/common/mapfile-vers index e2bb782688..384641b1a7 100644 --- a/usr/src/lib/libzonecfg/common/mapfile-vers +++ b/usr/src/lib/libzonecfg/common/mapfile-vers @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -106,6 +106,7 @@ SUNWprivate_1.1 { zonecfg_getfsent; zonecfg_get_handle; zonecfg_getipdent; + zonecfg_get_iptype; zonecfg_get_limitpriv; zonecfg_getmcapent; zonecfg_get_name; @@ -125,6 +126,7 @@ SUNWprivate_1.1 { zonecfg_get_uuid; zonecfg_get_xml_handle; zonecfg_get_zonepath; + zonecfg_ifname_exists; zonecfg_in_alt_root; zonecfg_init_handle; zonecfg_is_rctl; @@ -172,6 +174,7 @@ SUNWprivate_1.1 { zonecfg_setdsent; zonecfg_setfsent; zonecfg_setipdent; + zonecfg_set_iptype; zonecfg_set_limitpriv; zonecfg_set_name; zonecfg_setnwifent; diff --git a/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1 b/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1 index c51e89add3..5de8176c42 100644 --- a/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1 +++ b/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1 @@ -20,7 +20,7 @@ CDDL HEADER END - Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Copyright 2007 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. ident "%Z%%M% %I% %E% SMI" @@ -44,7 +44,7 @@ <!ELEMENT network EMPTY> -<!ATTLIST network address CDATA #REQUIRED +<!ATTLIST network address CDATA "" physical CDATA #REQUIRED> <!ELEMENT device EMPTY> @@ -136,6 +136,7 @@ <!ATTLIST zone name CDATA #REQUIRED zonepath CDATA #REQUIRED autoboot (true | false) #REQUIRED + ip-type CDATA "" pool CDATA "" limitpriv CDATA "" bootargs CDATA "" diff --git a/usr/src/pkgdefs/SUNWcnetr/pkginfo.tmpl b/usr/src/pkgdefs/SUNWcnetr/pkginfo.tmpl index 5a9803a32b..ddf799cca1 100644 --- a/usr/src/pkgdefs/SUNWcnetr/pkginfo.tmpl +++ b/usr/src/pkgdefs/SUNWcnetr/pkginfo.tmpl @@ -48,7 +48,7 @@ CLASSES="none ipsecalgsbase dhcpagent preserve sock2path" BASEDIR=/ SUNW_PKGVERS="1.0" SUNW_PKG_ALLZONES="true" -SUNW_PKG_HOLLOW="true" +SUNW_PKG_HOLLOW="false" SUNW_PKG_THISZONE="false" #VSTOCK="<reserved by Release Engineering for package part #>" #ISTATES="<developer defined>" diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com index fc2a8c7d1e..3a53f965b1 100644 --- a/usr/src/pkgdefs/SUNWhea/prototype_com +++ b/usr/src/pkgdefs/SUNWhea/prototype_com @@ -141,10 +141,13 @@ f none usr/include/inet/ip_ftable.h 644 root bin f none usr/include/inet/ip_multi.h 644 root bin f none usr/include/inet/ip_netinfo.h 644 root bin f none usr/include/inet/ip_rts.h 644 root bin +f none usr/include/inet/ip_stack.h 644 root bin f none usr/include/inet/ip6.h 644 root bin f none usr/include/inet/ip6_asp.h 644 root bin f none usr/include/inet/ipclassifier.h 644 root bin f none usr/include/inet/ipp_common.h 644 root bin +d none usr/include/inet/kssl 755 root bin +f none usr/include/inet/kssl/ksslapi.h 644 root bin f none usr/include/inet/led.h 644 root bin f none usr/include/inet/mi.h 644 root bin f none usr/include/inet/mib2.h 644 root bin @@ -154,9 +157,8 @@ f none usr/include/inet/sctp_itf.h 644 root bin f none usr/include/inet/snmpcom.h 644 root bin f none usr/include/inet/tcp.h 644 root bin f none usr/include/inet/tcp_sack.h 644 root bin +f none usr/include/inet/tcp_stack.h 644 root bin f none usr/include/inet/wifi_ioctl.h 644 root bin -d none usr/include/inet/kssl 755 root bin -f none usr/include/inet/kssl/ksslapi.h 644 root bin f none usr/include/inttypes.h 644 root bin f none usr/include/ipmp.h 644 root bin f none usr/include/ipmp_mpathd.h 644 root bin @@ -904,6 +906,7 @@ f none usr/include/sys/nexusdefs.h 644 root bin f none usr/include/sys/ndifm.h 644 root bin f none usr/include/sys/ndi_impldefs.h 644 root bin f none usr/include/sys/neti.h 644 root bin +f none usr/include/sys/netstack.h 644 root bin f none usr/include/sys/note.h 644 root bin f none usr/include/sys/nvpair.h 644 root bin f none usr/include/sys/nvpair_impl.h 644 root bin diff --git a/usr/src/pkgdefs/SUNWipfh/prototype_com b/usr/src/pkgdefs/SUNWipfh/prototype_com index 4e9f9ce951..466c9715f8 100644 --- a/usr/src/pkgdefs/SUNWipfh/prototype_com +++ b/usr/src/pkgdefs/SUNWipfh/prototype_com @@ -1,5 +1,5 @@ # -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -26,9 +26,15 @@ i depend d none usr 755 root sys d none usr/include 755 root bin d none usr/include/netinet 755 root bin +f none usr/include/netinet/ipf_stack.h 644 root bin f none usr/include/netinet/ip_compat.h 644 root bin f none usr/include/netinet/ip_fil.h 644 root bin f none usr/include/netinet/ip_nat.h 644 root bin f none usr/include/netinet/ip_state.h 644 root bin f none usr/include/netinet/ip_proxy.h 644 root bin f none usr/include/netinet/ipl.h 644 root bin +f none usr/include/netinet/ip_frag.h 644 root bin +f none usr/include/netinet/ip_auth.h 644 root bin +f none usr/include/netinet/ip_pool.h 644 root bin +f none usr/include/netinet/ip_htable.h 644 root bin +f none usr/include/netinet/ip_lookup.h 644 root bin diff --git a/usr/src/pkgdefs/SUNWipfr/pkginfo.tmpl b/usr/src/pkgdefs/SUNWipfr/pkginfo.tmpl index ce2abc21fd..8133ef204a 100644 --- a/usr/src/pkgdefs/SUNWipfr/pkginfo.tmpl +++ b/usr/src/pkgdefs/SUNWipfr/pkginfo.tmpl @@ -1,5 +1,5 @@ # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -27,7 +27,7 @@ CLASSES="none preserve manifest" BASEDIR=/ SUNW_PKGVERS="1.0" SUNW_PKG_ALLZONES="true" -SUNW_PKG_HOLLOW="true" +SUNW_PKG_HOLLOW="false" SUNW_PKG_THISZONE="false" #VSTOCK="<reserved by Release Engineering for package part #>" #ISTATES="<developer defined>" diff --git a/usr/src/pkgdefs/SUNWipfr/prototype_com b/usr/src/pkgdefs/SUNWipfr/prototype_com index 39bf9a08aa..261071a845 100644 --- a/usr/src/pkgdefs/SUNWipfr/prototype_com +++ b/usr/src/pkgdefs/SUNWipfr/prototype_com @@ -54,9 +54,6 @@ d none lib 755 root bin d none lib/svc 0755 root bin d none lib/svc/method 0755 root bin f none lib/svc/method/ipfilter 0555 root bin -d none kernel 755 root sys -d none kernel/drv 755 root sys -d none kernel/strmod 755 root sys d none var 755 root sys d none var/db 755 root sys d none var/db/ipf 755 root sys diff --git a/usr/src/pkgdefs/common_files/i.devpolicy b/usr/src/pkgdefs/common_files/i.devpolicy index 811a2db18b..16df7b244c 100644 --- a/usr/src/pkgdefs/common_files/i.devpolicy +++ b/usr/src/pkgdefs/common_files/i.devpolicy @@ -22,7 +22,7 @@ # # ident "%Z%%M% %I% %E% SMI" # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # NOTE: When a change is made to the source file for @@ -41,12 +41,17 @@ do sed < $dest.$$ > $dest \ -e '/md:admin/s/read_priv_set=sys_config/ /' \ -e '/^icmp[ ]*read_priv_set=net_rawaccess[ ]*write_priv_set=net_rawaccess$/d' \ - -e '/^icmp6[ ]*read_priv_set=net_rawaccess[ ]*write_priv_set=net_rawaccess$/d' + -e '/^icmp6[ ]*read_priv_set=net_rawaccess[ ]*write_priv_set=net_rawaccess$/d' \ + -e '/^keysock[ ]*read_priv_set=sys_net_config[ ]*write_priv_set=sys_net_config$/d' \ + -e '/^ipsecah[ ]*read_priv_set=sys_net_config[ ]*write_priv_set=sys_net_config$/d' \ + -e '/^ipsecesp[ ]*read_priv_set=sys_net_config[ ]*write_priv_set=sys_net_config$/d' \ + -e '/^spdsock[ ]*read_priv_set=sys_net_config[ ]*write_priv_set=sys_net_config$/d' \ + -e '/^ipf[ ]*read_priv_set=sys_net_config[ ]*write_priv_set=sys_net_config$/d' rm -f $dest.$$ # potential additions - additions="aggr aggr:ctl bge dld:ctl dnet ibd icmp icmp6 openeepr random vni ipf pfil scsi_vhci" + additions="aggr aggr:ctl bge dld:ctl dnet keysock ibd icmp icmp6 ipsecah ipsecesp openeepr random spdsock vni ipf pfil scsi_vhci" for dev in $additions do diff --git a/usr/src/tools/scripts/bfu.sh b/usr/src/tools/scripts/bfu.sh index 08ba1a27b5..694bc9b51b 100644 --- a/usr/src/tools/scripts/bfu.sh +++ b/usr/src/tools/scripts/bfu.sh @@ -99,6 +99,8 @@ all_zones_files=" etc/inet/* etc/init.d/* etc/inittab + etc/ipf/ipf.conf + etc/iu.ap etc/krb5/kadm5.acl etc/krb5/kdc.conf etc/krb5/kpropd.acl @@ -185,9 +187,6 @@ global_zone_only_files=" etc/devlink.tab etc/driver_aliases etc/driver_classes - etc/ipf/ipf.conf - etc/ipf/pfil.ap - etc/iu.ap etc/lvm/devpath etc/lvm/lock etc/lvm/md.cf @@ -240,7 +239,6 @@ superfluous_local_zone_files=" dev/pts dev/rdsk dev/rmt - dev/sad dev/stderr dev/stdin dev/stdout @@ -249,66 +247,45 @@ superfluous_local_zone_files=" devices etc/dacf.conf etc/dat - etc/default/dhcpagent - etc/default/inetinit - etc/default/ipsec etc/default/metassist.xml - etc/default/mpathd etc/default/power etc/flash/postdeployment/svm.cleanup etc/flash/predeployment/svm.save - etc/inet/datemsk.ndpd - etc/inet/ike etc/inet/ipqosconf.1.sample etc/inet/ipqosconf.2.sample etc/inet/ipqosconf.3.sample - etc/inet/ipsecalgs - etc/inet/ipsecinit.sample - etc/inet/mipagent.conf-sample - etc/inet/mipagent.conf.fa-sample - etc/inet/mipagent.conf.ha-sample - etc/inet/secret etc/inet/sock2path etc/init.d/devlinks etc/init.d/dodatadm.udaplt etc/init.d/drvconfig etc/init.d/llc2 - etc/init.d/mipagent etc/init.d/ncakmod etc/init.d/ncalogd etc/init.d/pcmcia etc/init.d/pppd etc/init.d/wrsmcfg - etc/ipf etc/llc2 etc/lvm etc/nca etc/openwin etc/ppp - etc/rc0.d/K06mipagent etc/rc0.d/K34ncalogd etc/rc0.d/K50pppd etc/rc0.d/K52llc2 - etc/rc1.d/K06mipagent etc/rc1.d/K34ncalogd etc/rc1.d/K50pppd etc/rc1.d/K52llc2 - etc/rc2.d/K06mipagent etc/rc2.d/S40llc2 etc/rc2.d/S42ncakmod etc/rc2.d/S47pppd etc/rc2.d/S81dodatadm.udaplt etc/rc2.d/S94ncalogd - etc/rc3.d/S80mipagent - etc/rcS.d/K06mipagent etc/rcS.d/K34ncalogd etc/rcS.d/K44wrsmcfg etc/rcS.d/K50pppd etc/rcS.d/K52llc2 etc/rcS.d/S29wrsmcfg etc/rcm - etc/snmp/conf/mipagent.acl - etc/snmp/conf/mipagent.reg etc/sock2path etc/usb etc/wrsm @@ -316,7 +293,6 @@ superfluous_local_zone_files=" kernel lib/libmeta.so lib/libmeta.so.1 - lib/svc/method/ipfilter lib/svc/method/sf880dr lib/svc/method/svc-cvcd lib/svc/method/svc-dcs @@ -354,18 +330,11 @@ superfluous_local_zone_files=" platform/sun4u/wanboot platform/sun4v/ufsboot platform/sun4v/wanboot - sbin/dladm sbin/metadb sbin/metadevadm sbin/metainit sbin/metarecover sbin/metastat - usr/include/netinet/ip_compat.h - usr/include/netinet/ip_fil.h - usr/include/netinet/ip_nat.h - usr/include/netinet/ip_proxy.h - usr/include/netinet/ip_state.h - usr/include/netinet/ipl.h usr/include/sys/dcam usr/lib/devfsadm/linkmod/SUNW_dcam1394_link.so usr/lib/ldoms @@ -375,9 +344,7 @@ superfluous_local_zone_files=" usr/platform/SUNW,SPARC-Enterprise/lib/llib-ldscp.ln usr/platform/SUNW,SPARC-Enterprise/sbin/prtdscp var/adm/pool - var/db/ipf var/log/pool - var/svc/manifest/network/ipfilter.xml var/svc/manifest/network/rpc/mdcomm.xml var/svc/manifest/network/rpc/meta.xml var/svc/manifest/network/rpc/metamed.xml diff --git a/usr/src/uts/Makefile b/usr/src/uts/Makefile index a3a6e1a383..7ed323931c 100644 --- a/usr/src/uts/Makefile +++ b/usr/src/uts/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -48,6 +47,7 @@ clobber := TARGET= clobber lint := TARGET= lint clean.lint := TARGET= clean.lint check := TARGET= check +sis_check := TARGET= sis_check modlist := TARGET= modlist modlist := NO_STATE= -K $$MODSTATE$$$$ @@ -95,7 +95,19 @@ $(PMTMO_FILE) pmtmo_file: $(PATCH_MAKEUP_TABLE) COMMON_HDRDIRS= common/des common/fs common/gssapi common/inet common/net \ common/netinet common/nfs common/rpc common/sys common/vm \ common/c2 common/pcmcia/sys common/rpcsvc common/inet/kssl \ - common/inet/nca common/ipp + common/inet/nca common/inet/ipf/netinet common/ipp + +# +# Kernel modules which support the sis_check target for symbol checking +# +i386_SIS_MODULES= intel/arp intel/hook intel/icmp intel/ip intel/ipf \ + intel/ipsecah intel/ipsecesp intel/keysock intel/neti \ + intel/rts intel/spdsock intel/tun +sparc_SIS_MODULES= sparc/arp sparc/hook sparc/icmp sparc/ip sparc/ipf \ + sparc/ipsecah sparc/ipsecesp sparc/keysock sparc/neti \ + sparc/rts sparc/spdsock sparc/tun + +SIS_MODULES=$($(MACH)_SIS_MODULES) # These aren't the only headers in closed. But the other directories # are simple enough that they can be driven from the src tree. @@ -119,6 +131,12 @@ all_h: FRC @cd common/rpcsvc; pwd; $(MAKE) $@ @cd common/gssapi; pwd; $(MAKE) $@ +# run stack instances global symbol checking to make sure +# you do intend to add a global variable +sis_check: $(SIS_MODULES) +$(SIS_MODULES): FRC + cd $@; pwd; $(MAKE) $(TARGET) + ONC_FILES= common/io/timod.c \ common/os/sig.c \ common/os/flock.c \ diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 927ded787f..a78f4e927d 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -198,6 +198,7 @@ GENUNIX_OBJS += \ nbmlock.o \ ndifm.o \ nice.o \ + netstack.o \ ntptime.o \ nvpair.o \ nvpair_alloc_system.o \ diff --git a/usr/src/uts/common/inet/Makefile b/usr/src/uts/common/inet/Makefile index 8f6c111896..4a2141e142 100644 --- a/usr/src/uts/common/inet/Makefile +++ b/usr/src/uts/common/inet/Makefile @@ -21,7 +21,7 @@ # # ident "%Z%%M% %I% %E% SMI" # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # uts/common/inet/Makefile @@ -33,8 +33,8 @@ HDRS= arp.h arp_impl.h common.h ipclassifier.h ip.h ip6.h ipdrop.h ipsecah.h \ ipsecesp.h ipsec_info.h ip6_asp.h ip_if.h ip_ire.h ip_multi.h \ ip_netinfo.h ip_ndp.h ip_rts.h ipsec_impl.h keysock.h led.h mi.h \ mib2.h nd.h optcom.h sadb.h sctp_itf.h snmpcom.h tcp.h tcp_sack.h \ - tun.h udp_impl.h rawip_impl.h ipp_common.h ip_ftable.h ip_impl.h \ - tcp_impl.h wifi_ioctl.h + tcp_stack.h tun.h udp_impl.h rawip_impl.h ipp_common.h ip_ftable.h \ + ip_impl.h tcp_impl.h wifi_ioctl.h ip_stack.h ROOTDIRS= $(ROOT)/usr/include/inet diff --git a/usr/src/uts/common/inet/arp/arp.c b/usr/src/uts/common/inet/arp/arp.c index 92f98a38eb..9677c55a78 100644 --- a/usr/src/uts/common/inet/arp/arp.c +++ b/usr/src/uts/common/inet/arp/arp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -45,6 +45,7 @@ #include <sys/vtrace.h> #include <sys/strsun.h> #include <sys/policy.h> +#include <sys/zone.h> #include <sys/ethernet.h> #include <sys/zone.h> #include <sys/random.h> @@ -132,10 +133,6 @@ typedef struct { (mp->b_prev != AR_DRAINING && (arl->arl_queue != NULL || \ arl->arl_dlpi_pending != DL_PRIM_INVAL)) -#define ACE_EXTERNAL_FLAGS_MASK \ - (ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MAPPING | ACE_F_MYADDR | \ - ACE_F_AUTHORITY) - #define ARH_FIXED_LEN 8 /* @@ -149,21 +146,11 @@ typedef struct ar_m_s { uint32_t ar_mac_hw_addr_length; } ar_m_t; -/* Named Dispatch Parameter Management Structure */ -typedef struct arpparam_s { - uint32_t arp_param_min; - uint32_t arp_param_max; - uint32_t arp_param_value; - char *arp_param_name; -} arpparam_t; - typedef struct msg2_args { mblk_t *m2a_mpdata; mblk_t *m2a_mptail; } msg2_args_t; -extern ire_stats_t ire_stats_v4; - static mblk_t *ar_alloc(uint32_t cmd, int); static int ar_ce_create(arl_t *arl, uint32_t proto, uchar_t *hw_addr, uint32_t hw_addr_len, uchar_t *proto_addr, @@ -172,23 +159,24 @@ static int ar_ce_create(arl_t *arl, uint32_t proto, uchar_t *hw_addr, uint32_t flags); static void ar_ce_delete(ace_t *ace); static void ar_ce_delete_per_arl(ace_t *ace, void *arg); -static ace_t **ar_ce_hash(uint32_t proto, const uchar_t *proto_addr, - uint32_t proto_addr_length); +static ace_t **ar_ce_hash(arp_stack_t *as, uint32_t proto, + const uchar_t *proto_addr, uint32_t proto_addr_length); static ace_t *ar_ce_lookup(arl_t *arl, uint32_t proto, const uchar_t *proto_addr, uint32_t proto_addr_length); static ace_t *ar_ce_lookup_entry(arl_t *arl, uint32_t proto, const uchar_t *proto_addr, uint32_t proto_addr_length); -static ace_t *ar_ce_lookup_from_area(mblk_t *mp, ace_t *matchfn()); +static ace_t *ar_ce_lookup_from_area(arp_stack_t *as, mblk_t *mp, + ace_t *matchfn()); static ace_t *ar_ce_lookup_mapping(arl_t *arl, uint32_t proto, const uchar_t *proto_addr, uint32_t proto_addr_length); static boolean_t ar_ce_resolve(ace_t *ace, const uchar_t *hw_addr, uint32_t hw_addr_length); -static void ar_ce_walk(void (*pfi)(ace_t *, void *), void *arg1); +static void ar_ce_walk(arp_stack_t *as, void (*pfi)(ace_t *, void *), + void *arg1); -static void ar_cleanup(void); static void ar_client_notify(const arl_t *arl, mblk_t *mp, int code); static int ar_close(queue_t *q); -static int ar_cmd_dispatch(queue_t *q, mblk_t *mp); +static int ar_cmd_dispatch(queue_t *q, mblk_t *mp, boolean_t from_wput); static void ar_cmd_done(arl_t *arl); static mblk_t *ar_dlpi_comm(t_uscalar_t prim, size_t size); static void ar_dlpi_send(arl_t *, mblk_t *); @@ -203,9 +191,9 @@ static int ar_interface_on(queue_t *q, mblk_t *mp); static int ar_interface_off(queue_t *q, mblk_t *mp); static void ar_ll_cleanup_arl_queue(queue_t *q); static void ar_ll_down(arl_t *arl); -static arl_t *ar_ll_lookup_by_name(const char *name); -static arl_t *ar_ll_lookup_from_mp(mblk_t *mp); -static void ar_ll_init(ar_t *, mblk_t *mp); +static arl_t *ar_ll_lookup_by_name(arp_stack_t *as, const char *name); +static arl_t *ar_ll_lookup_from_mp(arp_stack_t *as, mblk_t *mp); +static void ar_ll_init(arp_stack_t *, ar_t *, mblk_t *mp); static void ar_ll_set_defaults(arl_t *, mblk_t *mp); static void ar_ll_clear_defaults(arl_t *); static int ar_ll_up(arl_t *arl); @@ -216,13 +204,13 @@ static int ar_nd_ioctl(queue_t *q, mblk_t *mp); static int ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp); static int ar_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); -static boolean_t ar_param_register(arpparam_t *arppa, int cnt); +static boolean_t ar_param_register(IDP *ndp, arpparam_t *arppa, int cnt); static int ar_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); static void ar_query_delete(ace_t *ace, void *ar); static void ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr, uint32_t proto_addr_len); -static clock_t ar_query_xmit(ace_t *ace, ace_t *src_ace); +static clock_t ar_query_xmit(arp_stack_t *as, ace_t *ace, ace_t *src_ace); static void ar_rput(queue_t *q, mblk_t *mp_orig); static void ar_rput_dlpi(queue_t *q, mblk_t *mp); static void ar_set_address(ace_t *ace, uchar_t *addrpos, @@ -235,11 +223,14 @@ static void ar_wput(queue_t *q, mblk_t *mp); static void ar_wsrv(queue_t *q); static void ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, uint32_t plen, const uchar_t *haddr1, const uchar_t *paddr1, - const uchar_t *haddr2, const uchar_t *paddr2, const uchar_t *dstaddr); + const uchar_t *haddr2, const uchar_t *paddr2, const uchar_t *dstaddr, + arp_stack_t *as); static void ar_cmd_enqueue(arl_t *arl, mblk_t *mp, queue_t *q, ushort_t cmd, boolean_t); static mblk_t *ar_cmd_dequeue(arl_t *arl); +static void *arp_stack_init(netstackid_t stackid, netstack_t *ns); +static void arp_stack_fini(netstackid_t stackid, void *arg); /* * All of these are alterable, within the min/max values given, * at run time. arp_publish_interval and arp_publish_count are @@ -264,20 +255,19 @@ static arpparam_t arp_param_arr[] = { { 0, 3600000, 15000, "arp_broadcast_interval"}, { 5, 86400, 3600, "arp_defend_period"} }; - -#define arp_cleanup_interval arp_param_arr[0].arp_param_value -#define arp_publish_interval arp_param_arr[1].arp_param_value -#define arp_publish_count arp_param_arr[2].arp_param_value -#define arp_probe_delay arp_param_arr[3].arp_param_value -#define arp_probe_interval arp_param_arr[4].arp_param_value -#define arp_probe_count arp_param_arr[5].arp_param_value -#define arp_fastprobe_delay arp_param_arr[6].arp_param_value -#define arp_fastprobe_interval arp_param_arr[7].arp_param_value -#define arp_fastprobe_count arp_param_arr[8].arp_param_value -#define arp_defend_interval arp_param_arr[9].arp_param_value -#define arp_defend_rate arp_param_arr[10].arp_param_value -#define arp_broadcast_interval arp_param_arr[11].arp_param_value -#define arp_defend_period arp_param_arr[12].arp_param_value +#define as_cleanup_interval as_param_arr[0].arp_param_value +#define as_publish_interval as_param_arr[1].arp_param_value +#define as_publish_count as_param_arr[2].arp_param_value +#define as_probe_delay as_param_arr[3].arp_param_value +#define as_probe_interval as_param_arr[4].arp_param_value +#define as_probe_count as_param_arr[5].arp_param_value +#define as_fastprobe_delay as_param_arr[6].arp_param_value +#define as_fastprobe_interval as_param_arr[7].arp_param_value +#define as_fastprobe_count as_param_arr[8].arp_param_value +#define as_defend_interval as_param_arr[9].arp_param_value +#define as_defend_rate as_param_arr[10].arp_param_value +#define as_broadcast_interval as_param_arr[11].arp_param_value +#define as_defend_period as_param_arr[12].arp_param_value static struct module_info info = { 0, "arp", 0, INFPSZ, 512, 128 @@ -295,18 +285,6 @@ struct streamtab arpinfo = { &rinit, &winit }; -static void *ar_g_head; /* AR Instance Data List Head */ -static caddr_t ar_g_nd; /* AR Named Dispatch Head */ - -/* - * With the introduction of netinfo (neti kernel module), it is now possible - * to access data structures in the ARP module without the code being - * executed in the context of the IP module, thus there is no locking being - * enforced through the use of STREAMS. - */ -krwlock_t arl_g_lock; -arl_t *arl_g_head; /* ARL List Head */ - /* * TODO: we need a better mechanism to set the ARP hardware type since * the DLPI mac type does not include enough predefined values. @@ -322,14 +300,6 @@ static ar_m_t ar_m_tbl[] = { { DL_OTHER, ARPHRD_ETHER, -2, 6}, /* unknown */ }; -/* ARP Cache Entry Hash Table */ -static ace_t *ar_ce_hash_tbl[ARP_HASH_SIZE]; - -static ace_t *ar_ce_mask_entries; /* proto_mask not all ones */ - -static uint32_t arp_index_counter = 1; -static uint32_t arp_counter_wrapped = 0; - /* * Note that all routines which need to queue the message for later * processing have to be ioctl_aware to be able to queue the complete message. @@ -337,6 +307,7 @@ static uint32_t arp_counter_wrapped = 0; */ #define ARF_IOCTL_AWARE 0x1 /* Arp command can come down as M_IOCTL */ #define ARF_ONLY_CMD 0x2 /* Command is exclusive to ARP */ +#define ARF_WPUT_OK 0x4 /* Command is allowed from ar_wput */ /* ARP Cmd Table entry */ typedef struct arct_s { @@ -348,15 +319,22 @@ typedef struct arct_s { const char *arct_txt; } arct_t; +/* + * AR_ENTRY_ADD, QUERY and SQUERY are used by sdp, hence they need to + * have ARF_WPUT_OK set. + */ static arct_t ar_cmd_tbl[] = { { ar_entry_add, AR_ENTRY_ADD, sizeof (area_t), - ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_ENTRY_ADD" }, + ARF_IOCTL_AWARE | ARF_ONLY_CMD | ARF_WPUT_OK, OP_CONFIG, + "AR_ENTRY_ADD" }, { ar_entry_delete, AR_ENTRY_DELETE, sizeof (ared_t), ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_ENTRY_DELETE" }, { ar_entry_query, AR_ENTRY_QUERY, sizeof (areq_t), - ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_NP, "AR_ENTRY_QUERY" }, + ARF_IOCTL_AWARE | ARF_ONLY_CMD | ARF_WPUT_OK, OP_NP, + "AR_ENTRY_QUERY" }, { ar_entry_squery, AR_ENTRY_SQUERY, sizeof (area_t), - ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_NP, "AR_ENTRY_SQUERY" }, + ARF_IOCTL_AWARE | ARF_ONLY_CMD | ARF_WPUT_OK, OP_NP, + "AR_ENTRY_SQUERY" }, { ar_mapping_add, AR_MAPPING_ADD, sizeof (arma_t), ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_MAPPING_ADD" }, { ar_interface_up, AR_INTERFACE_UP, sizeof (arc_t), @@ -368,15 +346,16 @@ static arct_t ar_cmd_tbl[] = { { ar_interface_off, AR_INTERFACE_OFF, sizeof (arc_t), ARF_ONLY_CMD, OP_CONFIG, "AR_INTERFACE_OFF" }, { ar_set_ppa, (uint32_t)IF_UNITSEL, sizeof (int), - ARF_IOCTL_AWARE, OP_CONFIG, "IF_UNITSEL" }, + ARF_IOCTL_AWARE | ARF_WPUT_OK, OP_CONFIG, "IF_UNITSEL" }, { ar_nd_ioctl, ND_GET, 1, - ARF_IOCTL_AWARE, OP_NP, "ND_GET" }, + ARF_IOCTL_AWARE | ARF_WPUT_OK, OP_NP, "ND_GET" }, { ar_nd_ioctl, ND_SET, 1, - ARF_IOCTL_AWARE, OP_CONFIG, "ND_SET" }, + ARF_IOCTL_AWARE | ARF_WPUT_OK, OP_CONFIG, "ND_SET" }, { ar_snmp_msg, AR_SNMP_MSG, sizeof (struct T_optmgmt_ack), - ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_NP, "AR_SNMP_MSG" }, + ARF_IOCTL_AWARE | ARF_WPUT_OK | ARF_ONLY_CMD, OP_NP, + "AR_SNMP_MSG" }, { ar_slifname, (uint32_t)SIOCSLIFNAME, sizeof (struct lifreq), - ARF_IOCTL_AWARE, OP_CONFIG, "SIOCSLIFNAME" } + ARF_IOCTL_AWARE | ARF_WPUT_OK, OP_CONFIG, "SIOCSLIFNAME" } }; /* @@ -394,6 +373,7 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len, ace_t **acep; uchar_t *dst; mblk_t *mp; + arp_stack_t *as = ARL_TO_ARPSTACK(arl); arlphy_t *ap; if ((flags & ~ACE_EXTERNAL_FLAGS_MASK) || arl == NULL) @@ -491,13 +471,12 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len, } ace->ace_flags = flags; - if (ar_mask_all_ones(ace->ace_proto_mask, ace->ace_proto_addr_length)) { - acep = ar_ce_hash(ace->ace_proto, ace->ace_proto_addr, + acep = ar_ce_hash(as, ace->ace_proto, ace->ace_proto_addr, ace->ace_proto_addr_length); } else { - acep = &ar_ce_mask_entries; + acep = &as->as_ce_mask_entries; } if ((ace->ace_next = *acep) != NULL) ace->ace_next->ace_ptpn = &ace->ace_next; @@ -540,7 +519,7 @@ ar_ce_delete_per_arl(ace_t *ace, void *arl) /* Cache entry hash routine, based on protocol and protocol address. */ static ace_t ** -ar_ce_hash(uint32_t proto, const uchar_t *proto_addr, +ar_ce_hash(arp_stack_t *as, uint32_t proto, const uchar_t *proto_addr, uint32_t proto_addr_length) { const uchar_t *up = proto_addr; @@ -549,7 +528,7 @@ ar_ce_hash(uint32_t proto, const uchar_t *proto_addr, while (--len >= 0) hval ^= *up++; - return (&ar_ce_hash_tbl[hval % A_CNT(ar_ce_hash_tbl)]); + return (&as->as_ce_hash_tbl[hval % ARP_HASH_SIZE]); } /* Cache entry lookup. Try to find an ace matching the parameters passed. */ @@ -575,10 +554,11 @@ ar_ce_lookup_entry(arl_t *arl, uint32_t proto, const uchar_t *proto_addr, uint32_t proto_addr_length) { ace_t *ace; + arp_stack_t *as = ARL_TO_ARPSTACK(arl); if (!proto_addr) return (NULL); - ace = *ar_ce_hash(proto, proto_addr, proto_addr_length); + ace = *ar_ce_hash(as, proto, proto_addr, proto_addr_length); for (; ace; ace = ace->ace_next) { if (ace->ace_arl == arl && ace->ace_proto_addr_length == proto_addr_length && @@ -604,7 +584,7 @@ ar_ce_lookup_entry(arl_t *arl, uint32_t proto, const uchar_t *proto_addr, * call the supplied match function. */ static ace_t * -ar_ce_lookup_from_area(mblk_t *mp, ace_t *matchfn()) +ar_ce_lookup_from_area(arp_stack_t *as, mblk_t *mp, ace_t *matchfn()) { uchar_t *proto_addr; area_t *area = (area_t *)mp->b_rptr; @@ -613,7 +593,7 @@ ar_ce_lookup_from_area(mblk_t *mp, ace_t *matchfn()) area->area_proto_addr_length); if (!proto_addr) return (NULL); - return ((*matchfn)(ar_ll_lookup_from_mp(mp), area->area_proto, + return ((*matchfn)(ar_ll_lookup_from_mp(as, mp), area->area_proto, proto_addr, area->area_proto_addr_length)); } @@ -626,10 +606,11 @@ ar_ce_lookup_mapping(arl_t *arl, uint32_t proto, const uchar_t *proto_addr, uint32_t proto_addr_length) { ace_t *ace; + arp_stack_t *as = ARL_TO_ARPSTACK(arl); if (!proto_addr) return (NULL); - ace = ar_ce_mask_entries; + ace = as->as_ce_mask_entries; for (; ace; ace = ace->ace_next) { if (ace->ace_arl == arl && ace->ace_proto_addr_length == proto_addr_length && @@ -661,12 +642,12 @@ ar_ce_lookup_mapping(arl_t *arl, uint32_t proto, const uchar_t *proto_addr, * mapping to avoid arp interpreting it as a duplicate. */ static ace_t * -ar_ce_lookup_permanent(uint32_t proto, uchar_t *proto_addr, +ar_ce_lookup_permanent(arp_stack_t *as, uint32_t proto, uchar_t *proto_addr, uint32_t proto_addr_length) { ace_t *ace; - ace = *ar_ce_hash(proto, proto_addr, proto_addr_length); + ace = *ar_ce_hash(as, proto, proto_addr, proto_addr_length); for (; ace != NULL; ace = ace->ace_next) { if (!(ace->ace_flags & ACE_F_PERMANENT)) continue; @@ -771,8 +752,9 @@ ar_ce_resolve_all(arl_t *arl, uint32_t proto, const uchar_t *src_haddr, uchar_t *ace_addr; uchar_t *mask; int retv = AR_NOTFOUND; + arp_stack_t *as = ARL_TO_ARPSTACK(arl); - ace = *ar_ce_hash(proto, src_paddr, plen); + ace = *ar_ce_hash(as, proto, src_paddr, plen); for (; ace != NULL; ace = ace_next) { /* ar_ce_resolve may delete the ace; fetch next pointer now */ @@ -853,36 +835,28 @@ ar_ce_resolve_all(arl_t *arl, uint32_t proto, const uchar_t *src_haddr, /* Pass arg1 to the pfi supplied, along with each ace in existence. */ static void -ar_ce_walk(void (*pfi)(ace_t *, void *), void *arg1) +ar_ce_walk(arp_stack_t *as, void (*pfi)(ace_t *, void *), void *arg1) { ace_t *ace; ace_t *ace1; - ace_t **acep; + int i; - for (acep = ar_ce_hash_tbl; acep < A_END(ar_ce_hash_tbl); acep++) { + for (i = 0; i < ARP_HASH_SIZE; i++) { /* * We walk the hash chain in a way that allows the current * ace to get blown off by the called routine. */ - for (ace = *acep; ace; ace = ace1) { + for (ace = as->as_ce_hash_tbl[i]; ace; ace = ace1) { ace1 = ace->ace_next; (*pfi)(ace, arg1); } } - for (ace = ar_ce_mask_entries; ace; ace = ace1) { + for (ace = as->as_ce_mask_entries; ace; ace = ace1) { ace1 = ace->ace_next; (*pfi)(ace, arg1); } } -/* Free the ND tables if the last ar has gone away. */ -static void -ar_cleanup(void) -{ - if (!ar_g_head) - nd_free(&ar_g_nd); -} - /* * Send a copy of interesting packets to the corresponding IP instance. * The corresponding IP instance is the ARP-IP-DEV instance for this @@ -969,6 +943,7 @@ ar_close(queue_t *q) arc_t *arc; mblk_t *mp1; int index; + arp_stack_t *as = ar->ar_as; TRACE_1(TR_FAC_ARP, TR_ARP_CLOSE, "arp_close: q %p", q); @@ -997,7 +972,7 @@ ar_close(queue_t *q) } } /* Delete all our pending queries, 'arl' is not dereferenced */ - ar_ce_walk(ar_query_delete, ar); + ar_ce_walk(as, ar_query_delete, ar); /* * The request could be pending on some arl_queue also. This * happens if the arl is not yet bound, and bind is pending. @@ -1021,11 +996,12 @@ ar_close(queue_t *q) * If this is the control stream for an arl, delete anything * hanging off our arl. */ - ar_ce_walk(ar_ce_delete_per_arl, arl); + ar_ce_walk(as, ar_ce_delete_per_arl, arl); /* Free any messages waiting for a bind_ack */ /* Get the arl out of the chain. */ - rw_enter(&arl_g_lock, RW_WRITER); - for (arlp = &arl_g_head; *arlp; arlp = &(*arlp)->arl_next) { + rw_enter(&as->as_arl_g_lock, RW_WRITER); + for (arlp = &as->as_arl_head; *arlp; + arlp = &(*arlp)->arl_next) { if (*arlp == arl) { *arlp = arl->arl_next; break; @@ -1034,7 +1010,7 @@ ar_close(queue_t *q) ASSERT(arl->arl_dlpi_deferred == NULL); ar->ar_arl = NULL; - rw_exit(&arl_g_lock); + rw_exit(&as->as_arl_g_lock); mi_free((char *)arl); } @@ -1047,8 +1023,7 @@ ar_close(queue_t *q) } cr = ar->ar_credp; /* mi_close_comm frees the instance data. */ - (void) mi_close_comm(&ar_g_head, q); - ar_cleanup(); + (void) mi_close_comm(&as->as_head, q); qprocsoff(q); crfree(cr); @@ -1060,8 +1035,10 @@ ar_close(queue_t *q) info.hne_event = NE_UNPLUMB; info.hne_data = name; info.hne_datalen = strlen(name); - (void) hook_run(arpnicevents, (hook_data_t)&info); + (void) hook_run(as->as_arpnicevents, (hook_data_t)&info, + as->as_netstack); } + netstack_rele(as->as_netstack); return (0); } @@ -1071,7 +1048,7 @@ ar_close(queue_t *q) */ /* TODO: error reporting for M_PROTO case */ static int -ar_cmd_dispatch(queue_t *q, mblk_t *mp_orig) +ar_cmd_dispatch(queue_t *q, mblk_t *mp_orig, boolean_t from_wput) { arct_t *arct; uint32_t cmd; @@ -1117,10 +1094,15 @@ ar_cmd_dispatch(queue_t *q, mblk_t *mp_orig) if (cr == NULL) cr = DB_CREDDEF(mp_orig, ((ar_t *)q->q_ptr)->ar_credp); - if ((error = secpolicy_net(cr, arct->arct_priv_req, + if ((error = secpolicy_ip(cr, arct->arct_priv_req, B_FALSE)) != 0) return (error); } + /* Disallow many commands except if from rput i.e. from IP */ + if (from_wput && !(arct->arct_flags & ARF_WPUT_OK)) { + return (EINVAL); + } + if (arct->arct_flags & ARF_IOCTL_AWARE) mp = mp_orig; @@ -1436,11 +1418,12 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) int err; uint_t aflags; boolean_t unverified; + arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; /* We handle both M_IOCTL and M_PROTO messages. */ if (DB_TYPE(mp) == M_IOCTL) mp = mp->b_cont; - arl = ar_ll_lookup_from_mp(mp); + arl = ar_ll_lookup_from_mp(as, mp); if (arl == NULL) return (EINVAL); /* @@ -1462,7 +1445,8 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) * duplicate address detection state. If it's a new entry, then we're * obligated to do duplicate address detection now. */ - if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_entry)) != NULL) { + ace = ar_ce_lookup_from_area(as, mp, ar_ce_lookup_entry); + if (ace != NULL) { unverified = (ace->ace_flags & ACE_F_UNVERIFIED) != 0; ar_ce_delete(ace); } else { @@ -1525,11 +1509,11 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) ASSERT(ace != NULL); if (ace->ace_flags & ACE_F_FAST) { - ace->ace_xmit_count = arp_fastprobe_count; - ace->ace_xmit_interval = arp_fastprobe_delay; + ace->ace_xmit_count = as->as_fastprobe_count; + ace->ace_xmit_interval = as->as_fastprobe_delay; } else { - ace->ace_xmit_count = arp_probe_count; - ace->ace_xmit_interval = arp_probe_delay; + ace->ace_xmit_count = as->as_probe_count; + ace->ace_xmit_interval = as->as_probe_delay; } /* @@ -1558,12 +1542,12 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) area_t *, area); ar_xmit(arl, ARP_REQUEST, area->area_proto, proto_addr_len, hw_addr, NULL, NULL, - proto_addr, NULL); + proto_addr, NULL, as); ace->ace_xmit_count--; ace->ace_xmit_interval = (ace->ace_flags & ACE_F_FAST) ? - arp_fastprobe_interval : - arp_probe_interval; + as->as_fastprobe_interval : + as->as_probe_interval; ace_set_timer(ace, B_FALSE); } else { DTRACE_PROBE2(eadd_delay, ace_t *, ace, @@ -1576,7 +1560,7 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) area_t *, area); ar_xmit(arl, ARP_REQUEST, area->area_proto, proto_addr_len, hw_addr, proto_addr, - ap->ap_arp_addr, proto_addr, NULL); + ap->ap_arp_addr, proto_addr, NULL, as); ace->ace_last_bcast = ddi_get_lbolt(); /* @@ -1590,10 +1574,11 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) */ if ((aflags & ACE_F_AUTHORITY) && !(aflags & ACE_F_DEFEND) && - arp_publish_count > 0) { + as->as_publish_count > 0) { /* Account for the xmit we just did */ - ace->ace_xmit_count = arp_publish_count - 1; - ace->ace_xmit_interval = arp_publish_interval; + ace->ace_xmit_count = as->as_publish_count - 1; + ace->ace_xmit_interval = + as->as_publish_interval; if (ace->ace_xmit_count > 0) ace_set_timer(ace, B_FALSE); } @@ -1609,11 +1594,12 @@ ar_entry_delete(queue_t *q, mblk_t *mp_orig) ace_t *ace; arl_t *arl; mblk_t *mp = mp_orig; + arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; /* We handle both M_IOCTL and M_PROTO messages. */ if (DB_TYPE(mp) == M_IOCTL) mp = mp->b_cont; - arl = ar_ll_lookup_from_mp(mp); + arl = ar_ll_lookup_from_mp(as, mp); if (arl == NULL) return (EINVAL); /* @@ -1631,7 +1617,7 @@ ar_entry_delete(queue_t *q, mblk_t *mp_orig) * Need to know if it is a mapping or an exact match. Check exact * match first. */ - ace = ar_ce_lookup_from_area(mp, ar_ce_lookup); + ace = ar_ce_lookup_from_area(as, mp, ar_ce_lookup); if (ace != NULL) { /* * If it's a permanent entry, then the client is the one who @@ -1667,13 +1653,14 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) uint32_t proto_addr_len; clock_t ms; boolean_t is_mproto = B_TRUE; + arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; /* We handle both M_IOCTL and M_PROTO messages. */ if (DB_TYPE(mp) == M_IOCTL) { is_mproto = B_FALSE; mp = mp->b_cont; } - arl = ar_ll_lookup_from_mp(mp); + arl = ar_ll_lookup_from_mp(as, mp); if (arl == NULL) { DTRACE_PROBE2(query_no_arl, queue_t *, q, mblk_t *, mp); err = EINVAL; @@ -1830,8 +1817,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) * search the other arl for a resolved ACE. If we find one, * we resolve it rather than sending out a ARP request. */ - src_ace = ar_ce_lookup_permanent(areq->areq_proto, sender_addr, - areq->areq_sender_addr_length); + src_ace = ar_ce_lookup_permanent(as, areq->areq_proto, + sender_addr, areq->areq_sender_addr_length); if (src_ace == NULL) { DTRACE_PROBE3(query_source_missing, arl_t *, arl, areq_t *, areq, ace_t *, ace); @@ -1861,7 +1848,7 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) } } } - ms = ar_query_xmit(ace, src_ace); + ms = ar_query_xmit(as, ace, src_ace); if (ms == 0) { /* Immediate reply requested. */ ar_query_reply(ace, ENXIO, NULL, (uint32_t)0); @@ -1870,8 +1857,11 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) } return (EINPROGRESS); err_ret: - if (is_mproto) - BUMP_IRE_STATS(ire_stats_v4, ire_stats_freed); + if (is_mproto) { + ip_stack_t *ipst = as->as_netstack->netstack_ip; + + BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_freed); + } return (err); } @@ -1887,10 +1877,11 @@ ar_entry_squery(queue_t *q, mblk_t *mp_orig) mblk_t *mp = mp_orig; uchar_t *proto_addr; int proto_addr_len; + arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; if (DB_TYPE(mp) == M_IOCTL) mp = mp->b_cont; - arl = ar_ll_lookup_from_mp(mp); + arl = ar_ll_lookup_from_mp(as, mp); if (arl == NULL) return (EINVAL); /* @@ -1952,8 +1943,9 @@ static int ar_interface_down(queue_t *q, mblk_t *mp) { arl_t *arl; + arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; - arl = ar_ll_lookup_from_mp(mp); + arl = ar_ll_lookup_from_mp(as, mp); if (arl == NULL || arl->arl_closing) { DTRACE_PROBE2(down_no_arl, queue_t *, q, mblk_t *, mp); return (EINVAL); @@ -1987,7 +1979,7 @@ ar_interface_down(queue_t *q, mblk_t *mp) ASSERT(arl->arl_state == ARL_S_UP); /* Free all arp entries for this interface */ - ar_ce_walk(ar_ce_delete_per_arl, arl); + ar_ce_walk(as, ar_ce_delete_per_arl, arl); ar_ll_down(arl); /* Return EINPROGRESS so that ar_rput does not free the 'mp' */ @@ -2003,8 +1995,9 @@ ar_interface_up(queue_t *q, mblk_t *mp) arl_t *arl; int err; mblk_t *mp1; + arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; - arl = ar_ll_lookup_from_mp(mp); + arl = ar_ll_lookup_from_mp(as, mp); if (arl == NULL || arl->arl_closing) { DTRACE_PROBE2(up_no_arl, queue_t *, q, mblk_t *, mp); err = EINVAL; @@ -2063,8 +2056,9 @@ static int ar_interface_on(queue_t *q, mblk_t *mp) { arl_t *arl; + arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; - arl = ar_ll_lookup_from_mp(mp); + arl = ar_ll_lookup_from_mp(as, mp); if (arl == NULL) { DTRACE_PROBE2(on_no_arl, queue_t *, q, mblk_t *, mp); return (EINVAL); @@ -2084,8 +2078,9 @@ static int ar_interface_off(queue_t *q, mblk_t *mp) { arl_t *arl; + arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; - arl = ar_ll_lookup_from_mp(mp); + arl = ar_ll_lookup_from_mp(as, mp); if (arl == NULL) { DTRACE_PROBE2(off_no_arl, queue_t *, q, mblk_t *, mp); return (EINVAL); @@ -2108,8 +2103,10 @@ ar_ll_cleanup_arl_queue(queue_t *q) mblk_t *mp; mblk_t *mpnext; mblk_t *prev; + arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; + ip_stack_t *ipst = as->as_netstack->netstack_ip; - for (arl = arl_g_head; arl != NULL; arl = arl->arl_next) { + for (arl = as->as_arl_head; arl != NULL; arl = arl->arl_next) { for (prev = NULL, mp = arl->arl_queue; mp != NULL; mp = mpnext) { mpnext = mp->b_next; @@ -2123,7 +2120,7 @@ ar_ll_cleanup_arl_queue(queue_t *q) arl->arl_queue_tail = prev; if (DB_TYPE(mp) == M_PROTO && *(uint32_t *)mp->b_rptr == AR_ENTRY_QUERY) { - BUMP_IRE_STATS(ire_stats_v4, + BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_freed); } inet_freemsg(mp); @@ -2138,11 +2135,11 @@ ar_ll_cleanup_arl_queue(queue_t *q) * Look up a lower level tap by name. */ static arl_t * -ar_ll_lookup_by_name(const char *name) +ar_ll_lookup_by_name(arp_stack_t *as, const char *name) { arl_t *arl; - for (arl = arl_g_head; arl; arl = arl->arl_next) { + for (arl = as->as_arl_head; arl; arl = arl->arl_next) { if (strcmp(arl->arl_name, name) == 0) { return (arl); } @@ -2155,7 +2152,7 @@ ar_ll_lookup_by_name(const char *name) * portion of the ARP command. */ static arl_t * -ar_ll_lookup_from_mp(mblk_t *mp) +ar_ll_lookup_from_mp(arp_stack_t *as, mblk_t *mp) { arc_t *arc = (arc_t *)mp->b_rptr; uint8_t *name; @@ -2164,11 +2161,11 @@ ar_ll_lookup_from_mp(mblk_t *mp) name = mi_offset_param(mp, arc->arc_name_offset, namelen); if (name == NULL || name[namelen - 1] != '\0') return (NULL); - return (ar_ll_lookup_by_name((char *)name)); + return (ar_ll_lookup_by_name(as, (char *)name)); } static void -ar_ll_init(ar_t *ar, mblk_t *mp) +ar_ll_init(arp_stack_t *as, ar_t *ar, mblk_t *mp) { arl_t *arl; dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr; @@ -2200,27 +2197,29 @@ ar_ll_init(ar_t *ar, mblk_t *mp) * second of every day (non-leap year) for it to wrap around and the * for() loop below to kick in as a performance concern. */ - if (arp_counter_wrapped) { - arl_t *as; + if (as->as_arp_counter_wrapped) { + arl_t *arl1; do { - for (as = arl_g_head; as != NULL; as = as->arl_next) - if (as->arl_index == arp_index_counter) { - arp_index_counter++; - if (arp_index_counter == 0) { - arp_counter_wrapped++; - arp_index_counter = 1; + for (arl1 = as->as_arl_g_head; arl1 != NULL; + arl1 = arl1->arl_next) + if (arl1->arl_index == + as->as_arp_index_counter) { + as->as_arp_index_counter++; + if (as->as_arp_index_counter == 0) { + as->as_arp_counter_wrapped++; + as->as_arp_index_counter = 1; } break; } - } while (as != NULL); + } while (arl1 != NULL); } else { - arl->arl_index = arp_index_counter; + arl->arl_index = as->as_arp_index_counter; } - arp_index_counter++; - if (arp_index_counter == 0) { - arp_counter_wrapped++; - arp_index_counter = 1; + as->as_arp_index_counter++; + if (as->as_arp_index_counter == 0) { + as->as_arp_counter_wrapped++; + as->as_arp_index_counter = 1; } } @@ -2454,11 +2453,12 @@ ar_mapping_add(queue_t *q, mblk_t *mp_orig) uchar_t *proto_extract_mask; uint32_t hw_extract_start; arl_t *arl; + arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; /* We handle both M_IOCTL and M_PROTO messages. */ if (DB_TYPE(mp) == M_IOCTL) mp = mp->b_cont; - arl = ar_ll_lookup_from_mp(mp); + arl = ar_ll_lookup_from_mp(as, mp); if (arl == NULL) return (EINVAL); /* @@ -2473,7 +2473,8 @@ ar_mapping_add(queue_t *q, mblk_t *mp_orig) mp_orig->b_prev = NULL; arma = (arma_t *)mp->b_rptr; - if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_mapping)) != NULL) + ace = ar_ce_lookup_from_area(as, mp, ar_ce_lookup_mapping); + if (ace != NULL) ar_ce_delete(ace); hw_addr_len = arma->arma_hw_addr_length; hw_addr = mi_offset_paramc(mp, arma->arma_hw_addr_offset, hw_addr_len); @@ -2533,7 +2534,10 @@ ar_m_lookup(t_uscalar_t mac_type) static int ar_nd_ioctl(queue_t *q, mblk_t *mp) { - if (DB_TYPE(mp) == M_IOCTL && nd_getset(q, ar_g_nd, mp)) + ar_t *ar = (ar_t *)q->q_ptr; + arp_stack_t *as = ar->ar_as; + + if (DB_TYPE(mp) == M_IOCTL && nd_getset(q, as->as_nd, mp)) return (0); return (ENOENT); } @@ -2546,6 +2550,8 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) int err; queue_t *tmp_q; mblk_t *mp; + netstack_t *ns; + arp_stack_t *as; TRACE_1(TR_FAC_ARP, TR_ARP_OPEN, "arp_open: q %p", q); @@ -2553,17 +2559,17 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (q->q_ptr != NULL) { return (0); } - /* Load up the Named Dispatch tables, if not already done. */ - if (ar_g_nd == NULL && - !ar_param_register(arp_param_arr, A_CNT(arp_param_arr))) { - ar_cleanup(); - return (ENOMEM); - } + + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + as = ns->netstack_arp; + ASSERT(as != NULL); + /* mi_open_comm allocates the instance data structure, etc. */ - err = mi_open_comm(&ar_g_head, sizeof (ar_t), q, devp, flag, sflag, + err = mi_open_comm(&as->as_head, sizeof (ar_t), q, devp, flag, sflag, credp); if (err) { - ar_cleanup(); + netstack_rele(as->as_netstack); return (err); } @@ -2579,6 +2585,7 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) ar->ar_wq = q; crhold(credp); ar->ar_credp = credp; + ar->ar_as = as; /* * Probe for the DLPI info if we are not pushed on IP. Wait for @@ -2668,14 +2675,14 @@ ar_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) * named dispatch handler. */ static boolean_t -ar_param_register(arpparam_t *arppa, int cnt) +ar_param_register(IDP *ndp, arpparam_t *arppa, int cnt) { for (; cnt-- > 0; arppa++) { if (arppa->arp_param_name && arppa->arp_param_name[0]) { - if (!nd_load(&ar_g_nd, arppa->arp_param_name, + if (!nd_load(ndp, arppa->arp_param_name, ar_param_get, ar_param_set, (caddr_t)arppa)) { - nd_free(&ar_g_nd); + nd_free(ndp); return (B_FALSE); } } @@ -2715,7 +2722,8 @@ ar_plink_send(queue_t *q, mblk_t *mp) char *name; mblk_t *muxmp; mblk_t *mp1; - ar_t *ar; + ar_t *ar = (ar_t *)q->q_ptr; + arp_stack_t *as = ar->ar_as; struct linkblk *li; struct ipmx_s *ipmxp; queue_t *arpwq; @@ -2764,8 +2772,8 @@ ar_plink_send(queue_t *q, mblk_t *mp) * for use by IP. IP will send the M_IOCACK. */ if (arpwq != NULL) { - for (ar = (ar_t *)mi_first_ptr(&ar_g_head); ar != NULL; - ar = (ar_t *)mi_next_ptr(&ar_g_head, (void *)ar)) { + for (ar = (ar_t *)mi_first_ptr(&as->as_head); ar != NULL; + ar = (ar_t *)mi_next_ptr(&as->as_head, (void *)ar)) { if ((ar->ar_wq == arpwq) && (ar->ar_arl != NULL)) { ipmxp->ipmx_arpdev_stream = 1; (void) strcpy((char *)ipmxp->ipmx_name, @@ -2789,6 +2797,8 @@ ar_query_delete(ace_t *ace, void *arg) ar_t *ar = arg; mblk_t **mpp = &ace->ace_query_mp; mblk_t *mp; + arp_stack_t *as = ar->ar_as; + ip_stack_t *ipst = as->as_netstack->netstack_ip; while ((mp = *mpp) != NULL) { /* The response queue was stored in the query b_prev. */ @@ -2797,7 +2807,8 @@ ar_query_delete(ace_t *ace, void *arg) *mpp = mp->b_next; if (DB_TYPE(mp) == M_PROTO && *(uint32_t *)mp->b_rptr == AR_ENTRY_QUERY) { - BUMP_IRE_STATS(ire_stats_v4, ire_stats_freed); + BUMP_IRE_STATS(ipst->ips_ire_stats_v4, + ire_stats_freed); } inet_freemsg(mp); } else { @@ -2822,6 +2833,8 @@ ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr, arl_t *arl = ace->ace_arl; mblk_t *mp; mblk_t *xmit_mp; + arp_stack_t *as = ARL_TO_ARPSTACK(arl); + ip_stack_t *ipst = as->as_netstack->netstack_ip; arlphy_t *ap = arl->arl_phy; /* Cancel any outstanding timer. */ @@ -2870,7 +2883,8 @@ ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr, if (ret_val != 0) { /* TODO: find some way to let the guy know? */ inet_freemsg(mp); - BUMP_IRE_STATS(ire_stats_v4, ire_stats_freed); + BUMP_IRE_STATS(ipst->ips_ire_stats_v4, + ire_stats_freed); continue; } /* @@ -2928,7 +2942,7 @@ ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr, ar_ce_delete(ace); } else { mi_timer(arl->arl_wq, ace->ace_mp, - arp_cleanup_interval); + as->as_cleanup_interval); } } } @@ -2939,7 +2953,7 @@ ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr, * to the source address in the areq sent by IP. */ static clock_t -ar_query_xmit(ace_t *ace, ace_t *src_ace) +ar_query_xmit(arp_stack_t *as, ace_t *ace, ace_t *src_ace) { areq_t *areq; mblk_t *mp; @@ -2967,8 +2981,8 @@ ar_query_xmit(ace_t *ace, ace_t *src_ace) * interface. */ if (src_ace == NULL) { - src_ace = ar_ce_lookup_permanent(areq->areq_proto, sender_addr, - areq->areq_sender_addr_length); + src_ace = ar_ce_lookup_permanent(as, areq->areq_proto, + sender_addr, areq->areq_sender_addr_length); if (src_ace == NULL) { DTRACE_PROBE3(xmit_no_source, ace_t *, ace, areq_t *, areq, uchar_t *, sender_addr); @@ -3001,7 +3015,7 @@ ar_query_xmit(ace_t *ace, ace_t *src_ace) areq_t *, areq); ar_xmit(src_arl, ARP_REQUEST, areq->areq_proto, areq->areq_sender_addr_length, src_arl->arl_phy->ap_hw_addr, - sender_addr, src_arl->arl_phy->ap_arp_addr, proto_addr, NULL); + sender_addr, src_arl->arl_phy->ap_arp_addr, proto_addr, NULL, as); src_ace->ace_last_bcast = ddi_get_lbolt(); return (areq->areq_xmit_interval); } @@ -3025,6 +3039,7 @@ ar_rput(queue_t *q, mblk_t *mp) uchar_t *src_paddr; boolean_t is_probe; int i; + arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; TRACE_1(TR_FAC_ARP, TR_ARP_RPUT_START, "arp_rput_start: q %p", q); @@ -3036,7 +3051,7 @@ ar_rput(queue_t *q, mblk_t *mp) */ switch (DB_TYPE(mp)) { case M_IOCTL: - err = ar_cmd_dispatch(q, mp); + err = ar_cmd_dispatch(q, mp, B_FALSE); switch (err) { case ENOENT: DB_TYPE(mp) = M_IOCNAK; @@ -3092,7 +3107,7 @@ ar_rput(queue_t *q, mblk_t *mp) "arp_rput_end: q %p (%S)", q, "default"); return; } - err = ar_cmd_dispatch(q, mp); + err = ar_cmd_dispatch(q, mp, B_FALSE); switch (err) { case ENOENT: /* Miscellaneous DLPI messages get shuffled off. */ @@ -3179,8 +3194,8 @@ ar_rput(queue_t *q, mblk_t *mp) DTRACE_PROBE3(arp__physical__in__start, arl_t *, arl, arh_t *, arh, mblk_t *, mp); - ARP_HOOK_IN(arp_physical_in_event, arp_physical_in, - arl->arl_index, arh, mp, mp1); + ARP_HOOK_IN(as->as_arp_physical_in_event, as->as_arp_physical_in, + arl->arl_index, arh, mp, mp1, as); DTRACE_PROBE1(arp__physical__in__end, mblk_t *, mp); @@ -3318,7 +3333,7 @@ ar_rput(queue_t *q, mblk_t *mp) */ now = ddi_get_lbolt(); if ((now - dst_ace->ace_last_bcast) > - MSEC_TO_TICK(arp_broadcast_interval)) { + MSEC_TO_TICK(as->as_broadcast_interval)) { DTRACE_PROBE3(rput_bcast_reply, arl_t *, arl, arh_t *, arh, ace_t *, dst_ace); dst_ace->ace_last_bcast = now; @@ -3334,7 +3349,7 @@ ar_rput(queue_t *q, mblk_t *mp) ar_xmit(arl, ARP_RESPONSE, dst_ace->ace_proto, plen, dst_ace->ace_hw_addr, dst_ace->ace_proto_addr, - src_haddr, src_paddr, dstaddr); + src_haddr, src_paddr, dstaddr, as); if (!is_probe && err == AR_NOTFOUND && ar_ce_create(arl, proto, src_haddr, hlen, src_paddr, plen, NULL, NULL, 0, 0) == 0) { @@ -3343,7 +3358,7 @@ ar_rput(queue_t *q, mblk_t *mp) ace = ar_ce_lookup(arl, proto, src_paddr, plen); ASSERT(ace != NULL); mi_timer(arl->arl_wq, ace->ace_mp, - arp_cleanup_interval); + as->as_cleanup_interval); } } if (err == AR_CHANGED) { @@ -3359,8 +3374,11 @@ ar_rput(queue_t *q, mblk_t *mp) } static void -ar_ce_restart_dad(ace_t *ace, void *arl) +ar_ce_restart_dad(ace_t *ace, void *arl_arg) { + arl_t *arl = arl_arg; + arp_stack_t *as = ARL_TO_ARPSTACK(arl); + if ((ace->ace_arl == arl) && (ace->ace_flags & (ACE_F_UNVERIFIED|ACE_F_DAD_ABORTED)) == (ACE_F_UNVERIFIED|ACE_F_DAD_ABORTED)) { @@ -3369,11 +3387,11 @@ ar_ce_restart_dad(ace_t *ace, void *arl) * in this obscure case. */ if (ace->ace_flags & ACE_F_FAST) { - ace->ace_xmit_count = arp_fastprobe_count; - ace->ace_xmit_interval = arp_fastprobe_interval; + ace->ace_xmit_count = as->as_fastprobe_count; + ace->ace_xmit_interval = as->as_fastprobe_interval; } else { - ace->ace_xmit_count = arp_probe_count; - ace->ace_xmit_interval = arp_probe_interval; + ace->ace_xmit_count = as->as_probe_count; + ace->ace_xmit_interval = as->as_probe_interval; } ace->ace_flags &= ~ACE_F_DAD_ABORTED; ace_set_timer(ace, B_FALSE); @@ -3389,6 +3407,7 @@ ar_rput_dlpi(queue_t *q, mblk_t *mp) arlphy_t *ap = NULL; union DL_primitives *dlp; const char *err_str; + arp_stack_t *as = ar->ar_as; if (arl != NULL) ap = arl->arl_phy; @@ -3451,7 +3470,7 @@ ar_rput_dlpi(queue_t *q, mblk_t *mp) ar_ll_set_defaults(arl, mp); ar_dlpi_done(arl, DL_INFO_REQ); } else if (arl == NULL) { - ar_ll_init(ar, mp); + ar_ll_init(as, ar, mp); } /* Kick off any awaiting messages */ qenable(WR(q)); @@ -3509,7 +3528,7 @@ ar_rput_dlpi(queue_t *q, mblk_t *mp) switch (dlp->notify_ind.dl_notification) { case DL_NOTE_LINK_UP: ap->ap_link_down = B_FALSE; - ar_ce_walk(ar_ce_restart_dad, arl); + ar_ce_walk(as, ar_ce_restart_dad, arl); break; case DL_NOTE_LINK_DOWN: ap->ap_link_down = B_TRUE; @@ -3570,6 +3589,7 @@ ar_slifname(queue_t *q, mblk_t *mp_orig) mblk_t *ioccpy; struct iocblk *iocp; hook_nic_event_t info; + arp_stack_t *as = ar->ar_as; if (ar->ar_on_ill_stream) { /* @@ -3613,7 +3633,7 @@ ar_slifname(queue_t *q, mblk_t *mp_orig) /* Check whether the name is already in use. */ - old_arl = ar_ll_lookup_by_name(lifr->lifr_name); + old_arl = ar_ll_lookup_by_name(as, lifr->lifr_name); if (old_arl != NULL) { DTRACE_PROBE2(slifname_exists, arl_t *, arl, arl_t *, old_arl); return (EEXIST); @@ -3644,12 +3664,13 @@ ar_slifname(queue_t *q, mblk_t *mp_orig) info.hne_event = NE_PLUMB; info.hne_data = arl->arl_name; info.hne_datalen = strlen(arl->arl_name); - (void) hook_run(arpnicevents, (hook_data_t)&info); + (void) hook_run(as->as_arpnicevents, (hook_data_t)&info, + as->as_netstack); /* Chain in the new arl. */ - rw_enter(&arl_g_lock, RW_WRITER); - arl->arl_next = arl_g_head; - arl_g_head = arl; + rw_enter(&as->as_arl_g_lock, RW_WRITER); + arl->arl_next = as->as_arl_head; + as->as_arl_head = arl; DTRACE_PROBE1(slifname_set, arl_t *, arl); /* @@ -3663,7 +3684,7 @@ ar_slifname(queue_t *q, mblk_t *mp_orig) iocp->ioc_count = msgsize(ioccpy->b_cont); ioccpy->b_wptr = (uchar_t *)(iocp + 1); putnext(arl->arl_wq, ioccpy); - rw_exit(&arl_g_lock); + rw_exit(&as->as_arl_g_lock); return (0); } @@ -3677,6 +3698,7 @@ ar_set_ppa(queue_t *q, mblk_t *mp_orig) char *cp; mblk_t *mp = mp_orig; arl_t *old_arl; + arp_stack_t *as = ar->ar_as; if (ar->ar_on_ill_stream) { /* @@ -3714,7 +3736,7 @@ ar_set_ppa(queue_t *q, mblk_t *mp_orig) ppa = *(int *)(mp->b_rptr); (void) snprintf(arl->arl_name, sizeof (arl->arl_name), "%s%d", cp, ppa); - old_arl = ar_ll_lookup_by_name(arl->arl_name); + old_arl = ar_ll_lookup_by_name(as, arl->arl_name); if (old_arl != NULL) { DTRACE_PROBE2(setppa_exists, arl_t *, arl, arl_t *, old_arl); /* Make it a null string again */ @@ -3725,10 +3747,10 @@ ar_set_ppa(queue_t *q, mblk_t *mp_orig) arl->arl_ppa = ppa; DTRACE_PROBE1(setppa_done, arl_t *, arl); /* Chain in the new arl. */ - rw_enter(&arl_g_lock, RW_WRITER); - arl->arl_next = arl_g_head; - arl_g_head = arl; - rw_exit(&arl_g_lock); + rw_enter(&as->as_arl_g_lock, RW_WRITER); + arl->arl_next = as->as_arl_head; + as->as_arl_head = arl; + rw_exit(&as->as_arl_g_lock); return (0); } @@ -3739,6 +3761,7 @@ ar_snmp_msg(queue_t *q, mblk_t *mp_orig) mblk_t *mpdata, *mp = mp_orig; struct opthdr *optp; msg2_args_t args; + arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; if (mp == NULL) return (0); @@ -3761,7 +3784,7 @@ ar_snmp_msg(queue_t *q, mblk_t *mp_orig) args.m2a_mpdata = mpdata; args.m2a_mptail = NULL; - ar_ce_walk(ar_snmp_msg2, &args); + ar_ce_walk(as, ar_snmp_msg2, &args); optp->len = msgdsize(mpdata); } putnext(q, mp_orig); @@ -3828,7 +3851,7 @@ ar_wput(queue_t *q, mblk_t *mp) */ switch (DB_TYPE(mp)) { case M_IOCTL: - switch (err = ar_cmd_dispatch(q, mp)) { + switch (err = ar_cmd_dispatch(q, mp, B_TRUE)) { case ENOENT: /* * If it is an I_PLINK, process it. Otherwise @@ -3913,7 +3936,7 @@ ar_wput(queue_t *q, mblk_t *mp) * Commands in the form of PROTO messages are handled very * much the same as IOCTLs, but no response is returned. */ - switch (err = ar_cmd_dispatch(q, mp)) { + switch (err = ar_cmd_dispatch(q, mp, B_TRUE)) { case ENOENT: if (q->q_next) { putnext(q, mp); @@ -4041,23 +4064,24 @@ arl_reschedule(arl_t *arl) ace_resched_t art; int i; ace_t *ace; + arp_stack_t *as = ARL_TO_ARPSTACK(arl); i = ap->ap_defend_count; ap->ap_defend_count = 0; /* If none could be sitting around, then don't reschedule */ - if (i < arp_defend_rate) { + if (i < as->as_defend_rate) { DTRACE_PROBE1(reschedule_none, arl_t *, arl); return; } art.art_arl = arl; - while (ap->ap_defend_count < arp_defend_rate) { + while (ap->ap_defend_count < as->as_defend_rate) { art.art_naces = 0; - ar_ce_walk(ace_reschedule, &art); + ar_ce_walk(as, ace_reschedule, &art); for (i = 0; i < art.art_naces; i++) { ace = art.art_aces[i]; ace->ace_flags |= ACE_F_DELAYED; ace_set_timer(ace, B_FALSE); - if (++ap->ap_defend_count >= arp_defend_rate) + if (++ap->ap_defend_count >= as->as_defend_rate) break; } if (art.art_naces < ACE_RESCHED_LIST_LEN) @@ -4079,6 +4103,7 @@ ar_wsrv(queue_t *q) arlphy_t *ap; mblk_t *mp; clock_t ms; + arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as; TRACE_1(TR_FAC_ARP, TR_ARP_WSRV_START, "arp_wsrv_start: q %p", q); @@ -4115,15 +4140,16 @@ ar_wsrv(queue_t *q) ace->ace_proto, ace->ace_proto_addr_length, ace->ace_hw_addr, NULL, NULL, - ace->ace_proto_addr, NULL); + ace->ace_proto_addr, NULL, as); ace_set_timer(ace, B_FALSE); continue; } if (!arp_say_ready(ace)) continue; DTRACE_PROBE1(timer_ready, ace_t *, ace); - ace->ace_xmit_interval = arp_publish_interval; - ace->ace_xmit_count = arp_publish_count; + ace->ace_xmit_interval = + as->as_publish_interval; + ace->ace_xmit_count = as->as_publish_count; if (ace->ace_xmit_count == 0) ace->ace_xmit_count++; ace->ace_flags &= ~ACE_F_UNVERIFIED; @@ -4137,9 +4163,9 @@ ar_wsrv(queue_t *q) * them. */ now = ddi_get_lbolt(); - if (arp_defend_rate > 0 && + if (as->as_defend_rate > 0 && now - ap->ap_defend_start > - SEC_TO_TICK(arp_defend_period)) { + SEC_TO_TICK(as->as_defend_period)) { ap->ap_defend_start = now; arl_reschedule(arl); } @@ -4164,9 +4190,11 @@ ar_wsrv(queue_t *q) DTRACE_PROBE1(timer_send_delayed, ace_t *, ace); ace->ace_flags &= ~ACE_F_DELAYED; - } else if (arp_defend_rate > 0 && - (ap->ap_defend_count >= arp_defend_rate || - ++ap->ap_defend_count >= arp_defend_rate)) { + } else if (as->as_defend_rate > 0 && + (ap->ap_defend_count >= + as->as_defend_rate || + ++ap->ap_defend_count >= + as->as_defend_rate)) { /* * If we're no longer allowed to send * unbidden defense messages, then just @@ -4186,11 +4214,11 @@ ar_wsrv(queue_t *q) ace->ace_hw_addr, ace->ace_proto_addr, ap->ap_arp_addr, - ace->ace_proto_addr, NULL); + ace->ace_proto_addr, NULL, as); ace->ace_last_bcast = now; if (ace->ace_xmit_count == 0) ace->ace_xmit_interval = - arp_defend_interval; + as->as_defend_interval; if (ace->ace_xmit_interval != 0) ace_set_timer(ace, B_FALSE); continue; @@ -4206,10 +4234,10 @@ ar_wsrv(queue_t *q) if (ACE_NONPERM(ace)) { if (ace->ace_proto == IP_ARP_PROTO_TYPE && ndp_lookup_ipaddr(*(ipaddr_t *) - ace->ace_proto_addr)) { + ace->ace_proto_addr, as->as_netstack)) { ace->ace_flags |= ACE_F_OLD; mi_timer(arl->arl_wq, ace->ace_mp, - arp_cleanup_interval); + as->as_cleanup_interval); } else { ar_delete_notify(ace); ar_ce_delete(ace); @@ -4226,7 +4254,7 @@ ar_wsrv(queue_t *q) * Otherwise, we restart the timer. */ ASSERT(ace->ace_query_mp != NULL); - ms = ar_query_xmit(ace, NULL); + ms = ar_query_xmit(as, ace, NULL); if (ms == 0) ar_query_reply(ace, ENXIO, NULL, (uint32_t)0); else @@ -4245,7 +4273,7 @@ ar_wsrv(queue_t *q) static void ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, uint32_t plen, const uchar_t *haddr1, const uchar_t *paddr1, const uchar_t *haddr2, - const uchar_t *paddr2, const uchar_t *dstaddr) + const uchar_t *paddr2, const uchar_t *dstaddr, arp_stack_t *as) { arh_t *arh; uint8_t *cp; @@ -4320,8 +4348,8 @@ ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, uint32_t plen, DTRACE_PROBE3(arp__physical__out__start, arl_t *, arl, arh_t *, arh, mblk_t *, mp); - ARP_HOOK_OUT(arp_physical_out_event, arp_physical_out, - arl->arl_index, arh, mp, mp->b_cont); + ARP_HOOK_OUT(as->as_arp_physical_out_event, as->as_arp_physical_out, + arl->arl_index, arh, mp, mp->b_cont, as); DTRACE_PROBE1(arp__physical__out__end, mblk_t *, mp); @@ -4370,3 +4398,69 @@ ar_alloc(uint32_t cmd, int err) linkb(mp, mp1); return (mp); } + +void +arp_ddi_init(void) +{ + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel, so we can maintain the + * set of arp_stack_t's. + */ + netstack_register(NS_ARP, arp_stack_init, NULL, arp_stack_fini); +} + +void +arp_ddi_destroy(void) +{ + netstack_unregister(NS_ARP); +} + +/* + * Initialize the ARP stack instance. + */ +/* ARGSUSED */ +static void * +arp_stack_init(netstackid_t stackid, netstack_t *ns) +{ + arp_stack_t *as; + arpparam_t *pa; + + as = (arp_stack_t *)kmem_zalloc(sizeof (*as), KM_SLEEP); + as->as_netstack = ns; + + pa = (arpparam_t *)kmem_alloc(sizeof (arp_param_arr), KM_SLEEP); + as->as_param_arr = pa; + bcopy(arp_param_arr, as->as_param_arr, sizeof (arp_param_arr)); + + (void) ar_param_register(&as->as_nd, + as->as_param_arr, A_CNT(arp_param_arr)); + + as->as_arp_index_counter = 1; + as->as_arp_counter_wrapped = 0; + + rw_init(&as->as_arl_g_lock, "ARP ARl lock", RW_DRIVER, NULL); + arp_net_init(as, ns); + arp_hook_init(as); + + return (as); +} + +/* + * Free the ARP stack instance. + */ +/* ARGSUSED */ +static void +arp_stack_fini(netstackid_t stackid, void *arg) +{ + arp_stack_t *as = (arp_stack_t *)arg; + + arp_hook_destroy(as); + arp_net_destroy(as); + rw_destroy(&as->as_arl_g_lock); + + nd_free(&as->as_nd); + kmem_free(as->as_param_arr, sizeof (arp_param_arr)); + as->as_param_arr = NULL; + kmem_free(as, sizeof (*as)); +} diff --git a/usr/src/uts/common/inet/arp/arp_netinfo.c b/usr/src/uts/common/inet/arp/arp_netinfo.c index 0d2f55e239..4869053f62 100644 --- a/usr/src/uts/common/inet/arp/arp_netinfo.c +++ b/usr/src/uts/common/inet/arp/arp_netinfo.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,6 +33,7 @@ #include <sys/sunddi.h> #include <sys/hook.h> #include <sys/hook_impl.h> +#include <sys/netstack.h> #include <net/if.h> #include <sys/neti.h> @@ -42,16 +43,16 @@ /* * ARP netinfo entry point declarations. */ -static int arp_getifname(phy_if_t, char *, const size_t); -static int arp_getmtu(phy_if_t, lif_if_t); -static int arp_getpmtuenabled(void); +static int arp_getifname(phy_if_t, char *, const size_t, netstack_t *); +static int arp_getmtu(phy_if_t, lif_if_t, netstack_t *); +static int arp_getpmtuenabled(netstack_t *); static int arp_getlifaddr(phy_if_t, lif_if_t, size_t, - net_ifaddr_t [], void *); -static phy_if_t arp_phygetnext(phy_if_t); -static phy_if_t arp_phylookup(const char *); -static lif_if_t arp_lifgetnext(phy_if_t, lif_if_t); -static int arp_inject(inject_t, net_inject_t *); -static phy_if_t arp_routeto(struct sockaddr *); + net_ifaddr_t [], void *, netstack_t *); +static phy_if_t arp_phygetnext(phy_if_t, netstack_t *); +static phy_if_t arp_phylookup(const char *, netstack_t *); +static lif_if_t arp_lifgetnext(phy_if_t, lif_if_t, netstack_t *); +static int arp_inject(inject_t, net_inject_t *, netstack_t *); +static phy_if_t arp_routeto(struct sockaddr *, netstack_t *); static int arp_ispartialchecksum(mblk_t *); static int arp_isvalidchecksum(mblk_t *); @@ -71,115 +72,109 @@ static net_info_t arp_netinfo = { arp_isvalidchecksum }; -static hook_family_t arproot; - -/* - * Hooks for ARP - */ - -hook_event_t arp_physical_in_event; -hook_event_t arp_physical_out_event; -hook_event_t arp_nic_events; - -hook_event_token_t arp_physical_in; -hook_event_token_t arp_physical_out; -hook_event_token_t arpnicevents; - -net_data_t arp = NULL; - /* * Register ARP netinfo functions. */ void -arp_net_init() +arp_net_init(arp_stack_t *as, netstack_t *ns) { - arp = net_register(&arp_netinfo); - ASSERT(arp != NULL); + as->as_net_data = net_register_impl(&arp_netinfo, ns); + ASSERT(as->as_net_data != NULL); } /* * Unregister ARP netinfo functions. */ void -arp_net_destroy() +arp_net_destroy(arp_stack_t *as) { - (void) net_unregister(arp); + (void) net_unregister(as->as_net_data); } /* * Initialize ARP hook family and events */ void -arp_hook_init() +arp_hook_init(arp_stack_t *as) { - HOOK_FAMILY_INIT(&arproot, Hn_ARP); - if (net_register_family(arp, &arproot) != 0) { + HOOK_FAMILY_INIT(&as->as_arproot, Hn_ARP); + if (net_register_family(as->as_net_data, &as->as_arproot) != 0) { cmn_err(CE_NOTE, "arp_hook_init: " "net_register_family failed for arp"); } - HOOK_EVENT_INIT(&arp_physical_in_event, NH_PHYSICAL_IN); - arp_physical_in = net_register_event(arp, &arp_physical_in_event); - if (arp_physical_in == NULL) { + HOOK_EVENT_INIT(&as->as_arp_physical_in_event, NH_PHYSICAL_IN); + as->as_arp_physical_in = net_register_event(as->as_net_data, + &as->as_arp_physical_in_event); + if (as->as_arp_physical_in == NULL) { cmn_err(CE_NOTE, "arp_hook_init: " "net_register_event failed for arp/physical_in"); } - HOOK_EVENT_INIT(&arp_physical_out_event, NH_PHYSICAL_OUT); - arp_physical_out = net_register_event(arp, &arp_physical_out_event); - if (arp_physical_out == NULL) { + HOOK_EVENT_INIT(&as->as_arp_physical_out_event, NH_PHYSICAL_OUT); + as->as_arp_physical_out = net_register_event(as->as_net_data, + &as->as_arp_physical_out_event); + if (as->as_arp_physical_out == NULL) { cmn_err(CE_NOTE, "arp_hook_init: " "net_register_event failed for arp/physical_out"); } - HOOK_EVENT_INIT(&arp_nic_events, NH_NIC_EVENTS); - arpnicevents = net_register_event(arp, &arp_nic_events); - if (arpnicevents == NULL) { + HOOK_EVENT_INIT(&as->as_arp_nic_events, NH_NIC_EVENTS); + as->as_arpnicevents = net_register_event(as->as_net_data, + &as->as_arp_nic_events); + if (as->as_arpnicevents == NULL) { cmn_err(CE_NOTE, "arp_hook_init: " "net_register_event failed for arp/nic_events"); } } void -arp_hook_destroy() +arp_hook_destroy(arp_stack_t *as) { - if (arpnicevents != NULL) { - if (net_unregister_event(arp, &arp_nic_events) == 0) - arpnicevents = NULL; + if (as->as_arpnicevents != NULL) { + if (net_unregister_event(as->as_net_data, + &as->as_arp_nic_events) == 0) + as->as_arpnicevents = NULL; } - if (arp_physical_out != NULL) { - if (net_unregister_event(arp, &arp_physical_out_event) == 0) - arp_physical_out = NULL; + if (as->as_arp_physical_out != NULL) { + if (net_unregister_event(as->as_net_data, + &as->as_arp_physical_out_event) == 0) + as->as_arp_physical_out = NULL; } - if (arp_physical_in != NULL) { - if (net_unregister_event(arp, &arp_physical_in_event) == 0) - arp_physical_in = NULL; + if (as->as_arp_physical_in != NULL) { + if (net_unregister_event(as->as_net_data, + &as->as_arp_physical_in_event) == 0) + as->as_arp_physical_in = NULL; } - (void) net_unregister_family(arp, &arproot); + (void) net_unregister_family(as->as_net_data, &as->as_arproot); } /* * Determine the name of the lower level interface */ -int -arp_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen) +static int +arp_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen, + netstack_t *ns) { arl_t *arl; + arp_stack_t *as; ASSERT(buffer != NULL); + ASSERT(ns != NULL); - rw_enter(&arl_g_lock, RW_READER); - for (arl = arl_g_head; arl != NULL; arl = arl->arl_next) { + as = ns->netstack_arp; + rw_enter(&as->as_arl_g_lock, RW_READER); + for (arl = as->as_arl_g_head; arl != NULL; arl = arl->arl_next) { if (arl->arl_index == phy_ifdata) { (void) strlcpy(buffer, arl->arl_name, buflen); - rw_exit(&arl_g_lock); + rw_exit(&as->as_arl_g_lock); return (0); } } - rw_exit(&arl_g_lock); + rw_exit(&as->as_arl_g_lock); return (1); } @@ -188,8 +183,8 @@ arp_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen) * Unsupported with ARP. */ /*ARGSUSED*/ -int -arp_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata) +static int +arp_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata, netstack_t *ns) { return (-1); } @@ -198,8 +193,8 @@ arp_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata) * Unsupported with ARP. */ /*ARGSUSED*/ -int -arp_getpmtuenabled(void) +static int +arp_getpmtuenabled(netstack_t *ns) { return (-1); } @@ -208,9 +203,9 @@ arp_getpmtuenabled(void) * Unsupported with ARP. */ /*ARGSUSED*/ -int +static int arp_getlifaddr(phy_if_t phy_ifdata, lif_if_t ifdata, size_t nelem, - net_ifaddr_t type[], void *storage) + net_ifaddr_t type[], void *storage, netstack_t *ns) { return (-1); } @@ -218,17 +213,22 @@ arp_getlifaddr(phy_if_t phy_ifdata, lif_if_t ifdata, size_t nelem, /* * Determine the instance number of the next lower level interface */ -phy_if_t -arp_phygetnext(phy_if_t phy_ifdata) +static phy_if_t +arp_phygetnext(phy_if_t phy_ifdata, netstack_t *ns) { arl_t *arl; int index; + arp_stack_t *as; + + ASSERT(ns != NULL); - rw_enter(&arl_g_lock, RW_READER); + as = ns->netstack_arp; + rw_enter(&as->as_arl_g_lock, RW_READER); if (phy_ifdata == 0) { - arl = arl_g_head; + arl = as->as_arl_g_head; } else { - for (arl = arl_g_head; arl != NULL; arl = arl->arl_next) { + for (arl = as->as_arl_g_head; arl != NULL; + arl = arl->arl_next) { if (arl->arl_index == phy_ifdata) { arl = arl->arl_next; break; @@ -238,7 +238,7 @@ arp_phygetnext(phy_if_t phy_ifdata) index = (arl != NULL) ? arl->arl_index : 0; - rw_exit(&arl_g_lock); + rw_exit(&as->as_arl_g_lock); return (index); } @@ -246,24 +246,26 @@ arp_phygetnext(phy_if_t phy_ifdata) /* * Given a network interface name, find its ARP layer instance number. */ -phy_if_t -arp_phylookup(const char *name) +static phy_if_t +arp_phylookup(const char *name, netstack_t *ns) { arl_t *arl; int index; + arp_stack_t *as; ASSERT(name != NULL); + ASSERT(ns != NULL); index = 0; - - rw_enter(&arl_g_lock, RW_READER); - for (arl = arl_g_head; arl != NULL; arl = arl->arl_next) { + as = ns->netstack_arp; + rw_enter(&as->as_arl_g_lock, RW_READER); + for (arl = as->as_arl_g_head; arl != NULL; arl = arl->arl_next) { if (strcmp(name, arl->arl_name) == 0) { index = arl->arl_index; break; } } - rw_exit(&arl_g_lock); + rw_exit(&as->as_arl_g_lock); return (index); @@ -273,8 +275,8 @@ arp_phylookup(const char *name) * Unsupported with ARP. */ /*ARGSUSED*/ -lif_if_t -arp_lifgetnext(phy_if_t ifp, lif_if_t lif) +static lif_if_t +arp_lifgetnext(phy_if_t ifp, lif_if_t lif, netstack_t *ns) { return ((lif_if_t)-1); } @@ -283,8 +285,8 @@ arp_lifgetnext(phy_if_t ifp, lif_if_t lif) * Unsupported with ARP. */ /*ARGSUSED*/ -int -arp_inject(inject_t injection, net_inject_t *neti) +static int +arp_inject(inject_t injection, net_inject_t *neti, netstack_t *ns) { return (-1); } @@ -293,8 +295,8 @@ arp_inject(inject_t injection, net_inject_t *neti) * Unsupported with ARP. */ /*ARGSUSED*/ -phy_if_t -arp_routeto(struct sockaddr *addr) +static phy_if_t +arp_routeto(struct sockaddr *addr, netstack_t *ns) { return ((phy_if_t)-1); } @@ -313,7 +315,7 @@ arp_ispartialchecksum(mblk_t *mb) * Unsupported with ARP. */ /*ARGSUSED*/ -int +static int arp_isvalidchecksum(mblk_t *mb) { return (-1); diff --git a/usr/src/uts/common/inet/arp/arpddi.c b/usr/src/uts/common/inet/arp/arpddi.c index 5ca60b175f..edd5cd4c2a 100644 --- a/usr/src/uts/common/inet/arp/arpddi.c +++ b/usr/src/uts/common/inet/arp/arpddi.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -42,29 +42,35 @@ #define INET_DEVMTFLAGS IP_DEVMTFLAGS /* since as a driver we're ip */ #define INET_MODMTFLAGS (D_MP | D_MTPERMOD) -static void arp_ddi_destroy(); -static void arp_ddi_init(); - #include "../inetddi.c" +extern void arp_ddi_init(void); +extern void arp_ddi_destroy(void); + int _init(void) { - int error; + int error; - arp_ddi_init(); INET_BECOME_IP(); + /* + * Note: After mod_install succeeds, another thread can enter + * therefore all initialization is done before it and any + * de-initialization needed done if it fails. + */ + arp_ddi_init(); error = mod_install(&modlinkage); if (error != 0) arp_ddi_destroy(); + return (error); } int _fini(void) { - int error; + int error; error = mod_remove(&modlinkage); if (error == 0) @@ -77,21 +83,3 @@ _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } - - -static void -arp_ddi_init() -{ - rw_init(&arl_g_lock, "ARP ARl lock", RW_DRIVER, NULL); - arp_net_init(); - arp_hook_init(); -} - - -static void -arp_ddi_destroy() -{ - arp_hook_destroy(); - arp_net_destroy(); - rw_destroy(&arl_g_lock); -} diff --git a/usr/src/uts/common/inet/arp_impl.h b/usr/src/uts/common/inet/arp_impl.h index c6e12bc86c..567e2b88f5 100644 --- a/usr/src/uts/common/inet/arp_impl.h +++ b/usr/src/uts/common/inet/arp_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -37,10 +37,19 @@ extern "C" { #include <sys/types.h> #include <sys/stream.h> #include <net/if.h> +#include <sys/netstack.h> /* ARP kernel hash size; used for mdb support */ #define ARP_HASH_SIZE 256 +/* Named Dispatch Parameter Management Structure */ +typedef struct arpparam_s { + uint32_t arp_param_min; + uint32_t arp_param_max; + uint32_t arp_param_value; + char *arp_param_name; +} arpparam_t; + /* ARL Structure, one per link level device */ typedef struct arl_s { struct arl_s *arl_next; /* ARL chain at arl_g_head */ @@ -62,6 +71,12 @@ typedef struct arl_s { struct arlphy_s *arl_phy; /* physical info, if any */ } arl_t; +/* + * There is no field to get from an arl_t to an arp_stack_t, but this + * macro does it. + */ +#define ARL_TO_ARPSTACK(_arl) (((ar_t *)(_arl)->arl_rq->q_ptr)->ar_as) + /* ARL physical info structure for a link level device */ typedef struct arlphy_s { uint32_t ap_arp_hw_type; /* hardware type */ @@ -78,27 +93,6 @@ typedef struct arlphy_s { ap_link_down : 1; /* DL_NOTE status */ } arlphy_t; -extern arl_t *arl_g_head; /* ARL chain head */ -extern krwlock_t arl_g_lock; - -#define ARL_F_NOARP 0x01 - -#define ARL_S_DOWN 0x00 -#define ARL_S_PENDING 0x01 -#define ARL_S_UP 0x02 - -/* AR Structure, one per upper stream */ -typedef struct ar_s { - queue_t *ar_rq; /* Read queue pointer */ - queue_t *ar_wq; /* Write queue pointer */ - arl_t *ar_arl; /* Associated arl */ - cred_t *ar_credp; /* Credentials associated w/ open */ - struct ar_s *ar_arl_ip_assoc; /* ARL - IP association */ - uint32_t - ar_ip_acked_close : 1, /* IP has acked the close */ - ar_on_ill_stream : 1; /* Module below is IP */ -} ar_t; - /* ARP Cache Entry */ typedef struct ace_s { struct ace_s *ace_next; /* Hash chain next pointer */ @@ -120,23 +114,12 @@ typedef struct ace_s { int ace_xmit_count; } ace_t; -/* - * Hooks structures used inside of arp - */ -extern hook_event_token_t arp_physical_in; -extern hook_event_token_t arp_physical_out; -extern hook_event_token_t arpnicevents; +#define ARPHOOK_INTERESTED_PHYSICAL_IN(as) \ + (as->as_arp_physical_in_event.he_interested) +#define ARPHOOK_INTERESTED_PHYSICAL_OUT(as) \ + (as->as_arp_physical_out_event.he_interested) -extern hook_event_t arp_physical_in_event; -extern hook_event_t arp_physical_out_event; -extern hook_event_t arp_nic_events; - -#define ARPHOOK_INTERESTED_PHYSICAL_IN \ - (arp_physical_in_event.he_interested) -#define ARPHOOK_INTERESTED_PHYSICAL_OUT \ - (arp_physical_out_event.he_interested) - -#define ARP_HOOK_IN(_hook, _event, _ilp, _hdr, _fm, _m) \ +#define ARP_HOOK_IN(_hook, _event, _ilp, _hdr, _fm, _m, as) \ \ if ((_hook).he_interested) { \ hook_pkt_event_t info; \ @@ -146,7 +129,8 @@ extern hook_event_t arp_nic_events; info.hpe_hdr = _hdr; \ info.hpe_mp = &(_fm); \ info.hpe_mb = _m; \ - if (hook_run(_event, (hook_data_t)&info) != 0) {\ + if (hook_run(_event, (hook_data_t)&info, \ + as->as_netstack) != 0) { \ if (_fm != NULL) { \ freemsg(_fm); \ _fm = NULL; \ @@ -159,7 +143,7 @@ extern hook_event_t arp_nic_events; } \ } -#define ARP_HOOK_OUT(_hook, _event, _olp, _hdr, _fm, _m) \ +#define ARP_HOOK_OUT(_hook, _event, _olp, _hdr, _fm, _m, as) \ \ if ((_hook).he_interested) { \ hook_pkt_event_t info; \ @@ -169,8 +153,8 @@ extern hook_event_t arp_nic_events; info.hpe_hdr = _hdr; \ info.hpe_mp = &(_fm); \ info.hpe_mb = _m; \ - if (hook_run(_event, \ - (hook_data_t)&info) != 0) { \ + if (hook_run(_event, (hook_data_t)&info, \ + as->as_netstack) != 0) { \ if (_fm != NULL) { \ freemsg(_fm); \ _fm = NULL; \ @@ -183,10 +167,77 @@ extern hook_event_t arp_nic_events; } \ } -extern void arp_hook_init(); -extern void arp_hook_destroy(); -extern void arp_net_init(); -extern void arp_net_destroy(); +#define ACE_EXTERNAL_FLAGS_MASK \ + (ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MAPPING | ACE_F_MYADDR | \ + ACE_F_AUTHORITY) + +/* + * ARP stack instances + */ +struct arp_stack { + netstack_t *as_netstack; /* Common netstack */ + void *as_head; /* AR Instance Data List Head */ + caddr_t as_nd; /* AR Named Dispatch Head */ + struct arl_s *as_arl_head; /* ARL List Head */ + arpparam_t *as_param_arr; /* ndd variable table */ + + /* ARP Cache Entry Hash Table */ + ace_t *as_ce_hash_tbl[ARP_HASH_SIZE]; + ace_t *as_ce_mask_entries; + + /* + * With the introduction of netinfo (neti kernel module), + * it is now possible to access data structures in the ARP module + * without the code being executed in the context of the IP module, + * thus there is no locking being enforced through the use of STREAMS. + */ + krwlock_t as_arl_g_lock; + arl_t *as_arl_g_head; /* ARL List Head */ + + uint32_t as_arp_index_counter; + uint32_t as_arp_counter_wrapped; + + /* arp_neti.c */ + hook_family_t as_arproot; + + /* + * Hooks for ARP + */ + hook_event_t as_arp_physical_in_event; + hook_event_t as_arp_physical_out_event; + hook_event_t as_arp_nic_events; + + hook_event_token_t as_arp_physical_in; + hook_event_token_t as_arp_physical_out; + hook_event_token_t as_arpnicevents; + + net_data_t as_net_data; +}; +typedef struct arp_stack arp_stack_t; + +#define ARL_F_NOARP 0x01 + +#define ARL_S_DOWN 0x00 +#define ARL_S_PENDING 0x01 +#define ARL_S_UP 0x02 + +/* AR Structure, one per upper stream */ +typedef struct ar_s { + queue_t *ar_rq; /* Read queue pointer */ + queue_t *ar_wq; /* Write queue pointer */ + arl_t *ar_arl; /* Associated arl */ + cred_t *ar_credp; /* Credentials associated w/ open */ + struct ar_s *ar_arl_ip_assoc; /* ARL - IP association */ + uint32_t + ar_ip_acked_close : 1, /* IP has acked the close */ + ar_on_ill_stream : 1; /* Module below is IP */ + arp_stack_t *ar_as; +} ar_t; + +extern void arp_hook_init(arp_stack_t *); +extern void arp_hook_destroy(arp_stack_t *); +extern void arp_net_init(arp_stack_t *, netstack_t *); +extern void arp_net_destroy(arp_stack_t *); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/inet/inetddi.c b/usr/src/uts/common/inet/inetddi.c index 476de3dd2f..e6a0395ba0 100644 --- a/usr/src/uts/common/inet/inetddi.c +++ b/usr/src/uts/common/inet/inetddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -98,10 +97,10 @@ static struct dev_priv { {"icmp6", PRIVONLY_DEV, PRIV_NET_ICMPACCESS, PRIV_NET_ICMPACCESS}, {"ip", PRIVONLY_DEV, PRIV_NET_RAWACCESS, PRIV_NET_RAWACCESS}, {"ip6", PRIVONLY_DEV, PRIV_NET_RAWACCESS, PRIV_NET_RAWACCESS}, - {"keysock", PRIVONLY_DEV, PRIV_SYS_NET_CONFIG, PRIV_SYS_NET_CONFIG}, - {"ipsecah", PRIVONLY_DEV, PRIV_SYS_NET_CONFIG, PRIV_SYS_NET_CONFIG}, - {"ipsecesp", PRIVONLY_DEV, PRIV_SYS_NET_CONFIG, PRIV_SYS_NET_CONFIG}, - {"spdsock", PRIVONLY_DEV, PRIV_SYS_NET_CONFIG, PRIV_SYS_NET_CONFIG}, + {"keysock", PRIVONLY_DEV, PRIV_SYS_IP_CONFIG, PRIV_SYS_IP_CONFIG}, + {"ipsecah", PRIVONLY_DEV, PRIV_SYS_IP_CONFIG, PRIV_SYS_IP_CONFIG}, + {"ipsecesp", PRIVONLY_DEV, PRIV_SYS_IP_CONFIG, PRIV_SYS_IP_CONFIG}, + {"spdsock", PRIVONLY_DEV, PRIV_SYS_IP_CONFIG, PRIV_SYS_IP_CONFIG}, {NULL, 0, NULL, NULL} }; diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index ab4e145aed..993cafb6fe 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -47,6 +47,7 @@ extern "C" { #include <sys/hook.h> #include <sys/hook_event.h> #include <sys/hook_impl.h> +#include <inet/ip_stack.h> #ifdef _KERNEL #include <netinet/ip6.h> @@ -225,7 +226,7 @@ typedef struct ipoptp_s #define IP_FORWARD_NEVER 0 #define IP_FORWARD_ALWAYS 1 -#define WE_ARE_FORWARDING (ip_g_forward == IP_FORWARD_ALWAYS) +#define WE_ARE_FORWARDING(ipst) ((ipst)->ips_ip_g_forward == IP_FORWARD_ALWAYS) #define IPH_HDR_LENGTH(ipha) \ ((int)(((ipha_t *)ipha)->ipha_version_and_hdr_length & 0xF) << 2) @@ -447,9 +448,10 @@ typedef enum { * - when the nce is created or reinit-ed * - every time we get a sane arp response for the nce. */ -#define NCE_EXPIRED(nce) (nce->nce_last > 0 && \ - ((nce->nce_flags & NCE_F_PERMANENT) == 0) && \ - ((TICK_TO_MSEC(lbolt64) - nce->nce_last) > ip_ire_arp_interval)) +#define NCE_EXPIRED(nce, ipst) (nce->nce_last > 0 && \ + ((nce->nce_flags & NCE_F_PERMANENT) == 0) && \ + ((TICK_TO_MSEC(lbolt64) - nce->nce_last) > \ + (ipst)->ips_ip_ire_arp_interval)) #endif /* _KERNEL */ @@ -682,6 +684,18 @@ typedef struct ip_m_s { #define IRE_MARK_UNCACHED 0x0080 +/* + * The comment below (and for other netstack_t references) refers + * to the fact that we only do netstack_hold in particular cases, + * such as the references from open streams (ill_t and conn_t's + * pointers). Internally within IP we rely on IP's ability to cleanup e.g. + * ire_t's when an ill goes away. + */ +typedef struct ire_expire_arg_s { + int iea_flush_flag; + ip_stack_t *iea_ipst; /* Does not have a netstack_hold */ +} ire_expire_arg_t; + /* Flags with ire_expire routine */ #define FLUSH_ARP_TIME 0x0001 /* ARP info potentially stale timer */ #define FLUSH_REDIRECT_TIME 0x0002 /* Redirects potentially stale */ @@ -833,6 +847,12 @@ typedef struct ilg_s { * ilm_ipif is used by IPv4 as multicast groups are joined using ipif. * ilm_ill is used by IPv6 as multicast groups are joined using ill. * ilm_ill is NULL for IPv4 and ilm_ipif is NULL for IPv6. + * + * The comment below (and for other netstack_t references) refers + * to the fact that we only do netstack_hold in particular cases, + * such as the references from open streams (ill_t and conn_t's + * pointers). Internally within IP we rely on IP's ability to cleanup e.g. + * ire_t's when an ill goes away. */ #define ILM_DELETED 0x1 /* ilm_flags */ typedef struct ilm_s { @@ -853,6 +873,7 @@ typedef struct ilm_s { slist_t *ilm_filter; /* source filter list */ slist_t *ilm_pendsrcs; /* relevant src addrs for pending req */ rtx_state_t ilm_rtx; /* SCR retransmission state */ + ip_stack_t *ilm_ipst; /* Does not have a netstack_hold */ } ilm_t; #define ilm_addr V4_PART_OF_V6(ilm_v6addr) @@ -945,11 +966,11 @@ typedef struct ipsec_latch_s ASSERT((ipl)->ipl_refcnt != 0); \ } -#define IPLATCH_REFRELE(ipl) { \ +#define IPLATCH_REFRELE(ipl, ns) { \ ASSERT((ipl)->ipl_refcnt != 0); \ membar_exit(); \ if (atomic_add_32_nv(&(ipl)->ipl_refcnt, -1) == 0) \ - iplatch_free(ipl); \ + iplatch_free(ipl, ns); \ } /* @@ -1107,25 +1128,25 @@ typedef struct ipsec_selector { * 2) Or if we have not cached policy on the conn and the global policy is * non-empty. */ -#define CONN_INBOUND_POLICY_PRESENT(connp) \ - ((connp)->conn_in_enforce_policy || \ - (!((connp)->conn_policy_cached) && \ - ipsec_inbound_v4_policy_present)) +#define CONN_INBOUND_POLICY_PRESENT(connp, ipss) \ + ((connp)->conn_in_enforce_policy || \ + (!((connp)->conn_policy_cached) && \ + (ipss)->ipsec_inbound_v4_policy_present)) -#define CONN_INBOUND_POLICY_PRESENT_V6(connp) \ - ((connp)->conn_in_enforce_policy || \ - (!(connp)->conn_policy_cached && \ - ipsec_inbound_v6_policy_present)) +#define CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) \ + ((connp)->conn_in_enforce_policy || \ + (!(connp)->conn_policy_cached && \ + (ipss)->ipsec_inbound_v6_policy_present)) -#define CONN_OUTBOUND_POLICY_PRESENT(connp) \ - ((connp)->conn_out_enforce_policy || \ - (!((connp)->conn_policy_cached) && \ - ipsec_outbound_v4_policy_present)) +#define CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) \ + ((connp)->conn_out_enforce_policy || \ + (!((connp)->conn_policy_cached) && \ + (ipss)->ipsec_outbound_v4_policy_present)) -#define CONN_OUTBOUND_POLICY_PRESENT_V6(connp) \ - ((connp)->conn_out_enforce_policy || \ - (!(connp)->conn_policy_cached && \ - ipsec_outbound_v6_policy_present)) +#define CONN_OUTBOUND_POLICY_PRESENT_V6(connp, ipss) \ + ((connp)->conn_out_enforce_policy || \ + (!(connp)->conn_policy_cached && \ + (ipss)->ipsec_outbound_v6_policy_present)) /* * Information cached in IRE for upper layer protocol (ULP). @@ -1451,6 +1472,7 @@ typedef struct ipsq_s { int ipsq_depth; /* debugging aid */ pc_t ipsq_stack[IP_STACK_DEPTH]; /* debugging aid */ #endif + ip_stack_t *ipsq_ipst; /* Does not have a netstack_hold */ } ipsq_t; /* ipsq_flags */ @@ -1542,8 +1564,6 @@ typedef struct ill_group { int illgrp_ill_count; } ill_group_t; -extern ill_group_t *illgrp_head_v6; - /* * Fragmentation hash bucket */ @@ -1582,6 +1602,7 @@ typedef struct irb { uint_t irb_tmp_ire_cnt; /* Num of temporary IRE */ struct ire_s *irb_rr_origin; /* origin for round-robin */ int irb_nire; /* Num of ftable ire's that ref irb */ + ip_stack_t *irb_ipst; /* Does not have a netstack_hold */ } irb_t; #define IRB2RT(irb) (rt_t *)((caddr_t)(irb) - offsetof(rt_t, rt_irb)) @@ -1619,7 +1640,6 @@ typedef union ill_if_u { char illif_filler[CACHE_ALIGN(_ill_if_s_)]; } ill_if_t; - #define illif_next ill_if_s.illif_next #define illif_prev ill_if_s.illif_prev #define illif_avl_by_ppa ill_if_s.illif_avl_by_ppa @@ -1635,7 +1655,7 @@ typedef struct ill_walk_context_s { } ill_walk_context_t; /* - * ill_gheads structure, one for IPV4 and one for IPV6 + * ill_g_heads structure, one for IPV4 and one for IPV6 */ struct _ill_g_head_s_ { ill_if_t *ill_g_list_head; @@ -1650,20 +1670,19 @@ typedef union ill_g_head_u { #define ill_g_list_head ill_g_head_s.ill_g_list_head #define ill_g_list_tail ill_g_head_s.ill_g_list_tail -#pragma align CACHE_ALIGN_SIZE(ill_g_heads) -extern ill_g_head_t ill_g_heads[]; /* ILL List Head */ - +#define IP_V4_ILL_G_LIST(ipst) \ + (ipst)->ips_ill_g_heads[IP_V4_G_HEAD].ill_g_list_head +#define IP_V6_ILL_G_LIST(ipst) \ + (ipst)->ips_ill_g_heads[IP_V6_G_HEAD].ill_g_list_head +#define IP_VX_ILL_G_LIST(i, ipst) \ + (ipst)->ips_ill_g_heads[i].ill_g_list_head -#define IP_V4_ILL_G_LIST ill_g_heads[IP_V4_G_HEAD].ill_g_list_head -#define IP_V6_ILL_G_LIST ill_g_heads[IP_V6_G_HEAD].ill_g_list_head -#define IP_VX_ILL_G_LIST(i) ill_g_heads[i].ill_g_list_head - -#define ILL_START_WALK_V4(ctx_ptr) ill_first(IP_V4_G_HEAD, IP_V4_G_HEAD, \ - ctx_ptr) -#define ILL_START_WALK_V6(ctx_ptr) ill_first(IP_V6_G_HEAD, IP_V6_G_HEAD, \ - ctx_ptr) -#define ILL_START_WALK_ALL(ctx_ptr) ill_first(MAX_G_HEADS, MAX_G_HEADS, \ - ctx_ptr) +#define ILL_START_WALK_V4(ctx_ptr, ipst) \ + ill_first(IP_V4_G_HEAD, IP_V4_G_HEAD, ctx_ptr, ipst) +#define ILL_START_WALK_V6(ctx_ptr, ipst) \ + ill_first(IP_V6_G_HEAD, IP_V6_G_HEAD, ctx_ptr, ipst) +#define ILL_START_WALK_ALL(ctx_ptr, ipst) \ + ill_first(MAX_G_HEADS, MAX_G_HEADS, ctx_ptr, ipst) /* * Capabilities, possible flags for ill_capabilities. @@ -1973,6 +1992,8 @@ typedef struct ill_s { th_trace_t *ill_trace[IP_TR_HASH_MAX]; boolean_t ill_trace_disable; /* True when alloc fails */ #endif + zoneid_t ill_zoneid; + ip_stack_t *ill_ipst; /* Corresponds to a netstack_hold */ } ill_t; extern void ill_delete_glist(ill_t *); @@ -2376,7 +2397,7 @@ typedef struct tsol_ire_gw_secattr_s { if (atomic_add_32_nv(&(ire)->ire_refcnt, -1) == 0) \ ire_inactive(ire); \ } -#define IRE_REFRELE_NOTR(ire) { \ +#define IRE_REFRELE_NOTR(ire) { \ ASSERT((ire)->ire_refcnt != 0); \ membar_exit(); \ if (atomic_add_32_nv(&(ire)->ire_refcnt, -1) == 0) \ @@ -2530,6 +2551,7 @@ typedef struct ire_s { uint_t ire_stq_ifindex; uint_t ire_defense_count; /* number of ARP conflicts */ uint_t ire_defense_time; /* last time defended (secs) */ + ip_stack_t *ire_ipst; /* Does not have a netstack_hold */ #ifdef IRE_DEBUG th_trace_t *ire_trace[IP_TR_HASH_MAX]; boolean_t ire_trace_disable; /* True when alloc fails */ @@ -2612,8 +2634,8 @@ typedef struct ipt_s { */ #define COMMON_IP_MTU 1500 #define MAX_FRAG_MIN 10 -#define MAX_FRAG_PKTS \ - MAX(MAX_FRAG_MIN, (2 * (ip_reass_queue_bytes / \ +#define MAX_FRAG_PKTS(ipst) \ + MAX(MAX_FRAG_MIN, (2 * (ipst->ips_ip_reass_queue_bytes / \ (COMMON_IP_MTU * ILL_FRAG_HASH_TBL_COUNT)))) /* @@ -2842,104 +2864,96 @@ typedef struct cmd_info_s struct lifreq *ci_lifr; /* the lifreq struct passed down */ } cmd_info_t; -extern krwlock_t ill_g_lock; -extern kmutex_t ip_addr_avail_lock; -extern ipsq_t *ipsq_g_head; - -extern ill_t *ip_timer_ill; /* ILL for IRE expiration timer. */ -extern timeout_id_t ip_ire_expire_id; /* IRE expiration timeout id. */ -extern timeout_id_t ip_ire_reclaim_id; /* IRE recalaim timeout id. */ - -extern kmutex_t ip_mi_lock; -extern krwlock_t ip_g_nd_lock; /* For adding/removing nd variables */ -extern kmutex_t ip_trash_timer_lock; /* Protects ip_ire_expire_id */ - -extern kmutex_t igmp_timer_lock; /* Protects the igmp timer */ -extern kmutex_t mld_timer_lock; /* Protects the mld timer */ - -extern krwlock_t ill_g_usesrc_lock; /* Protects usesrc related fields */ +/* + * List of AH and ESP IPsec acceleration capable ills + */ +typedef struct ipsec_capab_ill_s { + uint_t ill_index; + boolean_t ill_isv6; + struct ipsec_capab_ill_s *next; +} ipsec_capab_ill_t; extern struct kmem_cache *ire_cache; -extern uint_t ip_redirect_cnt; /* Num of redirect routes in ftable */ - extern ipaddr_t ip_g_all_ones; -extern caddr_t ip_g_nd; /* Named Dispatch List Head */ - -extern uint_t ip_loopback_mtu; -extern ipparam_t *ip_param_arr; +extern uint_t ip_loopback_mtu; /* /etc/system */ -extern int ip_g_forward; -extern int ipv6_forward; extern vmem_t *ip_minor_arena; -#define ip_respond_to_address_mask_broadcast ip_param_arr[0].ip_param_value -#define ip_g_resp_to_echo_bcast ip_param_arr[1].ip_param_value -#define ip_g_resp_to_echo_mcast ip_param_arr[2].ip_param_value -#define ip_g_resp_to_timestamp ip_param_arr[3].ip_param_value -#define ip_g_resp_to_timestamp_bcast ip_param_arr[4].ip_param_value -#define ip_g_send_redirects ip_param_arr[5].ip_param_value -#define ip_g_forward_directed_bcast ip_param_arr[6].ip_param_value -#define ip_debug ip_param_arr[7].ip_param_value -#define ip_mrtdebug ip_param_arr[8].ip_param_value -#define ip_timer_interval ip_param_arr[9].ip_param_value -#define ip_ire_arp_interval ip_param_arr[10].ip_param_value -#define ip_ire_redir_interval ip_param_arr[11].ip_param_value -#define ip_def_ttl ip_param_arr[12].ip_param_value -#define ip_forward_src_routed ip_param_arr[13].ip_param_value -#define ip_wroff_extra ip_param_arr[14].ip_param_value -#define ip_ire_pathmtu_interval ip_param_arr[15].ip_param_value -#define ip_icmp_return ip_param_arr[16].ip_param_value -#define ip_path_mtu_discovery ip_param_arr[17].ip_param_value -#define ip_ignore_delete_time ip_param_arr[18].ip_param_value -#define ip_ignore_redirect ip_param_arr[19].ip_param_value -#define ip_output_queue ip_param_arr[20].ip_param_value -#define ip_broadcast_ttl ip_param_arr[21].ip_param_value -#define ip_icmp_err_interval ip_param_arr[22].ip_param_value -#define ip_icmp_err_burst ip_param_arr[23].ip_param_value -#define ip_reass_queue_bytes ip_param_arr[24].ip_param_value -#define ip_strict_dst_multihoming ip_param_arr[25].ip_param_value -#define ip_addrs_per_if ip_param_arr[26].ip_param_value -#define ipsec_override_persocket_policy ip_param_arr[27].ip_param_value -#define icmp_accept_clear_messages ip_param_arr[28].ip_param_value -#define igmp_accept_clear_messages ip_param_arr[29].ip_param_value +/* + * ip_g_forward controls IP forwarding. It takes two values: + * 0: IP_FORWARD_NEVER Don't forward packets ever. + * 1: IP_FORWARD_ALWAYS Forward packets for elsewhere. + * + * RFC1122 says there must be a configuration switch to control forwarding, + * but that the default MUST be to not forward packets ever. Implicit + * control based on configuration of multiple interfaces MUST NOT be + * implemented (Section 3.1). SunOS 4.1 did provide the "automatic" capability + * and, in fact, it was the default. That capability is now provided in the + * /etc/rc2.d/S69inet script. + */ + +#define ips_ip_respond_to_address_mask_broadcast ips_param_arr[0].ip_param_value +#define ips_ip_g_resp_to_echo_bcast ips_param_arr[1].ip_param_value +#define ips_ip_g_resp_to_echo_mcast ips_param_arr[2].ip_param_value +#define ips_ip_g_resp_to_timestamp ips_param_arr[3].ip_param_value +#define ips_ip_g_resp_to_timestamp_bcast ips_param_arr[4].ip_param_value +#define ips_ip_g_send_redirects ips_param_arr[5].ip_param_value +#define ips_ip_g_forward_directed_bcast ips_param_arr[6].ip_param_value +#define ips_ip_debug ips_param_arr[7].ip_param_value +#define ips_ip_mrtdebug ips_param_arr[8].ip_param_value +#define ips_ip_timer_interval ips_param_arr[9].ip_param_value +#define ips_ip_ire_arp_interval ips_param_arr[10].ip_param_value +#define ips_ip_ire_redir_interval ips_param_arr[11].ip_param_value +#define ips_ip_def_ttl ips_param_arr[12].ip_param_value +#define ips_ip_forward_src_routed ips_param_arr[13].ip_param_value +#define ips_ip_wroff_extra ips_param_arr[14].ip_param_value +#define ips_ip_ire_pathmtu_interval ips_param_arr[15].ip_param_value +#define ips_ip_icmp_return ips_param_arr[16].ip_param_value +#define ips_ip_path_mtu_discovery ips_param_arr[17].ip_param_value +#define ips_ip_ignore_delete_time ips_param_arr[18].ip_param_value +#define ips_ip_ignore_redirect ips_param_arr[19].ip_param_value +#define ips_ip_output_queue ips_param_arr[20].ip_param_value +#define ips_ip_broadcast_ttl ips_param_arr[21].ip_param_value +#define ips_ip_icmp_err_interval ips_param_arr[22].ip_param_value +#define ips_ip_icmp_err_burst ips_param_arr[23].ip_param_value +#define ips_ip_reass_queue_bytes ips_param_arr[24].ip_param_value +#define ips_ip_strict_dst_multihoming ips_param_arr[25].ip_param_value +#define ips_ip_addrs_per_if ips_param_arr[26].ip_param_value +#define ips_ipsec_override_persocket_policy ips_param_arr[27].ip_param_value +#define ips_icmp_accept_clear_messages ips_param_arr[28].ip_param_value +#define ips_igmp_accept_clear_messages ips_param_arr[29].ip_param_value /* IPv6 configuration knobs */ -#define delay_first_probe_time ip_param_arr[30].ip_param_value -#define max_unicast_solicit ip_param_arr[31].ip_param_value -#define ipv6_def_hops ip_param_arr[32].ip_param_value -#define ipv6_icmp_return ip_param_arr[33].ip_param_value -#define ipv6_forward_src_routed ip_param_arr[34].ip_param_value -#define ipv6_resp_echo_mcast ip_param_arr[35].ip_param_value -#define ipv6_send_redirects ip_param_arr[36].ip_param_value -#define ipv6_ignore_redirect ip_param_arr[37].ip_param_value -#define ipv6_strict_dst_multihoming ip_param_arr[38].ip_param_value -#define ip_ire_reclaim_fraction ip_param_arr[39].ip_param_value -#define ipsec_policy_log_interval ip_param_arr[40].ip_param_value -#define pim_accept_clear_messages ip_param_arr[41].ip_param_value -#define ip_ndp_unsolicit_interval ip_param_arr[42].ip_param_value -#define ip_ndp_unsolicit_count ip_param_arr[43].ip_param_value -#define ipv6_ignore_home_address_opt ip_param_arr[44].ip_param_value -#define ip_policy_mask ip_param_arr[45].ip_param_value -#define ip_multirt_resolution_interval ip_param_arr[46].ip_param_value -#define ip_multirt_ttl ip_param_arr[47].ip_param_value -#define ip_multidata_outbound ip_param_arr[48].ip_param_value -#define ip_ndp_defense_interval ip_param_arr[49].ip_param_value -#define ip_max_temp_idle ip_param_arr[50].ip_param_value -#define ip_max_temp_defend ip_param_arr[51].ip_param_value -#define ip_max_defend ip_param_arr[52].ip_param_value -#define ip_defend_interval ip_param_arr[53].ip_param_value -#define ip_dup_recovery ip_param_arr[54].ip_param_value -#define ip_restrict_interzone_loopback ip_param_arr[55].ip_param_value -#define ip_lso_outbound ip_param_arr[56].ip_param_value -#ifdef DEBUG -#define ipv6_drop_inbound_icmpv6 ip_param_arr[57].ip_param_value -#else -#define ipv6_drop_inbound_icmpv6 0 -#endif - -extern hrtime_t ipsec_policy_failure_last; +#define ips_delay_first_probe_time ips_param_arr[30].ip_param_value +#define ips_max_unicast_solicit ips_param_arr[31].ip_param_value +#define ips_ipv6_def_hops ips_param_arr[32].ip_param_value +#define ips_ipv6_icmp_return ips_param_arr[33].ip_param_value +#define ips_ipv6_forward_src_routed ips_param_arr[34].ip_param_value +#define ips_ipv6_resp_echo_mcast ips_param_arr[35].ip_param_value +#define ips_ipv6_send_redirects ips_param_arr[36].ip_param_value +#define ips_ipv6_ignore_redirect ips_param_arr[37].ip_param_value +#define ips_ipv6_strict_dst_multihoming ips_param_arr[38].ip_param_value +#define ips_ip_ire_reclaim_fraction ips_param_arr[39].ip_param_value +#define ips_ipsec_policy_log_interval ips_param_arr[40].ip_param_value +#define ips_pim_accept_clear_messages ips_param_arr[41].ip_param_value +#define ips_ip_ndp_unsolicit_interval ips_param_arr[42].ip_param_value +#define ips_ip_ndp_unsolicit_count ips_param_arr[43].ip_param_value +#define ips_ipv6_ignore_home_address_opt ips_param_arr[44].ip_param_value +#define ips_ip_policy_mask ips_param_arr[45].ip_param_value +#define ips_ip_multirt_resolution_interval ips_param_arr[46].ip_param_value +#define ips_ip_multirt_ttl ips_param_arr[47].ip_param_value +#define ips_ip_multidata_outbound ips_param_arr[48].ip_param_value +#define ips_ip_ndp_defense_interval ips_param_arr[49].ip_param_value +#define ips_ip_max_temp_idle ips_param_arr[50].ip_param_value +#define ips_ip_max_temp_defend ips_param_arr[51].ip_param_value +#define ips_ip_max_defend ips_param_arr[52].ip_param_value +#define ips_ip_defend_interval ips_param_arr[53].ip_param_value +#define ips_ip_dup_recovery ips_param_arr[54].ip_param_value +#define ips_ip_restrict_interzone_loopback ips_param_arr[55].ip_param_value +#define ips_ip_lso_outbound ips_param_arr[56].ip_param_value +#define ips_ipv6_drop_inbound_icmpv6 ips_param_arr[57].ip_param_value extern int dohwcksum; /* use h/w cksum if supported by the h/w */ #ifdef ZC_TEST @@ -2950,76 +2964,35 @@ extern char ipif_loopback_name[]; extern nv_t *ire_nv_tbl; -extern time_t ip_g_frag_timeout; -extern clock_t ip_g_frag_timo_ms; - -extern mib2_ipIfStatsEntry_t ip_mib; /* For tcpInErrs and udpNoPorts */ - extern struct module_info ip_mod_info; -extern timeout_id_t igmp_slowtimeout_id; -extern timeout_id_t mld_slowtimeout_id; - -extern uint_t loopback_packets; - -/* - * Hooks structures used inside of ip - */ -extern hook_event_token_t ipv4firewall_physical_in; -extern hook_event_token_t ipv4firewall_physical_out; -extern hook_event_token_t ipv4firewall_forwarding; -extern hook_event_token_t ipv4firewall_loopback_in; -extern hook_event_token_t ipv4firewall_loopback_out; -extern hook_event_token_t ipv4nicevents; - -extern hook_event_token_t ipv6firewall_physical_in; -extern hook_event_token_t ipv6firewall_physical_out; -extern hook_event_token_t ipv6firewall_forwarding; -extern hook_event_token_t ipv6firewall_loopback_in; -extern hook_event_token_t ipv6firewall_loopback_out; -extern hook_event_token_t ipv6nicevents; - -extern hook_event_t ip4_physical_in_event; -extern hook_event_t ip4_physical_out_event; -extern hook_event_t ip4_forwarding_event; -extern hook_event_t ip4_loopback_in_event; -extern hook_event_t ip4_loopback_out_event; -extern hook_event_t ip4_nic_events; - -extern hook_event_t ip6_physical_in_event; -extern hook_event_t ip6_physical_out_event; -extern hook_event_t ip6_forwarding_event; -extern hook_event_t ip6_loopback_in_event; -extern hook_event_t ip6_loopback_out_event; -extern hook_event_t ip6_nic_events; - -#define HOOKS4_INTERESTED_PHYSICAL_IN \ - (ip4_physical_in_event.he_interested) -#define HOOKS6_INTERESTED_PHYSICAL_IN \ - (ip6_physical_in_event.he_interested) -#define HOOKS4_INTERESTED_PHYSICAL_OUT \ - (ip4_physical_out_event.he_interested) -#define HOOKS6_INTERESTED_PHYSICAL_OUT \ - (ip6_physical_out_event.he_interested) -#define HOOKS4_INTERESTED_FORWARDING \ - (ip4_forwarding_event.he_interested) -#define HOOKS6_INTERESTED_FORWARDING \ - (ip6_forwarding_event.he_interested) -#define HOOKS4_INTERESTED_LOOPBACK_IN \ - (ip4_loopback_in_event.he_interested) -#define HOOKS6_INTERESTED_LOOPBACK_IN \ - (ip6_loopback_in_event.he_interested) -#define HOOKS4_INTERESTED_LOOPBACK_OUT \ - (ip4_loopback_out_event.he_interested) -#define HOOKS6_INTERESTED_LOOPBACK_OUT \ - (ip6_loopback_out_event.he_interested) +#define HOOKS4_INTERESTED_PHYSICAL_IN(ipst) \ + ((ipst)->ips_ip4_physical_in_event.he_interested) +#define HOOKS6_INTERESTED_PHYSICAL_IN(ipst) \ + ((ipst)->ips_ip6_physical_in_event.he_interested) +#define HOOKS4_INTERESTED_PHYSICAL_OUT(ipst) \ + ((ipst)->ips_ip4_physical_out_event.he_interested) +#define HOOKS6_INTERESTED_PHYSICAL_OUT(ipst) \ + ((ipst)->ips_ip6_physical_out_event.he_interested) +#define HOOKS4_INTERESTED_FORWARDING(ipst) \ + ((ipst)->ips_ip4_forwarding_event.he_interested) +#define HOOKS6_INTERESTED_FORWARDING(ipst) \ + ((ipst)->ips_ip6_forwarding_event.he_interested) +#define HOOKS4_INTERESTED_LOOPBACK_IN(ipst) \ + ((ipst)->ips_ip4_loopback_in_event.he_interested) +#define HOOKS6_INTERESTED_LOOPBACK_IN(ipst) \ + ((ipst)->ips_ip6_loopback_in_event.he_interested) +#define HOOKS4_INTERESTED_LOOPBACK_OUT(ipst) \ + ((ipst)->ips_ip4_loopback_out_event.he_interested) +#define HOOKS6_INTERESTED_LOOPBACK_OUT(ipst) \ + ((ipst)->ips_ip6_loopback_out_event.he_interested) /* * Hooks marcos used inside of ip */ #define IPHA_VHL ipha_version_and_hdr_length -#define FW_HOOKS(_hook, _event, _ilp, _olp, _iph, _fm, _m) \ +#define FW_HOOKS(_hook, _event, _ilp, _olp, _iph, _fm, _m, ipst) \ \ if ((_hook).he_interested) { \ hook_pkt_event_t info; \ @@ -3045,7 +3018,8 @@ extern hook_event_t ip6_nic_events; info.hpe_hdr = _iph; \ info.hpe_mp = &(_fm); \ info.hpe_mb = _m; \ - if (hook_run(_event, (hook_data_t)&info) != 0) { \ + if (hook_run(_event, (hook_data_t)&info, \ + ipst->ips_netstack) != 0) { \ ip2dbg(("%s hook dropped mblk chain %p hdr %p\n",\ (_hook).he_name, (void *)_fm, (void *)_m)); \ if (_fm != NULL) { \ @@ -3060,7 +3034,7 @@ extern hook_event_t ip6_nic_events; } \ } -#define FW_HOOKS6(_hook, _event, _ilp, _olp, _iph, _fm, _m) \ +#define FW_HOOKS6(_hook, _event, _ilp, _olp, _iph, _fm, _m, ipst) \ \ if ((_hook).he_interested) { \ hook_pkt_event_t info; \ @@ -3086,7 +3060,8 @@ extern hook_event_t ip6_nic_events; info.hpe_hdr = _iph; \ info.hpe_mp = &(_fm); \ info.hpe_mb = _m; \ - if (hook_run(_event, (hook_data_t)&info) != 0) { \ + if (hook_run(_event, (hook_data_t)&info, \ + ipst->ips_netstack) != 0) { \ ip2dbg(("%s hook dropped mblk chain %p hdr %p\n",\ (_hook).he_name, (void *)_fm, (void *)_m)); \ if (_fm != NULL) { \ @@ -3134,6 +3109,8 @@ extern uint32_t ipsechw_debug; #define IPSECHW_CALL(f, r, x) {} #endif +extern int ip_debug; + #ifdef IP_DEBUG #include <sys/debug.h> #include <sys/promif.h> @@ -3159,7 +3136,7 @@ struct mac_header_info_s; extern const char *dlpi_prim_str(int); extern const char *dlpi_err_str(int); extern void ill_frag_timer(void *); -extern ill_t *ill_first(int, int, ill_walk_context_t *); +extern ill_t *ill_first(int, int, ill_walk_context_t *, ip_stack_t *); extern ill_t *ill_next(ill_walk_context_t *, ill_t *); extern void ill_frag_timer_start(ill_t *); extern mblk_t *ip_carve_mp(mblk_t **, ssize_t); @@ -3167,10 +3144,12 @@ extern mblk_t *ip_dlpi_alloc(size_t, t_uscalar_t); extern char *ip_dot_addr(ipaddr_t, char *); extern const char *mac_colon_addr(const uint8_t *, size_t, char *, size_t); extern void ip_lwput(queue_t *, mblk_t *); -extern boolean_t icmp_err_rate_limit(void); -extern void icmp_time_exceeded(queue_t *, mblk_t *, uint8_t, zoneid_t); -extern void icmp_unreachable(queue_t *, mblk_t *, uint8_t, zoneid_t); -extern mblk_t *ip_add_info(mblk_t *, ill_t *, uint_t, zoneid_t); +extern boolean_t icmp_err_rate_limit(ip_stack_t *); +extern void icmp_time_exceeded(queue_t *, mblk_t *, uint8_t, zoneid_t, + ip_stack_t *); +extern void icmp_unreachable(queue_t *, mblk_t *, uint8_t, zoneid_t, + ip_stack_t *); +extern mblk_t *ip_add_info(mblk_t *, ill_t *, uint_t, zoneid_t, ip_stack_t *); extern mblk_t *ip_bind_v4(queue_t *, mblk_t *, conn_t *); extern int ip_bind_connected(conn_t *, mblk_t *, ipaddr_t *, uint16_t, ipaddr_t, uint16_t, boolean_t, boolean_t, boolean_t, @@ -3181,9 +3160,12 @@ extern int ip_bind_laddr(conn_t *, mblk_t *, ipaddr_t, uint16_t, extern uint_t ip_cksum(mblk_t *, int, uint32_t); extern int ip_close(queue_t *, int); extern uint16_t ip_csum_hdr(ipha_t *); -extern void ip_proto_not_sup(queue_t *, mblk_t *, uint_t, zoneid_t); -extern void ip_ire_fini(void); -extern void ip_ire_init(void); +extern void ip_proto_not_sup(queue_t *, mblk_t *, uint_t, zoneid_t, + ip_stack_t *); +extern void ip_ire_g_fini(void); +extern void ip_ire_g_init(void); +extern void ip_ire_fini(ip_stack_t *); +extern void ip_ire_init(ip_stack_t *); extern int ip_open(queue_t *, dev_t *, int, int, cred_t *); extern int ip_reassemble(mblk_t *, ipf_t *, uint_t, boolean_t, ill_t *, size_t); @@ -3205,7 +3187,7 @@ extern void ip_mib2_add_icmp6_stats(mib2_ipv6IfIcmpEntry_t *, extern void ip_udp_input(queue_t *, mblk_t *, ipha_t *, ire_t *, ill_t *); extern void ip_proto_input(queue_t *, mblk_t *, ipha_t *, ire_t *, ill_t *); extern void ip_rput_other(ipsq_t *, queue_t *, mblk_t *, void *); -extern void ip_setqinfo(queue_t *, minor_t, boolean_t); +extern void ip_setqinfo(queue_t *, minor_t, boolean_t, ip_stack_t *); extern void ip_trash_ire_reclaim(void *); extern void ip_trash_timer_expire(void *); extern void ip_wput(queue_t *, mblk_t *); @@ -3224,13 +3206,13 @@ extern void ip_wsrv(queue_t *); extern char *ip_nv_lookup(nv_t *, int); extern boolean_t ip_local_addr_ok_v6(const in6_addr_t *, const in6_addr_t *); extern boolean_t ip_remote_addr_ok_v6(const in6_addr_t *, const in6_addr_t *); -extern ipaddr_t ip_massage_options(ipha_t *); +extern ipaddr_t ip_massage_options(ipha_t *, netstack_t *); extern ipaddr_t ip_net_mask(ipaddr_t); extern void ip_newroute(queue_t *, mblk_t *, ipaddr_t, ill_t *, conn_t *, - zoneid_t); + zoneid_t, ip_stack_t *); extern ipxmit_state_t ip_xmit_v4(mblk_t *, ire_t *, struct ipsec_out_s *, boolean_t); -extern int ip_hdr_complete(ipha_t *, zoneid_t); +extern int ip_hdr_complete(ipha_t *, zoneid_t, ip_stack_t *); extern struct qinit rinit_ipv6; extern struct qinit winit_ipv6; @@ -3240,9 +3222,6 @@ extern struct qinit winit_tcp; extern struct qinit rinit_acceptor_tcp; extern struct qinit winit_acceptor_tcp; -extern net_data_t ipv4; -extern net_data_t ipv6; - extern void conn_drain_insert(conn_t *connp); extern int conn_ipsec_length(conn_t *connp); extern void ip_wput_ipsec_out(queue_t *, mblk_t *, ipha_t *, ill_t *, @@ -3250,7 +3229,7 @@ extern void ip_wput_ipsec_out(queue_t *, mblk_t *, ipha_t *, ill_t *, extern ipaddr_t ip_get_dst(ipha_t *); extern int ipsec_out_extra_length(mblk_t *); extern int ipsec_in_extra_length(mblk_t *); -extern mblk_t *ipsec_in_alloc(); +extern mblk_t *ipsec_in_alloc(boolean_t, netstack_t *); extern boolean_t ipsec_in_is_secure(mblk_t *); extern void ipsec_out_process(queue_t *, mblk_t *, ire_t *, uint_t); extern void ipsec_out_to_in(mblk_t *); @@ -3271,16 +3250,17 @@ extern void ill_trace_cleanup(ill_t *); extern void ipif_trace_cleanup(ipif_t *); #endif -extern int ip_srcid_insert(const in6_addr_t *, zoneid_t); -extern int ip_srcid_remove(const in6_addr_t *, zoneid_t); -extern void ip_srcid_find_id(uint_t, in6_addr_t *, zoneid_t); -extern uint_t ip_srcid_find_addr(const in6_addr_t *, zoneid_t); +extern int ip_srcid_insert(const in6_addr_t *, zoneid_t, ip_stack_t *); +extern int ip_srcid_remove(const in6_addr_t *, zoneid_t, ip_stack_t *); +extern void ip_srcid_find_id(uint_t, in6_addr_t *, zoneid_t, netstack_t *); +extern uint_t ip_srcid_find_addr(const in6_addr_t *, zoneid_t, netstack_t *); extern int ip_srcid_report(queue_t *, mblk_t *, caddr_t, cred_t *); extern uint8_t ipoptp_next(ipoptp_t *); extern uint8_t ipoptp_first(ipoptp_t *, ipha_t *); extern int ip_opt_get_user(const ipha_t *, uchar_t *); -extern ill_t *ip_grab_attach_ill(ill_t *, mblk_t *, int, boolean_t); +extern ill_t *ip_grab_attach_ill(ill_t *, mblk_t *, int, boolean_t, + ip_stack_t *); extern ire_t *conn_set_outgoing_ill(conn_t *, ire_t *, ill_t **); extern int ipsec_req_from_conn(conn_t *, ipsec_req_t *, int); extern int ip_snmp_get(queue_t *q, mblk_t *mctl); @@ -3347,6 +3327,12 @@ typedef struct cgtp_filter_ops { #define CGTP_MCAST_SUCCESS 1 +/* + * The separate CGTP module needs these as globals. It uses the first + * to unregister (since there is no ip_cgtp_filter_unregister() function) + * and it uses the second one to verify that the filter has been + * turned off (a ip_cgtp_filter_active() function would be good for that.) + */ extern cgtp_filter_ops_t *ip_cgtp_filter_ops; extern boolean_t ip_cgtp_filter; @@ -3520,7 +3506,7 @@ extern void ip_soft_ring_assignment(ill_t *, ill_rx_ring_t *, extern void tcp_wput(queue_t *, mblk_t *); extern int ip_fill_mtuinfo(struct in6_addr *, in_port_t, - struct ip6_mtuinfo *); + struct ip6_mtuinfo *, netstack_t *); extern ipif_t *conn_get_held_ipif(conn_t *, ipif_t **, int *); typedef void (*ipsq_func_t)(ipsq_t *, queue_t *, mblk_t *, void *); diff --git a/usr/src/uts/common/inet/ip/icmp.c b/usr/src/uts/common/inet/ip/icmp.c index 475d7530e9..e7439595f4 100644 --- a/usr/src/uts/common/inet/ip/icmp.c +++ b/usr/src/uts/common/inet/ip/icmp.c @@ -50,6 +50,7 @@ #include <sys/isa_defs.h> #include <sys/suntpi.h> #include <sys/xti_inet.h> +#include <sys/netstack.h> #include <net/route.h> #include <net/if.h> @@ -102,14 +103,6 @@ extern uint_t icmp_max_optsize; * exited the shared resource. */ -/* Named Dispatch Parameter Management Structure */ -typedef struct icmpparam_s { - uint_t icmp_param_min; - uint_t icmp_param_max; - uint_t icmp_param_value; - char *icmp_param_name; -} icmpparam_t; - static void icmp_addr_req(queue_t *q, mblk_t *mp); static void icmp_bind(queue_t *q, mblk_t *mp); static void icmp_bind_proto(queue_t *q); @@ -139,7 +132,7 @@ int icmp_opt_set(queue_t *q, uint_t optset_context, int icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr); static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); -static boolean_t icmp_param_register(icmpparam_t *icmppa, int cnt); +static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); static void icmp_rput(queue_t *q, mblk_t *mp); @@ -158,8 +151,11 @@ static void icmp_wput_other(queue_t *q, mblk_t *mp); static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); static void icmp_wput_restricted(queue_t *q, mblk_t *mp); -static void rawip_kstat_init(void); -static void rawip_kstat_fini(void); +static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); +static void rawip_stack_fini(netstackid_t stackid, void *arg); + +static void *rawip_kstat_init(netstackid_t stackid); +static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); static int rawip_kstat_update(kstat_t *kp, int rw); @@ -181,12 +177,6 @@ struct streamtab icmpinfo = { static sin_t sin_null; /* Zero address for quick clears */ static sin6_t sin6_null; /* Zero address for quick clears */ -static void *icmp_g_head; /* Head for list of open icmp streams. */ -static IDP icmp_g_nd; /* Points to table of ICMP ND variables. */ - -/* MIB-2 stuff for SNMP */ -static mib2_rawip_t rawip_mib; /* SNMP fixed size info */ -static kstat_t *rawip_mibkp; /* kstat exporting rawip_mib data */ /* Default structure copied into T_INFO_ACK messages */ static struct T_info_ack icmp_g_t_info_ack = { @@ -204,8 +194,8 @@ static struct T_info_ack icmp_g_t_info_ack = { }; /* - * Table of ND variables supported by icmp. These are loaded into icmp_g_nd - * in icmp_open. + * Table of ND variables supported by icmp. These are loaded into is_nd + * when the stack instance is created. * All of these are alterable, within the min/max values given, at run time. */ static icmpparam_t icmp_param_arr[] = { @@ -219,14 +209,14 @@ static icmpparam_t icmp_param_arr[] = { { 4096, 65536, 8192, "icmp_recv_hiwat"}, { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, }; -#define icmp_wroff_extra icmp_param_arr[0].icmp_param_value -#define icmp_ipv4_ttl icmp_param_arr[1].icmp_param_value -#define icmp_ipv6_hoplimit icmp_param_arr[2].icmp_param_value -#define icmp_bsd_compat icmp_param_arr[3].icmp_param_value -#define icmp_xmit_hiwat icmp_param_arr[4].icmp_param_value -#define icmp_xmit_lowat icmp_param_arr[5].icmp_param_value -#define icmp_recv_hiwat icmp_param_arr[6].icmp_param_value -#define icmp_max_buf icmp_param_arr[7].icmp_param_value +#define is_wroff_extra is_param_arr[0].icmp_param_value +#define is_ipv4_ttl is_param_arr[1].icmp_param_value +#define is_ipv6_hoplimit is_param_arr[2].icmp_param_value +#define is_bsd_compat is_param_arr[3].icmp_param_value +#define is_xmit_hiwat is_param_arr[4].icmp_param_value +#define is_xmit_lowat is_param_arr[5].icmp_param_value +#define is_recv_hiwat is_param_arr[6].icmp_param_value +#define is_max_buf is_param_arr[7].icmp_param_value /* * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message @@ -630,6 +620,7 @@ icmp_close(queue_t *q) { icmp_t *icmp = (icmp_t *)q->q_ptr; int i1; + icmp_stack_t *is = icmp->icmp_is; /* tell IP that if we're not here, he can't trust labels */ if (is_system_labeled()) @@ -655,9 +646,10 @@ icmp_close(queue_t *q) ip6_pkt_free(&icmp->icmp_sticky_ipp); crfree(icmp->icmp_credp); + netstack_rele(icmp->icmp_is->is_netstack); /* Free the icmp structure and release the minor device number. */ - i1 = mi_close_comm(&icmp_g_head, q); + i1 = mi_close_comm(&is->is_head, q); return (i1); } @@ -926,7 +918,7 @@ icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + opt_length; if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { - BUMP_MIB(&rawip_mib, rawipInErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); break; } @@ -1329,6 +1321,9 @@ icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) icmp_t *icmp; mblk_t *mp; out_labeled_t *olp; + netstack_t *ns; + icmp_stack_t *is; + zoneid_t zoneid; /* If the stream is already open, return immediately. */ if (q->q_ptr != NULL) @@ -1345,20 +1340,36 @@ icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * has an outer perimeter.) */ + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + is = ns->netstack_icmp; + ASSERT(is != NULL); + + /* + * For exclusive stacks we set the zoneid to zero + * to make ICMP operate as if in the global zone. + */ + if (is->is_netstack->netstack_stackid != GLOBAL_NETSTACKID) + zoneid = GLOBAL_ZONEID; + else + zoneid = crgetzoneid(credp); + /* * Create a icmp_t structure for this stream and link into the * list of open streams. */ - err = mi_open_comm(&icmp_g_head, sizeof (icmp_t), q, devp, + err = mi_open_comm(&is->is_head, sizeof (icmp_t), q, devp, flag, sflag, credp); - if (err != 0) + if (err != 0) { + netstack_rele(is->is_netstack); return (err); + } /* * The receive hiwat is only looked at on the stream head queue. * Store in q_hiwat in order to return on SO_RCVBUF getsockopts. */ - q->q_hiwat = icmp_recv_hiwat; + q->q_hiwat = is->is_recv_hiwat; /* Set the initial state of the stream and the privilege status. */ icmp = (icmp_t *)q->q_ptr; @@ -1377,7 +1388,8 @@ icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (getpflags(NET_MAC_AWARE, credp) != 0) icmp->icmp_mac_exempt = B_TRUE; - icmp->icmp_zoneid = getzoneid(); + icmp->icmp_zoneid = zoneid; + icmp->icmp_is = is; if (getmajor(*devp) == (major_t)ICMP6_MAJ) { icmp->icmp_ipversion = IPV6_VERSION; @@ -1386,14 +1398,14 @@ icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) icmp->icmp_proto = IPPROTO_ICMPV6; icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ icmp->icmp_max_hdr_len = IPV6_HDR_LEN; - icmp->icmp_ttl = (uint8_t)icmp_ipv6_hoplimit; + icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; } else { icmp->icmp_ipversion = IPV4_VERSION; icmp->icmp_family = AF_INET; /* May be changed by a SO_PROTOTYPE socket option. */ icmp->icmp_proto = IPPROTO_ICMP; icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; - icmp->icmp_ttl = (uint8_t)icmp_ipv4_ttl; + icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; } qprocson(q); @@ -1411,9 +1423,9 @@ icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * Store in q_hiwat in order to return on SO_SNDBUF * getsockopts. */ - WR(q)->q_hiwat = icmp_xmit_hiwat; + WR(q)->q_hiwat = is->is_xmit_hiwat; WR(q)->q_next->q_hiwat = WR(q)->q_hiwat; - WR(q)->q_lowat = icmp_xmit_lowat; + WR(q)->q_lowat = is->is_xmit_lowat; WR(q)->q_next->q_lowat = WR(q)->q_lowat; if (icmp->icmp_family == AF_INET6) { @@ -1423,7 +1435,8 @@ icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) goto open_error; } /* Set the Stream head write offset. */ - (void) mi_set_sth_wroff(q, icmp->icmp_max_hdr_len + icmp_wroff_extra); + (void) mi_set_sth_wroff(q, + icmp->icmp_max_hdr_len + is->is_wroff_extra); (void) mi_set_sth_hiwat(q, q->q_hiwat); if (is_system_labeled()) { @@ -1453,7 +1466,8 @@ icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) open_error: qprocsoff(q); crfree(credp); - (void) mi_close_comm(&icmp_g_head, q); + (void) mi_close_comm(&is->is_head, q); + netstack_rele(is->is_netstack); return (err); } @@ -1475,6 +1489,8 @@ icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) int icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) { + icmp_t *icmp = (icmp_t *)q->q_ptr; + icmp_stack_t *is = icmp->icmp_is; int *i1 = (int *)ptr; switch (level) { @@ -1497,7 +1513,7 @@ icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) *i1 = IP_DEFAULT_MULTICAST_LOOP; return (sizeof (int)); case IPV6_UNICAST_HOPS: - *i1 = icmp_ipv6_hoplimit; + *i1 = is->is_ipv6_hoplimit; return (sizeof (int)); } break; @@ -1523,6 +1539,7 @@ icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) icmp_t *icmp = (icmp_t *)q->q_ptr; int *i1 = (int *)ptr; ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; + icmp_stack_t *is = icmp->icmp_is; switch (level) { case SOL_SOCKET: @@ -1817,7 +1834,8 @@ icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) return (0); return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, - (struct ip6_mtuinfo *)ptr)); + (struct ip6_mtuinfo *)ptr, + is->is_netstack)); case IPV6_TCLASS: if (ipp->ipp_fields & IPPF_TCLASS) *i1 = ipp->ipp_tclass; @@ -1866,6 +1884,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) { icmp_t *icmp = (icmp_t *)q->q_ptr; + icmp_stack_t *is = icmp->icmp_is; int *i1 = (int *)invalp; boolean_t onoff = (*i1 == 0) ? 0 : 1; boolean_t checkonly; @@ -2022,7 +2041,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, break; case SO_SNDBUF: - if (*i1 > icmp_max_buf) { + if (*i1 > is->is_max_buf) { *outlenp = 0; return (ENOBUFS); } @@ -2032,7 +2051,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, } break; case SO_RCVBUF: - if (*i1 > icmp_max_buf) { + if (*i1 > is->is_max_buf) { *outlenp = 0; return (ENOBUFS); } @@ -2107,7 +2126,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + - icmp_wroff_extra); + is->is_wroff_extra); break; case IP_HDRINCL: if (!checkonly) @@ -2278,7 +2297,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, if (!checkonly) { if (*i1 == -1) { icmp->icmp_ttl = ipp->ipp_unicast_hops = - icmp_ipv6_hoplimit; + is->is_ipv6_hoplimit; ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; /* Pass modified value to IP. */ *i1 = ipp->ipp_hoplimit; @@ -2478,7 +2497,8 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, if (*i1 > 255 || *i1 < -1) return (EINVAL); if (*i1 == -1) - ipp->ipp_hoplimit = icmp_ipv6_hoplimit; + ipp->ipp_hoplimit = + is->is_ipv6_hoplimit; else ipp->ipp_hoplimit = *i1; ipp->ipp_fields |= IPPF_HOPLIMIT; @@ -2814,6 +2834,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, static int icmp_build_hdrs(queue_t *q, icmp_t *icmp) { + icmp_stack_t *is = icmp->icmp_is; uchar_t *hdrs; uint_t hdrs_len; ip6_t *ip6h; @@ -2864,7 +2885,7 @@ icmp_build_hdrs(queue_t *q, icmp_t *icmp) if (hdrs_len > icmp->icmp_max_hdr_len) { icmp->icmp_max_hdr_len = hdrs_len; (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + - icmp_wroff_extra); + is->is_wroff_extra); } return (0); } @@ -2889,21 +2910,21 @@ icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) * named dispatch (ND) handler. */ static boolean_t -icmp_param_register(icmpparam_t *icmppa, int cnt) +icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) { for (; cnt-- > 0; icmppa++) { if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { - if (!nd_load(&icmp_g_nd, icmppa->icmp_param_name, + if (!nd_load(ndp, icmppa->icmp_param_name, icmp_param_get, icmp_param_set, (caddr_t)icmppa)) { - nd_free(&icmp_g_nd); + nd_free(ndp); return (B_FALSE); } } } - if (!nd_load(&icmp_g_nd, "icmp_status", icmp_status_report, NULL, + if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, NULL)) { - nd_free(&icmp_g_nd); + nd_free(ndp); return (B_FALSE); } return (B_TRUE); @@ -2937,7 +2958,8 @@ icmp_rput(queue_t *q, mblk_t *mp) struct T_unitdata_ind *tudi; uchar_t *rptr; struct T_error_ack *tea; - icmp_t *icmp; + icmp_t *icmp = (icmp_t *)q->q_ptr; + icmp_stack_t *is = icmp->icmp_is; sin_t *sin; sin6_t *sin6; ip6_t *ip6h; @@ -2956,7 +2978,6 @@ icmp_rput(queue_t *q, mblk_t *mp) boolean_t icmp_ipv6_recvhoplimit = B_FALSE; uint_t hopstrip; - icmp = (icmp_t *)q->q_ptr; if (icmp->icmp_restricted) { putnext(q, mp); return; @@ -3106,7 +3127,7 @@ icmp_rput(queue_t *q, mblk_t *mp) freemsg(mp); if (options_mp != NULL) freeb(options_mp); - BUMP_MIB(&rawip_mib, rawipInErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); return; } ipvers = IPH_HDR_VERSION((ipha_t *)rptr); @@ -3133,7 +3154,7 @@ icmp_rput(queue_t *q, mblk_t *mp) } } } - if (icmp_bsd_compat) { + if (is->is_bsd_compat) { ushort_t len; len = ntohs(ipha->ipha_length); @@ -3149,7 +3170,8 @@ icmp_rput(queue_t *q, mblk_t *mp) freemsg(mp); if (options_mp != NULL) freeb(options_mp); - BUMP_MIB(&rawip_mib, rawipInErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, + rawipInErrors); return; } bcopy(rptr, mp1->b_rptr, hdr_len); @@ -3199,7 +3221,7 @@ icmp_rput(queue_t *q, mblk_t *mp) freemsg(mp); if (options_mp != NULL) freeb(options_mp); - BUMP_MIB(&rawip_mib, rawipInErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); return; } mp1->b_cont = mp; @@ -3289,7 +3311,7 @@ icmp_rput(queue_t *q, mblk_t *mp) ASSERT(udi_size == 0); } - BUMP_MIB(&rawip_mib, rawipInDatagrams); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipInDatagrams); putnext(q, mp); return; } @@ -3310,7 +3332,7 @@ icmp_rput(queue_t *q, mblk_t *mp) IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || icmp->icmp_family != AF_INET6) { freemsg(mp); - BUMP_MIB(&rawip_mib, rawipInErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); return; } @@ -3449,7 +3471,8 @@ icmp_rput(queue_t *q, mblk_t *mp) ip0dbg(("icmp_rput: RAW checksum " "failed %x\n", sum)); freemsg(mp); - BUMP_MIB(&rawip_mib, rawipInCksumErrs); + BUMP_MIB(&icmp->icmp_rawip_mib, + rawipInCksumErrs); return; } } @@ -3516,7 +3539,7 @@ icmp_rput(queue_t *q, mblk_t *mp) mp1 = allocb(udi_size, BPRI_MED); if (mp1 == NULL) { freemsg(mp); - BUMP_MIB(&rawip_mib, rawipInErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); return; } mp1->b_cont = mp; @@ -3545,7 +3568,7 @@ icmp_rput(queue_t *q, mblk_t *mp) sin6->sin6_scope_id = 0; sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, - icmp->icmp_zoneid); + icmp->icmp_zoneid, is->is_netstack); if (udi_size != 0) { uchar_t *dstopt; @@ -3664,7 +3687,7 @@ icmp_rput(queue_t *q, mblk_t *mp) /* Consumed all of allocated space */ ASSERT(udi_size == 0); } - BUMP_MIB(&rawip_mib, rawipInDatagrams); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipInDatagrams); putnext(q, mp); } @@ -3795,6 +3818,7 @@ icmp_snmp_get(queue_t *q, mblk_t *mpctl) { mblk_t *mpdata; struct opthdr *optp; + icmp_t *icmp = (icmp_t *)q->q_ptr; if (mpctl == NULL || (mpdata = mpctl->b_cont) == NULL) { @@ -3805,7 +3829,8 @@ icmp_snmp_get(queue_t *q, mblk_t *mpctl) optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = EXPER_RAWIP; optp->name = 0; - (void) snmp_append_data(mpdata, (char *)&rawip_mib, sizeof (rawip_mib)); + (void) snmp_append_data(mpdata, (char *)&icmp->icmp_rawip_mib, + sizeof (icmp->icmp_rawip_mib)); optp->len = msgdsize(mpdata); qreply(q, mpctl); @@ -3840,6 +3865,10 @@ icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) char *state; char laddrbuf[INET6_ADDRSTRLEN]; char faddrbuf[INET6_ADDRSTRLEN]; + icmp_stack_t *is; + + icmp = (icmp_t *)q->q_ptr; + is = icmp->icmp_is; (void) mi_mpprintf(mp, "RAWIP " MI_COL_HDRPAD_STR @@ -3848,9 +3877,9 @@ icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ - for (idp = mi_first_ptr(&icmp_g_head); + for (idp = mi_first_ptr(&is->is_head); (icmp = (icmp_t *)idp) != NULL; - idp = mi_next_ptr(&icmp_g_head, idp)) { + idp = mi_next_ptr(&is->is_head, idp)) { if (icmp->icmp_state == TS_UNBND) state = "UNBOUND"; else if (icmp->icmp_state == TS_IDLE) @@ -3933,6 +3962,7 @@ static void icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop, boolean_t use_putnext) { + icmp_stack_t *is = icmp->icmp_is; ipha_t *ipha; int ip_hdr_length; int tp_hdr_len; @@ -3946,7 +3976,8 @@ boolean_t use_putnext) ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + ASSERT(icmp != NULL); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); freemsg(mp); return; } @@ -3990,7 +4021,8 @@ boolean_t use_putnext) tp_hdr_len)) { if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len)) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, + rawipOutErrors); freemsg(mp); return; } @@ -4007,12 +4039,12 @@ boolean_t use_putnext) icmp_ud_err(q, mp, EMSGSIZE); return; } - if (!(mp1 = allocb(ip_hdr_length + icmp_wroff_extra + + if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + tp_hdr_len, BPRI_LO))) { icmp_ud_err(q, mp, ENOMEM); return; } - mp1->b_rptr += icmp_wroff_extra; + mp1->b_rptr += is->is_wroff_extra; mp1->b_wptr = mp1->b_rptr + ip_hdr_length; ipha->ipha_length = htons((uint16_t)pkt_len); @@ -4036,7 +4068,7 @@ boolean_t use_putnext) * Massage source route putting first source * route in ipha_dst. */ - (void) ip_massage_options(ipha); + (void) ip_massage_options(ipha, icmp->icmp_is->is_netstack); } if (pktinfop != NULL) { @@ -4071,14 +4103,15 @@ icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) uchar_t opt_storage[IP_MAX_OPT_LENGTH]; err = tsol_compute_label(DB_CREDDEF(mp, icmp->icmp_credp), dst, - opt_storage, icmp->icmp_mac_exempt); + opt_storage, icmp->icmp_mac_exempt, + icmp->icmp_is->is_netstack->netstack_ip); if (err == 0) { err = tsol_update_options(&icmp->icmp_ip_snd_options, &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, opt_storage); } if (err != 0) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); DTRACE_PROBE4( tx__ip__log__drop__updatelabel__icmp, char *, "queue(1) failed to update options(2) on mp(3)", @@ -4104,7 +4137,8 @@ icmp_wput(queue_t *q, mblk_t *mp) int ip_hdr_length; #define tudr ((struct T_unitdata_req *)rptr) size_t ip_len; - icmp_t *icmp; + icmp_t *icmp = (icmp_t *)q->q_ptr; + icmp_stack_t *is = icmp->icmp_is; sin6_t *sin6; sin_t *sin; ipaddr_t v4dst; @@ -4114,7 +4148,6 @@ icmp_wput(queue_t *q, mblk_t *mp) queue_t *ip_wq; boolean_t use_putnext = B_TRUE; - icmp = (icmp_t *)q->q_ptr; if (icmp->icmp_restricted) { icmp_wput_restricted(q, mp); return; @@ -4127,7 +4160,8 @@ icmp_wput(queue_t *q, mblk_t *mp) ipha = (ipha_t *)mp->b_rptr; if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, + rawipOutErrors); freemsg(mp); return; } @@ -4171,19 +4205,19 @@ icmp_wput(queue_t *q, mblk_t *mp) if (icmp->icmp_state == TS_UNBND) { /* If a port has not been bound to the stream, fail. */ - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EPROTO); return; } mp1 = mp->b_cont; if (mp1 == NULL) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EPROTO); return; } if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EADDRNOTAVAIL); return; } @@ -4194,14 +4228,14 @@ icmp_wput(queue_t *q, mblk_t *mp) if (!OK_32PTR((char *)sin6) || tudr->DEST_length != sizeof (sin6_t) || sin6->sin6_family != AF_INET6) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EADDRNOTAVAIL); return; } /* No support for mapped addresses on raw sockets */ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EADDRNOTAVAIL); return; } @@ -4218,7 +4252,7 @@ icmp_wput(queue_t *q, mblk_t *mp) if (!OK_32PTR((char *)sin) || tudr->DEST_length != sizeof (sin_t) || sin->sin_family != AF_INET) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EADDRNOTAVAIL); return; } @@ -4246,7 +4280,7 @@ icmp_wput(queue_t *q, mblk_t *mp) if (icmp_unitdata_opt_process(q, mp, &error, (void *)pktinfop) < 0) { /* failure */ - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, error); return; } @@ -4298,9 +4332,9 @@ icmp_wput(queue_t *q, mblk_t *mp) if ((uchar_t *)ipha < mp1->b_datap->db_base || mp1->b_datap->db_ref != 1 || !OK_32PTR(ipha)) { - if (!(mp1 = allocb(ip_hdr_length + icmp_wroff_extra, + if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, BPRI_LO))) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, ENOMEM); return; } @@ -4367,7 +4401,7 @@ icmp_wput(queue_t *q, mblk_t *mp) * as this can cause problems in layers below. */ if (ip_len > IP_MAXPACKET) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EMSGSIZE); return; } @@ -4392,11 +4426,11 @@ icmp_wput(queue_t *q, mblk_t *mp) * Massage source route putting first source route in ipha_dst. * Ignore the destination in the T_unitdata_req. */ - (void) ip_massage_options(ipha); + (void) ip_massage_options(ipha, icmp->icmp_is->is_netstack); } freeb(mp); - BUMP_MIB(&rawip_mib, rawipOutDatagrams); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutDatagrams); mblk_setcred(mp1, icmp->icmp_credp); if (use_putnext) { putnext(q, mp1); @@ -4415,13 +4449,14 @@ icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; err = tsol_compute_label_v6(DB_CREDDEF(mp, icmp->icmp_credp), dst, - opt_storage, icmp->icmp_mac_exempt); + opt_storage, icmp->icmp_mac_exempt, + icmp->icmp_is->is_netstack->netstack_ip); if (err == 0) { err = tsol_update_sticky(&icmp->icmp_sticky_ipp, &icmp->icmp_label_len_v6, opt_storage); } if (err != 0) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); DTRACE_PROBE4( tx__ip__log__drop__updatelabel__icmp6, char *, "queue(1) failed to update options(2) on mp(3)", @@ -4447,7 +4482,8 @@ icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) mblk_t *mp1; int ip_hdr_len = IPV6_HDR_LEN; size_t ip_len; - icmp_t *icmp; + icmp_t *icmp = (icmp_t *)q->q_ptr; + icmp_stack_t *is = icmp->icmp_is; ip6_pkt_t ipp_s; /* For ancillary data options */ ip6_pkt_t *ipp = &ipp_s; ip6_pkt_t *tipp; @@ -4458,8 +4494,6 @@ icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) uint8_t *nxthdr_ptr; in6_addr_t ip6_dst; - icmp = (icmp_t *)q->q_ptr; - /* * If the local address is a mapped address return * an error. @@ -4468,7 +4502,7 @@ icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) * since it is bound to a mapped address. */ if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EADDRNOTAVAIL); return; } @@ -4485,7 +4519,7 @@ icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) if (icmp_unitdata_opt_process(q, mp, &error, (void *)ipp) < 0) { /* failure */ - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, error); return; } @@ -4695,11 +4729,11 @@ no_options: if (ip_hdr_len > icmp->icmp_max_hdr_len) { icmp->icmp_max_hdr_len = ip_hdr_len; (void) mi_set_sth_wroff(RD(q), - icmp->icmp_max_hdr_len + icmp_wroff_extra); + icmp->icmp_max_hdr_len + is->is_wroff_extra); } - mp1 = allocb(ip_hdr_len + icmp_wroff_extra, BPRI_LO); + mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); if (!mp1) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, ENOMEM); return; } @@ -4800,7 +4834,8 @@ no_options: if (sin6->__sin6_src_id != 0 && IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { ip_srcid_find_id(sin6->__sin6_src_id, - &ip6h->ip6_src, icmp->icmp_zoneid); + &ip6h->ip6_src, icmp->icmp_zoneid, + is->is_netstack); } } @@ -4900,7 +4935,8 @@ no_options: * Notify the application as well. */ icmp_ud_err(q, mp, EPROTO); - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, + rawipOutErrors); return; } /* @@ -4909,7 +4945,8 @@ no_options: */ if (rth->ip6r_len & 0x1) { icmp_ud_err(q, mp, EPROTO); - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, + rawipOutErrors); return; } /* @@ -4918,7 +4955,8 @@ no_options: * between the first hop (in ip6_dst) and * the destination (in the last routing hdr entry). */ - csum = ip_massage_options_v6(ip6h, rth); + csum = ip_massage_options_v6(ip6h, rth, + icmp->icmp_is->is_netstack); /* * Verify that the first hop isn't a mapped address. * Routers along the path need to do this verification @@ -4926,7 +4964,8 @@ no_options: */ if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { icmp_ud_err(q, mp, EADDRNOTAVAIL); - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, + rawipOutErrors); return; } } @@ -4943,7 +4982,7 @@ no_options: * as this can cause problems in layers below. */ if (ip_len > IP_MAXPACKET) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EMSGSIZE); return; } @@ -4966,7 +5005,8 @@ no_options: cksum_off = ip_hdr_len + icmp->icmp_checksum_off; if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { - BUMP_MIB(&rawip_mib, rawipOutErrors); + BUMP_MIB(&icmp->icmp_rawip_mib, + rawipOutErrors); freemsg(mp); return; } @@ -4994,7 +5034,7 @@ no_options: freeb(mp); /* We're done. Pass the packet to IP */ - BUMP_MIB(&rawip_mib, rawipOutDatagrams); + BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutDatagrams); mblk_setcred(mp1, icmp->icmp_credp); putnext(q, mp1); } @@ -5117,7 +5157,7 @@ icmp_wput_other(queue_t *q, mblk_t *mp) case ND_SET: /* nd_getset performs the necessary error checking */ case ND_GET: - if (nd_getset(q, icmp_g_nd, mp)) { + if (nd_getset(q, icmp->icmp_is->is_nd, mp)) { qreply(q, mp); return; } @@ -5367,21 +5407,62 @@ icmp_ddi_init(void) optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, icmp_opt_obj.odb_opt_arr_cnt); - (void) icmp_param_register(icmp_param_arr, A_CNT(icmp_param_arr)); - - rawip_kstat_init(); + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel, so we can maintain the + * set of icmp_stack_t's. + */ + netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); } void icmp_ddi_destroy(void) { - nd_free(&icmp_g_nd); + netstack_unregister(NS_ICMP); +} + +/* + * Initialize the ICMP stack instance. + */ +static void * +rawip_stack_init(netstackid_t stackid, netstack_t *ns) +{ + icmp_stack_t *is; + icmpparam_t *pa; + + is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); + is->is_netstack = ns; - rawip_kstat_fini(); + pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); + is->is_param_arr = pa; + bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); + + (void) icmp_param_register(&is->is_nd, + is->is_param_arr, A_CNT(icmp_param_arr)); + is->is_ksp = rawip_kstat_init(stackid); + return (is); } +/* + * Free the ICMP stack instance. + */ static void -rawip_kstat_init(void) { +rawip_stack_fini(netstackid_t stackid, void *arg) +{ + icmp_stack_t *is = (icmp_stack_t *)arg; + + nd_free(&is->is_nd); + kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); + is->is_param_arr = NULL; + + rawip_kstat_fini(stackid, is->is_ksp); + is->is_ksp = NULL; + kmem_free(is, sizeof (*is)); +} + +static void * +rawip_kstat_init(netstackid_t stackid) { + kstat_t *ksp; rawip_named_kstat_t template = { { "inDatagrams", KSTAT_DATA_UINT32, 0 }, @@ -5391,45 +5472,59 @@ rawip_kstat_init(void) { { "outErrors", KSTAT_DATA_UINT32, 0 }, }; - rawip_mibkp = kstat_create("icmp", 0, "rawip", "mib2", + ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(rawip_named_kstat_t), - 0); - if (rawip_mibkp == NULL) - return; - - bcopy(&template, rawip_mibkp->ks_data, sizeof (template)); + 0, stackid); + if (ksp == NULL || ksp->ks_data == NULL) + return (NULL); - rawip_mibkp->ks_update = rawip_kstat_update; + bcopy(&template, ksp->ks_data, sizeof (template)); + ksp->ks_update = rawip_kstat_update; + ksp->ks_private = (void *)(uintptr_t)stackid; - kstat_install(rawip_mibkp); + kstat_install(ksp); + return (ksp); } static void -rawip_kstat_fini(void) { - if (rawip_mibkp) { - kstat_delete(rawip_mibkp); - rawip_mibkp = NULL; +rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) +{ + if (ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); + kstat_delete_netstack(ksp, stackid); } } static int -rawip_kstat_update(kstat_t *kp, int rw) { +rawip_kstat_update(kstat_t *ksp, int rw) +{ rawip_named_kstat_t *rawipkp; + netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; + netstack_t *ns; + icmp_stack_t *is; - if ((kp == NULL) || (kp->ks_data == NULL)) + if ((ksp == NULL) || (ksp->ks_data == NULL)) return (EIO); if (rw == KSTAT_WRITE) return (EACCES); - rawipkp = (rawip_named_kstat_t *)kp->ks_data; - - rawipkp->inDatagrams.value.ui32 = rawip_mib.rawipInDatagrams; - rawipkp->inCksumErrs.value.ui32 = rawip_mib.rawipInCksumErrs; - rawipkp->inErrors.value.ui32 = rawip_mib.rawipInErrors; - rawipkp->outDatagrams.value.ui32 = rawip_mib.rawipOutDatagrams; - rawipkp->outErrors.value.ui32 = rawip_mib.rawipOutErrors; + rawipkp = (rawip_named_kstat_t *)ksp->ks_data; + ns = netstack_find_by_stackid(stackid); + if (ns == NULL) + return (-1); + is = ns->netstack_icmp; + if (is == NULL) { + netstack_rele(ns); + return (-1); + } + rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; + rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; + rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; + rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; + rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; + netstack_rele(ns); return (0); } diff --git a/usr/src/uts/common/inet/ip/icmp_opt_data.c b/usr/src/uts/common/inet/ip/icmp_opt_data.c index 3a898bb384..b4a20417ad 100644 --- a/usr/src/uts/common/inet/ip/icmp_opt_data.c +++ b/usr/src/uts/common/inet/ip/icmp_opt_data.c @@ -155,7 +155,7 @@ opdes_t icmp_opt_arr[] = { (OP_PASSNEXT|OP_NODEFAULT|OP_VARLEN), sizeof (struct in_pktinfo), -1 /* not initialized */ }, -{ IP_NEXTHOP, IPPROTO_IP, OA_RW, OA_RW, OP_CONFIG, OP_PASSNEXT, +{ IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, OP_PASSNEXT, sizeof (in_addr_t), -1 /* not initialized */ }, { MRT_INIT, IPPROTO_IP, 0, OA_X, OP_CONFIG, diff --git a/usr/src/uts/common/inet/ip/igmp.c b/usr/src/uts/common/inet/ip/igmp.c index 7e01725f4c..60647c3b73 100644 --- a/usr/src/uts/common/inet/ip/igmp.c +++ b/usr/src/uts/common/inet/ip/igmp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -86,27 +86,6 @@ static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype, slist_t *flist); static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist); -/* Following protected by igmp_timer_lock */ -static int igmp_time_to_next; /* Time since last timeout */ -static int igmp_timer_fired_last; -uint_t igmp_deferred_next = INFINITY; -timeout_id_t igmp_timeout_id = 0; -kmutex_t igmp_timer_lock; - -/* Protected by igmp_slowtimeout_lock */ -timeout_id_t igmp_slowtimeout_id = 0; -kmutex_t igmp_slowtimeout_lock; - -/* Following protected by mld_timer_lock */ -static int mld_time_to_next; /* Time since last timeout */ -static int mld_timer_fired_last; -uint_t mld_deferred_next = INFINITY; -timeout_id_t mld_timeout_id = 0; -kmutex_t mld_timer_lock; - -/* Protected by mld_slowtimeout_lock */ -timeout_id_t mld_slowtimeout_id = 0; -kmutex_t mld_slowtimeout_lock; /* * Macros used to do timer len conversions. Timer values are always @@ -124,18 +103,16 @@ kmutex_t mld_slowtimeout_lock; * The unit for next is milliseconds. */ void -igmp_start_timers(unsigned next) +igmp_start_timers(unsigned next, ip_stack_t *ipst) { int time_left; - /* Protected by igmp_timer_lock */ - static boolean_t igmp_timer_setter_active; int ret; ASSERT(next != 0 && next != INFINITY); - mutex_enter(&igmp_timer_lock); + mutex_enter(&ipst->ips_igmp_timer_lock); - if (igmp_timer_setter_active) { + if (ipst->ips_igmp_timer_setter_active) { /* * Serialize timer setters, one at a time. If the * timer is currently being set by someone, @@ -143,21 +120,22 @@ igmp_start_timers(unsigned next) * invoked and return. The current setter will * take care. */ - igmp_time_to_next = MIN(igmp_time_to_next, next); - mutex_exit(&igmp_timer_lock); + ipst->ips_igmp_time_to_next = + MIN(ipst->ips_igmp_time_to_next, next); + mutex_exit(&ipst->ips_igmp_timer_lock); return; } else { - igmp_timer_setter_active = B_TRUE; + ipst->ips_igmp_timer_setter_active = B_TRUE; } - if (igmp_timeout_id == 0) { + if (ipst->ips_igmp_timeout_id == 0) { /* * The timer is inactive. We need to start a timer */ - igmp_time_to_next = next; - igmp_timeout_id = timeout(igmp_timeout_handler, NULL, - MSEC_TO_TICK(igmp_time_to_next)); - igmp_timer_setter_active = B_FALSE; - mutex_exit(&igmp_timer_lock); + ipst->ips_igmp_time_to_next = next; + ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, + (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); + ipst->ips_igmp_timer_setter_active = B_FALSE; + mutex_exit(&ipst->ips_igmp_timer_lock); return; } @@ -167,17 +145,17 @@ igmp_start_timers(unsigned next) * reschedule the timeout if the new 'next' will happen * earlier than the currently scheduled timeout */ - time_left = igmp_timer_fired_last + - MSEC_TO_TICK(igmp_time_to_next) - ddi_get_lbolt(); + time_left = ipst->ips_igmp_timer_fired_last + + MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt(); if (time_left < MSEC_TO_TICK(next)) { - igmp_timer_setter_active = B_FALSE; - mutex_exit(&igmp_timer_lock); + ipst->ips_igmp_timer_setter_active = B_FALSE; + mutex_exit(&ipst->ips_igmp_timer_lock); return; } - mutex_exit(&igmp_timer_lock); - ret = untimeout(igmp_timeout_id); - mutex_enter(&igmp_timer_lock); + mutex_exit(&ipst->ips_igmp_timer_lock); + ret = untimeout(ipst->ips_igmp_timeout_id); + mutex_enter(&ipst->ips_igmp_timer_lock); /* * The timeout was cancelled, or the timeout handler * completed, while we were blocked in the untimeout. @@ -188,18 +166,19 @@ igmp_start_timers(unsigned next) * if needed. */ if (ret == -1) { - ASSERT(igmp_timeout_id == 0); + ASSERT(ipst->ips_igmp_timeout_id == 0); } else { - ASSERT(igmp_timeout_id != 0); - igmp_timeout_id = 0; + ASSERT(ipst->ips_igmp_timeout_id != 0); + ipst->ips_igmp_timeout_id = 0; } - if (igmp_time_to_next != 0) { - igmp_time_to_next = MIN(igmp_time_to_next, next); - igmp_timeout_id = timeout(igmp_timeout_handler, NULL, - MSEC_TO_TICK(igmp_time_to_next)); + if (ipst->ips_igmp_time_to_next != 0) { + ipst->ips_igmp_time_to_next = + MIN(ipst->ips_igmp_time_to_next, next); + ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, + (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); } - igmp_timer_setter_active = B_FALSE; - mutex_exit(&igmp_timer_lock); + ipst->ips_igmp_timer_setter_active = B_FALSE; + mutex_exit(&ipst->ips_igmp_timer_lock); } /* @@ -207,17 +186,15 @@ igmp_start_timers(unsigned next) * The unit for next is milliseconds. */ void -mld_start_timers(unsigned next) +mld_start_timers(unsigned next, ip_stack_t *ipst) { int time_left; - /* Protedted by mld_timer_lock */ - static boolean_t mld_timer_setter_active; int ret; ASSERT(next != 0 && next != INFINITY); - mutex_enter(&mld_timer_lock); - if (mld_timer_setter_active) { + mutex_enter(&ipst->ips_mld_timer_lock); + if (ipst->ips_mld_timer_setter_active) { /* * Serialize timer setters, one at a time. If the * timer is currently being set by someone, @@ -225,21 +202,22 @@ mld_start_timers(unsigned next) * invoked and return. The current setter will * take care. */ - mld_time_to_next = MIN(mld_time_to_next, next); - mutex_exit(&mld_timer_lock); + ipst->ips_mld_time_to_next = + MIN(ipst->ips_mld_time_to_next, next); + mutex_exit(&ipst->ips_mld_timer_lock); return; } else { - mld_timer_setter_active = B_TRUE; + ipst->ips_mld_timer_setter_active = B_TRUE; } - if (mld_timeout_id == 0) { + if (ipst->ips_mld_timeout_id == 0) { /* * The timer is inactive. We need to start a timer */ - mld_time_to_next = next; - mld_timeout_id = timeout(mld_timeout_handler, NULL, - MSEC_TO_TICK(mld_time_to_next)); - mld_timer_setter_active = B_FALSE; - mutex_exit(&mld_timer_lock); + ipst->ips_mld_time_to_next = next; + ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, + (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); + ipst->ips_mld_timer_setter_active = B_FALSE; + mutex_exit(&ipst->ips_mld_timer_lock); return; } @@ -249,17 +227,17 @@ mld_start_timers(unsigned next) * reschedule the timeout if the new 'next' will happen * earlier than the currently scheduled timeout */ - time_left = mld_timer_fired_last + - MSEC_TO_TICK(mld_time_to_next) - ddi_get_lbolt(); + time_left = ipst->ips_mld_timer_fired_last + + MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt(); if (time_left < MSEC_TO_TICK(next)) { - mld_timer_setter_active = B_FALSE; - mutex_exit(&mld_timer_lock); + ipst->ips_mld_timer_setter_active = B_FALSE; + mutex_exit(&ipst->ips_mld_timer_lock); return; } - mutex_exit(&mld_timer_lock); - ret = untimeout(mld_timeout_id); - mutex_enter(&mld_timer_lock); + mutex_exit(&ipst->ips_mld_timer_lock); + ret = untimeout(ipst->ips_mld_timeout_id); + mutex_enter(&ipst->ips_mld_timer_lock); /* * The timeout was cancelled, or the timeout handler * completed, while we were blocked in the untimeout. @@ -270,18 +248,19 @@ mld_start_timers(unsigned next) * if needed. */ if (ret == -1) { - ASSERT(mld_timeout_id == 0); + ASSERT(ipst->ips_mld_timeout_id == 0); } else { - ASSERT(mld_timeout_id != 0); - mld_timeout_id = 0; + ASSERT(ipst->ips_mld_timeout_id != 0); + ipst->ips_mld_timeout_id = 0; } - if (mld_time_to_next != 0) { - mld_time_to_next = MIN(mld_time_to_next, next); - mld_timeout_id = timeout(mld_timeout_handler, NULL, - MSEC_TO_TICK(mld_time_to_next)); + if (ipst->ips_mld_time_to_next != 0) { + ipst->ips_mld_time_to_next = + MIN(ipst->ips_mld_time_to_next, next); + ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, + (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); } - mld_timer_setter_active = B_FALSE; - mutex_exit(&mld_timer_lock); + ipst->ips_mld_timer_setter_active = B_FALSE; + mutex_exit(&ipst->ips_mld_timer_lock); } /* @@ -303,14 +282,16 @@ igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) uint32_t group; uint_t next; ipif_t *ipif; + ip_stack_t *ipst; ASSERT(ill != NULL); ASSERT(!ill->ill_isv6); - ++igmpstat.igps_rcv_total; + ipst = ill->ill_ipst; + ++ipst->ips_igmpstat.igps_rcv_total; mblklen = MBLKL(mp); if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) { - ++igmpstat.igps_rcv_tooshort; + ++ipst->ips_igmpstat.igps_rcv_tooshort; goto bad_pkt; } igmplen = ntohs(ipha->ipha_length) - iphlen; @@ -321,7 +302,7 @@ igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) if (MBLKL(mp) < (igmplen + iphlen)) { mblk_t *mp1; if ((mp1 = msgpullup(mp, -1)) == NULL) { - ++igmpstat.igps_rcv_tooshort; + ++ipst->ips_igmpstat.igps_rcv_tooshort; goto bad_pkt; } freemsg(mp); @@ -333,14 +314,14 @@ igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) * Validate lengths */ if (igmplen < IGMP_MINLEN) { - ++igmpstat.igps_rcv_tooshort; + ++ipst->ips_igmpstat.igps_rcv_tooshort; goto bad_pkt; } /* * Validate checksum */ if (IP_CSUM(mp, iphlen, 0)) { - ++igmpstat.igps_rcv_badsum; + ++ipst->ips_igmpstat.igps_rcv_badsum; goto bad_pkt; } @@ -365,14 +346,14 @@ igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) next = igmpv3_query_in((igmp3qa_t *)igmpa, ill, igmplen); } else { - ++igmpstat.igps_rcv_tooshort; + ++ipst->ips_igmpstat.igps_rcv_tooshort; goto bad_pkt; } if (next == 0) goto bad_pkt; if (next != INFINITY) - igmp_start_timers(next); + igmp_start_timers(next, ipst); break; @@ -404,10 +385,10 @@ igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) } mutex_exit(&ill->ill_lock); - ++igmpstat.igps_rcv_reports; + ++ipst->ips_igmpstat.igps_rcv_reports; group = igmpa->igmpa_group; if (!CLASSD(group)) { - ++igmpstat.igps_rcv_badreports; + ++ipst->ips_igmpstat.igps_rcv_badreports; goto bad_pkt; } @@ -445,7 +426,7 @@ igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) ipif = ipif->ipif_next) { ilm = ilm_lookup_ipif(ipif, group); if (ilm != NULL) { - ++igmpstat.igps_rcv_ourreports; + ++ipst->ips_igmpstat.igps_rcv_ourreports; ilm->ilm_timer = INFINITY; ilm->ilm_state = IGMP_OTHERMEMBER; } @@ -478,8 +459,10 @@ igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) ilm_t *ilm; int timer; uint_t next; + ip_stack_t *ipst; - ++igmpstat.igps_rcv_queries; + ipst = ill->ill_ipst; + ++ipst->ips_igmpstat.igps_rcv_queries; /* * In the IGMPv2 specification, there are 3 states and a flag. @@ -516,7 +499,7 @@ igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) || igmpa->igmpa_group != 0) { - ++igmpstat.igps_rcv_badqueries; + ++ipst->ips_igmpstat.igps_rcv_badqueries; return (0); } @@ -529,7 +512,7 @@ igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) */ group = igmpa->igmpa_group; if (group != 0 && (!CLASSD(group))) { - ++igmpstat.igps_rcv_badqueries; + ++ipst->ips_igmpstat.igps_rcv_badqueries; return (0); } @@ -609,16 +592,18 @@ igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen) ilm_t *ilm; ipaddr_t *src_array; uint8_t qrv; + ip_stack_t *ipst; + ipst = ill->ill_ipst; /* make sure numsrc matches packet size */ numsrc = ntohs(igmp3qa->igmp3qa_numsrc); if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) { - ++igmpstat.igps_rcv_tooshort; + ++ipst->ips_igmpstat.igps_rcv_tooshort; return (0); } src_array = (ipaddr_t *)&igmp3qa[1]; - ++igmpstat.igps_rcv_queries; + ++ipst->ips_igmpstat.igps_rcv_queries; if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) { uint_t hdrval, mant, exp; @@ -738,6 +723,7 @@ void igmp_joingroup(ilm_t *ilm) { ill_t *ill; + ip_stack_t *ipst = ilm->ilm_ipst; ill = ilm->ilm_ipif->ipif_ill; @@ -802,10 +788,10 @@ igmp_joingroup(ilm_t *ilm) * acquire the ipsq. Instead we start the timer after we get * out of the ipsq in ipsq_exit. */ - mutex_enter(&igmp_timer_lock); - igmp_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, - igmp_deferred_next); - mutex_exit(&igmp_timer_lock); + mutex_enter(&ipst->ips_igmp_timer_lock); + ipst->ips_igmp_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, + ipst->ips_igmp_deferred_next); + mutex_exit(&ipst->ips_igmp_timer_lock); } if (ip_debug > 1) { @@ -820,6 +806,7 @@ void mld_joingroup(ilm_t *ilm) { ill_t *ill; + ip_stack_t *ipst = ilm->ilm_ipst; ill = ilm->ilm_ill; @@ -880,10 +867,10 @@ mld_joingroup(ilm_t *ilm) * acquire the ipsq. Instead we start the timer after we get * out of the ipsq in ipsq_exit */ - mutex_enter(&mld_timer_lock); - mld_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, - mld_deferred_next); - mutex_exit(&mld_timer_lock); + mutex_enter(&ipst->ips_mld_timer_lock); + ipst->ips_mld_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, + ipst->ips_mld_deferred_next); + mutex_exit(&ipst->ips_mld_timer_lock); } if (ip_debug > 1) { @@ -982,6 +969,7 @@ igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) { ill_t *ill; mrec_t *rp; + ip_stack_t *ipst = ilm->ilm_ipst; ASSERT(ilm != NULL); @@ -1059,10 +1047,10 @@ send_to_in: if (ilm->ilm_rtx.rtx_timer == INFINITY) { MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); - mutex_enter(&igmp_timer_lock); - igmp_deferred_next = MIN(igmp_deferred_next, + mutex_enter(&ipst->ips_igmp_timer_lock); + ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next, ilm->ilm_rtx.rtx_timer); - mutex_exit(&igmp_timer_lock); + mutex_exit(&ipst->ips_igmp_timer_lock); } mutex_exit(&ill->ill_lock); @@ -1074,6 +1062,7 @@ mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) { ill_t *ill; mrec_t *rp = NULL; + ip_stack_t *ipst = ilm->ilm_ipst; ASSERT(ilm != NULL); @@ -1146,10 +1135,10 @@ send_to_in: if (ilm->ilm_rtx.rtx_timer == INFINITY) { MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); - mutex_enter(&mld_timer_lock); - mld_deferred_next = - MIN(mld_deferred_next, ilm->ilm_rtx.rtx_timer); - mutex_exit(&mld_timer_lock); + mutex_enter(&ipst->ips_mld_timer_lock); + ipst->ips_mld_deferred_next = + MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer); + mutex_exit(&ipst->ips_mld_timer_lock); } mutex_exit(&ill->ill_lock); @@ -1392,8 +1381,6 @@ per_ilm_rtxtimer: * The igmp_slowtimeo() function is called thru another timer. * igmp_slowtimeout_lock protects the igmp_slowtimeout_id */ - -/* ARGSUSED */ void igmp_timeout_handler(void *arg) { @@ -1403,16 +1390,18 @@ igmp_timeout_handler(void *arg) uint_t next; ill_walk_context_t ctx; boolean_t success; - - mutex_enter(&igmp_timer_lock); - ASSERT(igmp_timeout_id != 0); - igmp_timer_fired_last = ddi_get_lbolt(); - elapsed = igmp_time_to_next; - igmp_time_to_next = 0; - mutex_exit(&igmp_timer_lock); - - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V4(&ctx); + ip_stack_t *ipst = (ip_stack_t *)arg; + + ASSERT(arg != NULL); + mutex_enter(&ipst->ips_igmp_timer_lock); + ASSERT(ipst->ips_igmp_timeout_id != 0); + ipst->ips_igmp_timer_fired_last = ddi_get_lbolt(); + elapsed = ipst->ips_igmp_time_to_next; + ipst->ips_igmp_time_to_next = 0; + mutex_exit(&ipst->ips_igmp_timer_lock); + + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V4(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { ASSERT(!ill->ill_isv6); /* @@ -1422,7 +1411,7 @@ igmp_timeout_handler(void *arg) */ if (!ill_waiter_inc(ill)) continue; - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); success = ipsq_enter(ill, B_TRUE); if (success) { next = igmp_timeout_handler_per_ill(ill, elapsed); @@ -1431,18 +1420,18 @@ igmp_timeout_handler(void *arg) ipsq_exit(ill->ill_phyint->phyint_ipsq, B_FALSE, B_TRUE); } - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); ill_waiter_dcr(ill); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); - mutex_enter(&igmp_timer_lock); - ASSERT(igmp_timeout_id != 0); - igmp_timeout_id = 0; - mutex_exit(&igmp_timer_lock); + mutex_enter(&ipst->ips_igmp_timer_lock); + ASSERT(ipst->ips_igmp_timeout_id != 0); + ipst->ips_igmp_timeout_id = 0; + mutex_exit(&ipst->ips_igmp_timer_lock); if (global_next != INFINITY) - igmp_start_timers(global_next); + igmp_start_timers(global_next, ipst); } /* @@ -1645,7 +1634,6 @@ per_ilm_rtxtimer: * Returns number of ticks to next event (or 0 if none). * MT issues are same as igmp_timeout_handler */ -/* ARGSUSED */ void mld_timeout_handler(void *arg) { @@ -1655,16 +1643,18 @@ mld_timeout_handler(void *arg) uint_t next; ill_walk_context_t ctx; boolean_t success; - - mutex_enter(&mld_timer_lock); - ASSERT(mld_timeout_id != 0); - mld_timer_fired_last = ddi_get_lbolt(); - elapsed = mld_time_to_next; - mld_time_to_next = 0; - mutex_exit(&mld_timer_lock); - - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V6(&ctx); + ip_stack_t *ipst = (ip_stack_t *)arg; + + ASSERT(arg != NULL); + mutex_enter(&ipst->ips_mld_timer_lock); + ASSERT(ipst->ips_mld_timeout_id != 0); + ipst->ips_mld_timer_fired_last = ddi_get_lbolt(); + elapsed = ipst->ips_mld_time_to_next; + ipst->ips_mld_time_to_next = 0; + mutex_exit(&ipst->ips_mld_timer_lock); + + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V6(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { ASSERT(ill->ill_isv6); /* @@ -1674,7 +1664,7 @@ mld_timeout_handler(void *arg) */ if (!ill_waiter_inc(ill)) continue; - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); success = ipsq_enter(ill, B_TRUE); if (success) { next = mld_timeout_handler_per_ill(ill, elapsed); @@ -1683,18 +1673,18 @@ mld_timeout_handler(void *arg) ipsq_exit(ill->ill_phyint->phyint_ipsq, B_TRUE, B_FALSE); } - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); ill_waiter_dcr(ill); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); - mutex_enter(&mld_timer_lock); - ASSERT(mld_timeout_id != 0); - mld_timeout_id = 0; - mutex_exit(&mld_timer_lock); + mutex_enter(&ipst->ips_mld_timer_lock); + ASSERT(ipst->ips_mld_timeout_id != 0); + ipst->ips_mld_timeout_id = 0; + mutex_exit(&ipst->ips_mld_timer_lock); if (global_next != INFINITY) - mld_start_timers(global_next); + mld_start_timers(global_next, ipst); } /* @@ -1711,16 +1701,17 @@ mld_timeout_handler(void *arg) * in IGMP_AGE_THRESHOLD seconds. * - Resets slowtimeout. */ -/* ARGSUSED */ void igmp_slowtimo(void *arg) { ill_t *ill; ill_if_t *ifp; avl_tree_t *avl_tree; + ip_stack_t *ipst = (ip_stack_t *)arg; + ASSERT(arg != NULL); /* Hold the ill_g_lock so that we can safely walk the ill list */ - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); /* * The ill_if_t list is circular, hence the odd loop parameters. @@ -1730,7 +1721,8 @@ igmp_slowtimo(void *arg) * structure (allowing us to skip if none of the instances have timers * running). */ - for (ifp = IP_V4_ILL_G_LIST; ifp != (ill_if_t *)&IP_V4_ILL_G_LIST; + for (ifp = IP_V4_ILL_G_LIST(ipst); + ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst); ifp = ifp->illif_next) { /* * illif_mcast_v[12] are set using atomics. If an ill hears @@ -1785,11 +1777,11 @@ igmp_slowtimo(void *arg) } } - rw_exit(&ill_g_lock); - mutex_enter(&igmp_slowtimeout_lock); - igmp_slowtimeout_id = timeout(igmp_slowtimo, NULL, + rw_exit(&ipst->ips_ill_g_lock); + mutex_enter(&ipst->ips_igmp_slowtimeout_lock); + ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst, MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); - mutex_exit(&igmp_slowtimeout_lock); + mutex_exit(&ipst->ips_igmp_slowtimeout_lock); } /* @@ -1805,12 +1797,14 @@ mld_slowtimo(void *arg) ill_t *ill; ill_if_t *ifp; avl_tree_t *avl_tree; + ip_stack_t *ipst = (ip_stack_t *)arg; + ASSERT(arg != NULL); /* See comments in igmp_slowtimo() above... */ - rw_enter(&ill_g_lock, RW_READER); - for (ifp = IP_V6_ILL_G_LIST; ifp != (ill_if_t *)&IP_V6_ILL_G_LIST; + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + for (ifp = IP_V6_ILL_G_LIST(ipst); + ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst); ifp = ifp->illif_next) { - if (ifp->illif_mcast_v1 == 0) continue; @@ -1834,11 +1828,11 @@ mld_slowtimo(void *arg) mutex_exit(&ill->ill_lock); } } - rw_exit(&ill_g_lock); - mutex_enter(&mld_slowtimeout_lock); - mld_slowtimeout_id = timeout(mld_slowtimo, NULL, + rw_exit(&ipst->ips_ill_g_lock); + mutex_enter(&ipst->ips_mld_slowtimeout_lock); + ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst, MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); - mutex_exit(&mld_slowtimeout_lock); + mutex_exit(&ipst->ips_mld_slowtimeout_lock); } /* @@ -1861,6 +1855,7 @@ igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) mblk_t *first_mp; ipsec_out_t *io; zoneid_t zoneid; + ip_stack_t *ipst = ill->ill_ipst; /* * We need to make sure this packet goes out on an ipif. If @@ -1900,6 +1895,7 @@ igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) if ((zoneid = ilm->ilm_zoneid) == ALL_ZONES) zoneid = GLOBAL_ZONEID; io->ipsec_out_zoneid = zoneid; + io->ipsec_out_ns = ipst->ips_netstack; /* No netstack_hold */ mp = allocb(size, BPRI_HI); if (mp == NULL) { @@ -1951,7 +1947,7 @@ igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); - ++igmpstat.igps_snd_reports; + ++ipst->ips_igmpstat.igps_snd_reports; } /* @@ -1979,6 +1975,7 @@ igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist) mrec_t *next_reclist = reclist; boolean_t morepkts; zoneid_t zoneid; + ip_stack_t *ipst = ill->ill_ipst; /* if there aren't any records, there's nothing to send */ if (reclist == NULL) @@ -2134,7 +2131,7 @@ nextpkt: ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); - ++igmpstat.igps_snd_reports; + ++ipst->ips_igmpstat.igps_snd_reports; if (morepkts) { if (more_src_cnt > 0) { @@ -2172,6 +2169,7 @@ mld_input(queue_t *q, mblk_t *mp, ill_t *ill) in6_addr_t *v6group_ptr, *lcladdr_ptr; uint_t next; int mldlen; + ip_stack_t *ipst = ill->ill_ipst; BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal); @@ -2228,7 +2226,7 @@ mld_input(queue_t *q, mblk_t *mp, ill_t *ill) } if (next != INFINITY) - mld_start_timers(next); + mld_start_timers(next, ipst); break; case MLD_LISTENER_REPORT: { diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index f94130944a..83e13a8bd1 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -130,115 +131,11 @@ * IP_SQUEUE_ENTER: squeue_enter * IP_SQUEUE_FILL: squeue_fill */ -int ip_squeue_enter = 2; +int ip_squeue_enter = 2; /* Setable in /etc/system */ + squeue_func_t ip_input_proc; -/* - * IP statistics. - */ -#define IP_STAT(x) (ip_statistics.x.value.ui64++) -#define IP_STAT_UPDATE(x, n) (ip_statistics.x.value.ui64 += (n)) #define SET_BPREV_FLAG(x) ((mblk_t *)(uintptr_t)(x)) -typedef struct ip_stat { - kstat_named_t ipsec_fanout_proto; - kstat_named_t ip_udp_fannorm; - kstat_named_t ip_udp_fanmb; - kstat_named_t ip_udp_fanothers; - kstat_named_t ip_udp_fast_path; - kstat_named_t ip_udp_slow_path; - kstat_named_t ip_udp_input_err; - kstat_named_t ip_tcppullup; - kstat_named_t ip_tcpoptions; - kstat_named_t ip_multipkttcp; - kstat_named_t ip_tcp_fast_path; - kstat_named_t ip_tcp_slow_path; - kstat_named_t ip_tcp_input_error; - kstat_named_t ip_db_ref; - kstat_named_t ip_notaligned1; - kstat_named_t ip_notaligned2; - kstat_named_t ip_multimblk3; - kstat_named_t ip_multimblk4; - kstat_named_t ip_ipoptions; - kstat_named_t ip_classify_fail; - kstat_named_t ip_opt; - kstat_named_t ip_udp_rput_local; - kstat_named_t ipsec_proto_ahesp; - kstat_named_t ip_conn_flputbq; - kstat_named_t ip_conn_walk_drain; - kstat_named_t ip_out_sw_cksum; - kstat_named_t ip_in_sw_cksum; - kstat_named_t ip_trash_ire_reclaim_calls; - kstat_named_t ip_trash_ire_reclaim_success; - kstat_named_t ip_ire_arp_timer_expired; - kstat_named_t ip_ire_redirect_timer_expired; - kstat_named_t ip_ire_pmtu_timer_expired; - kstat_named_t ip_input_multi_squeue; - kstat_named_t ip_tcp_in_full_hw_cksum_err; - kstat_named_t ip_tcp_in_part_hw_cksum_err; - kstat_named_t ip_tcp_in_sw_cksum_err; - kstat_named_t ip_tcp_out_sw_cksum_bytes; - kstat_named_t ip_udp_in_full_hw_cksum_err; - kstat_named_t ip_udp_in_part_hw_cksum_err; - kstat_named_t ip_udp_in_sw_cksum_err; - kstat_named_t ip_udp_out_sw_cksum_bytes; - kstat_named_t ip_frag_mdt_pkt_out; - kstat_named_t ip_frag_mdt_discarded; - kstat_named_t ip_frag_mdt_allocfail; - kstat_named_t ip_frag_mdt_addpdescfail; - kstat_named_t ip_frag_mdt_allocd; -} ip_stat_t; - -static ip_stat_t ip_statistics = { - { "ipsec_fanout_proto", KSTAT_DATA_UINT64 }, - { "ip_udp_fannorm", KSTAT_DATA_UINT64 }, - { "ip_udp_fanmb", KSTAT_DATA_UINT64 }, - { "ip_udp_fanothers", KSTAT_DATA_UINT64 }, - { "ip_udp_fast_path", KSTAT_DATA_UINT64 }, - { "ip_udp_slow_path", KSTAT_DATA_UINT64 }, - { "ip_udp_input_err", KSTAT_DATA_UINT64 }, - { "ip_tcppullup", KSTAT_DATA_UINT64 }, - { "ip_tcpoptions", KSTAT_DATA_UINT64 }, - { "ip_multipkttcp", KSTAT_DATA_UINT64 }, - { "ip_tcp_fast_path", KSTAT_DATA_UINT64 }, - { "ip_tcp_slow_path", KSTAT_DATA_UINT64 }, - { "ip_tcp_input_error", KSTAT_DATA_UINT64 }, - { "ip_db_ref", KSTAT_DATA_UINT64 }, - { "ip_notaligned1", KSTAT_DATA_UINT64 }, - { "ip_notaligned2", KSTAT_DATA_UINT64 }, - { "ip_multimblk3", KSTAT_DATA_UINT64 }, - { "ip_multimblk4", KSTAT_DATA_UINT64 }, - { "ip_ipoptions", KSTAT_DATA_UINT64 }, - { "ip_classify_fail", KSTAT_DATA_UINT64 }, - { "ip_opt", KSTAT_DATA_UINT64 }, - { "ip_udp_rput_local", KSTAT_DATA_UINT64 }, - { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 }, - { "ip_conn_flputbq", KSTAT_DATA_UINT64 }, - { "ip_conn_walk_drain", KSTAT_DATA_UINT64 }, - { "ip_out_sw_cksum", KSTAT_DATA_UINT64 }, - { "ip_in_sw_cksum", KSTAT_DATA_UINT64 }, - { "ip_trash_ire_reclaim_calls", KSTAT_DATA_UINT64 }, - { "ip_trash_ire_reclaim_success", KSTAT_DATA_UINT64 }, - { "ip_ire_arp_timer_expired", KSTAT_DATA_UINT64 }, - { "ip_ire_redirect_timer_expired", KSTAT_DATA_UINT64 }, - { "ip_ire_pmtu_timer_expired", KSTAT_DATA_UINT64 }, - { "ip_input_multi_squeue", KSTAT_DATA_UINT64 }, - { "ip_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, - { "ip_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, - { "ip_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, - { "ip_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, - { "ip_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, - { "ip_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, - { "ip_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, - { "ip_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, - { "ip_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, - { "ip_frag_mdt_discarded", KSTAT_DATA_UINT64 }, - { "ip_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, - { "ip_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, - { "ip_frag_mdt_allocd", KSTAT_DATA_UINT64 }, -}; - -static kstat_t *ip_kstat; - #define TCP6 "tcp6" #define TCP "tcp" #define SCTP "sctp" @@ -249,6 +146,9 @@ major_t TCP_MAJ; major_t SCTP_MAJ; major_t SCTP6_MAJ; +/* + * Setable in /etc/system + */ int ip_poll_normal_ms = 100; int ip_poll_normal_ticks = 0; int ip_modclose_ackwait_ms = 3000; @@ -709,25 +609,29 @@ static int conn_set_held_ipif(conn_t *, ipif_t **, ipif_t *); static mblk_t *ip_wput_attach_llhdr(mblk_t *, ire_t *, ip_proc_t, uint32_t); static void ip_ipsec_out_prepend(mblk_t *, mblk_t *, ill_t *); -static void icmp_frag_needed(queue_t *, mblk_t *, int, zoneid_t); +static void icmp_frag_needed(queue_t *, mblk_t *, int, zoneid_t, + ip_stack_t *); static void icmp_inbound(queue_t *, mblk_t *, boolean_t, ill_t *, int, - uint32_t, boolean_t, boolean_t, ill_t *, zoneid_t); + uint32_t, boolean_t, boolean_t, ill_t *, zoneid_t); static ipaddr_t icmp_get_nexthop_addr(ipha_t *, ill_t *, zoneid_t, mblk_t *mp); static boolean_t icmp_inbound_too_big(icmph_t *, ipha_t *, ill_t *, zoneid_t, - mblk_t *, int); + mblk_t *, int, ip_stack_t *); static void icmp_inbound_error_fanout(queue_t *, ill_t *, mblk_t *, icmph_t *, ipha_t *, int, int, boolean_t, boolean_t, ill_t *, zoneid_t); static void icmp_options_update(ipha_t *); -static void icmp_param_problem(queue_t *, mblk_t *, uint8_t, zoneid_t); +static void icmp_param_problem(queue_t *, mblk_t *, uint8_t, zoneid_t, + ip_stack_t *); static void icmp_pkt(queue_t *, mblk_t *, void *, size_t, boolean_t, - zoneid_t zoneid); -static mblk_t *icmp_pkt_err_ok(mblk_t *); -static void icmp_redirect(mblk_t *); -static void icmp_send_redirect(queue_t *, mblk_t *, ipaddr_t); + zoneid_t zoneid, ip_stack_t *); +static mblk_t *icmp_pkt_err_ok(mblk_t *, ip_stack_t *); +static void icmp_redirect(ill_t *, mblk_t *); +static void icmp_send_redirect(queue_t *, mblk_t *, ipaddr_t, + ip_stack_t *); static void ip_arp_news(queue_t *, mblk_t *); -static boolean_t ip_bind_insert_ire(mblk_t *, ire_t *, iulp_t *); +static boolean_t ip_bind_insert_ire(mblk_t *, ire_t *, iulp_t *, + ip_stack_t *); mblk_t *ip_dlpi_alloc(size_t, t_uscalar_t); char *ip_dot_addr(ipaddr_t, char *); mblk_t *ip_carve_mp(mblk_t **, ssize_t); @@ -740,73 +644,90 @@ static void ip_fanout_tcp(queue_t *, mblk_t *, ill_t *, ipha_t *, uint_t, static void ip_fanout_udp(queue_t *, mblk_t *, ill_t *, ipha_t *, uint32_t, boolean_t, uint_t, boolean_t, boolean_t, ill_t *, zoneid_t); static void ip_lrput(queue_t *, mblk_t *); -ipaddr_t ip_massage_options(ipha_t *); static void ip_mrtun_forward(ire_t *, ill_t *, mblk_t *); ipaddr_t ip_net_mask(ipaddr_t); void ip_newroute(queue_t *, mblk_t *, ipaddr_t, ill_t *, conn_t *, - zoneid_t); + zoneid_t, ip_stack_t *); static void ip_newroute_ipif(queue_t *, mblk_t *, ipif_t *, ipaddr_t, conn_t *, uint32_t, zoneid_t, ip_opt_info_t *); char *ip_nv_lookup(nv_t *, int); static boolean_t ip_check_for_ipsec_opt(queue_t *, mblk_t *); static int ip_param_get(queue_t *, mblk_t *, caddr_t, cred_t *); static int ip_param_generic_get(queue_t *, mblk_t *, caddr_t, cred_t *); -static boolean_t ip_param_register(ipparam_t *, size_t, ipndp_t *, - size_t); +static boolean_t ip_param_register(IDP *ndp, ipparam_t *, size_t, + ipndp_t *, size_t); static int ip_param_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *); void ip_rput(queue_t *, mblk_t *); static void ip_rput_dlpi_writer(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, void *dummy_arg); void ip_rput_forward(ire_t *, ipha_t *, mblk_t *, ill_t *); -static int ip_rput_forward_options(mblk_t *, ipha_t *, ire_t *); +static int ip_rput_forward_options(mblk_t *, ipha_t *, ire_t *, + ip_stack_t *); static boolean_t ip_rput_local_options(queue_t *, mblk_t *, ipha_t *, - ire_t *); + ire_t *, ip_stack_t *); static boolean_t ip_rput_multimblk_ipoptions(queue_t *, ill_t *, - mblk_t *, ipha_t **, ipaddr_t *); -static int ip_rput_options(queue_t *, mblk_t *, ipha_t *, ipaddr_t *); + mblk_t *, ipha_t **, ipaddr_t *, ip_stack_t *); +static int ip_rput_options(queue_t *, mblk_t *, ipha_t *, ipaddr_t *, + ip_stack_t *); static boolean_t ip_rput_fragment(queue_t *, mblk_t **, ipha_t *, uint32_t *, uint16_t *); int ip_snmp_get(queue_t *, mblk_t *); static mblk_t *ip_snmp_get_mib2_ip(queue_t *, mblk_t *, - mib2_ipIfStatsEntry_t *); -static mblk_t *ip_snmp_get_mib2_ip_traffic_stats(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_ip6(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_icmp(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_icmp6(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_igmp(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_multi(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_ip_addr(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_ip6_addr(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_ip_group_mem(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_ip6_group_mem(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_ip_group_src(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_ip6_group_src(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_virt_multi(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_multi_rtable(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_ip_route_media(queue_t *, mblk_t *); -static mblk_t *ip_snmp_get_mib2_ip6_route_media(queue_t *, mblk_t *); + mib2_ipIfStatsEntry_t *, ip_stack_t *); +static mblk_t *ip_snmp_get_mib2_ip_traffic_stats(queue_t *, mblk_t *, + ip_stack_t *); +static mblk_t *ip_snmp_get_mib2_ip6(queue_t *, mblk_t *, ip_stack_t *); +static mblk_t *ip_snmp_get_mib2_icmp(queue_t *, mblk_t *, ip_stack_t *ipst); +static mblk_t *ip_snmp_get_mib2_icmp6(queue_t *, mblk_t *, ip_stack_t *ipst); +static mblk_t *ip_snmp_get_mib2_igmp(queue_t *, mblk_t *, ip_stack_t *ipst); +static mblk_t *ip_snmp_get_mib2_multi(queue_t *, mblk_t *, ip_stack_t *ipst); +static mblk_t *ip_snmp_get_mib2_ip_addr(queue_t *, mblk_t *, + ip_stack_t *ipst); +static mblk_t *ip_snmp_get_mib2_ip6_addr(queue_t *, mblk_t *, + ip_stack_t *ipst); +static mblk_t *ip_snmp_get_mib2_ip_group_src(queue_t *, mblk_t *, + ip_stack_t *ipst); +static mblk_t *ip_snmp_get_mib2_ip6_group_src(queue_t *, mblk_t *, + ip_stack_t *ipst); +static mblk_t *ip_snmp_get_mib2_ip_group_mem(queue_t *, mblk_t *, + ip_stack_t *ipst); +static mblk_t *ip_snmp_get_mib2_ip6_group_mem(queue_t *, mblk_t *, + ip_stack_t *ipst); +static mblk_t *ip_snmp_get_mib2_virt_multi(queue_t *, mblk_t *, + ip_stack_t *ipst); +static mblk_t *ip_snmp_get_mib2_multi_rtable(queue_t *, mblk_t *, + ip_stack_t *ipst); +static mblk_t *ip_snmp_get_mib2_ip_route_media(queue_t *, mblk_t *, + ip_stack_t *ipst); +static mblk_t *ip_snmp_get_mib2_ip6_route_media(queue_t *, mblk_t *, + ip_stack_t *ipst); static void ip_snmp_get2_v4(ire_t *, iproutedata_t *); static void ip_snmp_get2_v6_route(ire_t *, iproutedata_t *); static int ip_snmp_get2_v6_media(nce_t *, iproutedata_t *); int ip_snmp_set(queue_t *, int, int, uchar_t *, int); -static boolean_t ip_source_routed(ipha_t *); +static boolean_t ip_source_routed(ipha_t *, ip_stack_t *); static boolean_t ip_source_route_included(ipha_t *); +static void ip_trash_ire_reclaim_stack(ip_stack_t *); static void ip_wput_frag(ire_t *, mblk_t *, ip_pkt_t, uint32_t, uint32_t, - zoneid_t); -static mblk_t *ip_wput_frag_copyhdr(uchar_t *, int, int); -static void ip_wput_local_options(ipha_t *); + zoneid_t, ip_stack_t *); +static mblk_t *ip_wput_frag_copyhdr(uchar_t *, int, int, ip_stack_t *); +static void ip_wput_local_options(ipha_t *, ip_stack_t *); static int ip_wput_options(queue_t *, mblk_t *, ipha_t *, boolean_t, - zoneid_t); + zoneid_t, ip_stack_t *); -static void conn_drain_init(void); -static void conn_drain_fini(void); +static void conn_drain_init(ip_stack_t *); +static void conn_drain_fini(ip_stack_t *); static void conn_drain_tail(conn_t *connp, boolean_t closing); -static void conn_walk_drain(void); +static void conn_walk_drain(ip_stack_t *); static void conn_walk_fanout_table(connf_t *, uint_t, pfv_t, void *, zoneid_t); +static void *ip_stack_init(netstackid_t stackid, netstack_t *ns); +static void ip_stack_shutdown(netstackid_t stackid, void *arg); +static void ip_stack_fini(netstackid_t stackid, void *arg); + static boolean_t conn_wantpacket(conn_t *, ill_t *, ipha_t *, int, zoneid_t); static void ip_arp_done(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, @@ -832,12 +753,14 @@ static int ip_int_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *); static squeue_func_t ip_squeue_switch(int); -static void ip_kstat_init(void); -static void ip_kstat_fini(void); +static void *ip_kstat_init(netstackid_t, ip_stack_t *); +static void ip_kstat_fini(netstackid_t, kstat_t *); static int ip_kstat_update(kstat_t *kp, int rw); -static void icmp_kstat_init(void); -static void icmp_kstat_fini(void); +static void *icmp_kstat_init(netstackid_t); +static void icmp_kstat_fini(netstackid_t, kstat_t *); static int icmp_kstat_update(kstat_t *kp, int rw); +static void *ip_kstat2_init(netstackid_t, ip_stat_t *); +static void ip_kstat2_fini(netstackid_t, kstat_t *); static int ip_conn_report(queue_t *, mblk_t *, caddr_t, cred_t *); @@ -847,21 +770,13 @@ static mblk_t *ip_tcp_input(mblk_t *, ipha_t *, ill_t *, boolean_t, static void ip_rput_process_forward(queue_t *, mblk_t *, ire_t *, ipha_t *, ill_t *, boolean_t); -timeout_id_t ip_ire_expire_id; /* IRE expiration timer. */ -static clock_t ip_ire_arp_time_elapsed; /* Time since IRE cache last flushed */ -static clock_t ip_ire_rd_time_elapsed; /* ... redirect IREs last flushed */ -static clock_t ip_ire_pmtu_time_elapsed; /* Time since path mtu increase */ - +static void ip_rput_process_forward(queue_t *, mblk_t *, ire_t *, + ipha_t *, ill_t *, boolean_t); ipaddr_t ip_g_all_ones = IP_HOST_MASK; -clock_t icmp_pkt_err_last = 0; /* Time since last icmp_pkt_err */ -uint_t icmp_pkt_err_sent = 0; /* Number of packets sent in burst */ /* How long, in seconds, we allow frags to hang around. */ #define IP_FRAG_TIMEOUT 60 -time_t ip_g_frag_timeout = IP_FRAG_TIMEOUT; -clock_t ip_g_frag_timo_ms = IP_FRAG_TIMEOUT * 1000; - /* * Threshold which determines whether MDT should be used when * generating IP fragments; payload size must be greater than @@ -869,49 +784,26 @@ clock_t ip_g_frag_timo_ms = IP_FRAG_TIMEOUT * 1000; */ #define IP_WPUT_FRAG_MDT_MIN 32768 +/* Setable in /etc/system only */ int ip_wput_frag_mdt_min = IP_WPUT_FRAG_MDT_MIN; -/* Protected by ip_mi_lock */ -static void *ip_g_head; /* Instance Data List Head */ -kmutex_t ip_mi_lock; /* Lock for list of instances */ - -/* Only modified during _init and _fini thus no locking is needed. */ -caddr_t ip_g_nd; /* Named Dispatch List Head */ - - static long ip_rput_pullups; int dohwcksum = 1; /* use h/w cksum if supported by the hardware */ vmem_t *ip_minor_arena; -/* - * MIB-2 stuff for SNMP (both IP and ICMP) - */ -mib2_ipIfStatsEntry_t ip_mib; -mib2_icmp_t icmp_mib; +int ip_debug; #ifdef DEBUG uint32_t ipsechw_debug = 0; #endif -kstat_t *ip_mibkp; /* kstat exporting ip_mib data */ -kstat_t *icmp_mibkp; /* kstat exporting icmp_mib data */ - -uint_t loopback_packets = 0; - /* * Multirouting/CGTP stuff */ cgtp_filter_ops_t *ip_cgtp_filter_ops; /* CGTP hooks */ int ip_cgtp_filter_rev = CGTP_FILTER_REV; /* CGTP hooks version */ boolean_t ip_cgtp_filter; /* Enable/disable CGTP hooks */ -/* Interval (in ms) between consecutive 'bad MTU' warnings */ -hrtime_t ip_multirt_log_interval = 1000; -/* Time since last warning issued. */ -static hrtime_t multirt_bad_mtu_last_time = 0; - -kmutex_t ip_trash_timer_lock; -krwlock_t ip_g_nd_lock; /* * XXX following really should only be in a header. Would need more @@ -991,17 +883,23 @@ static ipparam_t lcl_param_arr[] = { { 0, 1, 1, "ip_lso_outbound" }, #ifdef DEBUG { 0, 1, 0, "ip6_drop_inbound_icmpv6" }, +#else + { 0, 0, 0, "" }, #endif }; -ipparam_t *ip_param_arr = lcl_param_arr; - -/* Extended NDP table */ +/* + * Extended NDP table + * The addresses for the first two are filled in to be ips_ip_g_forward + * and ips_ipv6_forward at init time. + */ static ipndp_t lcl_ndp_arr[] = { /* getf setf data name */ - { ip_param_generic_get, ip_forward_set, (caddr_t)&ip_g_forward, +#define IPNDP_IP_FORWARDING_OFFSET 0 + { ip_param_generic_get, ip_forward_set, NULL, "ip_forwarding" }, - { ip_param_generic_get, ip_forward_set, (caddr_t)&ipv6_forward, +#define IPNDP_IP6_FORWARDING_OFFSET 1 + { ip_param_generic_get, ip_forward_set, NULL, "ip6_forwarding" }, { ip_ill_report, NULL, NULL, "ip_ill_status" }, @@ -1031,31 +929,14 @@ static ipndp_t lcl_ndp_arr[] = { (caddr_t)&ip_squeue_enter, "ip_squeue_enter" }, { ip_param_generic_get, ip_int_set, (caddr_t)&ip_squeue_fanout, "ip_squeue_fanout" }, - { ip_cgtp_filter_get, ip_cgtp_filter_set, (caddr_t)&ip_cgtp_filter, +#define IPNDP_CGTP_FILTER_OFFSET 16 + { ip_cgtp_filter_get, ip_cgtp_filter_set, NULL, "ip_cgtp_filter" }, { ip_param_generic_get, ip_int_set, - (caddr_t)&ip_soft_rings_cnt, "ip_soft_rings_cnt" } + (caddr_t)&ip_soft_rings_cnt, "ip_soft_rings_cnt" }, }; /* - * ip_g_forward controls IP forwarding. It takes two values: - * 0: IP_FORWARD_NEVER Don't forward packets ever. - * 1: IP_FORWARD_ALWAYS Forward packets for elsewhere. - * - * RFC1122 says there must be a configuration switch to control forwarding, - * but that the default MUST be to not forward packets ever. Implicit - * control based on configuration of multiple interfaces MUST NOT be - * implemented (Section 3.1). SunOS 4.1 did provide the "automatic" capability - * and, in fact, it was the default. That capability is now provided in the - * /etc/rc2.d/S69inet script. - */ -int ip_g_forward = IP_FORWARD_DEFAULT; - -/* It also has an IPv6 counterpart. */ - -int ipv6_forward = IP_FORWARD_DEFAULT; - -/* * Table of IP ioctls encoding the various properties of the ioctl and * indexed based on the last byte of the ioctl command. Occasionally there * is a clash, and there is more than 1 ioctl with the same last byte. @@ -1447,13 +1328,8 @@ ip_ioctl_cmd_t ip_misc_ioctl_table[] = { int ip_misc_ioctl_count = sizeof (ip_misc_ioctl_table) / sizeof (ip_ioctl_cmd_t); -static idl_t *conn_drain_list; /* The array of conn drain lists */ -static uint_t conn_drain_list_cnt; /* Total count of conn_drain_list */ -static int conn_drain_list_index; /* Next drain_list to be used */ int conn_drain_nthreads; /* Number of drainers reqd. */ /* Settable in /etc/system */ -uint_t ip_redirect_cnt; /* Num of redirect routes in ftable */ - /* Defined in ip_ire.c */ extern uint32_t ip_ire_max_bucket_cnt, ip6_ire_max_bucket_cnt; extern uint32_t ip_ire_min_bucket_cnt, ip6_ire_min_bucket_cnt; @@ -1474,15 +1350,9 @@ static nv_t ire_nv_arr[] = { nv_t *ire_nv_tbl = ire_nv_arr; -/* Defined in ip_if.c, protect the list of IPsec capable ills */ -extern krwlock_t ipsec_capab_ills_lock; - /* Defined in ip_netinfo.c */ extern ddi_taskq_t *eventq_queue_nic; -/* Packet dropper for IP IPsec processing failures */ -ipdropper_t ip_dropper; - /* Simple ICMP IP Header Template */ static ipha_t icmp_ipha = { IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP @@ -1532,7 +1402,7 @@ static boolean_t skip_sctp_cksum = B_FALSE; * appropriately. */ mblk_t * -ip_prepend_zoneid(mblk_t *mp, zoneid_t zoneid) +ip_prepend_zoneid(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst) { mblk_t *first_mp; ipsec_out_t *io; @@ -1545,7 +1415,7 @@ ip_prepend_zoneid(mblk_t *mp, zoneid_t zoneid) return (mp); } - first_mp = ipsec_alloc_ipsec_out(); + first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack); if (first_mp == NULL) return (NULL); io = (ipsec_out_t *)first_mp->b_rptr; @@ -1580,15 +1450,19 @@ ip_copymsg(mblk_t *mp) nmp = copymsg(mp->b_cont); - if (in->ipsec_info_type == IPSEC_OUT) - return (ipsec_out_tag(mp, nmp)); - else - return (ipsec_in_tag(mp, nmp)); + if (in->ipsec_info_type == IPSEC_OUT) { + return (ipsec_out_tag(mp, nmp, + ((ipsec_out_t *)in)->ipsec_out_ns)); + } else { + return (ipsec_in_tag(mp, nmp, + ((ipsec_in_t *)in)->ipsec_in_ns)); + } } /* Generate an ICMP fragmentation needed message. */ static void -icmp_frag_needed(queue_t *q, mblk_t *mp, int mtu, zoneid_t zoneid) +icmp_frag_needed(queue_t *q, mblk_t *mp, int mtu, zoneid_t zoneid, + ip_stack_t *ipst) { icmph_t icmph; mblk_t *first_mp; @@ -1596,7 +1470,7 @@ icmp_frag_needed(queue_t *q, mblk_t *mp, int mtu, zoneid_t zoneid) EXTRACT_PKT_MP(mp, first_mp, mctl_present); - if (!(mp = icmp_pkt_err_ok(mp))) { + if (!(mp = icmp_pkt_err_ok(mp, ipst))) { if (mctl_present) freeb(first_mp); return; @@ -1606,9 +1480,10 @@ icmp_frag_needed(queue_t *q, mblk_t *mp, int mtu, zoneid_t zoneid) icmph.icmph_type = ICMP_DEST_UNREACHABLE; icmph.icmph_code = ICMP_FRAGMENTATION_NEEDED; icmph.icmph_du_mtu = htons((uint16_t)mtu); - BUMP_MIB(&icmp_mib, icmpOutFragNeeded); - BUMP_MIB(&icmp_mib, icmpOutDestUnreachs); - icmp_pkt(q, first_mp, &icmph, sizeof (icmph_t), mctl_present, zoneid); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutFragNeeded); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutDestUnreachs); + icmp_pkt(q, first_mp, &icmph, sizeof (icmph_t), mctl_present, zoneid, + ipst); } /* @@ -1720,8 +1595,10 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, boolean_t onlink; timestruc_t now; uint32_t ill_index; + ip_stack_t *ipst; ASSERT(ill != NULL); + ipst = ill->ill_ipst; first_mp = mp; if (mctl_present) { @@ -1730,9 +1607,9 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, } ipha = (ipha_t *)mp->b_rptr; - if (icmp_accept_clear_messages == 0) { + if (ipst->ips_icmp_accept_clear_messages == 0) { first_mp = ipsec_check_global_policy(first_mp, NULL, - ipha, NULL, mctl_present); + ipha, NULL, mctl_present, ipst->ips_netstack); if (first_mp == NULL) return; } @@ -1747,7 +1624,7 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, if (!tsol_can_accept_raw(mp, B_FALSE)) { ip1dbg(("icmp_inbound: zone %d can't receive raw", zoneid)); - BUMP_MIB(&icmp_mib, icmpInErrors); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInErrors); freemsg(first_mp); return; } @@ -1762,12 +1639,12 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, ASSERT(ill != NULL); - BUMP_MIB(&icmp_mib, icmpInMsgs); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInMsgs); iph_hdr_length = IPH_HDR_LENGTH(ipha); if ((mp->b_wptr - mp->b_rptr) < (iph_hdr_length + ICMPH_SIZE)) { /* Last chance to get real. */ if (!pullupmsg(mp, iph_hdr_length + ICMPH_SIZE)) { - BUMP_MIB(&icmp_mib, icmpInErrors); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInErrors); freemsg(first_mp); return; } @@ -1777,7 +1654,7 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, /* ICMP header checksum, including checksum field, should be zero. */ if (sum_valid ? (sum != 0 && sum != 0xFFFF) : IP_CSUM(mp, iph_hdr_length, 0)) { - BUMP_MIB(&icmp_mib, icmpInCksumErrs); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs); freemsg(first_mp); return; } @@ -1790,22 +1667,22 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, interested = B_FALSE; switch (icmph->icmph_type) { case ICMP_ECHO_REPLY: - BUMP_MIB(&icmp_mib, icmpInEchoReps); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInEchoReps); break; case ICMP_DEST_UNREACHABLE: if (icmph->icmph_code == ICMP_FRAGMENTATION_NEEDED) - BUMP_MIB(&icmp_mib, icmpInFragNeeded); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInFragNeeded); interested = B_TRUE; /* Pass up to transport */ - BUMP_MIB(&icmp_mib, icmpInDestUnreachs); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInDestUnreachs); break; case ICMP_SOURCE_QUENCH: interested = B_TRUE; /* Pass up to transport */ - BUMP_MIB(&icmp_mib, icmpInSrcQuenchs); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInSrcQuenchs); break; case ICMP_REDIRECT: - if (!ip_ignore_redirect) + if (!ipst->ips_ip_ignore_redirect) interested = B_TRUE; - BUMP_MIB(&icmp_mib, icmpInRedirects); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInRedirects); break; case ICMP_ECHO_REQUEST: /* @@ -1819,29 +1696,29 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, interested = B_TRUE; } else if (CLASSD(ipha->ipha_dst)) { /* multicast: respond based on tunable */ - interested = ip_g_resp_to_echo_mcast; + interested = ipst->ips_ip_g_resp_to_echo_mcast; } else if (broadcast) { /* broadcast: respond based on tunable */ - interested = ip_g_resp_to_echo_bcast; + interested = ipst->ips_ip_g_resp_to_echo_bcast; } - BUMP_MIB(&icmp_mib, icmpInEchos); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInEchos); break; case ICMP_ROUTER_ADVERTISEMENT: case ICMP_ROUTER_SOLICITATION: break; case ICMP_TIME_EXCEEDED: interested = B_TRUE; /* Pass up to transport */ - BUMP_MIB(&icmp_mib, icmpInTimeExcds); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInTimeExcds); break; case ICMP_PARAM_PROBLEM: interested = B_TRUE; /* Pass up to transport */ - BUMP_MIB(&icmp_mib, icmpInParmProbs); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInParmProbs); break; case ICMP_TIME_STAMP_REQUEST: /* Response to Time Stamp Requests is local policy. */ - if (ip_g_resp_to_timestamp && + if (ipst->ips_ip_g_resp_to_timestamp && /* So is whether to respond if it was an IP broadcast. */ - (!broadcast || ip_g_resp_to_timestamp_bcast)) { + (!broadcast || ipst->ips_ip_g_resp_to_timestamp_bcast)) { int tstamp_len = 3 * sizeof (uint32_t); if (wptr + tstamp_len > mp->b_wptr) { @@ -1859,32 +1736,33 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, } interested = B_TRUE; } - BUMP_MIB(&icmp_mib, icmpInTimestamps); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInTimestamps); break; case ICMP_TIME_STAMP_REPLY: - BUMP_MIB(&icmp_mib, icmpInTimestampReps); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInTimestampReps); break; case ICMP_INFO_REQUEST: /* Per RFC 1122 3.2.2.7, ignore this. */ case ICMP_INFO_REPLY: break; case ICMP_ADDRESS_MASK_REQUEST: - if ((ip_respond_to_address_mask_broadcast || !broadcast) && + if ((ipst->ips_ip_respond_to_address_mask_broadcast || + !broadcast) && /* TODO m_pullup of complete header? */ (mp->b_datap->db_lim - wptr) >= IP_ADDR_LEN) interested = B_TRUE; - BUMP_MIB(&icmp_mib, icmpInAddrMasks); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInAddrMasks); break; case ICMP_ADDRESS_MASK_REPLY: - BUMP_MIB(&icmp_mib, icmpInAddrMaskReps); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInAddrMaskReps); break; default: interested = B_TRUE; /* Pass up to transport */ - BUMP_MIB(&icmp_mib, icmpInUnknowns); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInUnknowns); break; } /* See if there is an ICMP client. */ - if (ipcl_proto_search(IPPROTO_ICMP) != NULL) { + if (ipst->ips_ipcl_proto_fanout[IPPROTO_ICMP].connf_head != NULL) { /* If there is an ICMP client and we want one too, copy it. */ mblk_t *first_mp1; @@ -1906,14 +1784,14 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, * Initiate policy processing for this packet if ip_policy * is true. */ - if (IPP_ENABLED(IPP_LOCAL_IN) && ip_policy) { + if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && ip_policy) { ill_index = ill->ill_phyint->phyint_ifindex; ip_process(IPP_LOCAL_IN, &mp, ill_index); if (mp == NULL) { if (mctl_present) { freeb(first_mp); } - BUMP_MIB(&icmp_mib, icmpInErrors); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInErrors); return; } } @@ -1926,7 +1804,7 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, first_mp1 = ip_copymsg(first_mp); freemsg(first_mp); if (!first_mp1) { - BUMP_MIB(&icmp_mib, icmpOutDrops); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutDrops); return; } first_mp = first_mp1; @@ -1954,11 +1832,11 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, icmph->icmph_type = ICMP_ADDRESS_MASK_REPLY; bcopy(&ipif->ipif_net_mask, wptr, IP_ADDR_LEN); ipif_refrele(ipif); - BUMP_MIB(&icmp_mib, icmpOutAddrMaskReps); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutAddrMaskReps); break; case ICMP_ECHO_REQUEST: icmph->icmph_type = ICMP_ECHO_REPLY; - BUMP_MIB(&icmp_mib, icmpOutEchoReps); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutEchoReps); break; case ICMP_TIME_STAMP_REQUEST: { uint32_t *tsp; @@ -1972,7 +1850,7 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, now.tv_nsec / (NANOSEC / MILLISEC); *tsp++ = htonl(ts); /* Lay in 'receive time' */ *tsp++ = htonl(ts); /* Lay in 'send time' */ - BUMP_MIB(&icmp_mib, icmpOutTimestampReps); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutTimestampReps); break; } default: @@ -2016,12 +1894,12 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, if (mctl_present) { freeb(first_mp); } - icmp_redirect(mp); + icmp_redirect(ill, mp); return; case ICMP_DEST_UNREACHABLE: if (icmph->icmph_code == ICMP_FRAGMENTATION_NEEDED) { if (!icmp_inbound_too_big(icmph, ipha, ill, - zoneid, mp, iph_hdr_length)) { + zoneid, mp, iph_hdr_length, ipst)) { freemsg(first_mp); return; } @@ -2086,7 +1964,7 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, ipif_refrele(ipif); } /* Reset time to live. */ - ipha->ipha_ttl = ip_def_ttl; + ipha->ipha_ttl = ipst->ips_ip_def_ttl; { /* Swap source and destination addresses */ ipaddr_t tmp; @@ -2132,7 +2010,7 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, * accept packets for them afterwards. */ src_ire = ire_ctable_lookup(ipha->ipha_dst, 0, IRE_LOCAL, - NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (src_ire == NULL) { ipif = ipif_get_next_ipif(NULL, ill); if (ipif == NULL) { @@ -2142,7 +2020,7 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, } src_ire = ire_ftable_lookup(ipha->ipha_dst, 0, 0, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, - NULL, MATCH_IRE_ILL | MATCH_IRE_TYPE); + NULL, MATCH_IRE_ILL | MATCH_IRE_TYPE, ipst); ipif_refrele(ipif); if (src_ire != NULL) { onlink = B_TRUE; @@ -2160,7 +2038,8 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, * we attach a IPSEC_IN mp and clear ipsec_in_secure. */ ASSERT(first_mp == mp); - if ((first_mp = ipsec_in_alloc(B_TRUE)) == NULL) { + first_mp = ipsec_in_alloc(B_TRUE, ipst->ips_netstack); + if (first_mp == NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); freemsg(mp); return; @@ -2182,8 +2061,10 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, ii->ipsec_in_attach_if = B_TRUE; ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; ii->ipsec_in_rill_index = recv_ill->ill_phyint->phyint_ifindex; + ii->ipsec_in_ns = ipst->ips_netstack; /* No netstack_hold */ } else { ii = (ipsec_in_t *)first_mp->b_rptr; + ii->ipsec_in_ns = ipst->ips_netstack; /* No netstack_hold */ } ii->ipsec_in_zoneid = zoneid; ASSERT(zoneid != ALL_ZONES); @@ -2191,7 +2072,7 @@ icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); return; } - BUMP_MIB(&icmp_mib, icmpOutMsgs); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutMsgs); put(WR(q), first_mp); } @@ -2204,6 +2085,7 @@ icmp_get_nexthop_addr(ipha_t *ipha, ill_t *ill, zoneid_t zoneid, mblk_t *mp) int hdr_length = IPH_HDR_LENGTH(ipha); uint16_t *up; uint32_t ports; + ip_stack_t *ipst = ill->ill_ipst; up = (uint16_t *)((uchar_t *)ipha + hdr_length); switch (ipha->ipha_protocol) { @@ -2214,7 +2096,7 @@ icmp_get_nexthop_addr(ipha_t *ipha, ill_t *ill, zoneid_t zoneid, mblk_t *mp) /* do a reverse lookup */ tcph = (tcph_t *)((uchar_t *)ipha + hdr_length); connp = ipcl_tcp_lookup_reversed_ipv4(ipha, tcph, - TCPS_LISTEN); + TCPS_LISTEN, ipst); break; } case IPPROTO_UDP: @@ -2228,7 +2110,8 @@ icmp_get_nexthop_addr(ipha_t *ipha, ill_t *ill, zoneid_t zoneid, mblk_t *mp) dstport = htons(ntohl(ports) & 0xFFFF); srcport = htons(ntohl(ports) >> 16); - connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; + connfp = &ipst->ips_ipcl_udp_fanout[ + IPCL_UDP_HASH(dstport, ipst)]; mutex_enter(&connfp->connf_lock); connp = connfp->connf_head; @@ -2253,10 +2136,11 @@ icmp_get_nexthop_addr(ipha_t *ipha, ill_t *ill, zoneid_t zoneid, mblk_t *mp) ((uint16_t *)&ports)[0] = up[1]; ((uint16_t *)&ports)[1] = up[0]; - if ((connp = sctp_find_conn(&map_src, &map_dst, ports, - 0, zoneid)) == NULL) { + connp = sctp_find_conn(&map_src, &map_dst, ports, + 0, zoneid, ipst->ips_netstack->netstack_sctp); + if (connp == NULL) { connp = ipcl_classify_raw(mp, IPPROTO_SCTP, - zoneid, ports, ipha); + zoneid, ports, ipha, ipst); } else { CONN_INC_REF(connp); SCTP_REFRELE(CONN2SCTP(connp)); @@ -2271,7 +2155,8 @@ icmp_get_nexthop_addr(ipha_t *ipha, ill_t *ill, zoneid_t zoneid, mblk_t *mp) ripha.ipha_dst = ipha->ipha_src; ripha.ipha_protocol = ipha->ipha_protocol; - connfp = &ipcl_proto_fanout[ipha->ipha_protocol]; + connfp = &ipst->ips_ipcl_proto_fanout[ + ipha->ipha_protocol]; mutex_enter(&connfp->connf_lock); connp = connfp->connf_head; for (connp = connfp->connf_head; connp != NULL; @@ -2308,7 +2193,8 @@ static int icmp_frag_size_table[] = */ static boolean_t icmp_inbound_too_big(icmph_t *icmph, ipha_t *ipha, ill_t *ill, - zoneid_t zoneid, mblk_t *mp, int iph_hdr_length) + zoneid_t zoneid, mblk_t *mp, int iph_hdr_length, + ip_stack_t *ipst) { ire_t *ire, *first_ire; int mtu; @@ -2345,11 +2231,11 @@ icmp_inbound_too_big(icmph_t *icmph, ipha_t *ipha, ill_t *ill, /* nexthop set */ first_ire = ire_ctable_lookup(ipha->ipha_dst, nexthop_addr, 0, NULL, ALL_ZONES, MBLK_GETLABEL(mp), - MATCH_IRE_MARK_PRIVATE_ADDR | MATCH_IRE_GW); + MATCH_IRE_MARK_PRIVATE_ADDR | MATCH_IRE_GW, ipst); } else { /* nexthop not set */ first_ire = ire_ctable_lookup(ipha->ipha_dst, 0, IRE_CACHE, - NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); } if (!first_ire) { @@ -2553,9 +2439,13 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, ipsec_in_t *ii; tcph_t *tcph; conn_t *connp; + ip_stack_t *ipst; ASSERT(ill != NULL); + ASSERT(recv_ill != NULL); + ipst = recv_ill->ill_ipst; + first_mp = mp; if (mctl_present) { mp = first_mp->b_cont; @@ -2628,7 +2518,8 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, * in the form we sent it out. */ tcph = (tcph_t *)((uchar_t *)ipha + hdr_length); - connp = ipcl_tcp_lookup_reversed_ipv4(ipha, tcph, TCPS_LISTEN); + connp = ipcl_tcp_lookup_reversed_ipv4(ipha, tcph, TCPS_LISTEN, + ipst); if (connp == NULL) goto discard_pkt; @@ -2677,6 +2568,7 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, case IPPROTO_ESP: case IPPROTO_AH: { int ipsec_rc; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; /* * We need a IPSEC_IN in the front to fanout to AH/ESP. @@ -2718,7 +2610,7 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, * to locate the ill. */ ASSERT(first_mp == mp); - first_mp = ipsec_in_alloc(B_TRUE); + first_mp = ipsec_in_alloc(B_TRUE, ipst->ips_netstack); if (first_mp == NULL) { freemsg(mp); BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); @@ -2738,8 +2630,8 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, } ip2dbg(("icmp_inbound_error: ipsec\n")); - if (!ipsec_loaded()) { - ip_proto_not_sup(q, first_mp, 0, zoneid); + if (!ipsec_loaded(ipss)) { + ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); return; } @@ -3139,9 +3031,8 @@ icmp_options_update(ipha_t *ipha) /* * Process received ICMP Redirect messages. */ -/* ARGSUSED */ static void -icmp_redirect(mblk_t *mp) +icmp_redirect(ill_t *ill, mblk_t *mp) { ipha_t *ipha; int iph_hdr_length; @@ -3153,12 +3044,16 @@ icmp_redirect(mblk_t *mp) ipaddr_t src, dst, gateway; iulp_t ulp_info = { 0 }; int error; + ip_stack_t *ipst; + + ASSERT(ill != NULL); + ipst = ill->ill_ipst; ipha = (ipha_t *)mp->b_rptr; iph_hdr_length = IPH_HDR_LENGTH(ipha); if (((mp->b_wptr - mp->b_rptr) - iph_hdr_length) < sizeof (icmph_t) + IP_SIMPLE_HDR_LENGTH) { - BUMP_MIB(&icmp_mib, icmpInErrors); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInErrors); freemsg(mp); return; } @@ -3169,14 +3064,14 @@ icmp_redirect(mblk_t *mp) gateway = icmph->icmph_rd_gateway; /* Make sure the new gateway is reachable somehow. */ ire = ire_route_lookup(gateway, 0, 0, IRE_INTERFACE, NULL, NULL, - ALL_ZONES, NULL, MATCH_IRE_TYPE); + ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); /* * Make sure we had a route for the dest in question and that * that route was pointing to the old gateway (the source of the * redirect packet.) */ prev_ire = ire_route_lookup(dst, 0, src, 0, NULL, NULL, ALL_ZONES, - NULL, MATCH_IRE_GW); + NULL, MATCH_IRE_GW, ipst); /* * Check that * the redirect was not from ourselves @@ -3185,7 +3080,7 @@ icmp_redirect(mblk_t *mp) if (!prev_ire || !ire || ire->ire_type == IRE_LOCAL) { - BUMP_MIB(&icmp_mib, icmpInBadRedirects); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInBadRedirects); freemsg(mp); if (ire != NULL) ire_refrele(ire); @@ -3217,7 +3112,8 @@ icmp_redirect(mblk_t *mp) tmp_ire = ire_ftable_lookup(dst, 0, gateway, 0, NULL, &sire, ALL_ZONES, 0, NULL, - (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); + (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), + ipst); if (sire != NULL) { bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); /* @@ -3249,7 +3145,7 @@ icmp_redirect(mblk_t *mp) break; default: freemsg(mp); - BUMP_MIB(&icmp_mib, icmpInBadRedirects); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInBadRedirects); ire_refrele(ire); return; } @@ -3278,7 +3174,8 @@ icmp_redirect(mblk_t *mp) (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), &ulp_info, NULL, - NULL); + NULL, + ipst); if (ire == NULL) { freemsg(mp); @@ -3287,14 +3184,14 @@ icmp_redirect(mblk_t *mp) } error = ire_add(&ire, NULL, NULL, NULL, B_FALSE); ire_refrele(save_ire); - atomic_inc_32(&ip_redirect_cnt); + atomic_inc_32(&ipst->ips_ip_redirect_cnt); if (error == 0) { ire_refrele(ire); /* Held in ire_add_v4 */ /* tell routing sockets that we received a redirect */ ip_rts_change(RTM_REDIRECT, dst, gateway, IP_HOST_MASK, 0, src, (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, - (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); + (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); } /* @@ -3303,7 +3200,7 @@ icmp_redirect(mblk_t *mp) * modifying an existing redirect. */ prev_ire = ire_ftable_lookup(dst, 0, src, IRE_HOST, NULL, NULL, - ALL_ZONES, 0, NULL, (MATCH_IRE_GW | MATCH_IRE_TYPE)); + ALL_ZONES, 0, NULL, (MATCH_IRE_GW | MATCH_IRE_TYPE), ipst); if (prev_ire != NULL) { if (prev_ire ->ire_flags & RTF_DYNAMIC) ire_delete(prev_ire); @@ -3317,7 +3214,8 @@ icmp_redirect(mblk_t *mp) * Generate an ICMP parameter problem message. */ static void -icmp_param_problem(queue_t *q, mblk_t *mp, uint8_t ptr, zoneid_t zoneid) +icmp_param_problem(queue_t *q, mblk_t *mp, uint8_t ptr, zoneid_t zoneid, + ip_stack_t *ipst) { icmph_t icmph; boolean_t mctl_present; @@ -3325,7 +3223,7 @@ icmp_param_problem(queue_t *q, mblk_t *mp, uint8_t ptr, zoneid_t zoneid) EXTRACT_PKT_MP(mp, first_mp, mctl_present); - if (!(mp = icmp_pkt_err_ok(mp))) { + if (!(mp = icmp_pkt_err_ok(mp, ipst))) { if (mctl_present) freeb(first_mp); return; @@ -3334,8 +3232,9 @@ icmp_param_problem(queue_t *q, mblk_t *mp, uint8_t ptr, zoneid_t zoneid) bzero(&icmph, sizeof (icmph_t)); icmph.icmph_type = ICMP_PARAM_PROBLEM; icmph.icmph_pp_ptr = ptr; - BUMP_MIB(&icmp_mib, icmpOutParmProbs); - icmp_pkt(q, first_mp, &icmph, sizeof (icmph_t), mctl_present, zoneid); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutParmProbs); + icmp_pkt(q, first_mp, &icmph, sizeof (icmph_t), mctl_present, zoneid, + ipst); } /* @@ -3352,7 +3251,7 @@ icmp_param_problem(queue_t *q, mblk_t *mp, uint8_t ptr, zoneid_t zoneid) */ static void icmp_pkt(queue_t *q, mblk_t *mp, void *stuff, size_t len, - boolean_t mctl_present, zoneid_t zoneid) + boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) { ipaddr_t dst; icmph_t *icmph; @@ -3394,7 +3293,8 @@ icmp_pkt(queue_t *q, mblk_t *mp, void *stuff, size_t len, * Convert the IPSEC_IN to IPSEC_OUT. */ if (!ipsec_in_to_out(ipsec_mp, ipha, NULL)) { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutDiscards); return; } io = (ipsec_out_t *)ipsec_mp->b_rptr; @@ -3422,9 +3322,10 @@ icmp_pkt(queue_t *q, mblk_t *mp, void *stuff, size_t len, */ ipsec_in_t *ii; ASSERT(DB_TYPE(mp) == M_DATA); - if ((ipsec_mp = ipsec_in_alloc(B_TRUE)) == NULL) { + ipsec_mp = ipsec_in_alloc(B_TRUE, ipst->ips_netstack); + if (ipsec_mp == NULL) { freemsg(mp); - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); return; } ii = (ipsec_in_t *)ipsec_mp->b_rptr; @@ -3445,7 +3346,7 @@ icmp_pkt(queue_t *q, mblk_t *mp, void *stuff, size_t len, * Convert the IPSEC_IN to IPSEC_OUT. */ if (!ipsec_in_to_out(ipsec_mp, ipha, NULL)) { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); return; } io = (ipsec_out_t *)ipsec_mp->b_rptr; @@ -3455,7 +3356,7 @@ icmp_pkt(queue_t *q, mblk_t *mp, void *stuff, size_t len, dst = ipha->ipha_src; ire = ire_route_lookup(ipha->ipha_dst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), - NULL, NULL, zoneid, NULL, MATCH_IRE_TYPE); + NULL, NULL, zoneid, NULL, MATCH_IRE_TYPE, ipst); if (ire != NULL && (ire->ire_zoneid == zoneid || ire->ire_zoneid == ALL_ZONES)) { src = ipha->ipha_dst; @@ -3463,9 +3364,10 @@ icmp_pkt(queue_t *q, mblk_t *mp, void *stuff, size_t len, if (ire != NULL) ire_refrele(ire); ire = ire_route_lookup(dst, 0, 0, 0, NULL, NULL, zoneid, NULL, - (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE|MATCH_IRE_ZONEONLY)); + (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE|MATCH_IRE_ZONEONLY), + ipst); if (ire == NULL) { - BUMP_MIB(&ip_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes); freemsg(ipsec_mp); return; } @@ -3481,18 +3383,18 @@ icmp_pkt(queue_t *q, mblk_t *mp, void *stuff, size_t len, */ if (io == NULL) { /* This is not a IPSEC_OUT type control msg */ - BUMP_MIB(&ip_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes); freemsg(ipsec_mp); return; } ill = ill_lookup_on_ifindex(io->ipsec_out_ill_index, B_FALSE, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, ipst); if (ill != NULL) { ipif = ipif_get_next_ipif(NULL, ill); ill_refrele(ill); } if (ipif == NULL) { - BUMP_MIB(&ip_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes); freemsg(ipsec_mp); return; } @@ -3512,7 +3414,7 @@ icmp_pkt(queue_t *q, mblk_t *mp, void *stuff, size_t len, (uchar_t *)ipha + len_needed + 1 <= mp->b_wptr) { len_needed += IPH_HDR_LENGTH(((uchar_t *)ipha + len_needed)); } - len_needed += ip_icmp_return; + len_needed += ipst->ips_ip_icmp_return; msg_len = msgdsize(mp); if (msg_len > len_needed) { (void) adjmsg(mp, len_needed - msg_len); @@ -3520,7 +3422,7 @@ icmp_pkt(queue_t *q, mblk_t *mp, void *stuff, size_t len, } mp1 = allocb(sizeof (icmp_ipha) + len, BPRI_HI); if (mp1 == NULL) { - BUMP_MIB(&icmp_mib, icmpOutErrors); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutErrors); freemsg(ipsec_mp); return; } @@ -3551,7 +3453,7 @@ icmp_pkt(queue_t *q, mblk_t *mp, void *stuff, size_t len, *ipha = icmp_ipha; ipha->ipha_src = src; ipha->ipha_dst = dst; - ipha->ipha_ttl = ip_def_ttl; + ipha->ipha_ttl = ipst->ips_ip_def_ttl; msg_len += sizeof (icmp_ipha) + len; if (msg_len > IP_MAXPACKET) { (void) adjmsg(mp, IP_MAXPACKET - msg_len); @@ -3564,7 +3466,7 @@ icmp_pkt(queue_t *q, mblk_t *mp, void *stuff, size_t len, icmph->icmph_checksum = IP_CSUM(mp, (int32_t)sizeof (ipha_t), 0); if (icmph->icmph_checksum == 0) icmph->icmph_checksum = 0xFFFF; - BUMP_MIB(&icmp_mib, icmpOutMsgs); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutMsgs); put(q, ipsec_mp); } @@ -3579,42 +3481,43 @@ icmp_pkt(queue_t *q, mblk_t *mp, void *stuff, size_t len, * icmp_pkt_err_sent - number of packets sent in current burst */ boolean_t -icmp_err_rate_limit(void) +icmp_err_rate_limit(ip_stack_t *ipst) { clock_t now = TICK_TO_MSEC(lbolt); uint_t refilled; /* Number of packets refilled in tbf since last */ - uint_t err_interval = ip_icmp_err_interval; /* Guard against changes */ + /* Guard against changes by loading into local variable */ + uint_t err_interval = ipst->ips_ip_icmp_err_interval; if (err_interval == 0) return (B_FALSE); - if (icmp_pkt_err_last > now) { + if (ipst->ips_icmp_pkt_err_last > now) { /* 100HZ lbolt in ms for 32bit arch wraps every 49.7 days */ - icmp_pkt_err_last = 0; - icmp_pkt_err_sent = 0; + ipst->ips_icmp_pkt_err_last = 0; + ipst->ips_icmp_pkt_err_sent = 0; } /* * If we are in a burst update the token bucket filter. * Update the "last" time to be close to "now" but make sure * we don't loose precision. */ - if (icmp_pkt_err_sent != 0) { - refilled = (now - icmp_pkt_err_last)/err_interval; - if (refilled > icmp_pkt_err_sent) { - icmp_pkt_err_sent = 0; + if (ipst->ips_icmp_pkt_err_sent != 0) { + refilled = (now - ipst->ips_icmp_pkt_err_last)/err_interval; + if (refilled > ipst->ips_icmp_pkt_err_sent) { + ipst->ips_icmp_pkt_err_sent = 0; } else { - icmp_pkt_err_sent -= refilled; - icmp_pkt_err_last += refilled * err_interval; + ipst->ips_icmp_pkt_err_sent -= refilled; + ipst->ips_icmp_pkt_err_last += refilled * err_interval; } } - if (icmp_pkt_err_sent == 0) { + if (ipst->ips_icmp_pkt_err_sent == 0) { /* Start of new burst */ - icmp_pkt_err_last = now; + ipst->ips_icmp_pkt_err_last = now; } - if (icmp_pkt_err_sent < ip_icmp_err_burst) { - icmp_pkt_err_sent++; + if (ipst->ips_icmp_pkt_err_sent < ipst->ips_ip_icmp_err_burst) { + ipst->ips_icmp_pkt_err_sent++; ip1dbg(("icmp_err_rate_limit: %d sent in burst\n", - icmp_pkt_err_sent)); + ipst->ips_icmp_pkt_err_sent)); return (B_FALSE); } ip1dbg(("icmp_err_rate_limit: dropped\n")); @@ -3628,7 +3531,7 @@ icmp_err_rate_limit(void) * ICMP error packet should be sent. */ static mblk_t * -icmp_pkt_err_ok(mblk_t *mp) +icmp_pkt_err_ok(mblk_t *mp, ip_stack_t *ipst) { icmph_t *icmph; ipha_t *ipha; @@ -3640,20 +3543,20 @@ icmp_pkt_err_ok(mblk_t *mp) return (NULL); ipha = (ipha_t *)mp->b_rptr; if (ip_csum_hdr(ipha)) { - BUMP_MIB(&ip_mib, ipIfStatsInCksumErrs); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInCksumErrs); freemsg(mp); return (NULL); } src_ire = ire_ctable_lookup(ipha->ipha_dst, 0, IRE_BROADCAST, - NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); dst_ire = ire_ctable_lookup(ipha->ipha_src, 0, IRE_BROADCAST, - NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (src_ire != NULL || dst_ire != NULL || CLASSD(ipha->ipha_dst) || CLASSD(ipha->ipha_src) || (ntohs(ipha->ipha_fragment_offset_and_flags) & IPH_OFFSET)) { /* Note: only errors to the fragment with offset 0 */ - BUMP_MIB(&icmp_mib, icmpOutDrops); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutDrops); freemsg(mp); if (src_ire != NULL) ire_refrele(src_ire); @@ -3669,7 +3572,7 @@ icmp_pkt_err_ok(mblk_t *mp) len_needed = IPH_HDR_LENGTH(ipha) + ICMPH_SIZE; if (mp->b_wptr - mp->b_rptr < len_needed) { if (!pullupmsg(mp, len_needed)) { - BUMP_MIB(&icmp_mib, icmpInErrors); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInErrors); freemsg(mp); return (NULL); } @@ -3683,7 +3586,7 @@ icmp_pkt_err_ok(mblk_t *mp) case ICMP_TIME_EXCEEDED: case ICMP_PARAM_PROBLEM: case ICMP_REDIRECT: - BUMP_MIB(&icmp_mib, icmpOutDrops); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutDrops); freemsg(mp); return (NULL); default: @@ -3696,11 +3599,11 @@ icmp_pkt_err_ok(mblk_t *mp) */ if (is_system_labeled() && !tsol_can_reply_error(mp)) { ip2dbg(("icmp_pkt_err_ok: can't respond to packet\n")); - BUMP_MIB(&icmp_mib, icmpOutDrops); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutDrops); freemsg(mp); return (NULL); } - if (icmp_err_rate_limit()) { + if (icmp_err_rate_limit(ipst)) { /* * Only send ICMP error packets every so often. * This should be done on a per port/source basis, @@ -3716,7 +3619,7 @@ icmp_pkt_err_ok(mblk_t *mp) * Generate an ICMP redirect message. */ static void -icmp_send_redirect(queue_t *q, mblk_t *mp, ipaddr_t gateway) +icmp_send_redirect(queue_t *q, mblk_t *mp, ipaddr_t gateway, ip_stack_t *ipst) { icmph_t icmph; @@ -3726,7 +3629,7 @@ icmp_send_redirect(queue_t *q, mblk_t *mp, ipaddr_t gateway) */ ASSERT(mp->b_datap->db_type == M_DATA); - if (!(mp = icmp_pkt_err_ok(mp))) { + if (!(mp = icmp_pkt_err_ok(mp, ipst))) { return; } @@ -3734,16 +3637,17 @@ icmp_send_redirect(queue_t *q, mblk_t *mp, ipaddr_t gateway) icmph.icmph_type = ICMP_REDIRECT; icmph.icmph_code = 1; icmph.icmph_rd_gateway = gateway; - BUMP_MIB(&icmp_mib, icmpOutRedirects); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutRedirects); /* Redirects sent by router, and router is global zone */ - icmp_pkt(q, mp, &icmph, sizeof (icmph_t), B_FALSE, GLOBAL_ZONEID); + icmp_pkt(q, mp, &icmph, sizeof (icmph_t), B_FALSE, GLOBAL_ZONEID, ipst); } /* * Generate an ICMP time exceeded message. */ void -icmp_time_exceeded(queue_t *q, mblk_t *mp, uint8_t code, zoneid_t zoneid) +icmp_time_exceeded(queue_t *q, mblk_t *mp, uint8_t code, zoneid_t zoneid, + ip_stack_t *ipst) { icmph_t icmph; boolean_t mctl_present; @@ -3751,7 +3655,7 @@ icmp_time_exceeded(queue_t *q, mblk_t *mp, uint8_t code, zoneid_t zoneid) EXTRACT_PKT_MP(mp, first_mp, mctl_present); - if (!(mp = icmp_pkt_err_ok(mp))) { + if (!(mp = icmp_pkt_err_ok(mp, ipst))) { if (mctl_present) freeb(first_mp); return; @@ -3760,15 +3664,17 @@ icmp_time_exceeded(queue_t *q, mblk_t *mp, uint8_t code, zoneid_t zoneid) bzero(&icmph, sizeof (icmph_t)); icmph.icmph_type = ICMP_TIME_EXCEEDED; icmph.icmph_code = code; - BUMP_MIB(&icmp_mib, icmpOutTimeExcds); - icmp_pkt(q, first_mp, &icmph, sizeof (icmph_t), mctl_present, zoneid); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutTimeExcds); + icmp_pkt(q, first_mp, &icmph, sizeof (icmph_t), mctl_present, zoneid, + ipst); } /* * Generate an ICMP unreachable message. */ void -icmp_unreachable(queue_t *q, mblk_t *mp, uint8_t code, zoneid_t zoneid) +icmp_unreachable(queue_t *q, mblk_t *mp, uint8_t code, zoneid_t zoneid, + ip_stack_t *ipst) { icmph_t icmph; mblk_t *first_mp; @@ -3776,7 +3682,7 @@ icmp_unreachable(queue_t *q, mblk_t *mp, uint8_t code, zoneid_t zoneid) EXTRACT_PKT_MP(mp, first_mp, mctl_present); - if (!(mp = icmp_pkt_err_ok(mp))) { + if (!(mp = icmp_pkt_err_ok(mp, ipst))) { if (mctl_present) freeb(first_mp); return; @@ -3785,10 +3691,10 @@ icmp_unreachable(queue_t *q, mblk_t *mp, uint8_t code, zoneid_t zoneid) bzero(&icmph, sizeof (icmph_t)); icmph.icmph_type = ICMP_DEST_UNREACHABLE; icmph.icmph_code = code; - BUMP_MIB(&icmp_mib, icmpOutDestUnreachs); + BUMP_MIB(&ipst->ips_icmp_mib, icmpOutDestUnreachs); ip2dbg(("send icmp destination unreachable code %d\n", code)); icmp_pkt(q, first_mp, (char *)&icmph, sizeof (icmph_t), mctl_present, - zoneid); + zoneid, ipst); } /* @@ -3814,6 +3720,7 @@ ipif_dup_recovery(void *arg) mblk_t *arp_add_mp; mblk_t *arp_del_mp; area_t *area; + ip_stack_t *ipst = ill->ill_ipst; ipif->ipif_recovery_id = 0; @@ -3853,10 +3760,10 @@ alloc_fail: */ freemsg(arp_add_mp); mutex_enter(&ill->ill_lock); - if (ip_dup_recovery > 0 && ipif->ipif_recovery_id == 0 && + if (ipst->ips_ip_dup_recovery > 0 && ipif->ipif_recovery_id == 0 && !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { ipif->ipif_recovery_id = timeout(ipif_dup_recovery, ipif, - MSEC_TO_TICK(ip_dup_recovery)); + MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); } mutex_exit(&ill->ill_lock); } @@ -3878,6 +3785,7 @@ ip_arp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) char sbuf[INET_ADDRSTRLEN]; const char *failtype; boolean_t bring_up; + ip_stack_t *ipst = ill->ill_ipst; switch (((arcn_t *)mp->b_rptr)->arcn_code) { case AR_CN_READY: @@ -3922,9 +3830,10 @@ ip_arp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) !(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && ill->ill_net_type == IRE_IF_RESOLVER && !(ipif->ipif_state_flags & IPIF_CONDEMNED) && - ip_dup_recovery > 0 && ipif->ipif_recovery_id == 0) { + ipst->ips_ip_dup_recovery > 0 && + ipif->ipif_recovery_id == 0) { ipif->ipif_recovery_id = timeout(ipif_dup_recovery, - ipif, MSEC_TO_TICK(ip_dup_recovery)); + ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); continue; } @@ -3979,9 +3888,9 @@ ip_arp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && ill->ill_net_type == IRE_IF_RESOLVER && !(ipif->ipif_state_flags & IPIF_CONDEMNED) && - ip_dup_recovery > 0) { + ipst->ips_ip_dup_recovery > 0) { ipif->ipif_recovery_id = timeout(ipif_dup_recovery, - ipif, MSEC_TO_TICK(ip_dup_recovery)); + ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); } mutex_exit(&ill->ill_lock); } @@ -4029,6 +3938,17 @@ ip_arp_news(queue_t *q, mblk_t *mp) boolean_t isv6 = B_FALSE; ipif_t *ipif; ill_t *ill; + ip_stack_t *ipst; + + if (CONN_Q(q)) { + conn_t *connp = Q_TO_CONN(q); + + ipst = connp->conn_netstack->netstack_ip; + } else { + ill_t *ill = (ill_t *)q->q_ptr; + + ipst = ill->ill_ipst; + } if ((mp->b_wptr - mp->b_rptr) < sizeof (arcn_t) || !mp->b_cont) { if (q->q_next) { @@ -4083,11 +4003,12 @@ ip_arp_news(queue_t *q, mblk_t *mp) (void) mac_colon_addr((uint8_t *)(arh + 1), arh->arh_hlen, hbuf, sizeof (hbuf)); (void) ip_dot_addr(src, sbuf); - if (isv6) - ire = ire_cache_lookup_v6(&v6src, ALL_ZONES, NULL); - else - ire = ire_cache_lookup(src, ALL_ZONES, NULL); - + if (isv6) { + ire = ire_cache_lookup_v6(&v6src, ALL_ZONES, NULL, + ipst); + } else { + ire = ire_cache_lookup(src, ALL_ZONES, NULL, ipst); + } if (ire != NULL && IRE_IS_LOCAL(ire)) { uint32_t now; uint32_t maxage; @@ -4104,15 +4025,15 @@ ip_arp_news(queue_t *q, mblk_t *mp) ASSERT(ipif != NULL); now = gethrestime_sec(); maxage = now - ire->ire_create_time; - if (maxage > ip_max_temp_idle) - maxage = ip_max_temp_idle; + if (maxage > ipst->ips_ip_max_temp_idle) + maxage = ipst->ips_ip_max_temp_idle; lused = drv_hztousec(ddi_get_lbolt() - ire->ire_last_used_time) / MICROSEC + 1; if (lused >= maxage && (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY))) - maxdefense = ip_max_temp_defend; + maxdefense = ipst->ips_ip_max_temp_defend; else - maxdefense = ip_max_defend; + maxdefense = ipst->ips_ip_max_defend; /* * Now figure out how many times we've defended @@ -4121,7 +4042,8 @@ ip_arp_news(queue_t *q, mblk_t *mp) */ mutex_enter(&ire->ire_lock); if ((defs = ire->ire_defense_count) > 0 && - now - ire->ire_defense_time > ip_defend_interval) { + now - ire->ire_defense_time > + ipst->ips_ip_defend_interval) { ire->ire_defense_count = defs = 0; } ire->ire_defense_count++; @@ -4171,7 +4093,7 @@ ip_arp_news(queue_t *q, mblk_t *mp) * Delete the IRE cache entry and NCE for this * v6 address */ - ip_ire_clookup_and_delete_v6(&v6src); + ip_ire_clookup_and_delete_v6(&v6src, ipst); /* * If v6src is a non-zero, it's a router address * as below. Do the same sort of thing to clean @@ -4180,7 +4102,7 @@ ip_arp_news(queue_t *q, mblk_t *mp) */ if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { ire_walk_v6(ire_delete_cache_gw_v6, - (char *)&v6src, ALL_ZONES); + (char *)&v6src, ALL_ZONES, ipst); } } else { nce_hw_map_t hwm; @@ -4201,7 +4123,7 @@ ip_arp_news(queue_t *q, mblk_t *mp) hwm.hwm_addr = src; hwm.hwm_hwlen = arh->arh_hlen; hwm.hwm_hwaddr = (uchar_t *)(arh + 1); - ndp_walk_common(&ndp4, NULL, + ndp_walk_common(ipst->ips_ndp4, NULL, (pfi_t)nce_delete_hw_changed, &hwm, ALL_ZONES); } break; @@ -4213,7 +4135,7 @@ ip_arp_news(queue_t *q, mblk_t *mp) if (!(ill->ill_phyint->phyint_flags & PHYI_RUNNING)) break; ipif = ipif_lookup_addr(src, ill, ALL_ZONES, NULL, NULL, - NULL, NULL); + NULL, NULL, ipst); if (ipif != NULL) { /* * If this is a duplicate recovery, then we now need to @@ -4241,7 +4163,7 @@ ip_arp_news(queue_t *q, mblk_t *mp) ipif->ipif_addr_ready = 1; ipif_refrele(ipif); } - ire = ire_cache_lookup(src, ALL_ZONES, MBLK_GETLABEL(mp)); + ire = ire_cache_lookup(src, ALL_ZONES, MBLK_GETLABEL(mp), ipst); if (ire != NULL) { ire->ire_defense_count = 0; ire_refrele(ire); @@ -4266,7 +4188,8 @@ ip_arp_news(queue_t *q, mblk_t *mp) * application. */ mblk_t * -ip_add_info(mblk_t *data_mp, ill_t *ill, uint_t flags, zoneid_t zoneid) +ip_add_info(mblk_t *data_mp, ill_t *ill, uint_t flags, zoneid_t zoneid, + ip_stack_t *ipst) { mblk_t *mp; ip_pktinfo_t *pinfo; @@ -4305,7 +4228,7 @@ ip_add_info(mblk_t *data_mp, ill_t *ill, uint_t flags, zoneid_t zoneid) * ZONEONLY. */ ire = ire_ctable_lookup(ipha->ipha_dst, 0, 0, ipif, - zoneid, NULL, MATCH_IRE_ILL_GROUP); + zoneid, NULL, MATCH_IRE_ILL_GROUP, ipst); if (ire == NULL) { /* * packet must have come on a different @@ -4315,7 +4238,7 @@ ip_add_info(mblk_t *data_mp, ill_t *ill, uint_t flags, zoneid_t zoneid) * for SECATTR and ZONEONLY. */ ire = ire_ctable_lookup(ipha->ipha_dst, 0, 0, - ipif, zoneid, NULL, NULL); + ipif, zoneid, NULL, NULL, ipst); } if (ire == NULL) { @@ -4639,6 +4562,7 @@ ip_bind_laddr(conn_t *connp, mblk_t *mp, ipaddr_t src_addr, uint16_t lport, mblk_t *policy_mp; ipif_t *ipif; zoneid_t zoneid; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; if (ipsec_policy_set) { policy_mp = mp->b_cont; @@ -4657,7 +4581,7 @@ ip_bind_laddr(conn_t *connp, mblk_t *mp, ipaddr_t src_addr, uint16_t lport, if (src_addr) { src_ire = ire_route_lookup(src_addr, 0, 0, 0, - NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); + NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); /* * If an address other than 0.0.0.0 is requested, * we verify that it is a valid address for bind @@ -4691,7 +4615,7 @@ ip_bind_laddr(conn_t *connp, mblk_t *mp, ipaddr_t src_addr, uint16_t lport, *mp->b_wptr++ = (char)connp->conn_ulp; if ((ipif = ipif_lookup_addr(src_addr, NULL, zoneid, CONNP_TO_WQ(connp), mp, ip_wput_nondata, - &error)) != NULL) { + &error, ipst)) != NULL) { ipif_refrele(ipif); } else if (error == EINPROGRESS) { if (src_ire != NULL) @@ -4709,7 +4633,8 @@ ip_bind_laddr(conn_t *connp, mblk_t *mp, ipaddr_t src_addr, uint16_t lport, src_ire = ire_ctable_lookup( INADDR_BROADCAST, INADDR_ANY, IRE_BROADCAST, NULL, zoneid, NULL, - (MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY)); + (MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY), + ipst); if (src_ire == NULL || !ire_requested) error = EADDRNOTAVAIL; } else { @@ -4767,7 +4692,7 @@ ip_bind_laddr(conn_t *connp, mblk_t *mp, ipaddr_t src_addr, uint16_t lport, if (error == 0) { if (ire_requested) { - if (!ip_bind_insert_ire(mp, src_ire, NULL)) { + if (!ip_bind_insert_ire(mp, src_ire, NULL, ipst)) { error = -1; /* Falls through to bad_addr */ } @@ -4836,6 +4761,7 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp, ill_t *ill = NULL; zoneid_t zoneid; ipaddr_t src_addr = *src_addrp; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; src_ire = dst_ire = NULL; protocol = *mp->b_wptr & 0xFF; @@ -4857,7 +4783,7 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp, NULL, zoneid, MBLK_GETLABEL(mp), (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE | - MATCH_IRE_SECATTR)); + MATCH_IRE_SECATTR), ipst); } else { /* * If conn_dontroute is set or if conn_nexthop_set is set, @@ -4867,8 +4793,7 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp, ipif_t *ipif; ipif = ipif_lookup_onlink_addr(connp->conn_dontroute ? - dst_addr : connp->conn_nexthop_v4, - connp->conn_zoneid); + dst_addr : connp->conn_nexthop_v4, zoneid, ipst); if (ipif == NULL) { error = ENETUNREACH; goto bad_addr; @@ -4879,13 +4804,13 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp, if (connp->conn_nexthop_set) { dst_ire = ire_route_lookup(connp->conn_nexthop_v4, 0, 0, 0, NULL, NULL, zoneid, MBLK_GETLABEL(mp), - MATCH_IRE_SECATTR); + MATCH_IRE_SECATTR, ipst); } else { dst_ire = ire_route_lookup(dst_addr, 0, 0, 0, NULL, &sire, zoneid, MBLK_GETLABEL(mp), (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | - MATCH_IRE_SECATTR)); + MATCH_IRE_SECATTR), ipst); } } /* @@ -4943,7 +4868,7 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp, if (dst_ire != NULL && is_system_labeled() && !IPCL_IS_TCP(connp) && tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst_addr, NULL, - connp->conn_mac_exempt) != 0) { + connp->conn_mac_exempt, ipst) != 0) { error = EHOSTUNREACH; if (ip_debug > 2) { pr_addr_dbg("ip_bind_connected: no label for dst %s\n", @@ -4983,10 +4908,11 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp, if (!ipsec_policy_set && dst_ire != NULL && !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && (ill = ire_to_ill(dst_ire), ill != NULL)) { - if (ip_lso_outbound && ILL_LSO_CAPABLE(ill)) { + if (ipst->ips_ip_lso_outbound && ILL_LSO_CAPABLE(ill)) { lso_dst_ire = dst_ire; IRE_REFHOLD(lso_dst_ire); - } else if (ip_multidata_outbound && ILL_MDT_CAPABLE(ill)) { + } else if (ipst->ips_ip_multidata_outbound && + ILL_MDT_CAPABLE(ill)) { md_dst_ire = dst_ire; IRE_REFHOLD(md_dst_ire); } @@ -5003,7 +4929,7 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp, src_ire = ire_ftable_lookup(dst_addr, 0, 0, 0, NULL, NULL, zoneid, 0, NULL, MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | - MATCH_IRE_RJ_BHOLE); + MATCH_IRE_RJ_BHOLE, ipst); if (src_ire == NULL) { error = EHOSTUNREACH; goto bad_addr; @@ -5118,7 +5044,7 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp, */ ASSERT(src_ire == NULL); src_ire = ire_route_lookup(src_addr, 0, 0, 0, NULL, - NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); + NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); /* src_ire must be a local|loopback */ if (!IRE_IS_LOCAL(src_ire)) { if (ip_debug > 2) { @@ -5180,7 +5106,7 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp, if (sire != NULL) { ulp_info = &(sire->ire_uinfo); } - if (!ip_bind_insert_ire(mp, dst_ire, ulp_info)) { + if (!ip_bind_insert_ire(mp, dst_ire, ulp_info, ipst)) { error = -1; goto bad_addr; } @@ -5282,7 +5208,7 @@ bad_addr: * Prefers dst_ire over src_ire. */ static boolean_t -ip_bind_insert_ire(mblk_t *mp, ire_t *ire, iulp_t *ulp_info) +ip_bind_insert_ire(mblk_t *mp, ire_t *ire, iulp_t *ulp_info, ip_stack_t *ipst) { mblk_t *mp1; ire_t *ret_ire = NULL; @@ -5307,7 +5233,7 @@ ip_bind_insert_ire(mblk_t *mp, ire_t *ire, iulp_t *ulp_info) * Pass the latest setting of the ip_path_mtu_discovery and * copy the ulp info if any. */ - ret_ire->ire_frag_flag |= (ip_path_mtu_discovery) ? + ret_ire->ire_frag_flag |= (ipst->ips_ip_path_mtu_discovery) ? IPH_DF : 0; if (ulp_info != NULL) { bcopy(ulp_info, &(ret_ire->ire_uinfo), @@ -5436,6 +5362,7 @@ ip_modclose(ill_t *ill) ipif_t *ipif; queue_t *q = ill->ill_rq; hook_nic_event_t *info; + ip_stack_t *ipst = ill->ill_ipst; clock_t timeout; /* @@ -5536,8 +5463,16 @@ ip_modclose(ill_t *ill) mutex_exit(&ill->ill_lock); + /* + * ill_delete_tail drops reference on ill_ipst, but we need to keep + * it held until the end of the function since the cleanup + * below needs to be able to use the ip_stack_t. + */ + netstack_hold(ipst->ips_netstack); + /* qprocsoff is called in ill_delete_tail */ ill_delete_tail(ill); + ASSERT(ill->ill_ipst == NULL); /* * Walk through all upper (conn) streams and qenable @@ -5548,11 +5483,11 @@ ip_modclose(ill_t *ill) * get unblocked. */ ip1dbg(("ip_wsrv: walking\n")); - conn_walk_drain(); + conn_walk_drain(ipst); - mutex_enter(&ip_mi_lock); - mi_close_unlink(&ip_g_head, (IDP)ill); - mutex_exit(&ip_mi_lock); + mutex_enter(&ipst->ips_ip_mi_lock); + mi_close_unlink(&ipst->ips_ip_g_head, (IDP)ill); + mutex_exit(&ipst->ips_ip_mi_lock); /* * credp could be null if the open didn't succeed and ip_modopen @@ -5566,7 +5501,8 @@ ip_modclose(ill_t *ill) * event taskq. */ if ((info = ill->ill_nic_event_info) != NULL) { - if (ddi_taskq_dispatch(eventq_queue_nic, ip_ne_queue_func, + if (ddi_taskq_dispatch(eventq_queue_nic, + ip_ne_queue_func, (void *)info, DDI_SLEEP) == DDI_FAILURE) { ip2dbg(("ip_ioctl_finish:ddi_taskq_dispatch failed\n")); if (info->hne_data != NULL) @@ -5576,6 +5512,12 @@ ip_modclose(ill_t *ill) ill->ill_nic_event_info = NULL; } + /* + * Now we are done with the module close pieces that + * need the netstack_t. + */ + netstack_rele(ipst->ips_netstack); + mi_close_free((IDP)ill); q->q_ptr = WR(q)->q_ptr = NULL; @@ -5594,8 +5536,10 @@ ip_quiesce_conn(conn_t *connp) boolean_t drain_cleanup_reqd = B_FALSE; boolean_t conn_ioctl_cleanup_reqd = B_FALSE; boolean_t ilg_cleanup_reqd = B_FALSE; + ip_stack_t *ipst; ASSERT(!IPCL_IS_TCP(connp)); + ipst = connp->conn_netstack->netstack_ip; /* * Mark the conn as closing, and this conn must not be @@ -5649,8 +5593,9 @@ ip_quiesce_conn(conn_t *connp) if (drain_cleanup_reqd) conn_drain_tail(connp, B_TRUE); - if (connp->conn_rq == ip_g_mrouter || connp->conn_wq == ip_g_mrouter) - (void) ip_mrouter_done(NULL); + if (connp->conn_rq == ipst->ips_ip_g_mrouter || + connp->conn_wq == ipst->ips_ip_g_mrouter) + (void) ip_mrouter_done(NULL, ipst); if (ilg_cleanup_reqd) ilg_delete_all(connp); @@ -5713,11 +5658,11 @@ ip_close(queue_t *q, int flags) ASSERT(!IPCL_IS_UDP(connp)); if (connp->conn_latch != NULL) { - IPLATCH_REFRELE(connp->conn_latch); + IPLATCH_REFRELE(connp->conn_latch, connp->conn_netstack); connp->conn_latch = NULL; } if (connp->conn_policy != NULL) { - IPPH_REFRELE(connp->conn_policy); + IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); connp->conn_policy = NULL; } if (connp->conn_ipsec_opt_mp != NULL) { @@ -5838,39 +5783,155 @@ ip_csum_hdr(ipha_t *ipha) return ((uint16_t)sum); } +/* + * Called when the module is about to be unloaded + */ void ip_ddi_destroy(void) { - ipv4_hook_destroy(); - ipv6_hook_destroy(); - ip_net_destroy(); - tnet_fini(); - tcp_ddi_destroy(); - sctp_ddi_destroy(); - ipsec_loader_destroy(); - ipsec_policy_destroy(); - ipsec_kstat_destroy(); - nd_free(&ip_g_nd); - mutex_destroy(&igmp_timer_lock); - mutex_destroy(&mld_timer_lock); - mutex_destroy(&igmp_slowtimeout_lock); - mutex_destroy(&mld_slowtimeout_lock); - mutex_destroy(&ip_mi_lock); - mutex_destroy(&rts_clients.connf_lock); - ip_ire_fini(); - ip6_asp_free(); - conn_drain_fini(); - ipcl_destroy(); + + sctp_ddi_g_destroy(); + tcp_ddi_g_destroy(); + ipsec_policy_g_destroy(); + ipcl_g_destroy(); + ip_net_g_destroy(); + ip_ire_g_fini(); inet_minor_destroy(ip_minor_arena); - icmp_kstat_fini(); - ip_kstat_fini(); - rw_destroy(&ipsec_capab_ills_lock); - rw_destroy(&ill_g_usesrc_lock); - ip_drop_unregister(&ip_dropper); + + netstack_unregister(NS_IP); +} + +/* + * First step in cleanup. + */ +/* ARGSUSED */ +static void +ip_stack_shutdown(netstackid_t stackid, void *arg) +{ + ip_stack_t *ipst = (ip_stack_t *)arg; + +#ifdef NS_DEBUG + printf("ip_stack_shutdown(%p, stack %d)\n", (void *)ipst, stackid); +#endif + + /* Get rid of loopback interfaces and their IREs */ + ip_loopback_cleanup(ipst); } +/* + * Free the IP stack instance. + */ +static void +ip_stack_fini(netstackid_t stackid, void *arg) +{ + ip_stack_t *ipst = (ip_stack_t *)arg; + int ret; + +#ifdef NS_DEBUG + printf("ip_stack_fini(%p, stack %d)\n", (void *)ipst, stackid); +#endif + ipv4_hook_destroy(ipst); + ipv6_hook_destroy(ipst); + ip_net_destroy(ipst); + + rw_destroy(&ipst->ips_srcid_lock); + + ip_kstat_fini(stackid, ipst->ips_ip_mibkp); + ipst->ips_ip_mibkp = NULL; + icmp_kstat_fini(stackid, ipst->ips_icmp_mibkp); + ipst->ips_icmp_mibkp = NULL; + ip_kstat2_fini(stackid, ipst->ips_ip_kstat); + ipst->ips_ip_kstat = NULL; + bzero(&ipst->ips_ip_statistics, sizeof (ipst->ips_ip_statistics)); + ip6_kstat_fini(stackid, ipst->ips_ip6_kstat); + ipst->ips_ip6_kstat = NULL; + bzero(&ipst->ips_ip6_statistics, sizeof (ipst->ips_ip6_statistics)); + + nd_free(&ipst->ips_ip_g_nd); + kmem_free(ipst->ips_param_arr, sizeof (lcl_param_arr)); + ipst->ips_param_arr = NULL; + kmem_free(ipst->ips_ndp_arr, sizeof (lcl_ndp_arr)); + ipst->ips_ndp_arr = NULL; + + ip_mrouter_stack_destroy(ipst); + + mutex_destroy(&ipst->ips_ip_mi_lock); + rw_destroy(&ipst->ips_ipsec_capab_ills_lock); + rw_destroy(&ipst->ips_ill_g_usesrc_lock); + rw_destroy(&ipst->ips_ip_g_nd_lock); + + ret = untimeout(ipst->ips_igmp_timeout_id); + if (ret == -1) { + ASSERT(ipst->ips_igmp_timeout_id == 0); + } else { + ASSERT(ipst->ips_igmp_timeout_id != 0); + ipst->ips_igmp_timeout_id = 0; + } + ret = untimeout(ipst->ips_igmp_slowtimeout_id); + if (ret == -1) { + ASSERT(ipst->ips_igmp_slowtimeout_id == 0); + } else { + ASSERT(ipst->ips_igmp_slowtimeout_id != 0); + ipst->ips_igmp_slowtimeout_id = 0; + } + ret = untimeout(ipst->ips_mld_timeout_id); + if (ret == -1) { + ASSERT(ipst->ips_mld_timeout_id == 0); + } else { + ASSERT(ipst->ips_mld_timeout_id != 0); + ipst->ips_mld_timeout_id = 0; + } + ret = untimeout(ipst->ips_mld_slowtimeout_id); + if (ret == -1) { + ASSERT(ipst->ips_mld_slowtimeout_id == 0); + } else { + ASSERT(ipst->ips_mld_slowtimeout_id != 0); + ipst->ips_mld_slowtimeout_id = 0; + } + ret = untimeout(ipst->ips_ip_ire_expire_id); + if (ret == -1) { + ASSERT(ipst->ips_ip_ire_expire_id == 0); + } else { + ASSERT(ipst->ips_ip_ire_expire_id != 0); + ipst->ips_ip_ire_expire_id = 0; + } + + mutex_destroy(&ipst->ips_igmp_timer_lock); + mutex_destroy(&ipst->ips_mld_timer_lock); + mutex_destroy(&ipst->ips_igmp_slowtimeout_lock); + mutex_destroy(&ipst->ips_mld_slowtimeout_lock); + mutex_destroy(&ipst->ips_ip_addr_avail_lock); + rw_destroy(&ipst->ips_ill_g_lock); + ip_ire_fini(ipst); + ip6_asp_free(ipst); + conn_drain_fini(ipst); + ipcl_destroy(ipst); + + mutex_destroy(&ipst->ips_ndp4->ndp_g_lock); + mutex_destroy(&ipst->ips_ndp6->ndp_g_lock); + kmem_free(ipst->ips_ndp4, sizeof (ndp_g_t)); + ipst->ips_ndp4 = NULL; + kmem_free(ipst->ips_ndp6, sizeof (ndp_g_t)); + ipst->ips_ndp6 = NULL; + + if (ipst->ips_loopback_ksp != NULL) { + kstat_delete_netstack(ipst->ips_loopback_ksp, stackid); + ipst->ips_loopback_ksp = NULL; + } + + kmem_free(ipst->ips_phyint_g_list, sizeof (phyint_list_t)); + ipst->ips_phyint_g_list = NULL; + kmem_free(ipst->ips_ill_g_heads, sizeof (ill_g_head_t) * MAX_G_HEADS); + ipst->ips_ill_g_heads = NULL; + + kmem_free(ipst, sizeof (*ipst)); +} + +/* + * Called when the IP kernel module is loaded into the kernel + */ void ip_ddi_init(void) { @@ -5881,30 +5942,6 @@ ip_ddi_init(void) ip_input_proc = ip_squeue_switch(ip_squeue_enter); - /* IP's IPsec code calls the packet dropper */ - ip_drop_register(&ip_dropper, "IP IPsec processing"); - - if (!ip_g_nd) { - if (!ip_param_register(lcl_param_arr, A_CNT(lcl_param_arr), - lcl_ndp_arr, A_CNT(lcl_ndp_arr))) { - nd_free(&ip_g_nd); - } - } - - ipsec_loader_init(); - ipsec_policy_init(); - ipsec_kstat_init(); - rw_init(&ip_g_nd_lock, NULL, RW_DEFAULT, NULL); - mutex_init(&igmp_timer_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&mld_timer_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&igmp_slowtimeout_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&mld_slowtimeout_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&ip_mi_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&ip_addr_avail_lock, NULL, MUTEX_DEFAULT, NULL); - rw_init(&ill_g_lock, NULL, RW_DEFAULT, NULL); - rw_init(&ipsec_capab_ills_lock, NULL, RW_DEFAULT, NULL); - rw_init(&ill_g_usesrc_lock, NULL, RW_DEFAULT, NULL); - /* * For IP and TCP the minor numbers should start from 2 since we have 4 * initial devices: ip, ip6, tcp, tcp6. @@ -5915,33 +5952,121 @@ ip_ddi_init(void) "ip_ddi_init: ip_minor_arena creation failed\n"); } - ipcl_init(); - mutex_init(&rts_clients.connf_lock, NULL, MUTEX_DEFAULT, NULL); - ip_ire_init(); - ip6_asp_init(); - ipif_init(); - conn_drain_init(); - tcp_ddi_init(); - sctp_ddi_init(); - ip_poll_normal_ticks = MSEC_TO_TICK_ROUNDUP(ip_poll_normal_ms); - if ((ip_kstat = kstat_create("ip", 0, "ipstat", - "net", KSTAT_TYPE_NAMED, - sizeof (ip_statistics) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL)) != NULL) { - ip_kstat->ks_data = &ip_statistics; - kstat_install(ip_kstat); - } - ip_kstat_init(); - ip6_kstat_init(); - icmp_kstat_init(); - ipsec_loader_start(); + ipcl_g_init(); + ip_ire_g_init(); + ip_net_g_init(); + + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel, so we can maintain the + * set of udp_stack_t's. + */ + netstack_register(NS_IP, ip_stack_init, ip_stack_shutdown, + ip_stack_fini); + + ipsec_policy_g_init(); + tcp_ddi_g_init(); + sctp_ddi_g_init(); + tnet_init(); +} + +/* + * Initialize the IP stack instance. + */ +static void * +ip_stack_init(netstackid_t stackid, netstack_t *ns) +{ + ip_stack_t *ipst; + ipparam_t *pa; + ipndp_t *na; + +#ifdef NS_DEBUG + printf("ip_stack_init(stack %d)\n", stackid); +#endif - ip_net_init(); - ipv4_hook_init(); - ipv6_hook_init(); + ipst = (ip_stack_t *)kmem_zalloc(sizeof (*ipst), KM_SLEEP); + ipst->ips_netstack = ns; + + ipst->ips_ill_g_heads = kmem_zalloc(sizeof (ill_g_head_t) * MAX_G_HEADS, + KM_SLEEP); + ipst->ips_phyint_g_list = kmem_zalloc(sizeof (phyint_list_t), + KM_SLEEP); + ipst->ips_ndp4 = kmem_zalloc(sizeof (ndp_g_t), KM_SLEEP); + ipst->ips_ndp6 = kmem_zalloc(sizeof (ndp_g_t), KM_SLEEP); + mutex_init(&ipst->ips_ndp4->ndp_g_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ipst->ips_ndp6->ndp_g_lock, NULL, MUTEX_DEFAULT, NULL); + + rw_init(&ipst->ips_ip_g_nd_lock, NULL, RW_DEFAULT, NULL); + mutex_init(&ipst->ips_igmp_timer_lock, NULL, MUTEX_DEFAULT, NULL); + ipst->ips_igmp_deferred_next = INFINITY; + mutex_init(&ipst->ips_mld_timer_lock, NULL, MUTEX_DEFAULT, NULL); + ipst->ips_mld_deferred_next = INFINITY; + mutex_init(&ipst->ips_igmp_slowtimeout_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ipst->ips_mld_slowtimeout_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ipst->ips_ip_mi_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ipst->ips_ip_addr_avail_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&ipst->ips_ill_g_lock, NULL, RW_DEFAULT, NULL); + rw_init(&ipst->ips_ipsec_capab_ills_lock, NULL, RW_DEFAULT, NULL); + rw_init(&ipst->ips_ill_g_usesrc_lock, NULL, RW_DEFAULT, NULL); + + ipcl_init(ipst); + ip_ire_init(ipst); + ip6_asp_init(ipst); + ipif_init(ipst); + conn_drain_init(ipst); + ip_mrouter_stack_init(ipst); + + ipst->ips_ip_g_frag_timeout = IP_FRAG_TIMEOUT; + ipst->ips_ip_g_frag_timo_ms = IP_FRAG_TIMEOUT * 1000; + + ipst->ips_ip_multirt_log_interval = 1000; + + ipst->ips_ip_g_forward = IP_FORWARD_DEFAULT; + ipst->ips_ipv6_forward = IP_FORWARD_DEFAULT; + ipst->ips_ill_index = 1; + + ipst->ips_saved_ip_g_forward = -1; + ipst->ips_reg_vif_num = ALL_VIFS; /* Index to Register vif */ + + pa = (ipparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); + ipst->ips_param_arr = pa; + bcopy(lcl_param_arr, ipst->ips_param_arr, sizeof (lcl_param_arr)); + + na = (ipndp_t *)kmem_alloc(sizeof (lcl_ndp_arr), KM_SLEEP); + ipst->ips_ndp_arr = na; + bcopy(lcl_ndp_arr, ipst->ips_ndp_arr, sizeof (lcl_ndp_arr)); + ipst->ips_ndp_arr[IPNDP_IP_FORWARDING_OFFSET].ip_ndp_data = + (caddr_t)&ipst->ips_ip_g_forward; + ipst->ips_ndp_arr[IPNDP_IP6_FORWARDING_OFFSET].ip_ndp_data = + (caddr_t)&ipst->ips_ipv6_forward; + ASSERT(strcmp(ipst->ips_ndp_arr[IPNDP_CGTP_FILTER_OFFSET].ip_ndp_name, + "ip_cgtp_filter") == 0); + ipst->ips_ndp_arr[IPNDP_CGTP_FILTER_OFFSET].ip_ndp_data = + (caddr_t)&ip_cgtp_filter; + + (void) ip_param_register(&ipst->ips_ip_g_nd, + ipst->ips_param_arr, A_CNT(lcl_param_arr), + ipst->ips_ndp_arr, A_CNT(lcl_ndp_arr)); + + ipst->ips_ip_mibkp = ip_kstat_init(stackid, ipst); + ipst->ips_icmp_mibkp = icmp_kstat_init(stackid); + ipst->ips_ip_kstat = ip_kstat2_init(stackid, &ipst->ips_ip_statistics); + ipst->ips_ip6_kstat = + ip6_kstat_init(stackid, &ipst->ips_ip6_statistics); + + ipst->ips_ipmp_enable_failback = B_TRUE; + + ipst->ips_ip_src_id = 1; + rw_init(&ipst->ips_srcid_lock, NULL, RW_DEFAULT, NULL); + + ip_net_init(ipst, ns); + ipv4_hook_init(ipst); + ipv6_hook_init(ipst); + + return (ipst); } /* @@ -6108,12 +6233,14 @@ mac_colon_addr(const uint8_t *addr, size_t alen, char *buf, size_t buflen) */ static boolean_t ip_fanout_send_icmp(queue_t *q, mblk_t *mp, uint_t flags, - uint_t icmp_type, uint_t icmp_code, boolean_t mctl_present, zoneid_t zoneid) + uint_t icmp_type, uint_t icmp_code, boolean_t mctl_present, + zoneid_t zoneid, ip_stack_t *ipst) { ipha_t *ipha; mblk_t *first_mp; boolean_t secure; unsigned char db_type; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; first_mp = mp; if (mctl_present) { @@ -6140,9 +6267,9 @@ ip_fanout_send_icmp(queue_t *q, mblk_t *mp, uint_t flags, * there is no "conn", we are checking with global policy. */ ipha = (ipha_t *)mp->b_rptr; - if (secure || ipsec_inbound_v4_policy_present) { + if (secure || ipss->ipsec_inbound_v4_policy_present) { first_mp = ipsec_check_global_policy(first_mp, NULL, - ipha, NULL, mctl_present); + ipha, NULL, mctl_present, ipst->ips_netstack); if (first_mp == NULL) return (B_FALSE); } @@ -6152,7 +6279,7 @@ ip_fanout_send_icmp(queue_t *q, mblk_t *mp, uint_t flags, if (flags & IP_FF_SEND_ICMP) { if (flags & IP_FF_HDR_COMPLETE) { - if (ip_hdr_complete(ipha, zoneid)) { + if (ip_hdr_complete(ipha, zoneid, ipst)) { freemsg(first_mp); return (B_TRUE); } @@ -6169,7 +6296,8 @@ ip_fanout_send_icmp(queue_t *q, mblk_t *mp, uint_t flags, } switch (icmp_type) { case ICMP_DEST_UNREACHABLE: - icmp_unreachable(WR(q), first_mp, icmp_code, zoneid); + icmp_unreachable(WR(q), first_mp, icmp_code, zoneid, + ipst); break; default: freemsg(first_mp); @@ -6189,7 +6317,8 @@ ip_fanout_send_icmp(queue_t *q, mblk_t *mp, uint_t flags, * is consumed by this function. */ void -ip_proto_not_sup(queue_t *q, mblk_t *ipsec_mp, uint_t flags, zoneid_t zoneid) +ip_proto_not_sup(queue_t *q, mblk_t *ipsec_mp, uint_t flags, zoneid_t zoneid, + ip_stack_t *ipst) { mblk_t *mp; ipha_t *ipha; @@ -6204,19 +6333,20 @@ ip_proto_not_sup(queue_t *q, mblk_t *ipsec_mp, uint_t flags, zoneid_t zoneid) ipha = (ipha_t *)mp->b_rptr; /* Get ill from index in ipsec_in_t. */ ill = ill_lookup_on_ifindex(ii->ipsec_in_ill_index, - (IPH_HDR_VERSION(ipha) == IPV6_VERSION), NULL, NULL, NULL, NULL); + (IPH_HDR_VERSION(ipha) == IPV6_VERSION), NULL, NULL, NULL, NULL, + ipst); if (ill != NULL) { if (IPH_HDR_VERSION(ipha) == IP_VERSION) { if (ip_fanout_send_icmp(q, mp, flags, ICMP_DEST_UNREACHABLE, - ICMP_PROTOCOL_UNREACHABLE, B_FALSE, zoneid)) { + ICMP_PROTOCOL_UNREACHABLE, B_FALSE, zoneid, ipst)) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); } } else { if (ip_fanout_send_icmp_v6(q, mp, flags, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, - 0, B_FALSE, zoneid)) { + 0, B_FALSE, zoneid, ipst)) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); } @@ -6306,8 +6436,10 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, connf_t *connfp; boolean_t shared_addr; mib2_ipIfStatsEntry_t *mibptr; + ip_stack_t *ipst = recv_ill->ill_ipst; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; - mibptr = (ill != NULL) ? ill->ill_ip_mib : &ip_mib; + mibptr = (ill != NULL) ? ill->ill_ip_mib : &ipst->ips_ip_mib; if (mctl_present) { mp = first_mp->b_cont; secure = ipsec_in_is_secure(first_mp); @@ -6332,7 +6464,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, zoneid = tsol_packet_to_zoneid(mp); } - connfp = &ipcl_proto_fanout[protocol]; + connfp = &ipst->ips_ipcl_proto_fanout[protocol]; mutex_enter(&connfp->connf_lock); connp = connfp->connf_head; for (connp = connfp->connf_head; connp != NULL; @@ -6355,7 +6487,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, /* * Check for IPPROTO_ENCAP... */ - if (protocol == IPPROTO_ENCAP && ip_g_mrouter) { + if (protocol == IPPROTO_ENCAP && ipst->ips_ip_g_mrouter) { /* * If an IPsec mblk is here on a multicast * tunnel (using ip_mroute stuff), check policy here, @@ -6366,11 +6498,11 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, * ip_mroute_decap will never be called. */ first_mp = ipsec_check_global_policy(first_mp, connp, - ipha, NULL, mctl_present); + ipha, NULL, mctl_present, ipst->ips_netstack); if (first_mp != NULL) { if (mctl_present) freeb(first_mp); - ip_mroute_decap(q, mp); + ip_mroute_decap(q, mp, ill); } /* Else we already freed everything! */ } else { /* @@ -6378,7 +6510,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, */ if (ip_fanout_send_icmp(q, first_mp, flags, ICMP_DEST_UNREACHABLE, ICMP_PROTOCOL_UNREACHABLE, - mctl_present, zoneid)) { + mctl_present, zoneid, ipst)) { BUMP_MIB(mibptr, ipIfStatsInUnknownProtos); } } @@ -6425,7 +6557,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, if (flags & IP_FF_RAWIP) { BUMP_MIB(mibptr, rawipIfStatsInOverflows); } else { - BUMP_MIB(&icmp_mib, icmpInOverflows); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInOverflows); } freemsg(first_mp1); @@ -6435,7 +6567,8 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, * let "tun" do it instead. */ if (!IPCL_IS_IPTUN(connp) && - (CONN_INBOUND_POLICY_PRESENT(connp) || secure)) { + (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || + secure)) { first_mp1 = ipsec_check_inbound_policy (first_mp1, connp, ipha, NULL, mctl_present); @@ -6483,7 +6616,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, */ ASSERT(recv_ill != NULL); mp1 = ip_add_info(mp1, recv_ill, - in_flags, IPCL_ZONEID(connp)); + in_flags, IPCL_ZONEID(connp), ipst); } BUMP_MIB(mibptr, ipIfStatsHCInDelivers); if (mctl_present) @@ -6505,7 +6638,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, * If this packet is coming from icmp_inbound_error_fanout ip_policy * will be set to false. */ - if (IPP_ENABLED(IPP_LOCAL_IN) && ip_policy) { + if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && ip_policy) { ill_index = ill->ill_phyint->phyint_ifindex; ip_process(IPP_LOCAL_IN, &mp, ill_index); if (mp == NULL) { @@ -6522,7 +6655,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, if (flags & IP_FF_RAWIP) { BUMP_MIB(mibptr, rawipIfStatsInOverflows); } else { - BUMP_MIB(&icmp_mib, icmpInOverflows); + BUMP_MIB(&ipst->ips_icmp_mib, icmpInOverflows); } freemsg(first_mp); @@ -6540,7 +6673,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, return; } - if ((CONN_INBOUND_POLICY_PRESENT(connp) || secure)) { + if ((CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { first_mp = ipsec_check_inbound_policy(first_mp, connp, ipha, NULL, mctl_present); } @@ -6585,7 +6718,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, */ ASSERT(recv_ill != NULL); mp = ip_add_info(mp, recv_ill, - in_flags, IPCL_ZONEID(connp)); + in_flags, IPCL_ZONEID(connp), ipst); } BUMP_MIB(mibptr, ipIfStatsHCInDelivers); putnext(rq, mp); @@ -6617,6 +6750,8 @@ ip_fanout_tcp(queue_t *q, mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, tcph_t *tcph; boolean_t syn_present = B_FALSE; conn_t *connp; + ip_stack_t *ipst = recv_ill->ill_ipst; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; ASSERT(recv_ill != NULL); @@ -6632,15 +6767,15 @@ ip_fanout_tcp(queue_t *q, mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, ip_hdr_len = IPH_HDR_LENGTH(mp->b_rptr); - if ((connp = ipcl_classify_v4(mp, IPPROTO_TCP, ip_hdr_len, zoneid)) == - NULL) { + if ((connp = ipcl_classify_v4(mp, IPPROTO_TCP, ip_hdr_len, + zoneid, ipst)) == NULL) { /* * No connected connection or listener. Send a * TH_RST via tcp_xmit_listeners_reset. */ /* Initiate IPPf processing, if needed. */ - if (IPP_ENABLED(IPP_LOCAL_IN)) { + if (IPP_ENABLED(IPP_LOCAL_IN, ipst)) { uint32_t ill_index; ill_index = recv_ill->ill_phyint->phyint_ifindex; ip_process(IPP_LOCAL_IN, &first_mp, ill_index); @@ -6650,7 +6785,8 @@ ip_fanout_tcp(queue_t *q, mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsHCInDelivers); ip2dbg(("ip_fanout_tcp: no listener; send reset to zone %d\n", zoneid)); - tcp_xmit_listeners_reset(first_mp, ip_hdr_len, zoneid); + tcp_xmit_listeners_reset(first_mp, ip_hdr_len, zoneid, + ipst->ips_netstack->netstack_tcp); return; } @@ -6671,8 +6807,9 @@ ip_fanout_tcp(queue_t *q, mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, */ ASSERT(Q_TO_CONN(q) != NULL); if (do_tcp_fusion && - !CONN_INBOUND_POLICY_PRESENT(connp) && !secure && - !IPP_ENABLED(IPP_LOCAL_IN) && !ip_policy && + !CONN_INBOUND_POLICY_PRESENT(connp, ipss) && + !secure && + !IPP_ENABLED(IPP_LOCAL_IN, ipst) && !ip_policy && IPCL_IS_TCP(Q_TO_CONN(q))) { ASSERT(Q_TO_CONN(q)->conn_sqp != NULL); sqp = Q_TO_CONN(q)->conn_sqp; @@ -6695,7 +6832,8 @@ ip_fanout_tcp(queue_t *q, mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, return; } if (flags & TH_ACK) { - tcp_xmit_listeners_reset(first_mp, ip_hdr_len, zoneid); + tcp_xmit_listeners_reset(first_mp, ip_hdr_len, zoneid, + ipst->ips_netstack->netstack_tcp); CONN_DEC_REF(connp); return; } @@ -6705,7 +6843,7 @@ ip_fanout_tcp(queue_t *q, mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, return; } - if (CONN_INBOUND_POLICY_PRESENT(connp) || secure) { + if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure) { first_mp = ipsec_check_inbound_policy(first_mp, connp, ipha, NULL, mctl_present); if (first_mp == NULL) { @@ -6744,7 +6882,7 @@ ip_fanout_tcp(queue_t *q, mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, * Initiate policy processing here if needed. If we get here from * icmp_inbound_error_fanout, ip_policy is false. */ - if (IPP_ENABLED(IPP_LOCAL_IN) && ip_policy) { + if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && ip_policy) { ill_index = recv_ill->ill_phyint->phyint_ifindex; ip_process(IPP_LOCAL_IN, &mp, ill_index); if (mp == NULL) { @@ -6771,7 +6909,8 @@ ip_fanout_tcp(queue_t *q, mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, * Since tcp does not support IP_RECVPKTINFO for V4, only pass * IPF_RECVIF. */ - mp = ip_add_info(mp, recv_ill, IPF_RECVIF, IPCL_ZONEID(connp)); + mp = ip_add_info(mp, recv_ill, IPF_RECVIF, IPCL_ZONEID(connp), + ipst); if (mp == NULL) { BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsInDiscards); CONN_DEC_REF(connp); @@ -6819,6 +6958,8 @@ ip_fanout_udp_conn(conn_t *connp, mblk_t *first_mp, mblk_t *mp, boolean_t mctl_present = (first_mp != NULL); uint32_t in_flags = 0; /* set to IP_RECVSLLA and/or IP_RECVIF */ uint32_t ill_index; + ip_stack_t *ipst = recv_ill->ill_ipst; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; ASSERT(ill != NULL); @@ -6833,7 +6974,7 @@ ip_fanout_udp_conn(conn_t *connp, mblk_t *first_mp, mblk_t *mp, return; } - if (CONN_INBOUND_POLICY_PRESENT(connp) || secure) { + if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure) { first_mp = ipsec_check_inbound_policy(first_mp, connp, ipha, NULL, mctl_present); if (first_mp == NULL) { @@ -6872,7 +7013,7 @@ ip_fanout_udp_conn(conn_t *connp, mblk_t *first_mp, mblk_t *mp, * Initiate IPPF processing here, if needed. Note first_mp won't be * freed if the packet is dropped. The caller will do so. */ - if (IPP_ENABLED(IPP_LOCAL_IN) && ip_policy) { + if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && ip_policy) { ill_index = recv_ill->ill_phyint->phyint_ifindex; ip_process(IPP_LOCAL_IN, &mp, ill_index); if (mp == NULL) { @@ -6887,7 +7028,8 @@ ip_fanout_udp_conn(conn_t *connp, mblk_t *first_mp, mblk_t *mp, * else original mblk is returned */ ASSERT(recv_ill != NULL); - mp = ip_add_info(mp, recv_ill, in_flags, IPCL_ZONEID(connp)); + mp = ip_add_info(mp, recv_ill, in_flags, IPCL_ZONEID(connp), + ipst); } BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); /* Send it upstream */ @@ -6928,6 +7070,10 @@ ip_fanout_udp(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, zoneid_t last_zoneid; boolean_t reuseaddr; boolean_t shared_addr; + ip_stack_t *ipst; + + ASSERT(recv_ill != NULL); + ipst = recv_ill->ill_ipst; first_mp = mp; if (mctl_present) { @@ -6948,12 +7094,16 @@ ip_fanout_udp(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, shared_addr = (zoneid == ALL_ZONES); if (shared_addr) { + /* + * No need to handle exclusive-stack zones since ALL_ZONES + * only applies to the shared stack. + */ zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); if (zoneid == ALL_ZONES) zoneid = tsol_packet_to_zoneid(mp); } - connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; + connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; mutex_enter(&connfp->connf_lock); connp = connfp->connf_head; if (!broadcast && !CLASSD(dst)) { @@ -6981,7 +7131,7 @@ ip_fanout_udp(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, mutex_exit(&connfp->connf_lock); ip_fanout_udp_conn(connp, first_mp, mp, secure, ill, ipha, flags, recv_ill, ip_policy); - IP_STAT(ip_udp_fannorm); + IP_STAT(ipst, ip_udp_fannorm); CONN_DEC_REF(connp); return; } @@ -7055,7 +7205,8 @@ ip_fanout_udp(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, if (first_mp != NULL) { ASSERT(((ipsec_info_t *)first_mp->b_rptr)-> ipsec_info_type == IPSEC_IN); - first_mp1 = ipsec_in_tag(first_mp, NULL); + first_mp1 = ipsec_in_tag(first_mp, NULL, + ipst->ips_netstack); if (first_mp1 == NULL) { freemsg(mp1); connp = first_connp; @@ -7078,7 +7229,7 @@ ip_fanout_udp(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, mutex_enter(&connfp->connf_lock); /* Follow the next pointer before releasing the conn. */ next_connp = connp->conn_next; - IP_STAT(ip_udp_fanmb); + IP_STAT(ipst, ip_udp_fanmb); CONN_DEC_REF(connp); connp = next_connp; } @@ -7087,14 +7238,14 @@ ip_fanout_udp(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, mutex_exit(&connfp->connf_lock); ip_fanout_udp_conn(connp, first_mp, mp, secure, ill, ipha, flags, recv_ill, ip_policy); - IP_STAT(ip_udp_fanmb); + IP_STAT(ipst, ip_udp_fanmb); CONN_DEC_REF(connp); return; notfound: mutex_exit(&connfp->connf_lock); - IP_STAT(ip_udp_fanothers); + IP_STAT(ipst, ip_udp_fanothers); /* * IPv6 endpoints bound to unicast or multicast IPv4-mapped addresses * have already been matched above, since they live in the IPv4 @@ -7105,7 +7256,7 @@ notfound: * uses the IPv4 destination. */ IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &v6src); - connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; + connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; mutex_enter(&connfp->connf_lock); connp = connfp->connf_head; if (!broadcast && !CLASSD(dst)) { @@ -7135,7 +7286,8 @@ notfound: first_mp->b_cont = mp; else first_mp = mp; - if (ipcl_proto_search(IPPROTO_UDP) != NULL) { + if (ipst->ips_ipcl_proto_fanout[IPPROTO_UDP]. + connf_head != NULL) { ip_fanout_proto(q, first_mp, ill, ipha, flags | IP_FF_RAWIP, mctl_present, ip_policy, recv_ill, zoneid); @@ -7143,7 +7295,7 @@ notfound: if (ip_fanout_send_icmp(q, first_mp, flags, ICMP_DEST_UNREACHABLE, ICMP_PORT_UNREACHABLE, - mctl_present, zoneid)) { + mctl_present, zoneid, ipst)) { BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); } @@ -7190,7 +7342,8 @@ notfound: first_mp->b_cont = mp; else first_mp = mp; - if (ipcl_proto_search(IPPROTO_UDP) != NULL) { + if (ipst->ips_ipcl_proto_fanout[IPPROTO_UDP].connf_head != + NULL) { ip_fanout_proto(q, first_mp, ill, ipha, flags | IP_FF_RAWIP, mctl_present, ip_policy, recv_ill, zoneid); @@ -7239,7 +7392,8 @@ notfound: if (first_mp != NULL) { ASSERT(((ipsec_info_t *)first_mp->b_rptr)-> ipsec_info_type == IPSEC_IN); - first_mp1 = ipsec_in_tag(first_mp, NULL); + first_mp1 = ipsec_in_tag(first_mp, NULL, + ipst->ips_netstack); if (first_mp1 == NULL) { freemsg(mp1); connp = first_connp; @@ -7279,12 +7433,12 @@ notfound: * errors. */ int -ip_hdr_complete(ipha_t *ipha, zoneid_t zoneid) +ip_hdr_complete(ipha_t *ipha, zoneid_t zoneid, ip_stack_t *ipst) { ire_t *ire; if (ipha->ipha_src == INADDR_ANY) { - ire = ire_lookup_local(zoneid); + ire = ire_lookup_local(zoneid, ipst); if (ire == NULL) { ip1dbg(("ip_hdr_complete: no source IRE\n")); return (1); @@ -7292,7 +7446,7 @@ ip_hdr_complete(ipha_t *ipha, zoneid_t zoneid) ipha->ipha_src = ire->ire_addr; ire_refrele(ire); } - ipha->ipha_ttl = ip_def_ttl; + ipha->ipha_ttl = ipst->ips_ip_def_ttl; ipha->ipha_hdr_checksum = 0; ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); return (0); @@ -7338,7 +7492,7 @@ ip_lwput(queue_t *q, mblk_t *mp) * destination (either ipha_dst or the last entry in a source route.) */ ipaddr_t -ip_massage_options(ipha_t *ipha) +ip_massage_options(ipha_t *ipha, netstack_t *ns) { ipoptp_t opts; uchar_t *opt; @@ -7347,6 +7501,7 @@ ip_massage_options(ipha_t *ipha) ipaddr_t dst; int i; ire_t *ire; + ip_stack_t *ipst = ns->netstack_ip; ip2dbg(("ip_massage_options\n")); dst = ipha->ipha_dst; @@ -7382,7 +7537,7 @@ ip_massage_options(ipha_t *ipha) * for source route? */ ire = ire_ctable_lookup(dst, 0, IRE_LOCAL, NULL, - ALL_ZONES, NULL, MATCH_IRE_TYPE); + ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (ire != NULL) { ire_refrele(ire); off += IP_ADDR_LEN; @@ -7442,13 +7597,14 @@ ip_mrtun_forward(ire_t *ire, ill_t *in_ill, mblk_t *mp) uint32_t ill_index; ipxmit_state_t pktxmit_state; ill_t *out_ill; + ip_stack_t *ipst = in_ill->ill_ipst; ASSERT(ire != NULL); ASSERT(ire->ire_ipif->ipif_net_type == IRE_IF_NORESOLVER); ASSERT(ire->ire_stq != NULL); /* Initiate read side IPPF processing */ - if (IPP_ENABLED(IPP_FWD_IN)) { + if (IPP_ENABLED(IPP_FWD_IN, ipst)) { ill_index = in_ill->ill_phyint->phyint_ifindex; ip_process(IPP_FWD_IN, &mp, ill_index); if (mp == NULL) { @@ -7493,7 +7649,7 @@ ip_mrtun_forward(ire_t *ire, ill_t *in_ill, mblk_t *mp) ip_ipsec_out_prepend(first_mp, mp, in_ill); /* Sent by forwarding path, and router is global zone */ icmp_time_exceeded(q, first_mp, ICMP_TTL_EXCEEDED, - GLOBAL_ZONEID); + GLOBAL_ZONEID, ipst); return; } @@ -7511,8 +7667,9 @@ ip_mrtun_forward(ire_t *ire, ill_t *in_ill, mblk_t *mp) DTRACE_PROBE4(ip4__forwarding__start, ill_t *, in_ill, ill_t *, out_ill, ipha_t *, ipha, mblk_t *, mp); - FW_HOOKS(ip4_forwarding_event, ipv4firewall_forwarding, - in_ill, out_ill, ipha, mp, mp); + FW_HOOKS(ipst->ips_ip4_forwarding_event, + ipst->ips_ipv4firewall_forwarding, + in_ill, out_ill, ipha, mp, mp, ipst); DTRACE_PROBE1(ip4__forwarding__end, mblk_t *, mp); @@ -7540,7 +7697,7 @@ ip_mrtun_forward(ire_t *ire, ill_t *in_ill, mblk_t *mp) } /* Initiate write side IPPF processing */ - if (IPP_ENABLED(IPP_FWD_OUT)) { + if (IPP_ENABLED(IPP_FWD_OUT, ipst)) { ip_process(IPP_FWD_OUT, &mp, ill_index); if (mp == NULL) { ip2dbg(("ip_mrtun_forward: outbound pkt "\ @@ -7556,7 +7713,7 @@ ip_mrtun_forward(ire_t *ire, ill_t *in_ill, mblk_t *mp) ip_ipsec_out_prepend(first_mp, mp, in_ill); mp = first_mp; - ip_wput_frag(ire, mp, IB_PKT, max_frag, 0, GLOBAL_ZONEID); + ip_wput_frag(ire, mp, IB_PKT, max_frag, 0, GLOBAL_ZONEID, ipst); return; } @@ -7566,8 +7723,9 @@ ip_mrtun_forward(ire_t *ire, ill_t *in_ill, mblk_t *mp) DTRACE_PROBE4(ip4__physical__out__start, ill_t *, NULL, ill_t *, out_ill, ipha_t *, ipha, mblk_t *, mp); - FW_HOOKS(ip4_physical_out_event, ipv4firewall_physical_out, - NULL, out_ill, ipha, mp, mp); + FW_HOOKS(ipst->ips_ip4_physical_out_event, + ipst->ips_ipv4firewall_physical_out, + NULL, out_ill, ipha, mp, mp, ipst); DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); if (mp == NULL) return; @@ -7623,6 +7781,7 @@ ip_ipsec_out_prepend(mblk_t *first_mp, mblk_t *mp, ill_t *xmit_ill) io->ipsec_out_ill_index = xmit_ill->ill_phyint->phyint_ifindex; io->ipsec_out_xmit_if = B_TRUE; + io->ipsec_out_ns = xmit_ill->ill_ipst->ips_netstack; } /* @@ -7695,19 +7854,22 @@ ip_newroute_get_dst_ill(ill_t *dst_ill) * Helper function for the IPIF_NOFAILOVER/ATTACH_IF interface attachment case. */ ill_t * -ip_grab_attach_ill(ill_t *ill, mblk_t *first_mp, int ifindex, boolean_t isv6) +ip_grab_attach_ill(ill_t *ill, mblk_t *first_mp, int ifindex, boolean_t isv6, + ip_stack_t *ipst) { ill_t *ret_ill; ASSERT(ifindex != 0); - ret_ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL); + ret_ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL, + ipst); if (ret_ill == NULL || (ret_ill->ill_phyint->phyint_flags & PHYI_OFFLINE)) { if (isv6) { if (ill != NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); } else { - BUMP_MIB(&ip6_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip6_mib, + ipIfStatsOutDiscards); } ip1dbg(("ip_grab_attach_ill (IPv6): " "bad ifindex %d.\n", ifindex)); @@ -7715,7 +7877,8 @@ ip_grab_attach_ill(ill_t *ill, mblk_t *first_mp, int ifindex, boolean_t isv6) if (ill != NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); } else { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutDiscards); } ip1dbg(("ip_grab_attach_ill (IPv4): " "bad ifindex %d.\n", ifindex)); @@ -7753,7 +7916,7 @@ ip_grab_attach_ill(ill_t *ill, mblk_t *first_mp, int ifindex, boolean_t isv6) */ void ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, - zoneid_t zoneid) + zoneid_t zoneid, ip_stack_t *ipst) { areq_t *areq; ipaddr_t gw = 0; @@ -7812,7 +7975,7 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, if (mctl_present && io->ipsec_out_attach_if) { /* ip_grab_attach_ill returns a held ill */ attach_ill = ip_grab_attach_ill(NULL, first_mp, - io->ipsec_out_ill_index, B_FALSE); + io->ipsec_out_ill_index, B_FALSE, ipst); /* Failure case frees things for us. */ if (attach_ill == NULL) @@ -7895,7 +8058,7 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, * destination address via the specified nexthop. */ ire = ire_cache_lookup(nexthop_addr, zoneid, - MBLK_GETLABEL(mp)); + MBLK_GETLABEL(mp), ipst); if (ire != NULL) { gw = nexthop_addr; ire_marks |= IRE_MARK_PRIVATE_ADDR; @@ -7903,7 +8066,8 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, ire = ire_ftable_lookup(nexthop_addr, 0, 0, IRE_INTERFACE, NULL, NULL, zoneid, 0, MBLK_GETLABEL(mp), - MATCH_IRE_TYPE | MATCH_IRE_SECATTR); + MATCH_IRE_TYPE | MATCH_IRE_SECATTR, + ipst); if (ire != NULL) { dst = nexthop_addr; } @@ -7913,7 +8077,8 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_PARENT | - MATCH_IRE_SECATTR | MATCH_IRE_COMPLETE); + MATCH_IRE_SECATTR | MATCH_IRE_COMPLETE, + ipst); } else { /* * attach_ill is set only for communicating with @@ -7929,7 +8094,7 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, ire = ire_ftable_lookup(dst, 0, 0, 0, attach_ipif, &sire, zoneid, 0, MBLK_GETLABEL(mp), MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL | - MATCH_IRE_SECATTR); + MATCH_IRE_SECATTR, ipst); ipif_refrele(attach_ipif); } ip3dbg(("ip_newroute: ire_ftable_lookup() " @@ -7970,7 +8135,7 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, ASSERT(sire != NULL); multirt_is_resolvable = ire_multirt_lookup(&ire, &sire, multirt_flags, - MBLK_GETLABEL(mp)); + MBLK_GETLABEL(mp), ipst); ip3dbg(("ip_newroute: multirt_is_resolvable %d, " "ire %p, sire %p\n", @@ -8031,7 +8196,7 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, return; } ip_rts_change(RTM_MISS, dst, 0, 0, 0, 0, 0, 0, - RTA_DST); + RTA_DST, ipst); if (attach_ill != NULL) ill_refrele(attach_ill); goto icmp_err_ret; @@ -8211,7 +8376,7 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, * address still exists. */ src_ipif = ipif_lookup_addr(sire->ire_src_addr, NULL, - zoneid, NULL, NULL, NULL, NULL); + zoneid, NULL, NULL, NULL, NULL, ipst); } if (src_ipif == NULL) { ire_marks |= IRE_MARK_USESRC_CHECK; @@ -8288,13 +8453,13 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, */ if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { /* Force TTL of multirouted packets */ - if ((ip_multirt_ttl > 0) && - (ipha->ipha_ttl > ip_multirt_ttl)) { + if ((ipst->ips_ip_multirt_ttl > 0) && + (ipha->ipha_ttl > ipst->ips_ip_multirt_ttl)) { ip2dbg(("ip_newroute: forcing multirt TTL " "to %d (was %d), dst 0x%08x\n", - ip_multirt_ttl, ipha->ipha_ttl, + ipst->ips_ip_multirt_ttl, ipha->ipha_ttl, ntohl(sire->ire_addr))); - ipha->ipha_ttl = ip_multirt_ttl; + ipha->ipha_ttl = ipst->ips_ip_multirt_ttl; } } /* @@ -8440,7 +8605,8 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, (sire != NULL) ? &(sire->ire_uinfo) : &(save_ire->ire_uinfo), NULL, - gcgrp); + gcgrp, + ipst); if (ire == NULL) { if (gcgrp != NULL) { @@ -8608,7 +8774,8 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, (RTF_SETSRC | RTF_MULTIRT) : 0, /* flags */ &(save_ire->ire_uinfo), NULL, - gcgrp); + gcgrp, + ipst); if (dst_ill->ill_phys_addr_length == IP_ADDR_LEN) freeb(res_mp); @@ -8776,7 +8943,8 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, 0, /* flags if any */ &(save_ire->ire_uinfo), NULL, - NULL); + NULL, + ipst); if (ire == NULL) { ire_refrele(save_ire); @@ -8936,13 +9104,13 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp, if (in_ill != NULL) { BUMP_MIB(in_ill->ill_ip_mib, ipIfStatsInDiscards); } else { - BUMP_MIB(&ip_mib, ipIfStatsInDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards); } } else { if (dst_ill != NULL) { BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutDiscards); } else { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); } } ASSERT(copy_mp == NULL); @@ -8973,7 +9141,7 @@ icmp_err_ret: if (in_ill != NULL) { BUMP_MIB(in_ill->ill_ip_mib, ipIfStatsInNoRoutes); } else { - BUMP_MIB(&ip_mib, ipIfStatsInNoRoutes); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInNoRoutes); } q = WR(q); } else { @@ -8981,12 +9149,12 @@ icmp_err_ret: * There is no outgoing ill, so just increment the * system MIB. */ - BUMP_MIB(&ip_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes); /* * Since ip_wput() isn't close to finished, we fill * in enough of the header for credible error reporting. */ - if (ip_hdr_complete(ipha, zoneid)) { + if (ip_hdr_complete(ipha, zoneid, ipst)) { /* Failed */ MULTIRT_DEBUG_UNTAG(first_mp); freemsg(first_mp); @@ -9010,12 +9178,12 @@ icmp_err_ret: } ire_refrele(ire); } - if (ip_source_routed(ipha)) { + if (ip_source_routed(ipha, ipst)) { icmp_unreachable(q, first_mp, ICMP_SOURCE_ROUTE_FAILED, - zoneid); + zoneid, ipst); return; } - icmp_unreachable(q, first_mp, ICMP_HOST_UNREACHABLE, zoneid); + icmp_unreachable(q, first_mp, ICMP_HOST_UNREACHABLE, zoneid, ipst); } ip_opt_info_t zero_info; @@ -9069,6 +9237,7 @@ ip_newroute_ipif(queue_t *q, mblk_t *mp, ipif_t *ipif, ipaddr_t dst, mblk_t *copy_mp = NULL; boolean_t multirt_resolve_next; ipaddr_t ipha_dst; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; /* * CGTP goes in a loop which looks up a new ipif, do an ipif_refhold @@ -9141,7 +9310,7 @@ ip_newroute_ipif(queue_t *q, mblk_t *mp, ipif_t *ipif, ipaddr_t dst, if (mctl_present && io->ipsec_out_attach_if) { attach_ill = ip_grab_attach_ill(NULL, first_mp, - io->ipsec_out_ill_index, B_FALSE); + io->ipsec_out_ill_index, B_FALSE, ipst); /* Failure case frees things for us. */ if (attach_ill == NULL) { @@ -9223,7 +9392,7 @@ ip_newroute_ipif(queue_t *q, mblk_t *mp, ipif_t *ipif, ipaddr_t dst, * address still exists. */ src_ipif = ipif_lookup_addr(fire->ire_src_addr, NULL, - zoneid, NULL, NULL, NULL, NULL); + zoneid, NULL, NULL, NULL, NULL, ipst); } if (((ipif->ipif_flags & IPIF_DEPRECATED) || (connp != NULL && ipif->ipif_zoneid != zoneid && @@ -9394,7 +9563,8 @@ ip_newroute_ipif(queue_t *q, mblk_t *mp, ipif_t *ipif, ipaddr_t dst, (save_ire == NULL ? &ire_uinfo_null : &save_ire->ire_uinfo), NULL, - NULL); + NULL, + ipst); freeb(res_mp); @@ -9472,7 +9642,7 @@ ip_newroute_ipif(queue_t *q, mblk_t *mp, ipif_t *ipif, ipaddr_t dst, if ((flags & RTF_MULTIRT) && (copy_mp != NULL)) { boolean_t need_resolve = ire_multirt_need_resolve(ipha_dst, - MBLK_GETLABEL(copy_mp)); + MBLK_GETLABEL(copy_mp), ipst); if (!need_resolve) { MULTIRT_DEBUG_UNTAG(copy_mp); freemsg(copy_mp); @@ -9493,7 +9663,7 @@ ip_newroute_ipif(queue_t *q, mblk_t *mp, ipif_t *ipif, ipaddr_t dst, */ ipif_refrele(ipif); ipif = ipif_lookup_group(ipha_dst, - zoneid); + zoneid, ipst); ip2dbg(("ip_newroute_ipif: " "multirt dst %08x, ipif %p\n", htonl(dst), (void *)ipif)); @@ -9557,7 +9727,8 @@ ip_newroute_ipif(queue_t *q, mblk_t *mp, ipif_t *ipif, ipaddr_t dst, (save_ire == NULL ? &ire_uinfo_null : &save_ire->ire_uinfo), NULL, - NULL); + NULL, + ipst); if (save_ire != NULL) { ire_refrele(save_ire); @@ -9650,7 +9821,7 @@ ip_newroute_ipif(queue_t *q, mblk_t *mp, ipif_t *ipif, ipaddr_t dst, if ((flags & RTF_MULTIRT) && (copy_mp != NULL)) { boolean_t need_resolve = ire_multirt_need_resolve(ipha_dst, - MBLK_GETLABEL(copy_mp)); + MBLK_GETLABEL(copy_mp), ipst); if (!need_resolve) { MULTIRT_DEBUG_UNTAG(copy_mp); freemsg(copy_mp); @@ -9671,7 +9842,7 @@ ip_newroute_ipif(queue_t *q, mblk_t *mp, ipif_t *ipif, ipaddr_t dst, */ ipif_refrele(ipif); ipif = ipif_lookup_group(ipha_dst, - zoneid); + zoneid, ipst); if (ipif != NULL) { mp = copy_mp; copy_mp = NULL; @@ -9710,7 +9881,7 @@ err_ret: * Since ip_wput() isn't close to finished, we fill * in enough of the header for credible error reporting. */ - if (ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid)) { + if (ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst)) { /* Failed */ freemsg(first_mp); if (ire != NULL) @@ -9731,7 +9902,7 @@ err_ret: } ire_refrele(ire); } - icmp_unreachable(q, first_mp, ICMP_HOST_UNREACHABLE, zoneid); + icmp_unreachable(q, first_mp, ICMP_HOST_UNREACHABLE, zoneid, ipst); } /* Name/Value Table Lookup Routine */ @@ -9748,12 +9919,6 @@ ip_nv_lookup(nv_t *nv, int value) } /* - * one day it can be patched to 1 from /etc/system for machines that have few - * fast network interfaces feeding multiple cpus. - */ -int ill_stream_putlocks = 0; - -/* * This is a module open, i.e. this is a control stream for access * to a DLPI device. We allocate an ill_t as the instance data in * this case. @@ -9761,12 +9926,11 @@ int ill_stream_putlocks = 0; int ip_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) { - uint32_t mem_cnt; - uint32_t cpu_cnt; - uint32_t min_cnt; - pgcnt_t mem_avail; ill_t *ill; int err; + zoneid_t zoneid; + netstack_t *ns; + ip_stack_t *ipst; /* * Prevent unprivileged processes from pushing IP so that @@ -9775,8 +9939,24 @@ ip_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (secpolicy_net_rawaccess(credp) != 0) return (EPERM); + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + ipst = ns->netstack_ip; + ASSERT(ipst != NULL); + + /* + * For exclusive stacks we set the zoneid to zero + * to make IP operate as if in the global zone. + */ + if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) + zoneid = GLOBAL_ZONEID; + else + zoneid = crgetzoneid(credp); + ill = (ill_t *)mi_open_alloc_sleep(sizeof (ill_t)); q->q_ptr = WR(q)->q_ptr = ill; + ill->ill_ipst = ipst; + ill->ill_zoneid = zoneid; /* * ill_init initializes the ill fields and then sends down @@ -9785,6 +9965,7 @@ ip_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) err = ill_init(q, ill); if (err != 0) { mi_free(ill); + netstack_rele(ipst->ips_netstack); q->q_ptr = NULL; WR(q)->q_ptr = NULL; return (err); @@ -9818,41 +9999,13 @@ ip_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) return (err); } - /* - * ip_ire_max_bucket_cnt is sized below based on the memory - * size and the cpu speed of the machine. This is upper - * bounded by the compile time value of ip_ire_max_bucket_cnt - * and is lower bounded by the compile time value of - * ip_ire_min_bucket_cnt. Similar logic applies to - * ip6_ire_max_bucket_cnt. - */ - mem_avail = kmem_avail(); - mem_cnt = (mem_avail >> ip_ire_mem_ratio) / - ip_cache_table_size / sizeof (ire_t); - cpu_cnt = CPU->cpu_type_info.pi_clock >> ip_ire_cpu_ratio; - - min_cnt = MIN(cpu_cnt, mem_cnt); - if (min_cnt < ip_ire_min_bucket_cnt) - min_cnt = ip_ire_min_bucket_cnt; - if (ip_ire_max_bucket_cnt > min_cnt) { - ip_ire_max_bucket_cnt = min_cnt; - } - - mem_cnt = (mem_avail >> ip_ire_mem_ratio) / - ip6_cache_table_size / sizeof (ire_t); - min_cnt = MIN(cpu_cnt, mem_cnt); - if (min_cnt < ip6_ire_min_bucket_cnt) - min_cnt = ip6_ire_min_bucket_cnt; - if (ip6_ire_max_bucket_cnt > min_cnt) { - ip6_ire_max_bucket_cnt = min_cnt; - } - ill->ill_credp = credp; crhold(credp); - mutex_enter(&ip_mi_lock); - err = mi_open_link(&ip_g_head, (IDP)ill, devp, flag, sflag, credp); - mutex_exit(&ip_mi_lock); + mutex_enter(&ipst->ips_ip_mi_lock); + err = mi_open_link(&ipst->ips_ip_g_head, (IDP)ill, devp, flag, sflag, + credp); + mutex_exit(&ipst->ips_ip_mi_lock); if (err) { (void) ip_close(q, 0); return (err); @@ -9866,6 +10019,9 @@ ip_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) { conn_t *connp; major_t maj; + zoneid_t zoneid; + netstack_t *ns; + ip_stack_t *ipst; TRACE_1(TR_FAC_IP, TR_IP_OPEN, "ip_open: q %p", q); @@ -9878,11 +10034,34 @@ ip_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) return (ip_modopen(q, devp, flag, sflag, credp)); } + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + ipst = ns->netstack_ip; + ASSERT(ipst != NULL); + + /* + * For exclusive stacks we set the zoneid to zero + * to make IP operate as if in the global zone. + */ + if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) + zoneid = GLOBAL_ZONEID; + else + zoneid = crgetzoneid(credp); + /* * We are opening as a device. This is an IP client stream, and we * allocate an conn_t as the instance data. */ - connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP); + connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP, ipst->ips_netstack); + + /* + * ipcl_conn_create did a netstack_hold. Undo the hold that was + * done by netstack_find_by_cred() + */ + netstack_rele(ipst->ips_netstack); + + connp->conn_zoneid = zoneid; + connp->conn_upq = q; q->q_ptr = WR(q)->q_ptr = connp; @@ -9893,7 +10072,7 @@ ip_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (geteminor(*devp) == IPV6_MINOR) { connp->conn_flags |= IPCL_ISV6; connp->conn_af_isv6 = B_TRUE; - ip_setqinfo(q, geteminor(*devp), B_FALSE); + ip_setqinfo(q, geteminor(*devp), B_FALSE, ipst); connp->conn_src_preferences = IPV6_PREFER_SRC_DEFAULT; } else { connp->conn_af_isv6 = B_FALSE; @@ -9901,6 +10080,7 @@ ip_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) } if ((connp->conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { + /* CONN_DEC_REF takes care of netstack_rele() */ q->q_ptr = WR(q)->q_ptr = NULL; CONN_DEC_REF(connp); return (EBUSY); @@ -9922,8 +10102,6 @@ ip_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (getpflags(NET_MAC_AWARE, credp) != 0) connp->conn_mac_exempt = B_TRUE; - connp->conn_zoneid = getzoneid(); - /* * This should only happen for ndd, netstat, raw socket or other SCTP * administrative ops. In these cases, we just need a normal conn_t @@ -9960,21 +10138,25 @@ ip_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * is just an optimization to enter the best wput routine directly. */ void -ip_setqinfo(queue_t *q, minor_t minor, boolean_t bump_mib) +ip_setqinfo(queue_t *q, minor_t minor, boolean_t bump_mib, ip_stack_t *ipst) { ASSERT(q->q_flag & QREADR); ASSERT(WR(q)->q_next == NULL); ASSERT(q->q_ptr != NULL); if (minor == IPV6_MINOR) { - if (bump_mib) - BUMP_MIB(&ip6_mib, ipIfStatsOutSwitchIPVersion); + if (bump_mib) { + BUMP_MIB(&ipst->ips_ip6_mib, + ipIfStatsOutSwitchIPVersion); + } q->q_qinfo = &rinit_ipv6; WR(q)->q_qinfo = &winit_ipv6; (Q_TO_CONN(q))->conn_pkt_isv6 = B_TRUE; } else { - if (bump_mib) - BUMP_MIB(&ip_mib, ipIfStatsOutSwitchIPVersion); + if (bump_mib) { + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutSwitchIPVersion); + } q->q_qinfo = &iprinit; WR(q)->q_qinfo = &ipwinit; (Q_TO_CONN(q))->conn_pkt_isv6 = B_FALSE; @@ -10100,15 +10282,13 @@ conn_restart_ipsec_waiter(conn_t *connp, void *arg) * Called from the ipsec_loader thread, outside any perimeter, to tell * ip qenable any of the queues waiting for the ipsec loader to * complete. - * - * Use ip_mi_lock to be safe here: all modifications of the mi lists - * are done with this lock held, so it's guaranteed that none of the - * links will change along the way. */ void -ip_ipsec_load_complete() +ip_ipsec_load_complete(ipsec_stack_t *ipss) { - ipcl_walk(conn_restart_ipsec_waiter, NULL); + netstack_t *ns = ipss->ipsec_netstack; + + ipcl_walk(conn_restart_ipsec_waiter, NULL, ns->netstack_ip); } /* @@ -10119,28 +10299,28 @@ ip_ipsec_load_complete() static boolean_t ip_check_for_ipsec_opt(queue_t *q, mblk_t *mp) { - conn_t *connp; + conn_t *connp = Q_TO_CONN(q); + ipsec_stack_t *ipss = connp->conn_netstack->netstack_ipsec; /* * Take IPsec requests and treat them special. */ if (ipsec_opt_present(mp)) { /* First check if IPsec is loaded. */ - mutex_enter(&ipsec_loader_lock); - if (ipsec_loader_state != IPSEC_LOADER_WAIT) { - mutex_exit(&ipsec_loader_lock); + mutex_enter(&ipss->ipsec_loader_lock); + if (ipss->ipsec_loader_state != IPSEC_LOADER_WAIT) { + mutex_exit(&ipss->ipsec_loader_lock); return (B_FALSE); } - connp = Q_TO_CONN(q); mutex_enter(&connp->conn_lock); connp->conn_state_flags |= CONN_IPSEC_LOAD_WAIT; ASSERT(connp->conn_ipsec_opt_mp == NULL); connp->conn_ipsec_opt_mp = mp; mutex_exit(&connp->conn_lock); - mutex_exit(&ipsec_loader_lock); + mutex_exit(&ipss->ipsec_loader_lock); - ipsec_loader_loadnow(); + ipsec_loader_loadnow(ipss); return (B_TRUE); } return (B_FALSE); @@ -10173,6 +10353,9 @@ ipsec_set_req(cred_t *cr, conn_t *connp, ipsec_req_t *req) int fam; boolean_t is_pol_reset; int error = 0; + netstack_t *ns = connp->conn_netstack; + ip_stack_t *ipst = ns->netstack_ip; + ipsec_stack_t *ipss = ns->netstack_ipsec; #define REQ_MASK (IPSEC_PREF_REQUIRED|IPSEC_PREF_NEVER) @@ -10202,12 +10385,12 @@ ipsec_set_req(cred_t *cr, conn_t *connp, ipsec_req_t *req) * IPsec may not have been loaded for a request with zero * policies, so we don't fail in this case. */ - mutex_enter(&ipsec_loader_lock); - if (ipsec_loader_state != IPSEC_LOADER_SUCCEEDED) { - mutex_exit(&ipsec_loader_lock); + mutex_enter(&ipss->ipsec_loader_lock); + if (ipss->ipsec_loader_state != IPSEC_LOADER_SUCCEEDED) { + mutex_exit(&ipss->ipsec_loader_lock); return (EPROTONOSUPPORT); } - mutex_exit(&ipsec_loader_lock); + mutex_exit(&ipss->ipsec_loader_lock); /* * Test for valid requests. Invalid algorithms @@ -10227,7 +10410,7 @@ ipsec_set_req(cred_t *cr, conn_t *connp, ipsec_req_t *req) if (((ah_req & IPSEC_PREF_NEVER) || (esp_req & IPSEC_PREF_NEVER) || (se_req & IPSEC_PREF_NEVER)) && - secpolicy_net_config(cr, B_FALSE) != 0) { + secpolicy_ip_config(cr, B_FALSE) != 0) { return (EPERM); } @@ -10262,7 +10445,7 @@ ipsec_set_req(cred_t *cr, conn_t *connp, ipsec_req_t *req) */ if (is_pol_reset) { if (connp->conn_policy != NULL) { - IPPH_REFRELE(connp->conn_policy); + IPPH_REFRELE(connp->conn_policy, ipst->ips_netstack); connp->conn_policy = NULL; } connp->conn_flags &= ~IPCL_CHECK_POLICY; @@ -10272,11 +10455,12 @@ ipsec_set_req(cred_t *cr, conn_t *connp, ipsec_req_t *req) return (0); } - ph = connp->conn_policy = ipsec_polhead_split(connp->conn_policy); + ph = connp->conn_policy = ipsec_polhead_split(connp->conn_policy, + ipst->ips_netstack); if (ph == NULL) goto enomem; - ipsec_actvec_from_req(req, &actp, &nact); + ipsec_actvec_from_req(req, &actp, &nact, ipst->ips_netstack); if (actp == NULL) goto enomem; @@ -10287,11 +10471,13 @@ ipsec_set_req(cred_t *cr, conn_t *connp, ipsec_req_t *req) bzero(&sel, sizeof (sel)); sel.ipsl_valid = IPSL_IPV4; - pin4 = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, NULL); + pin4 = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, NULL, + ipst->ips_netstack); if (pin4 == NULL) goto enomem; - pout4 = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, NULL); + pout4 = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, NULL, + ipst->ips_netstack); if (pout4 == NULL) goto enomem; @@ -10302,12 +10488,12 @@ ipsec_set_req(cred_t *cr, conn_t *connp, ipsec_req_t *req) */ sel.ipsl_valid = IPSL_IPV6; pin6 = ipsec_policy_create(&sel, actp, nact, - IPSEC_PRIO_SOCKET, NULL); + IPSEC_PRIO_SOCKET, NULL, ipst->ips_netstack); if (pin6 == NULL) goto enomem; pout6 = ipsec_policy_create(&sel, actp, nact, - IPSEC_PRIO_SOCKET, NULL); + IPSEC_PRIO_SOCKET, NULL, ipst->ips_netstack); if (pout6 == NULL) goto enomem; @@ -10368,13 +10554,13 @@ enomem: if (actp != NULL) ipsec_actvec_free(actp, nact); if (pin4 != NULL) - IPPOL_REFRELE(pin4); + IPPOL_REFRELE(pin4, ipst->ips_netstack); if (pout4 != NULL) - IPPOL_REFRELE(pout4); + IPPOL_REFRELE(pout4, ipst->ips_netstack); if (pin6 != NULL) - IPPOL_REFRELE(pin6); + IPPOL_REFRELE(pin6, ipst->ips_netstack); if (pout6 != NULL) - IPPOL_REFRELE(pout6); + IPPOL_REFRELE(pout6, ipst->ips_netstack); return (ENOMEM); } @@ -10391,6 +10577,7 @@ ip_opt_set_ipif(conn_t *connp, ipaddr_t addr, boolean_t checkonly, int option, int error; ill_t *ill; int zoneid; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; ip2dbg(("ip_opt_set_ipif: ipaddr %X\n", addr)); @@ -10399,11 +10586,11 @@ ip_opt_set_ipif(conn_t *connp, ipaddr_t addr, boolean_t checkonly, int option, zoneid = IPCL_ZONEID(connp); if (option == IP_NEXTHOP) { ipif = ipif_lookup_onlink_addr(addr, - connp->conn_zoneid); + connp->conn_zoneid, ipst); } else { ipif = ipif_lookup_addr(addr, NULL, zoneid, CONNP_TO_WQ(connp), first_mp, ip_restart_optmgmt, - &error); + &error, ipst); } if (ipif == NULL) { if (error == EINPROGRESS) @@ -10501,12 +10688,13 @@ ip_opt_set_ill(conn_t *connp, int ifindex, boolean_t isv6, boolean_t checkonly, { ill_t *ill = NULL; int error = 0; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; ip2dbg(("ip_opt_set_ill: ifindex %d\n", ifindex)); if (ifindex != 0) { ASSERT(connp != NULL); ill = ill_lookup_on_ifindex(ifindex, isv6, CONNP_TO_WQ(connp), - first_mp, ip_restart_optmgmt, &error); + first_mp, ip_restart_optmgmt, &error, ipst); if (ill != NULL) { if (checkonly) { /* not supported by the virtual network iface */ @@ -10699,6 +10887,7 @@ ip_opt_set(queue_t *q, uint_t optset_context, int level, int name, boolean_t checkonly; ire_t *ire; boolean_t found; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; switch (optset_context) { @@ -10823,7 +11012,7 @@ ip_opt_set(queue_t *q, uint_t optset_context, int level, int name, case IPPROTO_IP: switch (name) { case IP_NEXTHOP: - if (secpolicy_net_config(cr, B_FALSE) != 0) + if (secpolicy_ip_config(cr, B_FALSE) != 0) return (EPERM); /* FALLTHRU */ case IP_MULTICAST_IF: @@ -10913,7 +11102,7 @@ ip_opt_set(queue_t *q, uint_t optset_context, int level, int name, */ ire = ire_ftable_lookup(group, IP_HOST_MASK, 0, IRE_HOST, NULL, NULL, ALL_ZONES, 0, NULL, - MATCH_IRE_MASK | MATCH_IRE_TYPE); + MATCH_IRE_MASK | MATCH_IRE_TYPE, ipst); if (ire != NULL) { if (ire->ire_flags & RTF_MULTIRT) { error = ip_multirt_apply_membership( @@ -11021,7 +11210,7 @@ ip_opt_set(queue_t *q, uint_t optset_context, int level, int name, */ ire = ire_ftable_lookup(grp, IP_HOST_MASK, 0, IRE_HOST, NULL, NULL, ALL_ZONES, 0, NULL, - MATCH_IRE_MASK | MATCH_IRE_TYPE); + MATCH_IRE_MASK | MATCH_IRE_TYPE, ipst); if (ire != NULL) { if (ire->ire_flags & RTF_MULTIRT) { error = ip_multirt_apply_membership( @@ -11103,7 +11292,7 @@ ip_opt_set(queue_t *q, uint_t optset_context, int level, int name, case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_ASSERT: - if ((error = secpolicy_net_config(cr, B_FALSE)) != 0) { + if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { *outlenp = 0; return (error); } @@ -11254,7 +11443,7 @@ ip_opt_set(queue_t *q, uint_t optset_context, int level, int name, */ ire = ire_ftable_lookup_v6(&groupv6, &ipv6_all_ones, 0, IRE_HOST, NULL, NULL, ALL_ZONES, 0, NULL, - MATCH_IRE_MASK | MATCH_IRE_TYPE); + MATCH_IRE_MASK | MATCH_IRE_TYPE, ipst); if (ire != NULL) { if (ire->ire_flags & RTF_MULTIRT) { error = ip_multirt_apply_membership_v6( @@ -11340,7 +11529,7 @@ ip_opt_set(queue_t *q, uint_t optset_context, int level, int name, */ ire = ire_ftable_lookup_v6(&v6grp, &ipv6_all_ones, 0, IRE_HOST, NULL, NULL, ALL_ZONES, 0, NULL, - MATCH_IRE_MASK | MATCH_IRE_TYPE); + MATCH_IRE_MASK | MATCH_IRE_TYPE, ipst); if (ire != NULL) { if (ire->ire_flags & RTF_MULTIRT) { error = ip_multirt_apply_membership_v6( @@ -11463,7 +11652,7 @@ ip_opt_set(queue_t *q, uint_t optset_context, int level, int name, sin6 = (struct sockaddr_in6 *)invalp; ire = ire_route_lookup_v6(&sin6->sin6_addr, 0, 0, 0, NULL, NULL, connp->conn_zoneid, - NULL, MATCH_IRE_DEFAULT); + NULL, MATCH_IRE_DEFAULT, ipst); if (ire == NULL) { *outlenp = 0; @@ -11544,6 +11733,7 @@ int ip_opt_default(queue_t *q, int level, int name, uchar_t *ptr) { int *i1 = (int *)ptr; + ip_stack_t *ipst = CONNQ_TO_IPST(q); switch (level) { case IPPROTO_IP: @@ -11560,7 +11750,7 @@ ip_opt_default(queue_t *q, int level, int name, uchar_t *ptr) case IPPROTO_IPV6: switch (name) { case IPV6_UNICAST_HOPS: - *i1 = ipv6_def_hops; + *i1 = ipst->ips_ipv6_def_hops; return (sizeof (int)); case IPV6_MULTICAST_HOPS: *i1 = IP_DEFAULT_MULTICAST_TTL; @@ -11586,9 +11776,10 @@ ip_opt_default(queue_t *q, int level, int name, uchar_t *ptr) */ int ip_fill_mtuinfo(struct in6_addr *in6, in_port_t port, - struct ip6_mtuinfo *mtuinfo) + struct ip6_mtuinfo *mtuinfo, netstack_t *ns) { ire_t *ire; + ip_stack_t *ipst = ns->netstack_ip; if (IN6_IS_ADDR_UNSPECIFIED(in6)) return (-1); @@ -11598,7 +11789,7 @@ ip_fill_mtuinfo(struct in6_addr *in6, in_port_t port, mtuinfo->ip6m_addr.sin6_port = port; mtuinfo->ip6m_addr.sin6_addr = *in6; - ire = ire_cache_lookup_v6(in6, ALL_ZONES, NULL); + ire = ire_cache_lookup_v6(in6, ALL_ZONES, NULL, ipst); if (ire != NULL) { mtuinfo->ip6m_mtu = ire->ire_max_frag; ire_refrele(ire); @@ -11655,7 +11846,8 @@ ip_opt_get(queue_t *q, int level, int name, uchar_t *ptr) return (sizeof (int)); case IPV6_PATHMTU: return (ip_fill_mtuinfo(&connp->conn_remv6, 0, - (struct ip6_mtuinfo *)ptr)); + (struct ip6_mtuinfo *)ptr, + connp->conn_netstack)); default: break; } @@ -11697,8 +11889,11 @@ ip_forward_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *ioc_cr) long new_value; int *forwarding_value = (int *)cp; ill_t *walker; - boolean_t isv6 = (forwarding_value == &ipv6_forward); + boolean_t isv6; ill_walk_context_t ctx; + ip_stack_t *ipst = CONNQ_TO_IPST(q); + + isv6 = (forwarding_value == &ipst->ips_ipv6_forward); if (ddi_strtol(value, NULL, 10, &new_value) != 0 || new_value < 0 || new_value > 1) { @@ -11713,17 +11908,17 @@ ip_forward_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *ioc_cr) * * Bring all the ill's up to date with the new global value. */ - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); if (isv6) - walker = ILL_START_WALK_V6(&ctx); + walker = ILL_START_WALK_V6(&ctx, ipst); else - walker = ILL_START_WALK_V4(&ctx); + walker = ILL_START_WALK_V4(&ctx, ipst); for (; walker != NULL; walker = ill_next(&ctx, walker)) { (void) ill_forward_set(q, mp, (new_value != 0), (caddr_t)walker); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (0); } @@ -11734,14 +11929,14 @@ ip_forward_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *ioc_cr) * not to acquire any locks */ static boolean_t -ip_param_register(ipparam_t *ippa, size_t ippa_cnt, +ip_param_register(IDP *ndp, ipparam_t *ippa, size_t ippa_cnt, ipndp_t *ipnd, size_t ipnd_cnt) { for (; ippa_cnt-- > 0; ippa++) { if (ippa->ip_param_name && ippa->ip_param_name[0]) { - if (!nd_load(&ip_g_nd, ippa->ip_param_name, + if (!nd_load(ndp, ippa->ip_param_name, ip_param_get, ip_param_set, (caddr_t)ippa)) { - nd_free(&ip_g_nd); + nd_free(ndp); return (B_FALSE); } } @@ -11749,10 +11944,10 @@ ip_param_register(ipparam_t *ippa, size_t ippa_cnt, for (; ipnd_cnt-- > 0; ipnd++) { if (ipnd->ip_ndp_name && ipnd->ip_ndp_name[0]) { - if (!nd_load(&ip_g_nd, ipnd->ip_ndp_name, + if (!nd_load(ndp, ipnd->ip_ndp_name, ipnd->ip_ndp_getf, ipnd->ip_ndp_setf, ipnd->ip_ndp_data)) { - nd_free(&ip_g_nd); + nd_free(ndp); return (B_FALSE); } } @@ -12133,13 +12328,15 @@ ip_udp_check(queue_t *q, conn_t *connp, ill_t *ill, ipha_t *ipha, { uint32_t ill_index; uint_t in_flags; /* IPF_RECVSLLA and/or IPF_RECVIF */ + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; ASSERT(ipha->ipha_protocol == IPPROTO_UDP); /* The ill_index of the incoming ILL */ ill_index = ((ill_t *)q->q_ptr)->ill_phyint->phyint_ifindex; /* pass packet up to the transport */ - if (CONN_INBOUND_POLICY_PRESENT(connp) || mctl_present) { + if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || mctl_present) { *first_mpp = ipsec_check_inbound_policy(*first_mpp, connp, ipha, NULL, mctl_present); if (*first_mpp == NULL) { @@ -12148,7 +12345,7 @@ ip_udp_check(queue_t *q, conn_t *connp, ill_t *ill, ipha_t *ipha, } /* Initiate IPPF processing for fastpath UDP */ - if (IPP_ENABLED(IPP_LOCAL_IN)) { + if (IPP_ENABLED(IPP_LOCAL_IN, ipst)) { ip_process(IPP_LOCAL_IN, mpp, ill_index); if (*mpp == NULL) { ip2dbg(("ip_input_ipsec_process: UDP pkt " @@ -12198,7 +12395,8 @@ ip_udp_check(queue_t *q, conn_t *connp, ill_t *ill, ipha_t *ipha, * If the call fails then the original mblk is * returned. */ - *mpp = ip_add_info(*mpp, ill, in_flags, IPCL_ZONEID(connp)); + *mpp = ip_add_info(*mpp, ill, in_flags, IPCL_ZONEID(connp), + ipst); } return (B_TRUE); @@ -12241,6 +12439,7 @@ ip_rput_fragment(queue_t *q, mblk_t **mpp, ipha_t *ipha, uint8_t ecn_info = 0; uint32_t packet_size; boolean_t pruned = B_FALSE; + ip_stack_t *ipst = ill->ill_ipst; if (cksum_val != NULL) *cksum_val = 0; @@ -12251,7 +12450,7 @@ ip_rput_fragment(queue_t *q, mblk_t **mpp, ipha_t *ipha, * Drop the fragmented as early as possible, if * we don't have resource(s) to re-assemble. */ - if (ip_reass_queue_bytes == 0) { + if (ipst->ips_ip_reass_queue_bytes == 0) { freemsg(mp); return (B_FALSE); } @@ -12342,10 +12541,10 @@ ip_rput_fragment(queue_t *q, mblk_t **mpp, ipha_t *ipha, /* If the reassembly list for this ILL will get too big, prune it */ if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= - ip_reass_queue_bytes) { + ipst->ips_ip_reass_queue_bytes) { ill_frag_prune(ill, - (ip_reass_queue_bytes < msg_len) ? 0 : - (ip_reass_queue_bytes - msg_len)); + (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : + (ipst->ips_ip_reass_queue_bytes - msg_len)); pruned = B_TRUE; } @@ -12398,7 +12597,7 @@ ip_rput_fragment(queue_t *q, mblk_t **mpp, ipha_t *ipha, return (B_FALSE); } - if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { + if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { /* * Too many fragmented packets in this hash * bucket. Free the oldest. @@ -12662,7 +12861,8 @@ reass_done: * the mp. caller is responsible for decrementing ire ref cnt. */ static boolean_t -ip_options_cksum(queue_t *q, ill_t *ill, mblk_t *mp, ipha_t *ipha, ire_t *ire) +ip_options_cksum(queue_t *q, ill_t *ill, mblk_t *mp, ipha_t *ipha, ire_t *ire, + ip_stack_t *ipst) { mblk_t *first_mp; boolean_t mctl_present; @@ -12679,14 +12879,15 @@ ip_options_cksum(queue_t *q, ill_t *ill, mblk_t *mp, ipha_t *ipha, ire_t *ire) if (ill != NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); } else { - BUMP_MIB(&ip_mib, ipIfStatsInCksumErrs); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsInCksumErrs); } freemsg(first_mp); return (B_FALSE); } } - if (!ip_rput_local_options(q, mp, ipha, ire)) { + if (!ip_rput_local_options(q, mp, ipha, ire, ipst)) { if (mctl_present) freeb(first_mp); return (B_FALSE); @@ -12710,6 +12911,10 @@ ip_udp_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, uint16_t *up; ill_t *ill = (ill_t *)q->q_ptr; uint16_t reass_hck_flags = 0; + ip_stack_t *ipst; + + ASSERT(recv_ill != NULL); + ipst = recv_ill->ill_ipst; #define rptr ((uchar_t *)ipha) @@ -12798,7 +13003,7 @@ ip_udp_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, hck_flags = DB_CKSUMFLAGS(mp); if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) - IP_STAT(ip_in_sw_cksum); + IP_STAT(ipst, ip_in_sw_cksum); IP_CKSUM_RECV(hck_flags, u1, (uchar_t *)(rptr + DB_CKSUMSTART(mp)), @@ -12808,11 +13013,11 @@ ip_udp_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, if (cksum_err) { BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); if (hck_flags & HCK_FULLCKSUM) - IP_STAT(ip_udp_in_full_hw_cksum_err); + IP_STAT(ipst, ip_udp_in_full_hw_cksum_err); else if (hck_flags & HCK_PARTIALCKSUM) - IP_STAT(ip_udp_in_part_hw_cksum_err); + IP_STAT(ipst, ip_udp_in_part_hw_cksum_err); else - IP_STAT(ip_udp_in_sw_cksum_err); + IP_STAT(ipst, ip_udp_in_sw_cksum_err); freemsg(first_mp); return; @@ -12824,9 +13029,9 @@ ip_udp_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, goto udpslowpath; if ((connp = ipcl_classify_v4(mp, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, - ire->ire_zoneid)) != NULL) { + ire->ire_zoneid, ipst)) != NULL) { ASSERT(connp->conn_upq != NULL); - IP_STAT(ip_udp_fast_path); + IP_STAT(ipst, ip_udp_fast_path); if (CONN_UDP_FLOWCTLD(connp)) { freemsg(mp); @@ -12866,7 +13071,7 @@ ip_udp_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, goto udpslowpath; ipoptions: - if (!ip_options_cksum(q, ill, mp, ipha, ire)) { + if (!ip_options_cksum(q, ill, mp, ipha, ire, ipst)) { goto slow_done; } @@ -12914,7 +13119,7 @@ udppullup: boolean_t cksum_err; if ((reass_hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) - IP_STAT(ip_in_sw_cksum); + IP_STAT(ipst, ip_in_sw_cksum); IP_CKSUM_RECV_REASS(reass_hck_flags, (int32_t)((uchar_t *)up - (uchar_t *)ipha), @@ -12925,11 +13130,11 @@ udppullup: BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); if (reass_hck_flags & HCK_FULLCKSUM) - IP_STAT(ip_udp_in_full_hw_cksum_err); + IP_STAT(ipst, ip_udp_in_full_hw_cksum_err); else if (reass_hck_flags & HCK_PARTIALCKSUM) - IP_STAT(ip_udp_in_part_hw_cksum_err); + IP_STAT(ipst, ip_udp_in_part_hw_cksum_err); else - IP_STAT(ip_udp_in_sw_cksum_err); + IP_STAT(ipst, ip_udp_in_sw_cksum_err); freemsg(first_mp); goto slow_done; @@ -12946,7 +13151,7 @@ udpslowpath: mctl_present, B_TRUE, recv_ill, ire->ire_zoneid); slow_done: - IP_STAT(ip_udp_slow_path); + IP_STAT(ipst, ip_udp_slow_path); return; #undef iphs @@ -12973,6 +13178,8 @@ ip_tcp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present, zoneid_t zoneid = ire->ire_zoneid; boolean_t cksum_err; uint16_t hck_flags = 0; + ip_stack_t *ipst = recv_ill->ill_ipst; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; #define rptr ((uchar_t *)ipha) @@ -13032,7 +13239,7 @@ ip_tcp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present, /* does packet contain IP+TCP headers? */ len = mp->b_wptr - rptr; if (len < (IP_SIMPLE_HDR_LENGTH + TCP_MIN_HEADER_LENGTH)) { - IP_STAT(ip_tcppullup); + IP_STAT(ipst, ip_tcppullup); goto tcppullup; } @@ -13044,7 +13251,7 @@ ip_tcp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present, * otherwise we are still in the fast path */ if (len < (offset << 2) + IP_SIMPLE_HDR_LENGTH) { - IP_STAT(ip_tcpoptions); + IP_STAT(ipst, ip_tcpoptions); goto tcpoptions; } @@ -13052,7 +13259,7 @@ ip_tcp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present, if ((mp1 = mp->b_cont) != NULL) { /* more then two? */ if (mp1->b_cont != NULL) { - IP_STAT(ip_multipkttcp); + IP_STAT(ipst, ip_multipkttcp); goto multipkttcp; } len += mp1->b_wptr - mp1->b_rptr; @@ -13082,7 +13289,7 @@ ip_tcp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present, hck_flags = DB_CKSUMFLAGS(mp); if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) - IP_STAT(ip_in_sw_cksum); + IP_STAT(ipst, ip_in_sw_cksum); IP_CKSUM_RECV(hck_flags, u1, (uchar_t *)(rptr + DB_CKSUMSTART(mp)), @@ -13093,19 +13300,19 @@ ip_tcp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present, BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); if (hck_flags & HCK_FULLCKSUM) - IP_STAT(ip_tcp_in_full_hw_cksum_err); + IP_STAT(ipst, ip_tcp_in_full_hw_cksum_err); else if (hck_flags & HCK_PARTIALCKSUM) - IP_STAT(ip_tcp_in_part_hw_cksum_err); + IP_STAT(ipst, ip_tcp_in_part_hw_cksum_err); else - IP_STAT(ip_tcp_in_sw_cksum_err); + IP_STAT(ipst, ip_tcp_in_sw_cksum_err); goto error; } try_again: - if ((connp = ipcl_classify_v4(mp, IPPROTO_TCP, ip_hdr_len, zoneid)) == - NULL) { + if ((connp = ipcl_classify_v4(mp, IPPROTO_TCP, ip_hdr_len, + zoneid, ipst)) == NULL) { /* Send the TH_RST */ goto no_conn; } @@ -13124,7 +13331,7 @@ try_again: * can avoid extra work. */ if (IPCL_IS_TCP4_CONNECTED_NO_POLICY(connp) && !mctl_present && - !IPP_ENABLED(IPP_LOCAL_IN)) { + !IPP_ENABLED(IPP_LOCAL_IN, ipst)) { ASSERT(first_mp == mp); BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); SET_SQUEUE(mp, tcp_rput_data, connp); @@ -13138,13 +13345,13 @@ try_again: DB_CKSUMSTART(mp) = (intptr_t)ip_squeue_get(ill_ring); if (IPCL_IS_FULLY_BOUND(connp) && !mctl_present && - !CONN_INBOUND_POLICY_PRESENT(connp)) { + !CONN_INBOUND_POLICY_PRESENT(connp, ipss)) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); SET_SQUEUE(mp, connp->conn_recv, connp); return (mp); } else if (IPCL_IS_BOUND(connp) && !mctl_present && - !CONN_INBOUND_POLICY_PRESENT(connp)) { + !CONN_INBOUND_POLICY_PRESENT(connp, ipss)) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); ip_squeue_enter_unbound++; @@ -13168,7 +13375,8 @@ try_again: return (NULL); } if (flags & TH_ACK) { - tcp_xmit_listeners_reset(first_mp, ip_hdr_len, zoneid); + tcp_xmit_listeners_reset(first_mp, ip_hdr_len, zoneid, + ipst->ips_netstack->netstack_tcp); CONN_DEC_REF(connp); return (NULL); } @@ -13178,7 +13386,7 @@ try_again: return (NULL); } - if (CONN_INBOUND_POLICY_PRESENT(connp) || mctl_present) { + if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || mctl_present) { first_mp = ipsec_check_inbound_policy(first_mp, connp, ipha, NULL, mctl_present); if (first_mp == NULL) { @@ -13212,7 +13420,7 @@ try_again: } /* Initiate IPPF processing for fastpath */ - if (IPP_ENABLED(IPP_LOCAL_IN)) { + if (IPP_ENABLED(IPP_LOCAL_IN, ipst)) { uint32_t ill_index; ill_index = recv_ill->ill_phyint->phyint_ifindex; @@ -13242,7 +13450,7 @@ try_again: * make sure IPF_RECVIF is passed to ip_add_info. */ mp = ip_add_info(mp, recv_ill, flags|IPF_RECVIF, - IPCL_ZONEID(connp)); + IPCL_ZONEID(connp), ipst); if (mp == NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); CONN_DEC_REF(connp); @@ -13272,7 +13480,7 @@ try_again: no_conn: /* Initiate IPPf processing, if needed. */ - if (IPP_ENABLED(IPP_LOCAL_IN)) { + if (IPP_ENABLED(IPP_LOCAL_IN, ipst)) { uint32_t ill_index; ill_index = recv_ill->ill_phyint->phyint_ifindex; ip_process(IPP_LOCAL_IN, &first_mp, ill_index); @@ -13282,10 +13490,11 @@ no_conn: } BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); - tcp_xmit_listeners_reset(first_mp, IPH_HDR_LENGTH(mp->b_rptr), zoneid); + tcp_xmit_listeners_reset(first_mp, IPH_HDR_LENGTH(mp->b_rptr), zoneid, + ipst->ips_netstack->netstack_tcp); return (NULL); ipoptions: - if (!ip_options_cksum(q, ill, first_mp, ipha, ire)) { + if (!ip_options_cksum(q, ill, first_mp, ipha, ire, ipst)) { goto slow_done; } @@ -13384,13 +13593,13 @@ multipkttcp: /* * Not M_DATA mblk or its a dup, so do the checksum now. */ - IP_STAT(ip_in_sw_cksum); + IP_STAT(ipst, ip_in_sw_cksum); if (IP_CSUM(mp, (int32_t)((uchar_t *)up - rptr), u1) != 0) { BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); goto error; } - IP_STAT(ip_tcp_slow_path); + IP_STAT(ipst, ip_tcp_slow_path); goto try_again; #undef iphs #undef rptr @@ -13418,6 +13627,12 @@ ip_sctp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present, uint_t ipif_seqid; in6_addr_t map_src, map_dst; ill_t *ill = (ill_t *)q->q_ptr; + ip_stack_t *ipst; + sctp_stack_t *sctps; + + ASSERT(recv_ill != NULL); + ipst = recv_ill->ill_ipst; + sctps = ipst->ips_netstack->netstack_sctp; #define rptr ((uchar_t *)ipha) @@ -13495,7 +13710,7 @@ find_sctp_client: sctph->sh_chksum = 0; calcsum = sctp_cksum(mp, u1); if (calcsum != pktsum) { - BUMP_MIB(&sctp_mib, sctpChecksumError); + BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); goto error; } sctph->sh_chksum = pktsum; @@ -13510,7 +13725,7 @@ find_sctp_client: IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst); IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src); if ((connp = sctp_fanout(&map_src, &map_dst, ports, ipif_seqid, zoneid, - mp)) == NULL) { + mp, sctps)) == NULL) { /* Check for raw socket or OOTB handling */ goto no_conn; } @@ -13527,7 +13742,7 @@ no_conn: ipoptions: DB_CKSUMFLAGS(mp) = 0; - if (!ip_options_cksum(q, ill, first_mp, ipha, ire)) + if (!ip_options_cksum(q, ill, first_mp, ipha, ire, ipst)) goto slow_done; UPDATE_IB_PKT_COUNT(ire); @@ -13565,7 +13780,7 @@ slow_done: static boolean_t ip_rput_multimblk_ipoptions(queue_t *q, ill_t *ill, mblk_t *mp, ipha_t **iphapp, - ipaddr_t *dstp) + ipaddr_t *dstp, ip_stack_t *ipst) { uint_t opt_len; ipha_t *ipha; @@ -13573,7 +13788,7 @@ ip_rput_multimblk_ipoptions(queue_t *q, ill_t *ill, mblk_t *mp, ipha_t **iphapp, uint_t pkt_len; ASSERT(ill != NULL); - IP_STAT(ip_ipoptions); + IP_STAT(ipst, ip_ipoptions); ipha = *iphapp; #define rptr ((uchar_t *)ipha) @@ -13616,8 +13831,8 @@ ip_rput_multimblk_ipoptions(queue_t *q, ill_t *ill, mblk_t *mp, ipha_t **iphapp, * destination address, which may have been affected * by source routing. */ - IP_STAT(ip_opt); - if (ip_rput_options(q, mp, ipha, dstp) == -1) { + IP_STAT(ipst, ip_opt); + if (ip_rput_options(q, mp, ipha, dstp, ipst) == -1) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); return (B_FALSE); } @@ -13646,11 +13861,14 @@ ip_rput_noire(queue_t *q, ill_t *in_ill, mblk_t *mp, int ll_multicast, ill_t *ill; ire_t *ire; boolean_t check_multirt = B_FALSE; + ip_stack_t *ipst; ipha = (ipha_t *)mp->b_rptr; ill = (ill_t *)q->q_ptr; ASSERT(ill != NULL); + ipst = ill->ill_ipst; + /* * No IRE for this destination, so it can't be for us. * Unless we are forwarding, drop the packet. @@ -13663,7 +13881,7 @@ ip_rput_noire(queue_t *q, ill_t *in_ill, mblk_t *mp, int ll_multicast, freemsg(mp); return (NULL); } - if (!(ill->ill_flags & ILLF_ROUTER) && !ip_source_routed(ipha)) { + if (!(ill->ill_flags & ILLF_ROUTER) && !ip_source_routed(ipha, ipst)) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); freemsg(mp); return (NULL); @@ -13695,15 +13913,15 @@ ip_rput_noire(queue_t *q, ill_t *in_ill, mblk_t *mp, int ll_multicast, /* * Now hand the packet to ip_newroute. */ - ip_newroute(q, mp, dst, in_ill, NULL, GLOBAL_ZONEID); + ip_newroute(q, mp, dst, in_ill, NULL, GLOBAL_ZONEID, ipst); return (NULL); } ire = ire_forward(dst, &check_multirt, NULL, NULL, - MBLK_GETLABEL(mp)); + MBLK_GETLABEL(mp), ipst); if (ire == NULL && check_multirt) { /* Let ip_newroute handle CGTP */ - ip_newroute(q, mp, dst, in_ill, NULL, GLOBAL_ZONEID); + ip_newroute(q, mp, dst, in_ill, NULL, GLOBAL_ZONEID, ipst); return (NULL); } @@ -13714,11 +13932,12 @@ ip_rput_noire(queue_t *q, ill_t *in_ill, mblk_t *mp, int ll_multicast, /* send icmp unreachable */ q = WR(q); /* Sent by forwarding path, and router is global zone */ - if (ip_source_routed(ipha)) { + if (ip_source_routed(ipha, ipst)) { icmp_unreachable(q, mp, ICMP_SOURCE_ROUTE_FAILED, - GLOBAL_ZONEID); + GLOBAL_ZONEID, ipst); } else { - icmp_unreachable(q, mp, ICMP_HOST_UNREACHABLE, GLOBAL_ZONEID); + icmp_unreachable(q, mp, ICMP_HOST_UNREACHABLE, GLOBAL_ZONEID, + ipst); } return (NULL); @@ -13729,7 +13948,7 @@ ip_rput_noire(queue_t *q, ill_t *in_ill, mblk_t *mp, int ll_multicast, * check ip header length and align it. */ static boolean_t -ip_check_and_align_header(queue_t *q, mblk_t *mp) +ip_check_and_align_header(queue_t *q, mblk_t *mp, ip_stack_t *ipst) { ssize_t len; ill_t *ill; @@ -13741,9 +13960,9 @@ ip_check_and_align_header(queue_t *q, mblk_t *mp) ill = (ill_t *)q->q_ptr; if (!OK_32PTR(mp->b_rptr)) - IP_STAT(ip_notaligned1); + IP_STAT(ipst, ip_notaligned1); else - IP_STAT(ip_notaligned2); + IP_STAT(ipst, ip_notaligned2); /* Guard against bogus device drivers */ if (len < 0) { /* clear b_prev - used by ip_mroute_decap */ @@ -13779,6 +13998,7 @@ ip_rput_notforus(queue_t **qp, mblk_t *mp, ire_t *ire, ill_t *ill) queue_t *q; ill_t *ire_ill; uint_t ill_ifindex; + ip_stack_t *ipst = ill->ill_ipst; q = *qp; /* @@ -13822,7 +14042,7 @@ ip_rput_notforus(queue_t **qp, mblk_t *mp, ire_t *ire, ill_t *ill) } if (check_multi && - ip_strict_dst_multihoming && + ipst->ips_ip_strict_dst_multihoming && ((ill->ill_flags & ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0)) { @@ -13856,7 +14076,7 @@ ip_fast_forward(ire_t *ire, ipaddr_t dst, ill_t *ill, mblk_t *mp) uint32_t sum; queue_t *dev_q; boolean_t check_multirt = B_FALSE; - + ip_stack_t *ipst = ill->ill_ipst; ipha = (ipha_t *)mp->b_rptr; @@ -13874,7 +14094,7 @@ ip_fast_forward(ire_t *ire, ipaddr_t dst, ill_t *ill, mblk_t *mp) goto drop; } src_ire = ire_ctable_lookup(ipha->ipha_src, 0, IRE_BROADCAST, NULL, - ALL_ZONES, NULL, MATCH_IRE_TYPE); + ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (src_ire != NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); @@ -13884,7 +14104,7 @@ ip_fast_forward(ire_t *ire, ipaddr_t dst, ill_t *ill, mblk_t *mp) /* No ire cache of nexthop. So first create one */ if (ire == NULL) { - ire = ire_forward(dst, &check_multirt, NULL, NULL, NULL); + ire = ire_forward(dst, &check_multirt, NULL, NULL, NULL, ipst); /* * We only come to ip_fast_forward if ip_cgtp_filter is * is not set. So upon return from ire_forward @@ -13898,12 +14118,14 @@ ip_fast_forward(ire_t *ire, ipaddr_t dst, ill_t *ill, mblk_t *mp) mp->b_prev = mp->b_next = 0; /* send icmp unreachable */ /* Sent by forwarding path, and router is global zone */ - if (ip_source_routed(ipha)) { + if (ip_source_routed(ipha, ipst)) { icmp_unreachable(ill->ill_wq, mp, - ICMP_SOURCE_ROUTE_FAILED, GLOBAL_ZONEID); + ICMP_SOURCE_ROUTE_FAILED, GLOBAL_ZONEID, + ipst); } else { icmp_unreachable(ill->ill_wq, mp, - ICMP_HOST_UNREACHABLE, GLOBAL_ZONEID); + ICMP_HOST_UNREACHABLE, GLOBAL_ZONEID, + ipst); } return (ire); } @@ -13941,8 +14163,9 @@ ip_fast_forward(ire_t *ire, ipaddr_t dst, ill_t *ill, mblk_t *mp) DTRACE_PROBE4(ip4__forwarding__start, ill_t *, ill, ill_t *, stq_ill, ipha_t *, ipha, mblk_t *, mp); - FW_HOOKS(ip4_forwarding_event, ipv4firewall_forwarding, - ill, stq_ill, ipha, mp, mp); + FW_HOOKS(ipst->ips_ip4_forwarding_event, + ipst->ips_ipv4firewall_forwarding, + ill, stq_ill, ipha, mp, mp, ipst); DTRACE_PROBE1(ip4__forwarding__end, mblk_t *, mp); @@ -13972,9 +14195,9 @@ ip_fast_forward(ire_t *ire, ipaddr_t dst, ill_t *ill, mblk_t *mp) DTRACE_PROBE4(ip4__physical__out__start, ill_t *, NULL, ill_t *, stq_ill, ipha_t *, ipha, mblk_t *, mp); - FW_HOOKS(ip4_physical_out_event, - ipv4firewall_physical_out, - NULL, stq_ill, ipha, mp, mpip); + FW_HOOKS(ipst->ips_ip4_physical_out_event, + ipst->ips_ipv4firewall_physical_out, + NULL, stq_ill, ipha, mp, mpip, ipst); DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); if (mp == NULL) @@ -14017,6 +14240,7 @@ ip_rput_process_forward(queue_t *q, mblk_t *mp, ire_t *ire, ipha_t *ipha, ill_group_t *ire_group; queue_t *dev_q; ire_t *src_ire; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ire->ire_stq != NULL); @@ -14037,7 +14261,7 @@ ip_rput_process_forward(queue_t *q, mblk_t *mp, ire_t *ire, ipha_t *ipha, * the check again for code-reusability */ src_ire = ire_ctable_lookup(ipha->ipha_src, 0, IRE_BROADCAST, NULL, - ALL_ZONES, NULL, MATCH_IRE_TYPE); + ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (src_ire != NULL || ntohl(ipha->ipha_dst) == INADDR_ANY || IN_BADCLASS(ntohl(ipha->ipha_dst))) { if (src_ire != NULL) @@ -14065,10 +14289,10 @@ ip_rput_process_forward(queue_t *q, mblk_t *mp, ire_t *ire, ipha_t *ipha, if (((ill->ill_flags & ((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_ROUTER) == 0) && - !(ip_source_routed(ipha) && (ire->ire_rfq == q || + !(ip_source_routed(ipha, ipst) && (ire->ire_rfq == q || (ill_group != NULL && ill_group == ire_group)))) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); - if (ip_source_routed(ipha)) { + if (ip_source_routed(ipha, ipst)) { q = WR(q); /* * Clear the indication that this may have @@ -14077,7 +14301,7 @@ ip_rput_process_forward(queue_t *q, mblk_t *mp, ire_t *ire, ipha_t *ipha, DB_CKSUMFLAGS(mp) = 0; /* Sent by forwarding path, and router is global zone */ icmp_unreachable(q, mp, - ICMP_SOURCE_ROUTE_FAILED, GLOBAL_ZONEID); + ICMP_SOURCE_ROUTE_FAILED, GLOBAL_ZONEID, ipst); return; } goto drop_pkt; @@ -14087,7 +14311,7 @@ ip_rput_process_forward(queue_t *q, mblk_t *mp, ire_t *ire, ipha_t *ipha, /* Packet is being forwarded. Turning off hwcksum flag. */ DB_CKSUMFLAGS(mp) = 0; - if (ip_g_send_redirects) { + if (ipst->ips_ip_g_send_redirects) { /* * Check whether the incoming interface and outgoing * interface is part of the same group. If so, @@ -14113,7 +14337,7 @@ ip_rput_process_forward(queue_t *q, mblk_t *mp, ire_t *ire, ipha_t *ipha, */ if ((ire->ire_rfq == q || (ill_group != NULL && ill_group == ire_group)) && - !ip_source_routed(ipha)) { + !ip_source_routed(ipha, ipst)) { nhop = (ire->ire_gateway_addr != 0 ? ire->ire_gateway_addr : ire->ire_addr); @@ -14146,7 +14370,7 @@ ip_rput_process_forward(queue_t *q, mblk_t *mp, ire_t *ire, ipha_t *ipha, */ nhop_ire = ire_ftable_lookup(nhop, 0, 0, IRE_INTERFACE, NULL, NULL, ALL_ZONES, - 0, NULL, MATCH_IRE_TYPE); + 0, NULL, MATCH_IRE_TYPE, ipst); if (nhop_ire != NULL) { if ((src & nhop_ire->ire_mask) == @@ -14159,7 +14383,7 @@ ip_rput_process_forward(queue_t *q, mblk_t *mp, ire_t *ire, ipha_t *ipha, mp1 = copyb(mp); if (mp1 != NULL) { icmp_send_redirect(WR(q), mp1, - nhop); + nhop, ipst); } } ire_refrele(nhop_ire); @@ -14188,6 +14412,7 @@ ip_rput_process_broadcast(queue_t **qp, mblk_t *mp, ire_t *ire, ipha_t *ipha, { queue_t *q; uint16_t hcksumflags; + ip_stack_t *ipst = ill->ill_ipst; q = *qp; @@ -14237,7 +14462,7 @@ ip_rput_process_broadcast(queue_t **qp, mblk_t *mp, ire_t *ire, ipha_t *ipha, return (NULL); } new_ire = ire_ctable_lookup(dst, 0, 0, - ipif, ALL_ZONES, NULL, MATCH_IRE_ILL); + ipif, ALL_ZONES, NULL, MATCH_IRE_ILL, ipst); ipif_refrele(ipif); if (new_ire != NULL) { @@ -14267,7 +14492,7 @@ ip_rput_process_broadcast(queue_t **qp, mblk_t *mp, ire_t *ire, ipha_t *ipha, ire = new_ire; } } else if (cgtp_flt_pkt == CGTP_IP_PKT_NOT_CGTP) { - if (!ip_g_forward_directed_bcast) { + if (!ipst->ips_ip_g_forward_directed_bcast) { /* * Free the message if * ip_g_forward_directed_bcast is turned @@ -14297,7 +14522,7 @@ ip_rput_process_broadcast(queue_t **qp, mblk_t *mp, ire_t *ire, ipha_t *ipha, } } } - if (ip_g_forward_directed_bcast && ll_multicast == 0) { + if (ipst->ips_ip_g_forward_directed_bcast && ll_multicast == 0) { /* * Verify that there are not more then one * IRE_BROADCAST with this broadcast address which @@ -14356,7 +14581,7 @@ ip_rput_process_broadcast(queue_t **qp, mblk_t *mp, ire_t *ire, ipha_t *ipha, ire_refrele(ire); return (NULL); } - ipha->ipha_ttl = ip_broadcast_ttl + 1; + ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl + 1; ipha->ipha_hdr_checksum = 0; ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); ip_rput_process_forward(q, mp, ire, ipha, @@ -14379,6 +14604,8 @@ static boolean_t ip_rput_process_multicast(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, int *ll_multicast, ipaddr_t *dstp) { + ip_stack_t *ipst = ill->ill_ipst; + BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ntohs(ipha->ipha_length)); @@ -14387,7 +14614,7 @@ ip_rput_process_multicast(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, * Forward packets only if we have joined the allmulti * group on this interface. */ - if (ip_g_mrouter && ill->ill_join_allmulti) { + if (ipst->ips_ip_g_mrouter && ill->ill_join_allmulti) { int retval; /* @@ -14467,6 +14694,7 @@ ip_rput_process_notdata(queue_t *q, mblk_t **first_mpp, ill_t *ill, boolean_t must_copy = B_FALSE; struct iocblk *iocp; ipha_t *ipha; + ip_stack_t *ipst = ill->ill_ipst; #define rptr ((uchar_t *)ipha) @@ -14509,7 +14737,8 @@ ip_rput_process_notdata(queue_t *q, mblk_t **first_mpp, ill_t *ill, if (ill != NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); } else { - BUMP_MIB(&ip_mib, ipIfStatsInDiscards); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsInDiscards); } return (B_TRUE); } @@ -14674,10 +14903,12 @@ void ip_rput(queue_t *q, mblk_t *mp) { ill_t *ill; + ip_stack_t *ipst; TRACE_1(TR_FAC_IP, TR_IP_RPUT_START, "ip_rput_start: q %p", q); ill = (ill_t *)q->q_ptr; + ipst = ill->ill_ipst; if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) { union DL_primitives *dl; @@ -14722,7 +14953,7 @@ ip_rput(queue_t *q, mblk_t *mp) if (mp->b_datap->db_ref > 1) { mblk_t *mp1; boolean_t adjusted = B_FALSE; - IP_STAT(ip_db_ref); + IP_STAT(ipst, ip_db_ref); /* * The IP_RECVSLLA option depends on having the link layer @@ -14813,6 +15044,7 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, mblk_t *mp; mblk_t *dmp; int cnt = 0; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(mp_chain != NULL); ASSERT(ill != NULL); @@ -14854,7 +15086,7 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, */ if (!OK_32PTR(dmp->b_rptr) || MBLKL(dmp) < IP_SIMPLE_HDR_LENGTH) { - if (!ip_check_and_align_header(q, dmp)) + if (!ip_check_and_align_header(q, dmp, ipst)) continue; } } @@ -14910,7 +15142,7 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, continue; } (void) adjmsg(mp, -len); - IP_STAT(ip_multimblk3); + IP_STAT(ipst, ip_multimblk3); } } @@ -14936,8 +15168,9 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, ill_t *, ill, ill_t *, NULL, ipha_t *, ipha, mblk_t *, first_mp); - FW_HOOKS(ip4_physical_in_event, ipv4firewall_physical_in, - ill, NULL, ipha, first_mp, mp); + FW_HOOKS(ipst->ips_ip4_physical_in_event, + ipst->ips_ipv4firewall_physical_in, + ill, NULL, ipha, first_mp, mp, ipst); DTRACE_PROBE1(ip4__physical__in__end, mblk_t *, first_mp); @@ -14987,7 +15220,8 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, opt_len == 0 && ipha->ipha_protocol != IPPROTO_RSVP && !ll_multicast && !CLASSD(dst)) { if (ire == NULL) - ire = ire_cache_lookup(dst, ALL_ZONES, NULL); + ire = ire_cache_lookup(dst, ALL_ZONES, NULL, + ipst); /* incoming packet is for forwarding */ if (ire == NULL || (ire->ire_type & IRE_CACHE)) { @@ -15011,11 +15245,11 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, /* Full-blown slow path */ if (opt_len != 0) { if (len != 0) - IP_STAT(ip_multimblk4); + IP_STAT(ipst, ip_multimblk4); else - IP_STAT(ip_ipoptions); + IP_STAT(ipst, ip_ipoptions); if (!ip_rput_multimblk_ipoptions(q, ill, mp, &ipha, - &dst)) + &dst, ipst)) continue; } @@ -15024,9 +15258,13 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, * the incoming packet. Packets identified as duplicates * must be discarded. Filtering is active only if the * the ip_cgtp_filter ndd variable is non-zero. + * + * Only applies to the shared stack since the filter_ops + * do not carry an ip_stack_t or zoneid. */ cgtp_flt_pkt = CGTP_IP_PKT_NOT_CGTP; - if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { + if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL) && + ipst->ips_netstack->netstack_stackid == GLOBAL_NETSTACKID) { cgtp_flt_pkt = ip_cgtp_filter_ops->cfo_filter(q, mp); if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { @@ -15046,7 +15284,8 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, * forwarded like unicast traffic. */ if (ipha->ipha_protocol == IPPROTO_RSVP && - ipcl_proto_search(IPPROTO_RSVP) != NULL) { + ipst->ips_ipcl_proto_fanout[IPPROTO_RSVP].connf_head != + NULL) { /* RSVP packet and rsvpd running. Treat as ours */ ip2dbg(("ip_input: RSVP for us: 0x%x\n", ntohl(dst))); /* @@ -15087,7 +15326,7 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, if (ire == NULL) { ire = ire_cache_lookup(dst, ALL_ZONES, - MBLK_GETLABEL(mp)); + MBLK_GETLABEL(mp), ipst); } /* @@ -15231,7 +15470,7 @@ local: * often unless interrupt binding * changes. */ - IP_STAT(ip_input_multi_squeue); + IP_STAT(ipst, ip_input_multi_squeue); squeue_enter_chain(curr_sqp, head, tail, cnt, SQTAG_IP_INPUT); curr_sqp = GET_SQUEUE(mp); @@ -15285,11 +15524,11 @@ local: ip_ring->rr_normal_blank_time, ip_ring->rr_normal_pkt_cnt); } - } + } TRACE_2(TR_FAC_IP, TR_IP_RPUT_END, "ip_input_end: q %p (%S)", q, "end"); -#undef rptr +#undef rptr } static void @@ -15450,6 +15689,7 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) boolean_t ioctl_aborted = B_FALSE; boolean_t log = B_TRUE; hook_nic_event_t *info; + ip_stack_t *ipst; ip1dbg(("ip_rput_dlpi_writer ..")); ill = (ill_t *)q->q_ptr; @@ -15457,6 +15697,8 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) ASSERT(IAM_WRITER_ILL(ill)); + ipst = ill->ill_ipst; + /* * ipsq_pending_mp and ipsq_pending_ipif track each other. i.e. * both are null or non-null. However we can assert that only @@ -15713,7 +15955,8 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) info->hne_event = NE_UP; info->hne_data = NULL; info->hne_datalen = 0; - info->hne_family = ill->ill_isv6 ? ipv6 : ipv4; + info->hne_family = ill->ill_isv6 ? + ipst->ips_ipv6_net_data : ipst->ips_ipv4_net_data; } else ip2dbg(("ip_rput_dlpi_writer: could not attach UP nic " "event information for %s (ENOMEM)\n", @@ -15902,10 +16145,10 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) mutex_exit(&ill->ill_lock); if (need_ire_walk_v4) ire_walk_v4(ill_mtu_change, (char *)ill, - ALL_ZONES); + ALL_ZONES, ipst); if (need_ire_walk_v6) ire_walk_v6(ill_mtu_change, (char *)ill, - ALL_ZONES); + ALL_ZONES, ipst); break; case DL_NOTE_LINK_UP: case DL_NOTE_LINK_DOWN: { @@ -16203,6 +16446,7 @@ ip_rput_other(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) if (ta->ifta_flags & (IFTUN_SRC | IFTUN_DST)) { ipif_set_tun_llink(ill, ta); } + } if (mp1 != NULL) { /* @@ -16365,13 +16609,14 @@ ip_rput_forward(ire_t *ire, ipha_t *ipha, mblk_t *mp, ill_t *in_ill) uint32_t ill_index; ill_t *out_ill; mib2_ipIfStatsEntry_t *mibptr; + ip_stack_t *ipst = in_ill->ill_ipst; /* Get the ill_index of the incoming ILL */ ill_index = (in_ill != NULL) ? in_ill->ill_phyint->phyint_ifindex : 0; - mibptr = (in_ill != NULL) ? in_ill->ill_ip_mib : &ip_mib; + mibptr = (in_ill != NULL) ? in_ill->ill_ip_mib : &ipst->ips_ip_mib; /* Initiate Read side IPPF processing */ - if (IPP_ENABLED(IPP_FWD_IN)) { + if (IPP_ENABLED(IPP_FWD_IN, ipst)) { ip_process(IPP_FWD_IN, &mp, ill_index); if (mp == NULL) { ip2dbg(("ip_rput_forward: pkt dropped/deferred "\ @@ -16403,7 +16648,7 @@ ip_rput_forward(ire_t *ire, ipha_t *ipha, mblk_t *mp, ill_t *in_ill) if (q != NULL) { /* Sent by forwarding path, and router is global zone */ icmp_time_exceeded(q, mp, ICMP_TTL_EXCEEDED, - GLOBAL_ZONEID); + GLOBAL_ZONEID, ipst); } else freemsg(mp); return; @@ -16426,8 +16671,9 @@ ip_rput_forward(ire_t *ire, ipha_t *ipha, mblk_t *mp, ill_t *in_ill) DTRACE_PROBE4(ip4__forwarding__start, ill_t *, in_ill, ill_t *, out_ill, ipha_t *, ipha, mblk_t *, mp); - FW_HOOKS(ip4_forwarding_event, ipv4firewall_forwarding, - in_ill, out_ill, ipha, mp, mp); + FW_HOOKS(ipst->ips_ip4_forwarding_event, + ipst->ips_ipv4firewall_forwarding, + in_ill, out_ill, ipha, mp, mp, ipst); DTRACE_PROBE1(ip4__forwarding__end, mblk_t *, mp); @@ -16454,7 +16700,7 @@ ip_rput_forward(ire_t *ire, ipha_t *ipha, mblk_t *mp, ill_t *in_ill) BUMP_MIB(mibptr, ipIfStatsInCksumErrs); goto drop_pkt; } - if (ip_rput_forward_options(mp, ipha, ire)) { + if (ip_rput_forward_options(mp, ipha, ire, ipst)) { BUMP_MIB(mibptr, ipIfStatsForwProhibits); return; } @@ -16476,7 +16722,7 @@ ip_rput_forward(ire_t *ire, ipha_t *ipha, mblk_t *mp, ill_t *in_ill) goto drop_pkt; } /* Initiate Write side IPPF processing */ - if (IPP_ENABLED(IPP_FWD_OUT)) { + if (IPP_ENABLED(IPP_FWD_OUT, ipst)) { ip_process(IPP_FWD_OUT, &mp, ill_index); if (mp == NULL) { ip2dbg(("ip_rput_forward: pkt dropped/deferred"\ @@ -16484,15 +16730,16 @@ ip_rput_forward(ire_t *ire, ipha_t *ipha, mblk_t *mp, ill_t *in_ill) return; } } - ip_wput_frag(ire, mp, IB_PKT, max_frag, 0, GLOBAL_ZONEID); + ip_wput_frag(ire, mp, IB_PKT, max_frag, 0, GLOBAL_ZONEID, ipst); ip2dbg(("ip_rput_forward:sent to ip_wput_frag\n")); return; } DTRACE_PROBE4(ip4__physical__out__start, ill_t *, NULL, ill_t *, out_ill, ipha_t *, ipha, mblk_t *, mp); - FW_HOOKS(ip4_physical_out_event, ipv4firewall_physical_out, - NULL, out_ill, ipha, mp, mp); + FW_HOOKS(ipst->ips_ip4_physical_out_event, + ipst->ips_ipv4firewall_physical_out, + NULL, out_ill, ipha, mp, mp, ipst); DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); if (mp == NULL) return; @@ -16513,6 +16760,7 @@ void ip_rput_forward_multicast(ipaddr_t dst, mblk_t *mp, ipif_t *ipif) { ire_t *ire; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; ASSERT(!ipif->ipif_isv6); /* @@ -16523,8 +16771,9 @@ ip_rput_forward_multicast(ipaddr_t dst, mblk_t *mp, ipif_t *ipif) */ if (ipif->ipif_flags & IPIF_POINTOPOINT) dst = ipif->ipif_pp_dst_addr; + ire = ire_ctable_lookup(dst, 0, 0, ipif, ALL_ZONES, MBLK_GETLABEL(mp), - MATCH_IRE_ILL_GROUP | MATCH_IRE_SECATTR); + MATCH_IRE_ILL_GROUP | MATCH_IRE_SECATTR, ipst); if (ire == NULL) { /* * Mark this packet to make it be delivered to @@ -16543,7 +16792,7 @@ ip_rput_forward_multicast(ipaddr_t dst, mblk_t *mp, ipif_t *ipif) /* Update any source route, record route or timestamp options */ static int -ip_rput_forward_options(mblk_t *mp, ipha_t *ipha, ire_t *ire) +ip_rput_forward_options(mblk_t *mp, ipha_t *ipha, ire_t *ire, ip_stack_t *ipst) { ipoptp_t opts; uchar_t *opt; @@ -16570,7 +16819,7 @@ ip_rput_forward_options(mblk_t *mp, ipha_t *ipha, ire_t *ire) case IPOPT_SSRR: case IPOPT_LSRR: /* Check if adminstratively disabled */ - if (!ip_forward_src_routed) { + if (!ipst->ips_ip_forward_src_routed) { if (ire->ire_stq != NULL) { /* * Sent by forwarding path, and router @@ -16578,7 +16827,7 @@ ip_rput_forward_options(mblk_t *mp, ipha_t *ipha, ire_t *ire) */ icmp_unreachable(ire->ire_stq, mp, ICMP_SOURCE_ROUTE_FAILED, - GLOBAL_ZONEID); + GLOBAL_ZONEID, ipst); } else { ip0dbg(("ip_rput_forward_options: " "unable to send unreach\n")); @@ -16588,7 +16837,7 @@ ip_rput_forward_options(mblk_t *mp, ipha_t *ipha, ire_t *ire) } dst_ire = ire_ctable_lookup(dst, 0, IRE_LOCAL, - NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (dst_ire == NULL) { /* * Must be partial since ip_rput_options @@ -16618,7 +16867,7 @@ ip_rput_forward_options(mblk_t *mp, ipha_t *ipha, ire_t *ire) * once as consecutive hops in source route. */ tmp_ire = ire_ctable_lookup(dst, 0, IRE_LOCAL, - NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (tmp_ire != NULL) { ire_refrele(tmp_ire); off += IP_ADDR_LEN; @@ -16656,8 +16905,7 @@ ip_rput_forward_options(mblk_t *mp, ipha_t *ipha, ire_t *ire) bcopy((char *)opt + off, &dst, IP_ADDR_LEN); dst_ire = ire_ctable_lookup(dst, 0, IRE_LOCAL, NULL, ALL_ZONES, NULL, - MATCH_IRE_TYPE); - + MATCH_IRE_TYPE, ipst); if (dst_ire == NULL) { /* Not for us */ break; @@ -16731,9 +16979,14 @@ ip_fanout_proto_again(mblk_t *ipsec_mp, ill_t *ill, ill_t *recv_ill, ire_t *ire) boolean_t ill_need_rele = B_FALSE; boolean_t rill_need_rele = B_FALSE; boolean_t ire_need_rele = B_FALSE; + netstack_t *ns; + ip_stack_t *ipst; ii = (ipsec_in_t *)ipsec_mp->b_rptr; ASSERT(ii->ipsec_in_ill_index != 0); + ns = ii->ipsec_in_ns; + ASSERT(ii->ipsec_in_ns != NULL); + ipst = ns->netstack_ip; mp = ipsec_mp->b_cont; ASSERT(mp != NULL); @@ -16746,13 +16999,13 @@ ip_fanout_proto_again(mblk_t *ipsec_mp, ill_t *ill, ill_t *recv_ill, ire_t *ire) * or ip_rput_data_v6 was called. */ ill = ill_lookup_on_ifindex(ii->ipsec_in_ill_index, - !ii->ipsec_in_v4, NULL, NULL, NULL, NULL); + !ii->ipsec_in_v4, NULL, NULL, NULL, NULL, ipst); ill_need_rele = B_TRUE; if (ii->ipsec_in_ill_index != ii->ipsec_in_rill_index) { recv_ill = ill_lookup_on_ifindex( ii->ipsec_in_rill_index, !ii->ipsec_in_v4, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, ipst); rill_need_rele = B_TRUE; } else { recv_ill = ill; @@ -16835,7 +17088,7 @@ ip_fanout_proto_again(mblk_t *ipsec_mp, ill_t *ill, ill_t *recv_ill, ire_t *ire) if (ire == NULL) { ire = ire_cache_lookup(dst, ii->ipsec_in_zoneid, - MBLK_GETLABEL(mp)); + MBLK_GETLABEL(mp), ipst); if (ire == NULL) { if (ill_need_rele) ill_refrele(ill); @@ -16912,6 +17165,7 @@ ill_frag_timer(void *arg) { ill_t *ill = (ill_t *)arg; boolean_t frag_pending; + ip_stack_t *ipst = ill->ill_ipst; mutex_enter(&ill->ill_lock); ASSERT(!ill->ill_fragtimer_executing); @@ -16923,7 +17177,7 @@ ill_frag_timer(void *arg) ill->ill_fragtimer_executing = 1; mutex_exit(&ill->ill_lock); - frag_pending = ill_frag_timeout(ill, ip_g_frag_timeout); + frag_pending = ill_frag_timeout(ill, ipst->ips_ip_g_frag_timeout); /* * Restart the timer, if we have fragments pending or if someone @@ -16940,6 +17194,8 @@ ill_frag_timer(void *arg) void ill_frag_timer_start(ill_t *ill) { + ip_stack_t *ipst = ill->ill_ipst; + ASSERT(MUTEX_HELD(&ill->ill_lock)); /* If the ill is closing or opening don't proceed */ @@ -16964,7 +17220,7 @@ ill_frag_timer_start(ill_t *ill) * called */ ill->ill_frag_timer_id = timeout(ill_frag_timer, ill, - MSEC_TO_TICK(ip_g_frag_timo_ms >> 1)); + MSEC_TO_TICK(ipst->ips_ip_g_frag_timo_ms >> 1)); ill->ill_fragtimer_needrestart = 0; } } @@ -16992,6 +17248,10 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, mblk_t *first_mp = mp; mblk_t *hada_mp = NULL; ipha_t *inner_ipha; + ip_stack_t *ipst; + + ASSERT(recv_ill != NULL); + ipst = recv_ill->ill_ipst; TRACE_1(TR_FAC_IP, TR_IP_RPUT_LOCL_START, "ip_rput_locl_start: q %p", q); @@ -17050,7 +17310,7 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, + IP_SIMPLE_HDR_LENGTH_IN_WORDS); if (u1) { - if (!ip_options_cksum(q, ill, mp, ipha, ire)) { + if (!ip_options_cksum(q, ill, mp, ipha, ire, ipst)) { if (hada_mp != NULL) freemsg(hada_mp); return; @@ -17203,9 +17463,9 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, * If we are not willing to accept IGMP packets in clear, * then check with global policy. */ - if (igmp_accept_clear_messages == 0) { + if (ipst->ips_igmp_accept_clear_messages == 0) { first_mp = ipsec_check_global_policy(first_mp, NULL, - ipha, NULL, mctl_present); + ipha, NULL, mctl_present, ipst->ips_netstack); if (first_mp == NULL) return; } @@ -17232,7 +17492,8 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, first_mp->b_cont = mp; else first_mp = mp; - if (ipcl_proto_search(ipha->ipha_protocol) == NULL) { + if (ipst->ips_ipcl_proto_fanout[ipha->ipha_protocol]. + connf_head != NULL) { /* No user-level listener for IGMP packets */ goto drop_pkt; } @@ -17243,9 +17504,9 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, * If we are not willing to accept PIM packets in clear, * then check with global policy. */ - if (pim_accept_clear_messages == 0) { + if (ipst->ips_pim_accept_clear_messages == 0) { first_mp = ipsec_check_global_policy(first_mp, NULL, - ipha, NULL, mctl_present); + ipha, NULL, mctl_present, ipst->ips_netstack); if (first_mp == NULL) return; } @@ -17255,7 +17516,7 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); return; } - if (pim_input(q, mp) != 0) { + if (pim_input(q, mp, ill) != 0) { /* Bad packet - discarded by pim_input */ TRACE_2(TR_FAC_IP, TR_IP_RPUT_LOCL_END, "ip_rput_locl_end: q %p (%S)", q, "pim"); @@ -17269,7 +17530,8 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, * be reinitialized. */ ipha = (ipha_t *)mp->b_rptr; - if (ipcl_proto_search(ipha->ipha_protocol) == NULL) { + if (ipst->ips_ipcl_proto_fanout[ipha->ipha_protocol]. + connf_head != NULL) { /* No user-level listener for PIM packets */ goto drop_pkt; } @@ -17334,8 +17596,9 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, * If AH/ESP was present, we would have already * allocated the first_mp. */ - if ((first_mp = ipsec_in_alloc(B_TRUE)) == - NULL) { + first_mp = ipsec_in_alloc(B_TRUE, + ipst->ips_netstack); + if (first_mp == NULL) { ip1dbg(("ip_proto_input: IPSEC_IN " "allocation failure.\n")); BUMP_MIB(ill->ill_ip_mib, @@ -17380,6 +17643,8 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, break; case IPPROTO_AH: case IPPROTO_ESP: { + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; + /* * Fast path for AH/ESP. If this is the first time * we are sending a datagram to AH/ESP, allocate @@ -17389,11 +17654,13 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, int ipsec_rc; ipsec_in_t *ii; + netstack_t *ns = ipst->ips_netstack; - IP_STAT(ipsec_proto_ahesp); + IP_STAT(ipst, ipsec_proto_ahesp); if (!mctl_present) { ASSERT(first_mp == mp); - if ((first_mp = ipsec_in_alloc(B_TRUE)) == NULL) { + first_mp = ipsec_in_alloc(B_TRUE, ns); + if (first_mp == NULL) { ip1dbg(("ip_proto_input: IPSEC_IN " "allocation failure.\n")); freemsg(hada_mp); /* okay ifnull */ @@ -17426,15 +17693,16 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, ii = (ipsec_in_t *)first_mp->b_rptr; } - if (!ipsec_loaded()) { + if (!ipsec_loaded(ipss)) { ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, - ire->ire_zoneid); + ire->ire_zoneid, ipst); return; } + ns = ipst->ips_netstack; /* select inbound SA and have IPsec process the pkt */ if (ipha->ipha_protocol == IPPROTO_ESP) { - esph_t *esph = ipsec_inbound_esp_sa(first_mp); + esph_t *esph = ipsec_inbound_esp_sa(first_mp, ns); if (esph == NULL) return; ASSERT(ii->ipsec_in_esp_sa != NULL); @@ -17442,7 +17710,7 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( first_mp, esph); } else { - ah_t *ah = ipsec_inbound_ah_sa(first_mp); + ah_t *ah = ipsec_inbound_ah_sa(first_mp, ns); if (ah == NULL) return; ASSERT(ii->ipsec_in_ah_sa != NULL); @@ -17502,7 +17770,8 @@ drop_pkt: * The options have already been checked for sanity in ip_rput_options(). */ static boolean_t -ip_rput_local_options(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire) +ip_rput_local_options(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, + ip_stack_t *ipst) { ipoptp_t opts; uchar_t *opt; @@ -17585,7 +17854,8 @@ ip_rput_local_options(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire) off = opt[IPOPT_OFFSET] - 1; bcopy((char *)opt + off, &dst, IP_ADDR_LEN); dst_ire = ire_ctable_lookup(dst, 0, IRE_LOCAL, - NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, + ipst); if (dst_ire == NULL) { /* Not for us */ break; @@ -17645,11 +17915,11 @@ bad_src_route: /* make sure we clear any indication of a hardware checksum */ DB_CKSUMFLAGS(mp) = 0; - zoneid = ipif_lookup_addr_zoneid(ipha->ipha_dst, ill); + zoneid = ipif_lookup_addr_zoneid(ipha->ipha_dst, ill, ipst); if (zoneid == ALL_ZONES) freemsg(mp); else - icmp_unreachable(q, mp, ICMP_SOURCE_ROUTE_FAILED, zoneid); + icmp_unreachable(q, mp, ICMP_SOURCE_ROUTE_FAILED, zoneid, ipst); return (B_FALSE); } @@ -17661,7 +17931,8 @@ bad_src_route: * and mp freed. */ static int -ip_rput_options(queue_t *q, mblk_t *mp, ipha_t *ipha, ipaddr_t *dstp) +ip_rput_options(queue_t *q, mblk_t *mp, ipha_t *ipha, ipaddr_t *dstp, + ip_stack_t *ipst) { ipoptp_t opts; uchar_t *opt; @@ -17692,7 +17963,7 @@ ip_rput_options(queue_t *q, mblk_t *mp, ipha_t *ipha, ipaddr_t *dstp) case IPOPT_SSRR: case IPOPT_LSRR: ire = ire_ctable_lookup(dst, 0, IRE_LOCAL, NULL, - ALL_ZONES, NULL, MATCH_IRE_TYPE); + ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (ire == NULL) { if (optval == IPOPT_SSRR) { ip1dbg(("ip_rput_options: not next" @@ -17736,7 +18007,7 @@ ip_rput_options(queue_t *q, mblk_t *mp, ipha_t *ipha, ipaddr_t *dstp) * for source route? */ ire = ire_ctable_lookup(dst, 0, IRE_LOCAL, NULL, - ALL_ZONES, NULL, MATCH_IRE_TYPE); + ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (ire != NULL) { ire_refrele(ire); @@ -17757,7 +18028,7 @@ ip_rput_options(queue_t *q, mblk_t *mp, ipha_t *ipha, ipaddr_t *dstp) ire = ire_ftable_lookup(dst, 0, 0, IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, MBLK_GETLABEL(mp), - MATCH_IRE_TYPE | MATCH_IRE_SECATTR); + MATCH_IRE_TYPE | MATCH_IRE_SECATTR, ipst); if (ire == NULL) { ip1dbg(("ip_rput_options: SSRR not " "directly reachable: 0x%x\n", @@ -17841,11 +18112,11 @@ param_prob: /* make sure we clear any indication of a hardware checksum */ DB_CKSUMFLAGS(mp) = 0; /* Don't know whether this is for non-global or global/forwarding */ - zoneid = ipif_lookup_addr_zoneid(dst, ill); + zoneid = ipif_lookup_addr_zoneid(dst, ill, ipst); if (zoneid == ALL_ZONES) freemsg(mp); else - icmp_param_problem(q, mp, (uint8_t)code, zoneid); + icmp_param_problem(q, mp, (uint8_t)code, zoneid, ipst); return (-1); bad_src_route: @@ -17857,11 +18128,11 @@ bad_src_route: /* make sure we clear any indication of a hardware checksum */ DB_CKSUMFLAGS(mp) = 0; - zoneid = ipif_lookup_addr_zoneid(dst, ill); + zoneid = ipif_lookup_addr_zoneid(dst, ill, ipst); if (zoneid == ALL_ZONES) freemsg(mp); else - icmp_unreachable(q, mp, ICMP_SOURCE_ROUTE_FAILED, zoneid); + icmp_unreachable(q, mp, ICMP_SOURCE_ROUTE_FAILED, zoneid, ipst); return (-1); } @@ -17900,75 +18171,89 @@ bad_src_route: int ip_snmp_get(queue_t *q, mblk_t *mpctl) { + ip_stack_t *ipst; + sctp_stack_t *sctps; + + + if (q->q_next != NULL) { + ipst = ILLQ_TO_IPST(q); + } else { + ipst = CONNQ_TO_IPST(q); + } + ASSERT(ipst != NULL); + sctps = ipst->ips_netstack->netstack_sctp; + if (mpctl == NULL || mpctl->b_cont == NULL) { return (0); } - if ((mpctl = ip_snmp_get_mib2_ip_traffic_stats(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_ip_traffic_stats(q, mpctl, + ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_ip6(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_ip6(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_icmp(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_icmp(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_icmp6(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_icmp6(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_igmp(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_igmp(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_multi(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_multi(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_ip_addr(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_ip_addr(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_ip6_addr(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_ip6_addr(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_ip_group_mem(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_ip_group_mem(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_ip6_group_mem(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_ip6_group_mem(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_ip_group_src(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_ip_group_src(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_ip6_group_src(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_ip6_group_src(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_virt_multi(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_virt_multi(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_multi_rtable(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_multi_rtable(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_ip_route_media(q, mpctl)) == NULL) { + if ((mpctl = ip_snmp_get_mib2_ip_route_media(q, mpctl, ipst)) == NULL) { return (1); } - if ((mpctl = ip_snmp_get_mib2_ip6_route_media(q, mpctl)) == NULL) { + mpctl = ip_snmp_get_mib2_ip6_route_media(q, mpctl, ipst); + if (mpctl == NULL) { return (1); } - if ((mpctl = sctp_snmp_get_mib2(q, mpctl)) == NULL) { + if ((mpctl = sctp_snmp_get_mib2(q, mpctl, sctps)) == NULL) { return (1); } freemsg(mpctl); @@ -17978,7 +18263,8 @@ ip_snmp_get(queue_t *q, mblk_t *mpctl) /* Get global (legacy) IPv4 statistics */ static mblk_t * -ip_snmp_get_mib2_ip(queue_t *q, mblk_t *mpctl, mib2_ipIfStatsEntry_t *ipmib) +ip_snmp_get_mib2_ip(queue_t *q, mblk_t *mpctl, mib2_ipIfStatsEntry_t *ipmib, + ip_stack_t *ipst) { mib2_ip_t old_ip_mib; struct opthdr *optp; @@ -17994,11 +18280,11 @@ ip_snmp_get_mib2_ip(queue_t *q, mblk_t *mpctl, mib2_ipIfStatsEntry_t *ipmib) optp->level = MIB2_IP; optp->name = 0; SET_MIB(old_ip_mib.ipForwarding, - (WE_ARE_FORWARDING ? 1 : 2)); + (WE_ARE_FORWARDING(ipst) ? 1 : 2)); SET_MIB(old_ip_mib.ipDefaultTTL, - (uint32_t)ip_def_ttl); + (uint32_t)ipst->ips_ip_def_ttl); SET_MIB(old_ip_mib.ipReasmTimeout, - ip_g_frag_timeout); + ipst->ips_ip_g_frag_timeout); SET_MIB(old_ip_mib.ipAddrEntrySize, sizeof (mib2_ipAddrEntry_t)); SET_MIB(old_ip_mib.ipRouteEntrySize, @@ -18072,7 +18358,7 @@ ip_snmp_get_mib2_ip(queue_t *q, mblk_t *mpctl, mib2_ipIfStatsEntry_t *ipmib) /* Per interface IPv4 statistics */ static mblk_t * -ip_snmp_get_mib2_ip_traffic_stats(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_ip_traffic_stats(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18090,36 +18376,44 @@ ip_snmp_get_mib2_ip_traffic_stats(queue_t *q, mblk_t *mpctl) optp->level = MIB2_IP; optp->name = MIB2_IP_TRAFFIC_STATS; /* Include "unknown interface" ip_mib */ - ip_mib.ipIfStatsIPVersion = MIB2_INETADDRESSTYPE_ipv4; - ip_mib.ipIfStatsIfIndex = MIB2_UNKNOWN_INTERFACE; /* Flag to netstat */ - SET_MIB(ip_mib.ipIfStatsForwarding, (WE_ARE_FORWARDING ? 1 : 2)); - SET_MIB(ip_mib.ipIfStatsDefaultTTL, (uint32_t)ip_def_ttl); - SET_MIB(ip_mib.ipIfStatsEntrySize, sizeof (mib2_ipIfStatsEntry_t)); - SET_MIB(ip_mib.ipIfStatsAddrEntrySize, sizeof (mib2_ipAddrEntry_t)); - SET_MIB(ip_mib.ipIfStatsRouteEntrySize, sizeof (mib2_ipRouteEntry_t)); - SET_MIB(ip_mib.ipIfStatsNetToMediaEntrySize, + ipst->ips_ip_mib.ipIfStatsIPVersion = MIB2_INETADDRESSTYPE_ipv4; + ipst->ips_ip_mib.ipIfStatsIfIndex = + MIB2_UNKNOWN_INTERFACE; /* Flag to netstat */ + SET_MIB(ipst->ips_ip_mib.ipIfStatsForwarding, + (ipst->ips_ip_g_forward ? 1 : 2)); + SET_MIB(ipst->ips_ip_mib.ipIfStatsDefaultTTL, + (uint32_t)ipst->ips_ip_def_ttl); + SET_MIB(ipst->ips_ip_mib.ipIfStatsEntrySize, + sizeof (mib2_ipIfStatsEntry_t)); + SET_MIB(ipst->ips_ip_mib.ipIfStatsAddrEntrySize, + sizeof (mib2_ipAddrEntry_t)); + SET_MIB(ipst->ips_ip_mib.ipIfStatsRouteEntrySize, + sizeof (mib2_ipRouteEntry_t)); + SET_MIB(ipst->ips_ip_mib.ipIfStatsNetToMediaEntrySize, sizeof (mib2_ipNetToMediaEntry_t)); - SET_MIB(ip_mib.ipIfStatsMemberEntrySize, sizeof (ip_member_t)); - SET_MIB(ip_mib.ipIfStatsGroupSourceEntrySize, sizeof (ip_grpsrc_t)); + SET_MIB(ipst->ips_ip_mib.ipIfStatsMemberEntrySize, + sizeof (ip_member_t)); + SET_MIB(ipst->ips_ip_mib.ipIfStatsGroupSourceEntrySize, + sizeof (ip_grpsrc_t)); - if (!snmp_append_data2(mpctl->b_cont, &mp_tail, (char *)&ip_mib, - (int)sizeof (ip_mib))) { + if (!snmp_append_data2(mpctl->b_cont, &mp_tail, + (char *)&ipst->ips_ip_mib, (int)sizeof (ipst->ips_ip_mib))) { ip1dbg(("ip_snmp_get_mib2_ip_traffic_stats: " "failed to allocate %u bytes\n", - (uint_t)sizeof (ip_mib))); + (uint_t)sizeof (ipst->ips_ip_mib))); } - bcopy(&ip_mib, &global_ip_mib, sizeof (global_ip_mib)); + bcopy(&ipst->ips_ip_mib, &global_ip_mib, sizeof (global_ip_mib)); - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V4(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V4(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { ill->ill_ip_mib->ipIfStatsIfIndex = ill->ill_phyint->phyint_ifindex; SET_MIB(ill->ill_ip_mib->ipIfStatsForwarding, - (WE_ARE_FORWARDING ? 1 : 2)); + (ipst->ips_ip_g_forward ? 1 : 2)); SET_MIB(ill->ill_ip_mib->ipIfStatsDefaultTTL, - (uint32_t)ip_def_ttl); + (uint32_t)ipst->ips_ip_def_ttl); ip_mib2_add_ip_stats(&global_ip_mib, ill->ill_ip_mib); if (!snmp_append_data2(mpctl->b_cont, &mp_tail, @@ -18130,7 +18424,7 @@ ip_snmp_get_mib2_ip_traffic_stats(queue_t *q, mblk_t *mpctl) (uint_t)sizeof (*ill->ill_ip_mib))); } } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); ip3dbg(("ip_snmp_get_mib2_ip_traffic_stats: " @@ -18138,12 +18432,12 @@ ip_snmp_get_mib2_ip_traffic_stats(queue_t *q, mblk_t *mpctl) (int)optp->level, (int)optp->name, (int)optp->len)); qreply(q, mpctl); - return (ip_snmp_get_mib2_ip(q, mp2ctl, &global_ip_mib)); + return (ip_snmp_get_mib2_ip(q, mp2ctl, &global_ip_mib, ipst)); } /* Global IPv4 ICMP statistics */ static mblk_t * -ip_snmp_get_mib2_icmp(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_icmp(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18156,10 +18450,10 @@ ip_snmp_get_mib2_icmp(queue_t *q, mblk_t *mpctl) optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = MIB2_ICMP; optp->name = 0; - if (!snmp_append_data(mpctl->b_cont, (char *)&icmp_mib, - (int)sizeof (icmp_mib))) { + if (!snmp_append_data(mpctl->b_cont, (char *)&ipst->ips_icmp_mib, + (int)sizeof (ipst->ips_icmp_mib))) { ip1dbg(("ip_snmp_get_mib2_icmp: failed to allocate %u bytes\n", - (uint_t)sizeof (icmp_mib))); + (uint_t)sizeof (ipst->ips_icmp_mib))); } optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); ip3dbg(("ip_snmp_get_mib2_icmp: level %d, name %d, len %d\n", @@ -18170,7 +18464,7 @@ ip_snmp_get_mib2_icmp(queue_t *q, mblk_t *mpctl) /* Global IPv4 IGMP statistics */ static mblk_t * -ip_snmp_get_mib2_igmp(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_igmp(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18183,10 +18477,10 @@ ip_snmp_get_mib2_igmp(queue_t *q, mblk_t *mpctl) optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = EXPER_IGMP; optp->name = 0; - if (!snmp_append_data(mpctl->b_cont, (char *)&igmpstat, - (int)sizeof (igmpstat))) { + if (!snmp_append_data(mpctl->b_cont, (char *)&ipst->ips_igmpstat, + (int)sizeof (ipst->ips_igmpstat))) { ip1dbg(("ip_snmp_get_mib2_igmp: failed to allocate %u bytes\n", - (uint_t)sizeof (igmpstat))); + (uint_t)sizeof (ipst->ips_igmpstat))); } optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); ip3dbg(("ip_snmp_get_mib2_igmp: level %d, name %d, len %d\n", @@ -18197,7 +18491,7 @@ ip_snmp_get_mib2_igmp(queue_t *q, mblk_t *mpctl) /* Global IPv4 Multicast Routing statistics */ static mblk_t * -ip_snmp_get_mib2_multi(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_multi(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18210,7 +18504,7 @@ ip_snmp_get_mib2_multi(queue_t *q, mblk_t *mpctl) optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = EXPER_DVMRP; optp->name = 0; - if (!ip_mroute_stats(mpctl->b_cont)) { + if (!ip_mroute_stats(mpctl->b_cont, ipst)) { ip0dbg(("ip_mroute_stats: failed\n")); } optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); @@ -18222,7 +18516,7 @@ ip_snmp_get_mib2_multi(queue_t *q, mblk_t *mpctl) /* IPv4 address information */ static mblk_t * -ip_snmp_get_mib2_ip_addr(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_ip_addr(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18246,8 +18540,8 @@ ip_snmp_get_mib2_ip_addr(queue_t *q, mblk_t *mpctl) optp->name = MIB2_IP_ADDR; zoneid = Q_TO_CONN(q)->conn_zoneid; - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V4(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V4(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { @@ -18294,7 +18588,7 @@ ip_snmp_get_mib2_ip_addr(queue_t *q, mblk_t *mpctl) } } } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); ip3dbg(("ip_snmp_get_mib2_ip_addr: level %d, name %d, len %d\n", @@ -18305,7 +18599,7 @@ ip_snmp_get_mib2_ip_addr(queue_t *q, mblk_t *mpctl) /* IPv6 address information */ static mblk_t * -ip_snmp_get_mib2_ip6_addr(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_ip6_addr(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18328,8 +18622,8 @@ ip_snmp_get_mib2_ip6_addr(queue_t *q, mblk_t *mpctl) optp->name = MIB2_IP6_ADDR; zoneid = Q_TO_CONN(q)->conn_zoneid; - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V6(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V6(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { @@ -18395,7 +18689,7 @@ ip_snmp_get_mib2_ip6_addr(queue_t *q, mblk_t *mpctl) } } } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); ip3dbg(("ip_snmp_get_mib2_ip6_addr: level %d, name %d, len %d\n", @@ -18406,7 +18700,7 @@ ip_snmp_get_mib2_ip6_addr(queue_t *q, mblk_t *mpctl) /* IPv4 multicast group membership. */ static mblk_t * -ip_snmp_get_mib2_ip_group_mem(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_ip_group_mem(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18430,8 +18724,8 @@ ip_snmp_get_mib2_ip_group_mem(queue_t *q, mblk_t *mpctl) optp->level = MIB2_IP; optp->name = EXPER_IP_GROUP_MEMBERSHIP; - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V4(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V4(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { ILM_WALKER_HOLD(ill); for (ipif = ill->ill_ipif; ipif != NULL; @@ -18462,7 +18756,7 @@ ip_snmp_get_mib2_ip_group_mem(queue_t *q, mblk_t *mpctl) } ILM_WALKER_RELE(ill); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); ip3dbg(("ip_snmp_get: level %d, name %d, len %d\n", (int)optp->level, (int)optp->name, (int)optp->len)); @@ -18472,7 +18766,7 @@ ip_snmp_get_mib2_ip_group_mem(queue_t *q, mblk_t *mpctl) /* IPv6 multicast group membership. */ static mblk_t * -ip_snmp_get_mib2_ip6_group_mem(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_ip6_group_mem(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18494,8 +18788,8 @@ ip_snmp_get_mib2_ip6_group_mem(queue_t *q, mblk_t *mpctl) optp->level = MIB2_IP6; optp->name = EXPER_IP6_GROUP_MEMBERSHIP; - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V6(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V6(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { ILM_WALKER_HOLD(ill); ipm6.ipv6GroupMemberIfIndex = ill->ill_phyint->phyint_ifindex; @@ -18517,7 +18811,7 @@ ip_snmp_get_mib2_ip6_group_mem(queue_t *q, mblk_t *mpctl) } ILM_WALKER_RELE(ill); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); ip3dbg(("ip_snmp_get: level %d, name %d, len %d\n", @@ -18528,7 +18822,7 @@ ip_snmp_get_mib2_ip6_group_mem(queue_t *q, mblk_t *mpctl) /* IP multicast filtered sources */ static mblk_t * -ip_snmp_get_mib2_ip_group_src(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_ip_group_src(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18554,8 +18848,8 @@ ip_snmp_get_mib2_ip_group_src(queue_t *q, mblk_t *mpctl) optp->level = MIB2_IP; optp->name = EXPER_IP_GROUP_SOURCES; - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V4(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V4(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { ILM_WALKER_HOLD(ill); for (ipif = ill->ill_ipif; ipif != NULL; @@ -18593,7 +18887,7 @@ ip_snmp_get_mib2_ip_group_src(queue_t *q, mblk_t *mpctl) } ILM_WALKER_RELE(ill); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); ip3dbg(("ip_snmp_get: level %d, name %d, len %d\n", (int)optp->level, (int)optp->name, (int)optp->len)); @@ -18603,7 +18897,7 @@ ip_snmp_get_mib2_ip_group_src(queue_t *q, mblk_t *mpctl) /* IPv6 multicast filtered sources. */ static mblk_t * -ip_snmp_get_mib2_ip6_group_src(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_ip6_group_src(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18627,8 +18921,8 @@ ip_snmp_get_mib2_ip6_group_src(queue_t *q, mblk_t *mpctl) optp->level = MIB2_IP6; optp->name = EXPER_IP6_GROUP_SOURCES; - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V6(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V6(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { ILM_WALKER_HOLD(ill); ips6.ipv6GroupSourceIfIndex = ill->ill_phyint->phyint_ifindex; @@ -18652,7 +18946,7 @@ ip_snmp_get_mib2_ip6_group_src(queue_t *q, mblk_t *mpctl) } ILM_WALKER_RELE(ill); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); ip3dbg(("ip_snmp_get: level %d, name %d, len %d\n", @@ -18663,7 +18957,7 @@ ip_snmp_get_mib2_ip6_group_src(queue_t *q, mblk_t *mpctl) /* Multicast routing virtual interface table. */ static mblk_t * -ip_snmp_get_mib2_virt_multi(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_virt_multi(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18676,7 +18970,7 @@ ip_snmp_get_mib2_virt_multi(queue_t *q, mblk_t *mpctl) optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = EXPER_DVMRP; optp->name = EXPER_DVMRP_VIF; - if (!ip_mroute_vif(mpctl->b_cont)) { + if (!ip_mroute_vif(mpctl->b_cont, ipst)) { ip0dbg(("ip_mroute_vif: failed\n")); } optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); @@ -18688,7 +18982,7 @@ ip_snmp_get_mib2_virt_multi(queue_t *q, mblk_t *mpctl) /* Multicast routing table. */ static mblk_t * -ip_snmp_get_mib2_multi_rtable(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_multi_rtable(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18701,7 +18995,7 @@ ip_snmp_get_mib2_multi_rtable(queue_t *q, mblk_t *mpctl) optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = EXPER_DVMRP; optp->name = EXPER_DVMRP_MRT; - if (!ip_mroute_mrt(mpctl->b_cont)) { + if (!ip_mroute_mrt(mpctl->b_cont, ipst)) { ip0dbg(("ip_mroute_mrt: failed\n")); } optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); @@ -18716,7 +19010,7 @@ ip_snmp_get_mib2_multi_rtable(queue_t *q, mblk_t *mpctl) * in one IRE walk. */ static mblk_t * -ip_snmp_get_mib2_ip_route_media(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_ip_route_media(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; /* Returned */ @@ -18750,15 +19044,16 @@ ip_snmp_get_mib2_ip_route_media(queue_t *q, mblk_t *mpctl) ird.ird_attrs.lp_head = mp4ctl->b_cont; zoneid = Q_TO_CONN(q)->conn_zoneid; - ire_walk_v4(ip_snmp_get2_v4, &ird, zoneid); + ire_walk_v4(ip_snmp_get2_v4, &ird, zoneid, ipst); if (zoneid == GLOBAL_ZONEID) { /* - * Those IREs are used by Mobile-IP; since mipagent(1M) requires - * the sys_net_config privilege, it can only run in the global - * zone, so we don't display these IREs in the other zones. + * Those IREs are used by Mobile-IP; since mipagent(1M) + * requires the sys_net_config or sys_ip_config privilege, + * it can only run in the global zone or an exclusive-IP zone, + * and both those have a conn_zoneid == GLOBAL_ZONEID. */ - ire_walk_srcif_table_v4(ip_snmp_get2_v4, &ird); - ire_walk_ill_mrtun(0, 0, ip_snmp_get2_v4, &ird, NULL); + ire_walk_srcif_table_v4(ip_snmp_get2_v4, &ird, ipst); + ire_walk_ill_mrtun(0, 0, ip_snmp_get2_v4, &ird, NULL, ipst); } /* ipRouteEntryTable in mpctl */ @@ -18799,7 +19094,7 @@ ip_snmp_get_mib2_ip_route_media(queue_t *q, mblk_t *mpctl) * ipv6NetToMediaEntryTable in an NDP walk. */ static mblk_t * -ip_snmp_get_mib2_ip6_route_media(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_ip6_route_media(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; /* Returned */ @@ -18833,7 +19128,7 @@ ip_snmp_get_mib2_ip6_route_media(queue_t *q, mblk_t *mpctl) ird.ird_attrs.lp_head = mp4ctl->b_cont; zoneid = Q_TO_CONN(q)->conn_zoneid; - ire_walk_v6(ip_snmp_get2_v6_route, &ird, zoneid); + ire_walk_v6(ip_snmp_get2_v6_route, &ird, zoneid, ipst); optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = MIB2_IP6; @@ -18844,7 +19139,7 @@ ip_snmp_get_mib2_ip6_route_media(queue_t *q, mblk_t *mpctl) qreply(q, mpctl); /* ipv6NetToMediaEntryTable in mp3ctl */ - ndp_walk(NULL, ip_snmp_get2_v6_media, &ird); + ndp_walk(NULL, ip_snmp_get2_v6_media, &ird, ipst); optp = (struct opthdr *)&mp3ctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = MIB2_IP6; @@ -18873,7 +19168,7 @@ ip_snmp_get_mib2_ip6_route_media(queue_t *q, mblk_t *mpctl) * IPv6 mib: One per ill */ static mblk_t * -ip_snmp_get_mib2_ip6(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_ip6(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18892,44 +19187,55 @@ ip_snmp_get_mib2_ip6(queue_t *q, mblk_t *mpctl) optp->level = MIB2_IP6; optp->name = 0; /* Include "unknown interface" ip6_mib */ - ip6_mib.ipIfStatsIPVersion = MIB2_INETADDRESSTYPE_ipv6; - ip6_mib.ipIfStatsIfIndex = MIB2_UNKNOWN_INTERFACE; /* Flag to netstat */ - SET_MIB(ip6_mib.ipIfStatsForwarding, ipv6_forward ? 1 : 2); - SET_MIB(ip6_mib.ipIfStatsDefaultHopLimit, ipv6_def_hops); - SET_MIB(ip6_mib.ipIfStatsEntrySize, + ipst->ips_ip6_mib.ipIfStatsIPVersion = MIB2_INETADDRESSTYPE_ipv6; + ipst->ips_ip6_mib.ipIfStatsIfIndex = + MIB2_UNKNOWN_INTERFACE; /* Flag to netstat */ + SET_MIB(ipst->ips_ip6_mib.ipIfStatsForwarding, + ipst->ips_ipv6_forward ? 1 : 2); + SET_MIB(ipst->ips_ip6_mib.ipIfStatsDefaultHopLimit, + ipst->ips_ipv6_def_hops); + SET_MIB(ipst->ips_ip6_mib.ipIfStatsEntrySize, sizeof (mib2_ipIfStatsEntry_t)); - SET_MIB(ip6_mib.ipIfStatsAddrEntrySize, sizeof (mib2_ipv6AddrEntry_t)); - SET_MIB(ip6_mib.ipIfStatsRouteEntrySize, + SET_MIB(ipst->ips_ip6_mib.ipIfStatsAddrEntrySize, + sizeof (mib2_ipv6AddrEntry_t)); + SET_MIB(ipst->ips_ip6_mib.ipIfStatsRouteEntrySize, sizeof (mib2_ipv6RouteEntry_t)); - SET_MIB(ip6_mib.ipIfStatsNetToMediaEntrySize, + SET_MIB(ipst->ips_ip6_mib.ipIfStatsNetToMediaEntrySize, sizeof (mib2_ipv6NetToMediaEntry_t)); - SET_MIB(ip6_mib.ipIfStatsMemberEntrySize, sizeof (ipv6_member_t)); - SET_MIB(ip6_mib.ipIfStatsGroupSourceEntrySize, sizeof (ipv6_grpsrc_t)); + SET_MIB(ipst->ips_ip6_mib.ipIfStatsMemberEntrySize, + sizeof (ipv6_member_t)); + SET_MIB(ipst->ips_ip6_mib.ipIfStatsGroupSourceEntrySize, + sizeof (ipv6_grpsrc_t)); /* * Synchronize 64- and 32-bit counters */ - SYNC32_MIB(&ip6_mib, ipIfStatsInReceives, ipIfStatsHCInReceives); - SYNC32_MIB(&ip6_mib, ipIfStatsInDelivers, ipIfStatsHCInDelivers); - SYNC32_MIB(&ip6_mib, ipIfStatsOutRequests, ipIfStatsHCOutRequests); - SYNC32_MIB(&ip6_mib, ipIfStatsOutForwDatagrams, + SYNC32_MIB(&ipst->ips_ip6_mib, ipIfStatsInReceives, + ipIfStatsHCInReceives); + SYNC32_MIB(&ipst->ips_ip6_mib, ipIfStatsInDelivers, + ipIfStatsHCInDelivers); + SYNC32_MIB(&ipst->ips_ip6_mib, ipIfStatsOutRequests, + ipIfStatsHCOutRequests); + SYNC32_MIB(&ipst->ips_ip6_mib, ipIfStatsOutForwDatagrams, ipIfStatsHCOutForwDatagrams); - SYNC32_MIB(&ip6_mib, ipIfStatsOutMcastPkts, ipIfStatsHCOutMcastPkts); - SYNC32_MIB(&ip6_mib, ipIfStatsInMcastPkts, ipIfStatsHCInMcastPkts); + SYNC32_MIB(&ipst->ips_ip6_mib, ipIfStatsOutMcastPkts, + ipIfStatsHCOutMcastPkts); + SYNC32_MIB(&ipst->ips_ip6_mib, ipIfStatsInMcastPkts, + ipIfStatsHCInMcastPkts); - if (!snmp_append_data2(mpctl->b_cont, &mp_tail, (char *)&ip6_mib, - (int)sizeof (ip6_mib))) { + if (!snmp_append_data2(mpctl->b_cont, &mp_tail, + (char *)&ipst->ips_ip6_mib, (int)sizeof (ipst->ips_ip6_mib))) { ip1dbg(("ip_snmp_get_mib2_ip6: failed to allocate %u bytes\n", - (uint_t)sizeof (ip6_mib))); + (uint_t)sizeof (ipst->ips_ip6_mib))); } - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V6(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V6(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { ill->ill_ip_mib->ipIfStatsIfIndex = ill->ill_phyint->phyint_ifindex; SET_MIB(ill->ill_ip_mib->ipIfStatsForwarding, - ipv6_forward ? 1 : 2); + ipst->ips_ipv6_forward ? 1 : 2); SET_MIB(ill->ill_ip_mib->ipIfStatsDefaultHopLimit, ill->ill_max_hops); @@ -18957,7 +19263,7 @@ ip_snmp_get_mib2_ip6(queue_t *q, mblk_t *mpctl) (uint_t)sizeof (*ill->ill_ip_mib))); } } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); ip3dbg(("ip_snmp_get_mib2_ip6: level %d, name %d, len %d\n", @@ -18970,7 +19276,7 @@ ip_snmp_get_mib2_ip6(queue_t *q, mblk_t *mpctl) * ICMPv6 mib: One per ill */ static mblk_t * -ip_snmp_get_mib2_icmp6(queue_t *q, mblk_t *mpctl) +ip_snmp_get_mib2_icmp6(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) { struct opthdr *optp; mblk_t *mp2ctl; @@ -18988,16 +19294,19 @@ ip_snmp_get_mib2_icmp6(queue_t *q, mblk_t *mpctl) optp->level = MIB2_ICMP6; optp->name = 0; /* Include "unknown interface" icmp6_mib */ - icmp6_mib.ipv6IfIcmpIfIndex = MIB2_UNKNOWN_INTERFACE; /* netstat flag */ - icmp6_mib.ipv6IfIcmpEntrySize = sizeof (mib2_ipv6IfIcmpEntry_t); - if (!snmp_append_data2(mpctl->b_cont, &mp_tail, (char *)&icmp6_mib, - (int)sizeof (icmp6_mib))) { + ipst->ips_icmp6_mib.ipv6IfIcmpIfIndex = + MIB2_UNKNOWN_INTERFACE; /* netstat flag */ + ipst->ips_icmp6_mib.ipv6IfIcmpEntrySize = + sizeof (mib2_ipv6IfIcmpEntry_t); + if (!snmp_append_data2(mpctl->b_cont, &mp_tail, + (char *)&ipst->ips_icmp6_mib, + (int)sizeof (ipst->ips_icmp6_mib))) { ip1dbg(("ip_snmp_get_mib2_icmp6: failed to allocate %u bytes\n", - (uint_t)sizeof (icmp6_mib))); + (uint_t)sizeof (ipst->ips_icmp6_mib))); } - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V6(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V6(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { ill->ill_icmp6_mib->ipv6IfIcmpIfIndex = ill->ill_phyint->phyint_ifindex; @@ -19009,7 +19318,7 @@ ip_snmp_get_mib2_icmp6(queue_t *q, mblk_t *mpctl) (uint_t)sizeof (*ill->ill_icmp6_mib))); } } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); ip3dbg(("ip_snmp_get_mib2_icmp6: level %d, name %d, len %d\n", @@ -19542,7 +19851,7 @@ ip_mib2_add_icmp6_stats(mib2_ipv6IfIcmpEntry_t *o1, mib2_ipv6IfIcmpEntry_t *o2) * have already been checked. */ static boolean_t -ip_source_routed(ipha_t *ipha) +ip_source_routed(ipha_t *ipha, ip_stack_t *ipst) { ipoptp_t opts; uchar_t *opt; @@ -19573,7 +19882,7 @@ ip_source_routed(ipha_t *ipha) * entries left in the source route return (true). */ ire = ire_ctable_lookup(dst, 0, IRE_LOCAL, NULL, - ALL_ZONES, NULL, MATCH_IRE_TYPE); + ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (ire == NULL) { ip2dbg(("ip_source_routed: not next" " source route 0x%x\n", @@ -19622,11 +19931,14 @@ ip_source_route_included(ipha_t *ipha) /* * Called when the IRE expiration timer fires. */ -/* ARGSUSED */ void ip_trash_timer_expire(void *args) { - int flush_flag = 0; + int flush_flag = 0; + ire_expire_arg_t iea; + ip_stack_t *ipst = (ip_stack_t *)args; + + iea.iea_ipst = ipst; /* No netstack_hold */ /* * ip_ire_expire_id is protected by ip_trash_timer_lock. @@ -19634,31 +19946,34 @@ ip_trash_timer_expire(void *args) * that occurs due to an almost immediate timer firing will not * progress beyond this point until the current invocation is done */ - mutex_enter(&ip_trash_timer_lock); - ip_ire_expire_id = 0; - mutex_exit(&ip_trash_timer_lock); + mutex_enter(&ipst->ips_ip_trash_timer_lock); + ipst->ips_ip_ire_expire_id = 0; + mutex_exit(&ipst->ips_ip_trash_timer_lock); /* Periodic timer */ - if (ip_ire_arp_time_elapsed >= ip_ire_arp_interval) { + if (ipst->ips_ip_ire_arp_time_elapsed >= + ipst->ips_ip_ire_arp_interval) { /* * Remove all IRE_CACHE entries since they might * contain arp information. */ flush_flag |= FLUSH_ARP_TIME; - ip_ire_arp_time_elapsed = 0; - IP_STAT(ip_ire_arp_timer_expired); + ipst->ips_ip_ire_arp_time_elapsed = 0; + IP_STAT(ipst, ip_ire_arp_timer_expired); } - if (ip_ire_rd_time_elapsed >= ip_ire_redir_interval) { + if (ipst->ips_ip_ire_rd_time_elapsed >= + ipst->ips_ip_ire_redir_interval) { /* Remove all redirects */ flush_flag |= FLUSH_REDIRECT_TIME; - ip_ire_rd_time_elapsed = 0; - IP_STAT(ip_ire_redirect_timer_expired); + ipst->ips_ip_ire_rd_time_elapsed = 0; + IP_STAT(ipst, ip_ire_redirect_timer_expired); } - if (ip_ire_pmtu_time_elapsed >= ip_ire_pathmtu_interval) { + if (ipst->ips_ip_ire_pmtu_time_elapsed >= + ipst->ips_ip_ire_pathmtu_interval) { /* Increase path mtu */ flush_flag |= FLUSH_MTU_TIME; - ip_ire_pmtu_time_elapsed = 0; - IP_STAT(ip_ire_pmtu_timer_expired); + ipst->ips_ip_ire_pmtu_time_elapsed = 0; + IP_STAT(ipst, ip_ire_pmtu_timer_expired); } /* @@ -19666,14 +19981,18 @@ ip_trash_timer_expire(void *args) * ftable, that is, no need to walk the ftable in that case. */ if (flush_flag & (FLUSH_MTU_TIME|FLUSH_ARP_TIME)) { + iea.iea_flush_flag = flush_flag; ire_walk_ill_tables(MATCH_IRE_TYPE, IRE_CACHETABLE, ire_expire, - (char *)(uintptr_t)flush_flag, IP_MASK_TABLE_SIZE, 0, NULL, - ip_cache_table_size, ip_cache_table, NULL, ALL_ZONES); + (char *)(uintptr_t)&iea, IP_MASK_TABLE_SIZE, 0, NULL, + ipst->ips_ip_cache_table_size, ipst->ips_ip_cache_table, + NULL, ALL_ZONES, ipst); } - if ((flush_flag & FLUSH_REDIRECT_TIME) && ip_redirect_cnt > 0) { + if ((flush_flag & FLUSH_REDIRECT_TIME) && + ipst->ips_ip_redirect_cnt > 0) { + iea.iea_flush_flag = flush_flag; ire_walk_ill_tables(MATCH_IRE_TYPE, IRE_FORWARDTABLE, - ire_expire, (char *)(uintptr_t)flush_flag, - IP_MASK_TABLE_SIZE, 0, NULL, 0, NULL, NULL, ALL_ZONES); + ire_expire, (char *)(uintptr_t)&iea, IP_MASK_TABLE_SIZE, + 0, NULL, 0, NULL, NULL, ALL_ZONES, ipst); } if (flush_flag & FLUSH_MTU_TIME) { /* @@ -19682,13 +20001,14 @@ ip_trash_timer_expire(void *args) * needed since NUD handles stale entries. */ flush_flag = FLUSH_MTU_TIME; - ire_walk_v6(ire_expire, (char *)(uintptr_t)flush_flag, - ALL_ZONES); + iea.iea_flush_flag = flush_flag; + ire_walk_v6(ire_expire, (char *)(uintptr_t)&iea, + ALL_ZONES, ipst); } - ip_ire_arp_time_elapsed += ip_timer_interval; - ip_ire_rd_time_elapsed += ip_timer_interval; - ip_ire_pmtu_time_elapsed += ip_timer_interval; + ipst->ips_ip_ire_arp_time_elapsed += ipst->ips_ip_timer_interval; + ipst->ips_ip_ire_rd_time_elapsed += ipst->ips_ip_timer_interval; + ipst->ips_ip_ire_pmtu_time_elapsed += ipst->ips_ip_timer_interval; /* * Hold the lock to serialize timeout calls and prevent @@ -19697,10 +20017,10 @@ ip_trash_timer_expire(void *args) * to start before the return value of timeout has been stored * in ip_ire_expire_id by the current invocation. */ - mutex_enter(&ip_trash_timer_lock); - ip_ire_expire_id = timeout(ip_trash_timer_expire, NULL, - MSEC_TO_TICK(ip_timer_interval)); - mutex_exit(&ip_trash_timer_lock); + mutex_enter(&ipst->ips_ip_trash_timer_lock); + ipst->ips_ip_ire_expire_id = timeout(ip_trash_timer_expire, + (void *)ipst, MSEC_TO_TICK(ipst->ips_ip_timer_interval)); + mutex_exit(&ipst->ips_ip_trash_timer_lock); } /* @@ -19711,6 +20031,20 @@ ip_trash_timer_expire(void *args) void ip_trash_ire_reclaim(void *args) { + netstack_handle_t nh; + netstack_t *ns; + + netstack_next_init(&nh); + while ((ns = netstack_next(&nh)) != NULL) { + ip_trash_ire_reclaim_stack(ns->netstack_ip); + netstack_rele(ns); + } + netstack_next_fini(&nh); +} + +static void +ip_trash_ire_reclaim_stack(ip_stack_t *ipst) +{ ire_cache_count_t icc; ire_cache_reclaim_t icr; ncc_cache_count_t ncc; @@ -19730,23 +20064,24 @@ ip_trash_ire_reclaim(void *args) icc.icc_offlink = 0; icc.icc_pmtu = 0; icc.icc_onlink = 0; - ire_walk(ire_cache_count, (char *)&icc); + ire_walk(ire_cache_count, (char *)&icc, ipst); /* * Free NCEs for IPv6 like the onlink ires. */ ncc.ncc_total = 0; ncc.ncc_host = 0; - ndp_walk(NULL, (pfi_t)ndp_cache_count, (uchar_t *)&ncc); + ndp_walk(NULL, (pfi_t)ndp_cache_count, (uchar_t *)&ncc, ipst); ASSERT(icc.icc_total == icc.icc_unused + icc.icc_offlink + icc.icc_pmtu + icc.icc_onlink); - delete_cnt = icc.icc_total/ip_ire_reclaim_fraction; - IP_STAT(ip_trash_ire_reclaim_calls); + delete_cnt = icc.icc_total/ipst->ips_ip_ire_reclaim_fraction; + IP_STAT(ipst, ip_trash_ire_reclaim_calls); if (delete_cnt == 0) return; - IP_STAT(ip_trash_ire_reclaim_success); + IP_STAT(ipst, ip_trash_ire_reclaim_success); /* Always delete all unused offlink entries */ + icr.icr_ipst = ipst; icr.icr_unused = 1; if (delete_cnt <= icc.icc_unused) { /* @@ -19799,20 +20134,20 @@ ip_trash_ire_reclaim(void *args) #ifdef DEBUG ip1dbg(("IP reclaim: target %d out of %d current %d/%d/%d/%d " "fractions %d/%d/%d/%d\n", - icc.icc_total/ip_ire_reclaim_fraction, icc.icc_total, + icc.icc_total/ipst->ips_ip_ire_reclaim_fraction, icc.icc_total, icc.icc_unused, icc.icc_offlink, icc.icc_pmtu, icc.icc_onlink, icr.icr_unused, icr.icr_offlink, icr.icr_pmtu, icr.icr_onlink)); #endif - ire_walk(ire_cache_reclaim, (char *)&icr); + ire_walk(ire_cache_reclaim, (char *)&icr, ipst); if (ncr.ncr_host != 0) ndp_walk(NULL, (pfi_t)ndp_cache_reclaim, - (uchar_t *)&ncr); + (uchar_t *)&ncr, ipst); #ifdef DEBUG icc.icc_total = 0; icc.icc_unused = 0; icc.icc_offlink = 0; icc.icc_pmtu = 0; icc.icc_onlink = 0; - ire_walk(ire_cache_count, (char *)&icc); + ire_walk(ire_cache_count, (char *)&icc, ipst); ip1dbg(("IP reclaim: result total %d %d/%d/%d/%d\n", icc.icc_total, icc.icc_unused, icc.icc_offlink, icc.icc_pmtu, icc.icc_onlink)); @@ -19911,6 +20246,7 @@ ip_output_options(void *arg, mblk_t *mp, void *arg2, int caller, boolean_t ignore_nexthop = B_FALSE; boolean_t ip_nexthop = B_FALSE; ipaddr_t nexthop_addr; + ip_stack_t *ipst; #ifdef _BIG_ENDIAN #define V_HLEN (v_hlen_tos_len >> 24) @@ -19934,6 +20270,7 @@ ip_output_options(void *arg, mblk_t *mp, void *arg2, int caller, connp = (conn_t *)arg; ASSERT(connp != NULL); zoneid = connp->conn_zoneid; + ipst = connp->conn_netstack->netstack_ip; /* is queue flow controlled? */ if ((q->q_first != NULL || connp->conn_draining) && @@ -19950,7 +20287,7 @@ ip_output_options(void *arg, mblk_t *mp, void *arg2, int caller, * originating from tcp should have been directed over to * tcp_multisend() in the first place. */ - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); freemsg(mp); return; } else if (DB_TYPE(mp) != M_DATA) @@ -19986,7 +20323,7 @@ ip_output_options(void *arg, mblk_t *mp, void *arg2, int caller, (ipha->ipha_version_and_hdr_length & 0xf0) == (IPV4_VERSION << 4) && !connp->conn_ulp_labeled) { err = tsol_check_label(BEST_CRED(mp, connp), &mp, &adjust, - connp->conn_mac_exempt); + connp->conn_mac_exempt, ipst); ipha = (ipha_t *)mp->b_rptr; if (err != 0) { first_mp = mp; @@ -20009,7 +20346,7 @@ ip_output_options(void *arg, mblk_t *mp, void *arg2, int caller, */ ire = ire_ctable_lookup(ipha->ipha_src, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, IPCL_ZONEID(connp), - NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); + NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); if (ire == NULL) goto drop_pkt; ire_refrele(ire); @@ -20025,7 +20362,8 @@ ip_output_options(void *arg, mblk_t *mp, void *arg2, int caller, connp->conn_nofailover_ill == NULL) { xmit_ill = ill_lookup_on_ifindex( - infop->ip_opt_ill_index, B_FALSE, NULL, NULL, NULL, NULL); + infop->ip_opt_ill_index, B_FALSE, NULL, NULL, NULL, NULL, + ipst); if (xmit_ill == NULL || IS_VNI(xmit_ill)) goto drop_pkt; @@ -20050,8 +20388,8 @@ ip_output_options(void *arg, mblk_t *mp, void *arg2, int caller, */ if (connp->conn_out_enforce_policy || (connp->conn_latch != NULL)) { if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, - ipha->ipha_protocol)) == NULL)) { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + ipha->ipha_protocol, ipst->ips_netstack)) == NULL)) { + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); if (need_decref) CONN_DEC_REF(connp); return; @@ -20077,7 +20415,7 @@ ip_output_options(void *arg, mblk_t *mp, void *arg2, int caller, attach_ill = conn_get_held_ill(connp, &connp->conn_nofailover_ill, &err); if (err == ILL_LOOKUP_FAILED) { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); if (need_decref) CONN_DEC_REF(connp); freemsg(first_mp); @@ -20107,7 +20445,7 @@ ip_output_options(void *arg, mblk_t *mp, void *arg2, int caller, * destination only SO_DONTROUTE and IP_NEXTHOP go through * the standard path not IP_XMIT_IF. */ - ire = ire_cache_lookup(dst, zoneid, MBLK_GETLABEL(mp)); + ire = ire_cache_lookup(dst, zoneid, MBLK_GETLABEL(mp), ipst); if ((ire == NULL) || ((ire->ire_type != IRE_BROADCAST) && (ire->ire_type != IRE_LOOPBACK))) { if ((connp->conn_dontroute || @@ -20144,7 +20482,8 @@ ip_output_options(void *arg, mblk_t *mp, void *arg2, int caller, xmit_ill = conn_get_held_ill(connp, &connp->conn_xmit_if_ill, &err); if (err == ILL_LOOKUP_FAILED) { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutDiscards); if (attach_ill != NULL) ill_refrele(attach_ill); if (need_decref) @@ -20174,7 +20513,7 @@ standard_path: */ if (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && !connp->conn_fully_bound) { - ire = ire_cache_lookup(dst, zoneid, MBLK_GETLABEL(mp)); + ire = ire_cache_lookup(dst, zoneid, MBLK_GETLABEL(mp), ipst); if (ire == NULL) goto noirefound; TRACE_2(TR_FAC_IP, TR_IP_WPUT_END, @@ -20191,13 +20530,13 @@ standard_path: * The TTL of such packets is bounded by the * ip_multirt_ttl ndd variable. */ - if ((ip_multirt_ttl > 0) && - (ipha->ipha_ttl > ip_multirt_ttl)) { + if ((ipst->ips_ip_multirt_ttl > 0) && + (ipha->ipha_ttl > ipst->ips_ip_multirt_ttl)) { ip2dbg(("ip_wput: forcing multirt TTL to %d " "(was %d), dst 0x%08x\n", - ip_multirt_ttl, ipha->ipha_ttl, + ipst->ips_ip_multirt_ttl, ipha->ipha_ttl, ntohl(ire->ire_addr))); - ipha->ipha_ttl = ip_multirt_ttl; + ipha->ipha_ttl = ipst->ips_ip_multirt_ttl; } /* * We look at this point if there are pending @@ -20211,7 +20550,7 @@ standard_path: */ multirt_need_resolve = ire_multirt_need_resolve(ire->ire_addr, - MBLK_GETLABEL(first_mp)); + MBLK_GETLABEL(first_mp), ipst); ip2dbg(("ip_wput[TCP]: ire %p, " "multirt_need_resolve %d, first_mp %p\n", (void *)ire, multirt_need_resolve, @@ -20231,7 +20570,7 @@ standard_path: * ire_multirt_need_resolve() deemed it necessary. */ if (copy_mp != NULL) { - ip_newroute(q, copy_mp, dst, NULL, connp, zoneid); + ip_newroute(q, copy_mp, dst, NULL, connp, zoneid, ipst); } if (need_decref) CONN_DEC_REF(connp); @@ -20281,7 +20620,7 @@ standard_path: if (ire != NULL && sctp_ire == NULL) IRE_REFRELE_NOTR(ire); - ire = ire_cache_lookup(dst, zoneid, MBLK_GETLABEL(mp)); + ire = ire_cache_lookup(dst, zoneid, MBLK_GETLABEL(mp), ipst); if (ire == NULL) goto noirefound; IRE_REFHOLD_NOTR(ire); @@ -20321,13 +20660,13 @@ standard_path: * The TTL of such packets is bounded by the * ip_multirt_ttl ndd variable. */ - if ((ip_multirt_ttl > 0) && - (ipha->ipha_ttl > ip_multirt_ttl)) { + if ((ipst->ips_ip_multirt_ttl > 0) && + (ipha->ipha_ttl > ipst->ips_ip_multirt_ttl)) { ip2dbg(("ip_wput: forcing multirt TTL to %d " "(was %d), dst 0x%08x\n", - ip_multirt_ttl, ipha->ipha_ttl, + ipst->ips_ip_multirt_ttl, ipha->ipha_ttl, ntohl(ire->ire_addr))); - ipha->ipha_ttl = ip_multirt_ttl; + ipha->ipha_ttl = ipst->ips_ip_multirt_ttl; } /* @@ -20341,7 +20680,7 @@ standard_path: * to initiate additional route resolutions. */ multirt_need_resolve = ire_multirt_need_resolve(ire->ire_addr, - MBLK_GETLABEL(first_mp)); + MBLK_GETLABEL(first_mp), ipst); ip2dbg(("ip_wput[not TCP]: ire %p, " "multirt_need_resolve %d, first_mp %p\n", (void *)ire, multirt_need_resolve, (void *)first_mp)); @@ -20360,7 +20699,7 @@ standard_path: * ire_multirt_resolvable() deemed it necessary */ if (copy_mp != NULL) { - ip_newroute(q, copy_mp, dst, NULL, connp, zoneid); + ip_newroute(q, copy_mp, dst, NULL, connp, zoneid, ipst); } if (need_decref) CONN_DEC_REF(connp); @@ -20382,6 +20721,8 @@ qnext: * * 3) ICMP replies also could come here. */ + ipst = ILLQ_TO_IPST(q); + if (DB_TYPE(mp) != M_DATA) { notdata: if (DB_TYPE(mp) == M_CTL) { @@ -20447,7 +20788,8 @@ qnext: ASSERT(cmp != NULL); freeb(mp); - ill_ipsec_capab_send_all(satype, cmp, sa); + ill_ipsec_capab_send_all(satype, cmp, sa, + ipst->ips_netstack); return; } else { /* @@ -20504,7 +20846,7 @@ qnext: ASSERT(io->ipsec_out_ill_index != 0); ifindex = io->ipsec_out_ill_index; ill = ill_lookup_on_ifindex(ifindex, B_FALSE, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, ipst); /* * ipsec_out_xmit_if bit is used to tell * ip_wput to use the ill to send outgoing data @@ -20519,7 +20861,7 @@ qnext: "for xmit_ill %d\n", ifindex)); freemsg(first_mp); - BUMP_MIB(&ip_mib, + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); ASSERT(!need_decref); return; @@ -20543,7 +20885,8 @@ qnext: "(BIND TO IPIF_NOFAILOVER) %d\n", ifindex)); freemsg(first_mp); - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutDiscards); ASSERT(!need_decref); return; } @@ -20578,7 +20921,7 @@ qnext: (*mp->b_rptr & 0xf0) == (IPV4_VERSION << 4) && !connp->conn_ulp_labeled) { err = tsol_check_label(BEST_CRED(mp, connp), &mp, - &adjust, connp->conn_mac_exempt); + &adjust, connp->conn_mac_exempt, ipst); ipha = (ipha_t *)mp->b_rptr; if (first_mp != NULL) first_mp->b_cont = mp; @@ -20604,8 +20947,10 @@ qnext: */ if (connp->conn_out_enforce_policy) { if (((mp = ipsec_attach_ipsec_out(mp, connp, - NULL, ipha->ipha_protocol)) == NULL)) { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + NULL, ipha->ipha_protocol, + ipst->ips_netstack)) == NULL)) { + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutDiscards); if (need_decref) CONN_DEC_REF(connp); return; @@ -20647,9 +20992,9 @@ qnext: */ #ifdef notyet if (q->q_next == NULL) /* Avoid ill queue */ - ip_setqinfo(RD(q), B_TRUE, B_TRUE); + ip_setqinfo(RD(q), B_TRUE, B_TRUE, ipst); #endif - BUMP_MIB(&ip_mib, ipIfStatsOutWrongIPVersion); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutWrongIPVersion); ASSERT(xmit_ill == NULL); if (attach_ill != NULL) ill_refrele(attach_ill); @@ -20689,9 +21034,10 @@ qnext: * Move first entry from any source route into ipha_dst and * verify the options */ - if (ip_wput_options(q, first_mp, ipha, mctl_present, zoneid)) { + if (ip_wput_options(q, first_mp, ipha, mctl_present, + zoneid, ipst)) { ASSERT(xmit_ill == NULL); - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); if (attach_ill != NULL) ill_refrele(attach_ill); TRACE_2(TR_FAC_IP, TR_IP_WPUT_END, @@ -20770,7 +21116,8 @@ qnext: if (err == ILL_LOOKUP_FAILED) { ip1dbg(("ip_wput: No ill for " "IP_XMIT_IF\n")); - BUMP_MIB(&ip_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutNoRoutes); goto drop_pkt; } } @@ -20781,7 +21128,8 @@ qnext: if (err == IPIF_LOOKUP_FAILED) { ip1dbg(("ip_wput: No ipif for " "multicast\n")); - BUMP_MIB(&ip_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutNoRoutes); goto drop_pkt; } } @@ -20790,7 +21138,8 @@ qnext: if (ipif == NULL) { ip1dbg(("ip_wput: No ipif for " "IP_XMIT_IF\n")); - BUMP_MIB(&ip_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutNoRoutes); goto drop_pkt; } } else if (ipif == NULL || ipif->ipif_isv6) { @@ -20812,11 +21161,12 @@ qnext: */ if (ipif != NULL) ipif_refrele(ipif); - ipif = ipif_lookup_group(dst, zoneid); + ipif = ipif_lookup_group(dst, zoneid, ipst); if (ipif == NULL) { ip1dbg(("ip_wput: No ipif for " "multicast\n")); - BUMP_MIB(&ip_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutNoRoutes); goto drop_pkt; } err = conn_set_held_ipif(connp, @@ -20825,7 +21175,8 @@ qnext: ipif_refrele(ipif); ip1dbg(("ip_wput: No ipif for " "multicast\n")); - BUMP_MIB(&ip_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutNoRoutes); goto drop_pkt; } } @@ -20858,6 +21209,7 @@ qnext: io->ipsec_out_type = IPSEC_OUT; io->ipsec_out_len = sizeof (ipsec_out_t); io->ipsec_out_use_global_policy = B_TRUE; + io->ipsec_out_ns = ipst->ips_netstack; first_mp->b_cont = mp; mctl_present = B_TRUE; } @@ -20937,7 +21289,7 @@ qnext: ire = NULL; if (xmit_ill == NULL) { ire = ire_ctable_lookup(dst, 0, 0, ipif, - zoneid, MBLK_GETLABEL(mp), match_flags); + zoneid, MBLK_GETLABEL(mp), match_flags, ipst); } /* @@ -21006,7 +21358,7 @@ qnext: } } } else { - ire = ire_cache_lookup(dst, zoneid, MBLK_GETLABEL(mp)); + ire = ire_cache_lookup(dst, zoneid, MBLK_GETLABEL(mp), ipst); if ((ire != NULL) && (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { ignore_dontroute = B_TRUE; @@ -21037,11 +21389,13 @@ dontroute: */ if (connp->conn_xmit_if_ill == NULL) { /* If suitable ipif not found, drop packet */ - dst_ipif = ipif_lookup_onlink_addr(dst, zoneid); + dst_ipif = ipif_lookup_onlink_addr(dst, zoneid, + ipst); if (dst_ipif == NULL) { ip1dbg(("ip_wput: no route for " "dst using SO_DONTROUTE\n")); - BUMP_MIB(&ip_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutNoRoutes); mp->b_prev = mp->b_next = NULL; if (first_mp == NULL) first_mp = mp; @@ -21063,7 +21417,7 @@ dontroute: ip1dbg(("ip_wput: no route for" " dst using" " SO_DONTROUTE\n")); - BUMP_MIB(&ip_mib, + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes); mp->b_prev = mp->b_next = NULL; if (first_mp == NULL) @@ -21101,7 +21455,7 @@ send_from_ill: goto discard_pkt; } ire = ire_ctable_lookup(dst, 0, 0, attach_ipif, - zoneid, MBLK_GETLABEL(mp), match_flags); + zoneid, MBLK_GETLABEL(mp), match_flags, ipst); ipif_refrele(attach_ipif); } else if (xmit_ill != NULL || (connp != NULL && connp->conn_xmit_if_ill != NULL)) { @@ -21117,7 +21471,8 @@ send_from_ill: xmit_ill = conn_get_held_ill(connp, &connp->conn_xmit_if_ill, &err); if (err == ILL_LOOKUP_FAILED) { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutDiscards); if (need_decref) CONN_DEC_REF(connp); freemsg(first_mp); @@ -21154,7 +21509,7 @@ send_from_ill: match_flags = MATCH_IRE_ILL_GROUP | MATCH_IRE_SECATTR; ire = ire_ctable_lookup(dst, 0, 0, ipif, zoneid, - MBLK_GETLABEL(mp), match_flags); + MBLK_GETLABEL(mp), match_flags, ipst); /* * If an ire exists use it or else create * an ire but don't add it to the cache. @@ -21189,9 +21544,10 @@ send_from_ill: match_flags = MATCH_IRE_MARK_PRIVATE_ADDR | MATCH_IRE_GW; ire = ire_ctable_lookup(dst, nexthop_addr, 0, - NULL, zoneid, MBLK_GETLABEL(mp), match_flags); + NULL, zoneid, MBLK_GETLABEL(mp), match_flags, ipst); } else { - ire = ire_cache_lookup(dst, zoneid, MBLK_GETLABEL(mp)); + ire = ire_cache_lookup(dst, zoneid, MBLK_GETLABEL(mp), + ipst); } if (!ire) { /* @@ -21225,6 +21581,7 @@ send_from_ill: sizeof (ipsec_out_t); io->ipsec_out_use_global_policy = B_TRUE; + io->ipsec_out_ns = ipst->ips_netstack; first_mp->b_cont = mp; mctl_present = B_TRUE; } @@ -21248,7 +21605,8 @@ noirefound: */ mp->b_prev = NULL; mp->b_next = NULL; - ip_newroute(q, first_mp, dst, NULL, connp, zoneid); + ip_newroute(q, first_mp, dst, NULL, connp, zoneid, + ipst); TRACE_2(TR_FAC_IP, TR_IP_WPUT_END, "ip_wput_end: q %p (%S)", q, "newroute"); if (attach_ill != NULL) @@ -21276,13 +21634,13 @@ noirefound: * The TTL of such packets is bounded by the * ip_multirt_ttl ndd variable. */ - if ((ip_multirt_ttl > 0) && - (ipha->ipha_ttl > ip_multirt_ttl)) { + if ((ipst->ips_ip_multirt_ttl > 0) && + (ipha->ipha_ttl > ipst->ips_ip_multirt_ttl)) { ip2dbg(("ip_wput: forcing multirt TTL to %d " "(was %d), dst 0x%08x\n", - ip_multirt_ttl, ipha->ipha_ttl, + ipst->ips_ip_multirt_ttl, ipha->ipha_ttl, ntohl(ire->ire_addr))); - ipha->ipha_ttl = ip_multirt_ttl; + ipha->ipha_ttl = ipst->ips_ip_multirt_ttl; } /* * At this point, we check to see if there are any pending @@ -21295,7 +21653,7 @@ noirefound: * to initiate additional route resolutions. */ multirt_need_resolve = ire_multirt_need_resolve(ire->ire_addr, - MBLK_GETLABEL(first_mp)); + MBLK_GETLABEL(first_mp), ipst); ip2dbg(("ip_wput[noirefound]: ire %p, " "multirt_need_resolve %d, first_mp %p\n", (void *)ire, multirt_need_resolve, (void *)first_mp)); @@ -21318,7 +21676,7 @@ noirefound: */ if (copy_mp != NULL) { if (CLASSD(dst)) { - ipif_t *ipif = ipif_lookup_group(dst, zoneid); + ipif_t *ipif = ipif_lookup_group(dst, zoneid, ipst); if (ipif) { ASSERT(infop->ip_opt_ill_index == 0); ip_newroute_ipif(q, copy_mp, ipif, dst, connp, @@ -21330,7 +21688,7 @@ noirefound: copy_mp = NULL; } } else { - ip_newroute(q, copy_mp, dst, NULL, connp, zoneid); + ip_newroute(q, copy_mp, dst, NULL, connp, zoneid, ipst); } } if (attach_ill != NULL) @@ -21344,15 +21702,15 @@ noirefound: icmp_parameter_problem: /* could not have originated externally */ ASSERT(mp->b_prev == NULL); - if (ip_hdr_complete(ipha, zoneid) == 0) { - BUMP_MIB(&ip_mib, ipIfStatsOutNoRoutes); + if (ip_hdr_complete(ipha, zoneid, ipst) == 0) { + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes); /* it's the IP header length that's in trouble */ - icmp_param_problem(q, first_mp, 0, zoneid); + icmp_param_problem(q, first_mp, 0, zoneid, ipst); first_mp = NULL; } discard_pkt: - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); drop_pkt: ip1dbg(("ip_wput: dropped packet\n")); if (ire != NULL) @@ -21418,9 +21776,10 @@ conn_get_held_ipif(conn_t *connp, ipif_t **ipifp, int *err) { ipif_t *ipif; ill_t *ill; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; *err = 0; - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); mutex_enter(&connp->conn_lock); ipif = *ipifp; if (ipif != NULL) { @@ -21430,7 +21789,7 @@ conn_get_held_ipif(conn_t *connp, ipif_t **ipifp, int *err) ipif_refhold_locked(ipif); mutex_exit(&ill->ill_lock); mutex_exit(&connp->conn_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ipif); } else { *err = IPIF_LOOKUP_FAILED; @@ -21438,7 +21797,7 @@ conn_get_held_ipif(conn_t *connp, ipif_t **ipifp, int *err) mutex_exit(&ill->ill_lock); } mutex_exit(&connp->conn_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (NULL); } @@ -21491,7 +21850,8 @@ conn_set_held_ipif(conn_t *connp, ipif_t **ipifp, ipif_t *ipif) * NOTE : This function does not ire_refrele the ire argument passed in. */ static void -ip_wput_ire_fragmentit(mblk_t *ipsec_mp, ire_t *ire, zoneid_t zoneid) +ip_wput_ire_fragmentit(mblk_t *ipsec_mp, ire_t *ire, zoneid_t zoneid, + ip_stack_t *ipst) { ipha_t *ipha; mblk_t *mp; @@ -21549,7 +21909,7 @@ ip_wput_ire_fragmentit(mblk_t *ipsec_mp, ire_t *ire, zoneid_t zoneid) ip_source_route_included(ipha)) || CLASSD(ipha->ipha_dst)); ip_wput_frag(ire, ipsec_mp, OB_PKT, max_frag, - (dont_use ? 0 : frag_flag), zoneid); + (dont_use ? 0 : frag_flag), zoneid, ipst); } /* @@ -21666,6 +22026,12 @@ ip_wput_ire_parse_ipsec_out(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, ipsec_out_t *io; mblk_t *first_mp; boolean_t policy_present; + ip_stack_t *ipst; + ipsec_stack_t *ipss; + + ASSERT(ire != NULL); + ipst = ire->ire_ipst; + ipss = ipst->ips_netstack->netstack_ipsec; first_mp = mp; if (mp->b_datap->db_type == M_CTL) { @@ -21748,9 +22114,9 @@ ip_wput_ire_parse_ipsec_out(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, * ipsec_out and make sure that we don't apply global policy. */ if (ipha != NULL) - policy_present = ipsec_outbound_v4_policy_present; + policy_present = ipss->ipsec_outbound_v4_policy_present; else - policy_present = ipsec_outbound_v6_policy_present; + policy_present = ipss->ipsec_outbound_v6_policy_present; if (!policy_present) return (mp); @@ -21896,6 +22262,8 @@ ip_wput_ire(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, int caller, boolean_t multirt_send = B_FALSE; int err; ipxmit_state_t pktxmit_state; + ip_stack_t *ipst = ire->ire_ipst; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; TRACE_1(TR_FAC_IP, TR_IP_WPUT_IRE_START, "ip_wput_ire_start: q %p", q); @@ -22028,10 +22396,10 @@ ip_wput_ire(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, int caller, src_ire = ire_ftable_lookup(ipha->ipha_dst, 0, 0, 0, NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | - MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); + MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); if (src_ire != NULL && !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && - (!ip_restrict_interzone_loopback || + (!ipst->ips_ip_restrict_interzone_loopback || ire_local_same_ill_group(ire, src_ire))) { if (ipha->ipha_src == INADDR_ANY && !unspec_src) ipha->ipha_src = src_ire->ire_src_addr; @@ -22040,7 +22408,7 @@ ip_wput_ire(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, int caller, ire_refrele(ire); if (conn_outgoing_ill != NULL) ill_refrele(conn_outgoing_ill); - BUMP_MIB(&ip_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes); if (src_ire != NULL) { if (src_ire->ire_flags & RTF_BLACKHOLE) { ire_refrele(src_ire); @@ -22049,18 +22417,19 @@ ip_wput_ire(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, int caller, } ire_refrele(src_ire); } - if (ip_hdr_complete(ipha, zoneid)) { + if (ip_hdr_complete(ipha, zoneid, ipst)) { /* Failed */ freemsg(mp); return; } - icmp_unreachable(q, mp, ICMP_HOST_UNREACHABLE, zoneid); + icmp_unreachable(q, mp, ICMP_HOST_UNREACHABLE, zoneid, + ipst); return; } } if (mp->b_datap->db_type == M_CTL || - ipsec_outbound_v4_policy_present) { + ipss->ipsec_outbound_v4_policy_present) { mp = ip_wput_ire_parse_ipsec_out(mp, ipha, NULL, ire, connp, unspec_src, zoneid); if (mp == NULL) { @@ -22193,7 +22562,7 @@ another:; ire->ire_zoneid != zoneid) { ire_t *src_ire = ire_ctable_lookup(dst, 0, IRE_BROADCAST, ire->ire_ipif, zoneid, NULL, - (MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); + (MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP), ipst); if (src_ire != NULL) { src = src_ire->ire_src_addr; ire_refrele(src_ire); @@ -22206,7 +22575,8 @@ another:; BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); } else { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutDiscards); } return; } @@ -22264,8 +22634,8 @@ another:; hlen, LENGTH, max_frag, ipsec_len, cksum); /* Software checksum? */ if (DB_CKSUMFLAGS(mp) == 0) { - IP_STAT(ip_out_sw_cksum); - IP_STAT_UPDATE( + IP_STAT(ipst, ip_out_sw_cksum); + IP_STAT_UPDATE(ipst, ip_udp_out_sw_cksum_bytes, LENGTH - hlen); } @@ -22286,8 +22656,8 @@ another:; LENGTH, max_frag, ipsec_len, cksum); /* Software checksum? */ if (DB_CKSUMFLAGS(mp) == 0) { - IP_STAT(ip_out_sw_cksum); - IP_STAT_UPDATE(ip_tcp_out_sw_cksum_bytes, + IP_STAT(ipst, ip_out_sw_cksum); + IP_STAT_UPDATE(ipst, ip_tcp_out_sw_cksum_bytes, LENGTH - hlen); } } else { @@ -22431,8 +22801,9 @@ another:; DTRACE_PROBE4(ip4__physical__out__start, ill_t *, NULL, ill_t *, ire->ire_ipif->ipif_ill, ipha_t *, ipha, mblk_t *, mp); - FW_HOOKS(ip4_physical_out_event, ipv4firewall_physical_out, - NULL, ire->ire_ipif->ipif_ill, ipha, mp, mp); + FW_HOOKS(ipst->ips_ip4_physical_out_event, + ipst->ips_ipv4firewall_physical_out, + NULL, ire->ire_ipif->ipif_ill, ipha, mp, mp, ipst); DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); if (mp == NULL) goto release_ire_and_ill; @@ -22502,7 +22873,7 @@ broadcast: if ((connp != NULL) && connp->conn_dontroute) ipha->ipha_ttl = 1; else - ipha->ipha_ttl = ip_broadcast_ttl; + ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl; /* * Note that we are not doing a IRB_REFHOLD here. @@ -22690,8 +23061,8 @@ broadcast: /* hlen gets the number of uchar_ts in the IP header */ hlen = (V_HLEN & 0xF) << 2; up = IPH_TCPH_CHECKSUMP(ipha, hlen); - IP_STAT(ip_out_sw_cksum); - IP_STAT_UPDATE(ip_tcp_out_sw_cksum_bytes, + IP_STAT(ipst, ip_out_sw_cksum); + IP_STAT_UPDATE(ipst, ip_tcp_out_sw_cksum_bytes, LENGTH - hlen); *up = IP_CSUM(mp, hlen, cksum + IP_TCP_CSUM_COMP); if (*up == 0) @@ -22723,7 +23094,8 @@ broadcast: * than reorder packets, we just drop this * packet. */ - if (ip_output_queue && connp != NULL && + if (ipst->ips_ip_output_queue && + connp != NULL && caller != IRE_SEND) { if (caller == IP_WSRV) { connp->conn_did_putbq = 1; @@ -22741,7 +23113,7 @@ broadcast: if (canput(stq->q_next)) connp->conn_did_putbq = 0; - IP_STAT(ip_conn_flputbq); + IP_STAT(ipst, ip_conn_flputbq); } else { /* * We are not the service proc. @@ -22784,8 +23156,8 @@ broadcast: ipsec_len, cksum); /* Software checksum? */ if (DB_CKSUMFLAGS(mp) == 0) { - IP_STAT(ip_out_sw_cksum); - IP_STAT_UPDATE( + IP_STAT(ipst, ip_out_sw_cksum); + IP_STAT_UPDATE(ipst, ip_udp_out_sw_cksum_bytes, LENGTH - hlen); } @@ -22871,7 +23243,8 @@ broadcast: * SO_DONTROUTE option to accomplish this. */ - if (ip_g_mrouter && !conn_dontroute && ill != NULL) { + if (ipst->ips_ip_g_mrouter && !conn_dontroute && + ill != NULL) { /* Unconditionally redo the checksum */ ipha->ipha_hdr_checksum = 0; ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); @@ -23028,9 +23401,9 @@ broadcast: ill_t *, NULL, ill_t *, out_ill, ipha_t *, ipha, mblk_t *, mp); - FW_HOOKS(ip4_physical_out_event, - ipv4firewall_physical_out, - NULL, out_ill, ipha, mp, mp); + FW_HOOKS(ipst->ips_ip4_physical_out_event, + ipst->ips_ipv4firewall_physical_out, + NULL, out_ill, ipha, mp, mp, ipst); DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); if (mp == NULL) @@ -23126,7 +23499,7 @@ release_ire_and_ill_2: ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); icmp_frag_needed(ire->ire_stq, first_mp, - max_frag, zoneid); + max_frag, zoneid, ipst); if (!next_mp) { ire_refrele(ire); if (conn_outgoing_ill != NULL) { @@ -23171,7 +23544,7 @@ release_ire_and_ill_2: * ip_wput_ire_fragmentit->ip_wput_frag */ - if (IPP_ENABLED(IPP_LOCAL_OUT)) { + if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { ip_process(IPP_LOCAL_OUT, &mp, ill_index); if (mp == NULL) { @@ -23199,13 +23572,13 @@ release_ire_and_ill_2: "ip_wput_ire_end: q %p (%S)", q, "last fragmentation"); ip_wput_ire_fragmentit(mp, ire, - zoneid); + zoneid, ipst); ire_refrele(ire); if (conn_outgoing_ill != NULL) ill_refrele(conn_outgoing_ill); return; } - ip_wput_ire_fragmentit(mp, ire, zoneid); + ip_wput_ire_fragmentit(mp, ire, zoneid, ipst); } } } else { @@ -23229,9 +23602,9 @@ release_ire_and_ill_2: ill_t *, NULL, ill_t *, out_ill, ipha_t *, ipha, mblk_t *, first_mp); - FW_HOOKS(ip4_loopback_out_event, - ipv4firewall_loopback_out, - NULL, out_ill, ipha, first_mp, mp); + FW_HOOKS(ipst->ips_ip4_loopback_out_event, + ipst->ips_ipv4firewall_loopback_out, + NULL, out_ill, ipha, first_mp, mp, ipst); DTRACE_PROBE1(ip4__loopback__out_end, mblk_t *, first_mp); @@ -23255,8 +23628,9 @@ release_ire_and_ill_2: ill_t *, NULL, ill_t *, out_ill, ipha_t *, ipha, mblk_t *, first_mp); - FW_HOOKS(ip4_loopback_out_event, ipv4firewall_loopback_out, - NULL, out_ill, ipha, first_mp, mp); + FW_HOOKS(ipst->ips_ip4_loopback_out_event, + ipst->ips_ipv4firewall_loopback_out, + NULL, out_ill, ipha, first_mp, mp, ipst); DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, first_mp); @@ -23341,6 +23715,7 @@ ip_mdinfo_return(ire_t *dst_ire, conn_t *connp, char *ill_name, { mblk_t *mp; boolean_t rc = B_FALSE; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; ASSERT(dst_ire != NULL); ASSERT(connp != NULL); @@ -23375,7 +23750,7 @@ ip_mdinfo_return(ire_t *dst_ire, conn_t *connp, char *ill_name, break; /* Outbound IPQoS enabled? */ - if (IPP_ENABLED(IPP_LOCAL_OUT)) { + if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { /* * In this case, we disable MDT for this and all * future connections going over the interface. @@ -23462,6 +23837,7 @@ ip_lsoinfo_return(ire_t *dst_ire, conn_t *connp, char *ill_name, ill_lso_capab_t *lso_cap) { mblk_t *mp; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; ASSERT(dst_ire != NULL); ASSERT(connp != NULL); @@ -23473,9 +23849,9 @@ ip_lsoinfo_return(ire_t *dst_ire, conn_t *connp, char *ill_name, CONN_IPSEC_OUT_ENCAPSULATED(connp) || (dst_ire->ire_flags & RTF_MULTIRT) || !CONN_IS_LSO_MD_FASTPATH(connp) || - (IPP_ENABLED(IPP_LOCAL_OUT))) { + (IPP_ENABLED(IPP_LOCAL_OUT, ipst))) { connp->conn_lso_ok = B_FALSE; - if (IPP_ENABLED(IPP_LOCAL_OUT)) { + if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { /* * Disable LSO for this and all future connections going * over the interface. @@ -23675,6 +24051,7 @@ ip_wput_frag_mdt(ire_t *ire, mblk_t *mp, ip_pkt_t pkt_type, int len, multidata_t *mmd; ip_pdescinfo_t pdi; ill_t *ill; + ip_stack_t *ipst = ire->ire_ipst; ASSERT(DB_TYPE(mp) == M_DATA); ASSERT(MBLKL(mp) > sizeof (ipha_t)); @@ -23691,7 +24068,7 @@ ip_wput_frag_mdt(ire_t *ire, mblk_t *mp, ip_pkt_t pkt_type, int len, ASSERT(pkts > 1); /* Allocate a message block which will hold all the IP Headers. */ - wroff = ip_wroff_extra; + wroff = ipst->ips_ip_wroff_extra; hdr_chunk_len = wroff + IP_SIMPLE_HDR_LENGTH; i1 = pkts * hdr_chunk_len; @@ -23707,14 +24084,14 @@ ip_wput_frag_mdt(ire_t *ire, mblk_t *mp, ip_pkt_t pkt_type, int len, if (md_mp == NULL) { freemsg(hdr_mp); } else { -free_mmd: IP_STAT(ip_frag_mdt_discarded); +free_mmd: IP_STAT(ipst, ip_frag_mdt_discarded); freemsg(md_mp); } - IP_STAT(ip_frag_mdt_allocfail); + IP_STAT(ipst, ip_frag_mdt_allocfail); BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); return; } - IP_STAT(ip_frag_mdt_allocd); + IP_STAT(ipst, ip_frag_mdt_allocd); /* * Add a payload buffer to the Multidata; this operation must not @@ -23825,7 +24202,7 @@ free_mmd: IP_STAT(ip_frag_mdt_discarded); (void *)mmd, (void *)&pdi, error); /* NOTREACHED */ } - IP_STAT(ip_frag_mdt_addpdescfail); + IP_STAT(ipst, ip_frag_mdt_addpdescfail); /* Free unattached payload message blocks as well */ md_mp->b_cont = mp->b_cont; goto free_mmd; @@ -23854,7 +24231,7 @@ free_mmd: IP_STAT(ip_frag_mdt_discarded); ASSERT(mp->b_wptr == pld_ptr); /* Update IP statistics */ - IP_STAT_UPDATE(ip_frag_mdt_pkt_out, pkts); + IP_STAT_UPDATE(ipst, ip_frag_mdt_pkt_out, pkts); UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); @@ -23903,7 +24280,7 @@ pbuf_panic: */ static void ip_wput_frag(ire_t *ire, mblk_t *mp_orig, ip_pkt_t pkt_type, uint32_t max_frag, - uint32_t frag_flag, zoneid_t zoneid) + uint32_t frag_flag, zoneid_t zoneid, ip_stack_t *ipst) { int i1; mblk_t *ll_hdr_mp; @@ -23933,7 +24310,7 @@ ip_wput_frag(ire_t *ire, mblk_t *mp_orig, ip_pkt_t pkt_type, uint32_t max_frag, mib2_ipIfStatsEntry_t *mibptr = NULL; ill = ire_to_ill(ire); - mibptr = (ill != NULL) ? ill->ill_ip_mib : &ip_mib; + mibptr = (ill != NULL) ? ill->ill_ip_mib : &ipst->ips_ip_mib; BUMP_MIB(mibptr, ipIfStatsOutFragReqds); @@ -23990,7 +24367,8 @@ ip_wput_frag(ire_t *ire, mblk_t *mp_orig, ip_pkt_t pkt_type, uint32_t max_frag, */ ipha->ipha_hdr_checksum = 0; ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); - icmp_frag_needed(ire->ire_stq, first_mp, max_frag, zoneid); + icmp_frag_needed(ire->ire_stq, first_mp, max_frag, zoneid, + ipst); TRACE_1(TR_FAC_IP, TR_IP_WPUT_FRAG_END, "ip_wput_frag_end:(%S)", "don't fragment"); @@ -24028,8 +24406,10 @@ ip_wput_frag(ire_t *ire, mblk_t *mp_orig, ip_pkt_t pkt_type, uint32_t max_frag, /* Check if we can use MDT to send out the frags. */ ASSERT(!IRE_IS_LOCAL(ire)); - if (hdr_len == IP_SIMPLE_HDR_LENGTH && ip_multidata_outbound && - !(ire->ire_flags & RTF_MULTIRT) && !IPP_ENABLED(IPP_LOCAL_OUT) && + if (hdr_len == IP_SIMPLE_HDR_LENGTH && + ipst->ips_ip_multidata_outbound && + !(ire->ire_flags & RTF_MULTIRT) && + !IPP_ENABLED(IPP_LOCAL_OUT, ipst) && ill != NULL && ILL_MDT_CAPABLE(ill) && IP_CAN_FRAG_MDT(mp, IP_SIMPLE_HDR_LENGTH, len)) { ASSERT(ill->ill_mdt_capab != NULL); @@ -24049,7 +24429,7 @@ ip_wput_frag(ire_t *ire, mblk_t *mp_orig, ip_pkt_t pkt_type, uint32_t max_frag, } /* Get a copy of the header for the trailing frags */ - hdr_mp = ip_wput_frag_copyhdr((uchar_t *)ipha, hdr_len, offset); + hdr_mp = ip_wput_frag_copyhdr((uchar_t *)ipha, hdr_len, offset, ipst); if (!hdr_mp) { BUMP_MIB(mibptr, ipIfStatsOutFragFails); freemsg(mp); @@ -24296,8 +24676,9 @@ ip_wput_frag(ire_t *ire, mblk_t *mp_orig, ip_pkt_t pkt_type, uint32_t max_frag, ill_t *, NULL, ill_t *, out_ill, ipha_t *, ipha, mblk_t *, xmit_mp); - FW_HOOKS(ip4_physical_out_event, ipv4firewall_physical_out, - NULL, out_ill, ipha, xmit_mp, mp); + FW_HOOKS(ipst->ips_ip4_physical_out_event, + ipst->ips_ipv4firewall_physical_out, + NULL, out_ill, ipha, xmit_mp, mp, ipst); DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, xmit_mp); @@ -24593,9 +24974,9 @@ ip_wput_frag(ire_t *ire, mblk_t *mp_orig, ip_pkt_t pkt_type, uint32_t max_frag, ill_t *, NULL, ill_t *, out_ill, ipha_t *, ipha, mblk_t *, xmit_mp); - FW_HOOKS(ip4_physical_out_event, - ipv4firewall_physical_out, - NULL, out_ill, ipha, xmit_mp, mp); + FW_HOOKS(ipst->ips_ip4_physical_out_event, + ipst->ips_ipv4firewall_physical_out, + NULL, out_ill, ipha, xmit_mp, mp, ipst); DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, xmit_mp); @@ -24695,7 +25076,7 @@ drop_pkt: * Copy the header plus those options which have the copy bit set */ static mblk_t * -ip_wput_frag_copyhdr(uchar_t *rptr, int hdr_len, int offset) +ip_wput_frag_copyhdr(uchar_t *rptr, int hdr_len, int offset, ip_stack_t *ipst) { mblk_t *mp; uchar_t *up; @@ -24704,13 +25085,13 @@ ip_wput_frag_copyhdr(uchar_t *rptr, int hdr_len, int offset) * Quick check if we need to look for options without the copy bit * set */ - mp = allocb(ip_wroff_extra + hdr_len, BPRI_HI); + mp = allocb(ipst->ips_ip_wroff_extra + hdr_len, BPRI_HI); if (!mp) return (mp); - mp->b_rptr += ip_wroff_extra; + mp->b_rptr += ipst->ips_ip_wroff_extra; if (hdr_len == IP_SIMPLE_HDR_LENGTH || offset != 0) { bcopy(rptr, mp->b_rptr, hdr_len); - mp->b_wptr += hdr_len + ip_wroff_extra; + mp->b_wptr += hdr_len + ipst->ips_ip_wroff_extra; return (mp); } up = mp->b_rptr; @@ -24766,6 +25147,7 @@ ip_wput_local(queue_t *q, ill_t *ill, ipha_t *ipha, mblk_t *mp, ire_t *ire, boolean_t mctl_present; int ire_type; #define rptr ((uchar_t *)ipha) + ip_stack_t *ipst = ill->ill_ipst; TRACE_1(TR_FAC_IP, TR_IP_WPUT_LOCAL_START, "ip_wput_local_start: q %p", q); @@ -24813,20 +25195,21 @@ ip_wput_local(queue_t *q, ill_t *ill, ipha_t *ipha, mblk_t *mp, ire_t *ire, ill_t *, ill, ill_t *, NULL, ipha_t *, ipha, mblk_t *, first_mp); - FW_HOOKS(ip4_loopback_in_event, ipv4firewall_loopback_in, - ill, NULL, ipha, first_mp, mp); + FW_HOOKS(ipst->ips_ip4_loopback_in_event, + ipst->ips_ipv4firewall_loopback_in, + ill, NULL, ipha, first_mp, mp, ipst); DTRACE_PROBE1(ip4__loopback__in__end, mblk_t *, first_mp); if (first_mp == NULL) return; - loopback_packets++; + ipst->ips_loopback_packets++; ip2dbg(("ip_wput_local: from 0x%x to 0x%x in zone %d\n", ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), zoneid)); if (!IS_SIMPLE_IPH(ipha)) { - ip_wput_local_options(ipha); + ip_wput_local_options(ipha, ipst); } protocol = ipha->ipha_protocol; @@ -25033,7 +25416,7 @@ ip_wput_local(queue_t *q, ill_t *ill, ipha_t *ipha, mblk_t *mp, ire_t *ire, * The options have been sanity checked by ip_wput_options(). */ static void -ip_wput_local_options(ipha_t *ipha) +ip_wput_local_options(ipha_t *ipha, ip_stack_t *ipst) { ipoptp_t opts; uchar_t *opt; @@ -25108,7 +25491,8 @@ ip_wput_local_options(ipha_t *ipha) off = opt[IPOPT_OFFSET] - 1; bcopy((char *)opt + off, &dst, IP_ADDR_LEN); ire = ire_ctable_lookup(dst, 0, IRE_LOCAL, - NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, + ipst); if (ire == NULL) { /* Not for us */ break; @@ -25171,6 +25555,7 @@ ip_wput_multicast(queue_t *q, mblk_t *mp, ipif_t *ipif, zoneid_t zoneid) ire_t *ire; ipaddr_t dst; mblk_t *first_mp; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; /* igmp_sendpkt always allocates a ipsec_out_t */ ASSERT(mp->b_datap->db_type == M_CTL); @@ -25205,7 +25590,7 @@ ip_wput_multicast(queue_t *q, mblk_t *mp, ipif_t *ipif, zoneid_t zoneid) * ip_newroute_ipif. */ ire = ire_ctable_lookup(dst, 0, 0, ipif, zoneid, NULL, - MATCH_IRE_ILL); + MATCH_IRE_ILL, ipst); if (!ire) { /* * Mark this packet to make it be delivered to @@ -25255,6 +25640,7 @@ ip_wput_attach_llhdr(mblk_t *mp, ire_t *ire, ip_proc_t proc, uint32_t ill_index) mblk_t *mp1; boolean_t qos_done = B_FALSE; uchar_t *ll_hdr; + ip_stack_t *ipst = ire->ire_ipst; #define rptr ((uchar_t *)ipha) @@ -25264,7 +25650,7 @@ ip_wput_attach_llhdr(mblk_t *mp, ire_t *ire, ip_proc_t proc, uint32_t ill_index) if ((mp1 = ire->ire_nce->nce_fp_mp) != NULL) { ASSERT(DB_TYPE(mp1) == M_DATA); /* Initiate IPPF processing */ - if ((proc != 0) && IPP_ENABLED(proc)) { + if ((proc != 0) && IPP_ENABLED(proc, ipst)) { UNLOCK_IRE_FP_MP(ire); ip_process(proc, &mp, ill_index); if (mp == NULL) @@ -25336,7 +25722,7 @@ unlock_err: */ if (DB_CRED(mp) != NULL) mblk_setcred(mp1, DB_CRED(mp)); - if (!qos_done && (proc != 0) && IPP_ENABLED(proc)) { + if (!qos_done && (proc != 0) && IPP_ENABLED(proc, ipst)) { ip_process(proc, &mp1, ill_index); if (mp1 == NULL) return (NULL); @@ -25368,10 +25754,13 @@ ip_wput_ipsec_out_v6(queue_t *q, mblk_t *ipsec_mp, ip6_t *ip6h, ill_t *ill, zoneid_t zoneid; boolean_t ill_need_rele = B_FALSE; boolean_t ire_need_rele = B_FALSE; + ip_stack_t *ipst; mp = ipsec_mp->b_cont; ip6h1 = (ip6_t *)mp->b_rptr; io = (ipsec_out_t *)ipsec_mp->b_rptr; + ASSERT(io->ipsec_out_ns != NULL); + ipst = io->ipsec_out_ns->netstack_ip; ill_index = io->ipsec_out_ill_index; if (io->ipsec_out_reachable) { flags |= IPV6_REACHABILITY_CONFIRMATION; @@ -25389,7 +25778,7 @@ ip_wput_ipsec_out_v6(queue_t *q, mblk_t *ipsec_mp, ip6_t *ip6h, ill_t *ill, if (ill_index != 0) { if (ill == NULL) { ill = ip_grab_attach_ill(NULL, ipsec_mp, ill_index, - B_TRUE); + B_TRUE, ipst); /* Failure case frees things for us. */ if (ill == NULL) @@ -25435,7 +25824,7 @@ ip_wput_ipsec_out_v6(queue_t *q, mblk_t *ipsec_mp, ip6_t *ip6h, ill_t *ill, ire = ire_arg; } else { ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ipif, - zoneid, MBLK_GETLABEL(mp), match_flags); + zoneid, MBLK_GETLABEL(mp), match_flags, ipst); ire_need_rele = B_TRUE; } if (ire != NULL) { @@ -25477,14 +25866,15 @@ ip_wput_ipsec_out_v6(queue_t *q, mblk_t *ipsec_mp, ip6_t *ip6h, ill_t *ill, return; } ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ipif, - zoneid, MBLK_GETLABEL(mp), match_flags); + zoneid, MBLK_GETLABEL(mp), match_flags, ipst); ire_need_rele = B_TRUE; ipif_refrele(ipif); } else { if (ire_arg != NULL) { ire = ire_arg; } else { - ire = ire_cache_lookup_v6(v6dstp, zoneid, NULL); + ire = ire_cache_lookup_v6(v6dstp, zoneid, NULL, + ipst); ire_need_rele = B_TRUE; } } @@ -25515,7 +25905,7 @@ ip_wput_ipsec_out_v6(queue_t *q, mblk_t *ipsec_mp, ip6_t *ip6h, ill_t *ill, } ip_newroute_v6(q, ipsec_mp, v6dstp, &ip6h->ip6_src, ill, - zoneid); + zoneid, ipst); } if (ill != NULL && ill_need_rele) ill_refrele(ill); @@ -25536,8 +25926,9 @@ send: ill_t *, NULL, ill_t *, out_ill, ip6_t *, ip6h1, mblk_t *, ipsec_mp); - FW_HOOKS6(ip6_loopback_out_event, ipv6firewall_loopback_out, - NULL, out_ill, ip6h1, ipsec_mp, mp); + FW_HOOKS6(ipst->ips_ip6_loopback_out_event, + ipst->ips_ipv6firewall_loopback_out, + NULL, out_ill, ip6h1, ipsec_mp, mp, ipst); DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, ipsec_mp); @@ -25672,6 +26063,8 @@ ip_wput_ipsec_out(queue_t *q, mblk_t *ipsec_mp, ipha_t *ipha, ill_t *ill, uint32_t cksum; uint16_t *up; ipxmit_state_t pktxmit_state; + ip_stack_t *ipst; + #ifdef _BIG_ENDIAN #define LENGTH (v_hlen_tos_len & 0xFFFF) #else @@ -25689,11 +26082,14 @@ ip_wput_ipsec_out(queue_t *q, mblk_t *ipsec_mp, ipha_t *ipha, ill_t *ill, attach_if = io->ipsec_out_attach_if; zoneid = io->ipsec_out_zoneid; ASSERT(zoneid != ALL_ZONES); + ipst = io->ipsec_out_ns->netstack_ip; + ASSERT(io->ipsec_out_ns != NULL); + match_flags = MATCH_IRE_ILL_GROUP | MATCH_IRE_SECATTR; if (ill_index != 0) { if (ill == NULL) { ill = ip_grab_attach_ill(NULL, ipsec_mp, - ill_index, B_FALSE); + ill_index, B_FALSE, ipst); /* Failure case frees things for us. */ if (ill == NULL) @@ -25725,13 +26121,13 @@ ip_wput_ipsec_out(queue_t *q, mblk_t *ipsec_mp, ipha_t *ipha, ill_t *ill, */ conn_dontroute = io->ipsec_out_dontroute; if (ill_index == 0) - ipif = ipif_lookup_group(dst, zoneid); + ipif = ipif_lookup_group(dst, zoneid, ipst); else (void) ipif_lookup_zoneid(ill, zoneid, 0, &ipif); if (ipif == NULL) { ip1dbg(("ip_wput_ipsec_out: No ipif for" " multicast\n")); - BUMP_MIB(&ip_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes); freemsg(ipsec_mp); goto done; } @@ -25741,14 +26137,14 @@ ip_wput_ipsec_out(queue_t *q, mblk_t *ipsec_mp, ipha_t *ipha, ill_t *ill, * an ire to send this downstream. */ ire = ire_ctable_lookup(dst, 0, 0, ipif, zoneid, - MBLK_GETLABEL(mp), match_flags); + MBLK_GETLABEL(mp), match_flags, ipst); if (ire != NULL) { ill_t *ill1; /* * Do the multicast forwarding now, as the IPSEC * processing has been done. */ - if (ip_g_mrouter && !conn_dontroute && + if (ipst->ips_ip_g_mrouter && !conn_dontroute && (ill1 = ire_to_ill(ire))) { if (ip_mforward(ill1, ipha, mp)) { freemsg(ipsec_mp); @@ -25787,14 +26183,14 @@ ip_wput_ipsec_out(queue_t *q, mblk_t *ipsec_mp, ipha_t *ipha, ill_t *ill, } else { if (attach_if) { ire = ire_ctable_lookup(dst, 0, 0, ill->ill_ipif, - zoneid, MBLK_GETLABEL(mp), match_flags); + zoneid, MBLK_GETLABEL(mp), match_flags, ipst); } else { if (ire_arg != NULL) { ire = ire_arg; ire_need_rele = B_FALSE; } else { ire = ire_cache_lookup(dst, zoneid, - MBLK_GETLABEL(mp)); + MBLK_GETLABEL(mp), ipst); } } if (ire != NULL) { @@ -25837,11 +26233,12 @@ ip_wput_ipsec_out(queue_t *q, mblk_t *ipsec_mp, ipha_t *ipha, ill_t *ill, */ ipha->ipha_ident = IP_HDR_INCLUDED; ip_newroute(q, ipsec_mp, dst, NULL, - (CONN_Q(q) ? Q_TO_CONN(q) : NULL), zoneid); + (CONN_Q(q) ? Q_TO_CONN(q) : NULL), zoneid, ipst); } goto done; send: - if (ipha->ipha_protocol == IPPROTO_UDP && udp_compute_checksum()) { + if (ipha->ipha_protocol == IPPROTO_UDP && + udp_compute_checksum(ipst->ips_netstack)) { /* * ESP NAT-Traversal packet. * @@ -25849,8 +26246,8 @@ send: */ offset = IP_SIMPLE_HDR_LENGTH + UDP_CHECKSUM_OFFSET; - IP_STAT(ip_out_sw_cksum); - IP_STAT_UPDATE(ip_udp_out_sw_cksum_bytes, + IP_STAT(ipst, ip_out_sw_cksum); + IP_STAT_UPDATE(ipst, ip_udp_out_sw_cksum_bytes, ntohs(htons(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH)); #define iphs ((uint16_t *)ipha) cksum = IP_UDP_CSUM_COMP + iphs[6] + iphs[7] + iphs[8] + @@ -25898,8 +26295,9 @@ send: ill_t *, NULL, ill_t *, out_ill, ipha_t *, ipha1, mblk_t *, ipsec_mp); - FW_HOOKS(ip4_loopback_out_event, ipv4firewall_loopback_out, - NULL, out_ill, ipha1, ipsec_mp, mp); + FW_HOOKS(ipst->ips_ip4_loopback_out_event, + ipst->ips_ipv4firewall_loopback_out, + NULL, out_ill, ipha1, ipsec_mp, mp, ipst); DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp); @@ -25931,7 +26329,7 @@ send: "fragmented accelerated packet!\n")); freemsg(ipsec_mp); } else { - ip_wput_ire_fragmentit(ipsec_mp, ire, zoneid); + ip_wput_ire_fragmentit(ipsec_mp, ire, zoneid, ipst); } if (ire_need_rele) ire_refrele(ire); @@ -26112,8 +26510,9 @@ send: DTRACE_PROBE4(ip4__physical__out__start, ill_t *, NULL, ill_t *, ire->ire_ipif->ipif_ill, ipha_t *, ipha1, mblk_t *, mp); - FW_HOOKS(ip4_physical_out_event, ipv4firewall_physical_out, - NULL, ire->ire_ipif->ipif_ill, ipha1, mp, mp); + FW_HOOKS(ipst->ips_ip4_physical_out_event, + ipst->ips_ipv4firewall_physical_out, + NULL, ire->ire_ipif->ipif_ill, ipha1, mp, mp, ipst); DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); if (mp == NULL) goto drop_pkt; @@ -26195,13 +26594,14 @@ ipsec_out_is_accelerated(mblk_t *ipsec_mp, ipsa_t *sa, ill_t *ill, ire_t *ire) ipsec_out_t *io; mblk_t *data_mp; uint_t plen, overhead; + ip_stack_t *ipst; if ((sa->ipsa_flags & IPSA_F_HW) == 0) return; if (ill == NULL) return; - + ipst = ill->ill_ipst; /* * Destination address is a broadcast or multicast. Punt. */ @@ -26274,7 +26674,7 @@ ipsec_out_is_accelerated(mblk_t *ipsec_mp, ipsa_t *sa, ill_t *ill, ire_t *ire) * specified by the SA? */ if (!ipsec_capab_match(ill, io->ipsec_out_capab_ill_index, - ill->ill_isv6, sa)) { + ill->ill_isv6, sa, ipst->ips_netstack)) { return; } @@ -26409,10 +26809,13 @@ ipsec_out_process(queue_t *q, mblk_t *ipsec_mp, ire_t *ire, uint_t ill_index) zoneid_t zoneid; ipsec_status_t ipsec_rc; boolean_t ill_need_rele = B_FALSE; + ip_stack_t *ipst; + ipsec_stack_t *ipss; io = (ipsec_out_t *)ipsec_mp->b_rptr; ASSERT(io->ipsec_out_type == IPSEC_OUT); ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); + ipst = io->ipsec_out_ns->netstack_ip; mp = ipsec_mp->b_cont; /* @@ -26421,14 +26824,14 @@ ipsec_out_process(queue_t *q, mblk_t *ipsec_mp, ire_t *ire, uint_t ill_index) * We can check for ipsec_out_proc_begin even for such packets, as * they will always be false (asserted below). */ - if (IPP_ENABLED(IPP_LOCAL_OUT) && !io->ipsec_out_proc_begin) { + if (IPP_ENABLED(IPP_LOCAL_OUT, ipst) && !io->ipsec_out_proc_begin) { ip_process(IPP_LOCAL_OUT, &mp, io->ipsec_out_ill_index != 0 ? io->ipsec_out_ill_index : ill_index); if (mp == NULL) { ip2dbg(("ipsec_out_process: packet dropped "\ "during IPPF processing\n")); freeb(ipsec_mp); - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); return; } } @@ -26447,15 +26850,17 @@ ipsec_out_process(queue_t *q, mblk_t *ipsec_mp, ire_t *ire, uint_t ill_index) (io->ipsec_out_act != NULL)); ASSERT(io->ipsec_out_failed == B_FALSE); - if (!ipsec_loaded()) { + ipss = ipst->ips_netstack->netstack_ipsec; + if (!ipsec_loaded(ipss)) { ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr; if (IPH_HDR_VERSION(ipha) == IP_VERSION) { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); } else { - BUMP_MIB(&ip6_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); } ip_drop_packet(ipsec_mp, B_FALSE, NULL, ire, - &ipdrops_ip_ipsec_not_loaded, &ip_dropper); + DROPPER(ipss, ipds_ip_ipsec_not_loaded), + &ipss->ipsec_dropper); return; } @@ -26504,7 +26909,8 @@ ipsec_out_process(queue_t *q, mblk_t *ipsec_mp, ire_t *ire, uint_t ill_index) if (ill != NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); } else { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutDiscards); } return; } @@ -26563,7 +26969,8 @@ ipsec_out_process(queue_t *q, mblk_t *ipsec_mp, ire_t *ire, uint_t ill_index) if (ill != NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); } else { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutDiscards); } /* FALLTHRU */ case IPSEC_STATUS_PENDING: @@ -26577,7 +26984,7 @@ ipsec_out_process(queue_t *q, mblk_t *ipsec_mp, ire_t *ire, uint_t ill_index) if (ire == NULL) { int idx = io->ipsec_out_capab_ill_index; ill = ill_lookup_on_ifindex(idx, B_FALSE, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, ipst); ill_need_rele = B_TRUE; } else { ill = (ill_t *)ire->ire_stq->q_ptr; @@ -26593,7 +27000,8 @@ ipsec_out_process(queue_t *q, mblk_t *ipsec_mp, ire_t *ire, uint_t ill_index) if (ill != NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); } else { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, + ipIfStatsOutDiscards); } /* FALLTHRU */ case IPSEC_STATUS_PENDING: @@ -26965,13 +27373,20 @@ ip_wput_nondata(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) struct iocblk *iocp; ip_ioctl_cmd_t *ipip; cred_t *cr; - conn_t *connp = NULL; + conn_t *connp; int cmd, err; nce_t *nce; ipif_t *ipif; + ip_stack_t *ipst; + char *proto_str; - if (CONN_Q(q)) + if (CONN_Q(q)) { connp = Q_TO_CONN(q); + ipst = connp->conn_netstack->netstack_ip; + } else { + connp = NULL; + ipst = ILLQ_TO_IPST(q); + } cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); @@ -27098,6 +27513,10 @@ nak: freemsg(mp); return; case M_IOCACK: + /* /dev/ip shouldn't see this */ + if (CONN_Q(q)) + goto nak; + /* Finish socket ioctls passed through to ARP. */ ip_sioctl_iocack(q, mp); return; @@ -27124,6 +27543,10 @@ nak: freemsg(mp); return; case IRE_DB_REQ_TYPE: + if (connp == NULL) { + proto_str = "IRE_DB_REQ_TYPE"; + goto protonak; + } /* An Upper Level Protocol wants a copy of an IRE. */ ip_ire_req(q, mp); return; @@ -27216,7 +27639,10 @@ nak: case O_T_BIND_REQ: case T_BIND_REQ: { /* Request can get queued in bind */ - ASSERT(connp != NULL); + if (connp == NULL) { + proto_str = "O_T_BIND_REQ/T_BIND_REQ"; + goto protonak; + } /* * Both TCP and UDP call ip_bind_{v4,v6}() directly * instead of going through this path. We only get @@ -27265,7 +27691,11 @@ nak: ip2dbg(("ip_wput: T_SVR4_OPTMGMT_REQ flags %x\n", ((struct T_optmgmt_req *)mp->b_rptr)->MGMT_flags)); - ASSERT(connp != NULL); + if (connp == NULL) { + proto_str = "T_SVR4_OPTMGMT_REQ"; + goto protonak; + } + if (!snmpcom_req(q, mp, ip_snmp_set, ip_snmp_get, cr)) { /* @@ -27296,7 +27726,11 @@ nak: * Call tpi_optcom_req so that it can * generate the ack. */ - ASSERT(connp != NULL); + if (connp == NULL) { + proto_str = "T_OPTMGMT_REQ"; + goto protonak; + } + ASSERT(ipsq == NULL); /* * We don't come here for restart. ip_restart_optmgmt @@ -27314,6 +27748,10 @@ nak: } return; case T_UNBIND_REQ: + if (connp == NULL) { + proto_str = "T_UNBIND_REQ"; + goto protonak; + } mp = ip_unbind(q, mp); qreply(q, mp); return; @@ -27500,7 +27938,7 @@ nak: inbound_ill = ill_lookup_on_ifindex(ifindex, B_TRUE, NULL, NULL, NULL, - NULL); + NULL, ipst); mp1->b_prev = NULL; if (inbound_ill != NULL) fwdq = inbound_ill->ill_rq; @@ -27631,7 +28069,7 @@ nak: ire = ire_ctable_lookup(fake_ire->ire_addr, fake_ire->ire_gateway_addr, IRE_CACHE, ipif, fake_ire->ire_zoneid, NULL, - (MATCH_IRE_GW|MATCH_IRE_IPIF|MATCH_IRE_ZONEONLY)); + (MATCH_IRE_GW|MATCH_IRE_IPIF|MATCH_IRE_ZONEONLY), ipst); ipif_refrele(ipif); if (ire == NULL) { /* @@ -27704,6 +28142,12 @@ nak: putnext(q, mp); } else freemsg(mp); + return; + +protonak: + cmn_err(CE_NOTE, "IP doesn't process %s as a module", proto_str); + if ((mp = mi_tpi_err_ack_alloc(mp, TPROTO, EINVAL)) != NULL) + qreply(q, mp); } /* @@ -27714,7 +28158,7 @@ nak: */ static int ip_wput_options(queue_t *q, mblk_t *ipsec_mp, ipha_t *ipha, - boolean_t mctl_present, zoneid_t zoneid) + boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) { ipoptp_t opts; uchar_t *opt; @@ -27761,7 +28205,7 @@ ip_wput_options(queue_t *q, mblk_t *ipsec_mp, ipha_t *ipha, ire = ire_ftable_lookup(dst, 0, 0, IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, MBLK_GETLABEL(mp), - MATCH_IRE_TYPE | MATCH_IRE_SECATTR); + MATCH_IRE_TYPE | MATCH_IRE_SECATTR, ipst); if (ire == NULL) { ip1dbg(("ip_wput_options: SSRR not" " directly reachable: 0x%x\n", @@ -27834,12 +28278,12 @@ param_prob: * Since ip_wput() isn't close to finished, we fill * in enough of the header for credible error reporting. */ - if (ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid)) { + if (ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst)) { /* Failed */ freemsg(ipsec_mp); return (-1); } - icmp_param_problem(q, ipsec_mp, (uint8_t)code, zoneid); + icmp_param_problem(q, ipsec_mp, (uint8_t)code, zoneid, ipst); return (-1); bad_src_route: @@ -27847,12 +28291,12 @@ bad_src_route: * Since ip_wput() isn't close to finished, we fill * in enough of the header for credible error reporting. */ - if (ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid)) { + if (ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst)) { /* Failed */ freemsg(ipsec_mp); return (-1); } - icmp_unreachable(q, ipsec_mp, ICMP_SOURCE_ROUTE_FAILED, zoneid); + icmp_unreachable(q, ipsec_mp, ICMP_SOURCE_ROUTE_FAILED, zoneid, ipst); return (-1); } @@ -27864,42 +28308,43 @@ bad_src_route: #define CONN_MAXDRAINCNT 64 static void -conn_drain_init(void) +conn_drain_init(ip_stack_t *ipst) { int i; - conn_drain_list_cnt = conn_drain_nthreads; + ipst->ips_conn_drain_list_cnt = conn_drain_nthreads; - if ((conn_drain_list_cnt == 0) || - (conn_drain_list_cnt > CONN_MAXDRAINCNT)) { + if ((ipst->ips_conn_drain_list_cnt == 0) || + (ipst->ips_conn_drain_list_cnt > CONN_MAXDRAINCNT)) { /* * Default value of the number of drainers is the * number of cpus, subject to maximum of 8 drainers. */ if (boot_max_ncpus != -1) - conn_drain_list_cnt = MIN(boot_max_ncpus, 8); + ipst->ips_conn_drain_list_cnt = MIN(boot_max_ncpus, 8); else - conn_drain_list_cnt = MIN(max_ncpus, 8); + ipst->ips_conn_drain_list_cnt = MIN(max_ncpus, 8); } - conn_drain_list = kmem_zalloc(conn_drain_list_cnt * sizeof (idl_t), - KM_SLEEP); + ipst->ips_conn_drain_list = kmem_zalloc(ipst->ips_conn_drain_list_cnt * + sizeof (idl_t), KM_SLEEP); - for (i = 0; i < conn_drain_list_cnt; i++) { - mutex_init(&conn_drain_list[i].idl_lock, NULL, + for (i = 0; i < ipst->ips_conn_drain_list_cnt; i++) { + mutex_init(&ipst->ips_conn_drain_list[i].idl_lock, NULL, MUTEX_DEFAULT, NULL); } } static void -conn_drain_fini(void) +conn_drain_fini(ip_stack_t *ipst) { int i; - for (i = 0; i < conn_drain_list_cnt; i++) - mutex_destroy(&conn_drain_list[i].idl_lock); - kmem_free(conn_drain_list, conn_drain_list_cnt * sizeof (idl_t)); - conn_drain_list = NULL; + for (i = 0; i < ipst->ips_conn_drain_list_cnt; i++) + mutex_destroy(&ipst->ips_conn_drain_list[i].idl_lock); + kmem_free(ipst->ips_conn_drain_list, + ipst->ips_conn_drain_list_cnt * sizeof (idl_t)); + ipst->ips_conn_drain_list = NULL; } /* @@ -27923,6 +28368,7 @@ conn_drain_insert(conn_t *connp) { idl_t *idl; uint_t index; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; mutex_enter(&connp->conn_lock); if (connp->conn_state_flags & CONN_CLOSING) { @@ -27939,13 +28385,13 @@ conn_drain_insert(conn_t *connp) * Atomicity of load/stores is enough to make sure that * conn_drain_list_index is always within bounds. */ - index = conn_drain_list_index; - ASSERT(index < conn_drain_list_cnt); - connp->conn_idl = &conn_drain_list[index]; + index = ipst->ips_conn_drain_list_index; + ASSERT(index < ipst->ips_conn_drain_list_cnt); + connp->conn_idl = &ipst->ips_conn_drain_list[index]; index++; - if (index == conn_drain_list_cnt) + if (index == ipst->ips_conn_drain_list_cnt) index = 0; - conn_drain_list_index = index; + ipst->ips_conn_drain_list_index = index; } mutex_exit(&connp->conn_lock); @@ -28129,7 +28575,7 @@ ip_wsrv(queue_t *q) * Hence the if check above. */ ip1dbg(("ip_wsrv: walking\n")); - conn_walk_drain(); + conn_walk_drain(ill->ill_ipst); } return; } @@ -28195,18 +28641,21 @@ ip_wsrv(queue_t *q) * Applies to both IPv4 and IPv6. */ static void -conn_walk_fanout(pfv_t func, void *arg, zoneid_t zoneid) +conn_walk_fanout(pfv_t func, void *arg, zoneid_t zoneid, ip_stack_t *ipst) { - conn_walk_fanout_table(ipcl_udp_fanout, ipcl_udp_fanout_size, + conn_walk_fanout_table(ipst->ips_ipcl_udp_fanout, + ipst->ips_ipcl_udp_fanout_size, func, arg, zoneid); - conn_walk_fanout_table(ipcl_conn_fanout, ipcl_conn_fanout_size, + conn_walk_fanout_table(ipst->ips_ipcl_conn_fanout, + ipst->ips_ipcl_conn_fanout_size, func, arg, zoneid); - conn_walk_fanout_table(ipcl_bind_fanout, ipcl_bind_fanout_size, + conn_walk_fanout_table(ipst->ips_ipcl_bind_fanout, + ipst->ips_ipcl_bind_fanout_size, func, arg, zoneid); - conn_walk_fanout_table(ipcl_proto_fanout, - A_CNT(ipcl_proto_fanout), func, arg, zoneid); - conn_walk_fanout_table(ipcl_proto_fanout_v6, - A_CNT(ipcl_proto_fanout_v6), func, arg, zoneid); + conn_walk_fanout_table(ipst->ips_ipcl_proto_fanout, + IPPROTO_MAX, func, arg, zoneid); + conn_walk_fanout_table(ipst->ips_ipcl_proto_fanout_v6, + IPPROTO_MAX, func, arg, zoneid); } /* @@ -28219,15 +28668,15 @@ conn_walk_fanout(pfv_t func, void *arg, zoneid_t zoneid) * in turn qenable the next conn, when it is done/blocked/closing. */ static void -conn_walk_drain(void) +conn_walk_drain(ip_stack_t *ipst) { int i; idl_t *idl; - IP_STAT(ip_conn_walk_drain); + IP_STAT(ipst, ip_conn_walk_drain); - for (i = 0; i < conn_drain_list_cnt; i++) { - idl = &conn_drain_list[i]; + for (i = 0; i < ipst->ips_conn_drain_list_cnt; i++) { + idl = &ipst->ips_conn_drain_list[i]; mutex_enter(&idl->idl_lock); if (idl->idl_conn == NULL) { mutex_exit(&idl->idl_lock); @@ -28314,6 +28763,8 @@ conn_report1(conn_t *connp, void *mp) static int ip_conn_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) { + conn_t *connp = Q_TO_CONN(q); + (void) mi_mpprintf(mp, "CONN " MI_COL_HDRPAD_STR "rfq " MI_COL_HDRPAD_STR @@ -28332,7 +28783,8 @@ ip_conn_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) return (0); } - conn_walk_fanout(conn_report1, mp->b_cont, Q_TO_CONN(q)->conn_zoneid); + conn_walk_fanout(conn_report1, mp->b_cont, connp->conn_zoneid, + connp->conn_netstack->netstack_ip); return (0); } @@ -28349,6 +28801,7 @@ conn_wantpacket(conn_t *connp, ill_t *ill, ipha_t *ipha, int fanout_flags, ipif_t *ipif; ire_t *ire; ipaddr_t dst, src; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; dst = ipha->ipha_dst; src = ipha->ipha_src; @@ -28400,7 +28853,7 @@ conn_wantpacket(conn_t *connp, ill_t *ill, ipha_t *ipha, int fanout_flags, return (B_FALSE); ire = ire_ctable_lookup(dst, 0, IRE_BROADCAST, ipif, connp->conn_zoneid, NULL, - (MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); + (MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP), ipst); ipif_refrele(ipif); if (ire != NULL) { ire_refrele(ire); @@ -28653,6 +29106,7 @@ ip_multirt_apply_membership(int (*fn)(conn_t *, boolean_t, ipaddr_t, ipaddr_t, irb_t *irb; int error = 0; opt_restart_t *or; + ip_stack_t *ipst = ire->ire_ipst; irb = ire->ire_bucket; ASSERT(irb != NULL); @@ -28669,7 +29123,7 @@ ip_multirt_apply_membership(int (*fn)(conn_t *, boolean_t, ipaddr_t, ipaddr_t, ire_gw = ire_ftable_lookup(ire->ire_gateway_addr, 0, 0, IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, - MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); + MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); /* No resolver exists for the gateway; skip this ire. */ if (ire_gw == NULL) continue; @@ -28725,17 +29179,18 @@ ip_multirt_bad_mtu(ire_t *ire, uint32_t max_frag) { hrtime_t current = gethrtime(); char buf[INET_ADDRSTRLEN]; + ip_stack_t *ipst = ire->ire_ipst; /* Convert interval in ms to hrtime in ns */ - if (multirt_bad_mtu_last_time + - ((hrtime_t)ip_multirt_log_interval * (hrtime_t)1000000) <= + if (ipst->ips_multirt_bad_mtu_last_time + + ((hrtime_t)ipst->ips_ip_multirt_log_interval * (hrtime_t)1000000) <= current) { cmn_err(CE_WARN, "ip: ignoring multiroute " "to %s, incorrect MTU %u (expected %u)\n", ip_dot_addr(ire->ire_addr, buf), ire->ire_max_frag, max_frag); - multirt_bad_mtu_last_time = current; + ipst->ips_multirt_bad_mtu_last_time = current; } } @@ -28749,6 +29204,14 @@ static int ip_cgtp_filter_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *ioc_cr) { boolean_t *ip_cgtp_filter_value = (boolean_t *)cp; + ip_stack_t *ipst = CONNQ_TO_IPST(q); + + /* + * Only applies to the shared stack since the filter_ops + * do not carry an ip_stack_t or zoneid. + */ + if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) + return (ENOTSUP); (void) mi_mpprintf(mp, "%d", (int)*ip_cgtp_filter_value); return (0); @@ -28768,6 +29231,17 @@ ip_cgtp_filter_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, { long new_value; boolean_t *ip_cgtp_filter_value = (boolean_t *)cp; + ip_stack_t *ipst = CONNQ_TO_IPST(q); + + if (secpolicy_net_config(ioc_cr, B_FALSE) != 0) + return (EPERM); + + /* + * Only applies to the shared stack since the filter_ops + * do not carry an ip_stack_t or zoneid. + */ + if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) + return (ENOTSUP); if (ddi_strtol(value, NULL, 10, &new_value) != 0 || new_value < 0 || new_value > 1) { @@ -28783,7 +29257,8 @@ ip_cgtp_filter_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, (ip_cgtp_filter_ops->cfo_filter_rev != CGTP_FILTER_REV)) { cmn_err(CE_WARN, "IP: CGTP filtering version mismatch " "(module hooks version %d, expecting %d)\n", - ip_cgtp_filter_ops->cfo_filter_rev, CGTP_FILTER_REV); + ip_cgtp_filter_ops->cfo_filter_rev, + CGTP_FILTER_REV); return (ENOTSUP); } @@ -28800,9 +29275,10 @@ ip_cgtp_filter_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, if (ip_cgtp_filter_ops != NULL) { int res; - if ((res = ip_cgtp_filter_ops->cfo_change_state(new_value))) { + + res = ip_cgtp_filter_ops->cfo_change_state(new_value); + if (res) return (res); - } } *ip_cgtp_filter_value = (boolean_t)new_value; @@ -28817,7 +29293,15 @@ ip_cgtp_filter_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, int ip_cgtp_filter_supported(void) { - return (ip_cgtp_filter_rev); + ip_stack_t *ipst; + int ret; + + ipst = netstack_find_by_stackid(GLOBAL_NETSTACKID)->netstack_ip; + if (ipst == NULL) + return (-1); + ret = ip_cgtp_filter_rev; + netstack_rele(ipst->ips_netstack); + return (ret); } @@ -28826,14 +29310,24 @@ ip_cgtp_filter_supported(void) * or by invoking this function. In the first case, the version number * of the registered structure is checked at hooks activation time * in ip_cgtp_filter_set(). + * + * Only applies to the shared stack since the filter_ops + * do not carry an ip_stack_t or zoneid. */ int ip_cgtp_filter_register(cgtp_filter_ops_t *ops) { + ip_stack_t *ipst; + if (ops->cfo_filter_rev != CGTP_FILTER_REV) return (ENOTSUP); + ipst = netstack_find_by_stackid(GLOBAL_NETSTACKID)->netstack_ip; + if (ipst == NULL) + return (EINVAL); + ip_cgtp_filter_ops = ops; + netstack_rele(ipst->ips_netstack); return (0); } @@ -28863,6 +29357,9 @@ ip_input_proc_set(queue_t *q, mblk_t *mp, char *value, int *v = (int *)addr; long new_value; + if (secpolicy_net_config(cr, B_FALSE) != 0) + return (EPERM); + if (ddi_strtol(value, NULL, 10, &new_value) != 0) return (EINVAL); @@ -28879,6 +29376,9 @@ ip_int_set(queue_t *q, mblk_t *mp, char *value, int *v = (int *)addr; long new_value; + if (secpolicy_net_config(cr, B_FALSE) != 0) + return (EPERM); + if (ddi_strtol(value, NULL, 10, &new_value) != 0) return (EINVAL); @@ -28886,9 +29386,89 @@ ip_int_set(queue_t *q, mblk_t *mp, char *value, return (0); } +static void * +ip_kstat2_init(netstackid_t stackid, ip_stat_t *ip_statisticsp) +{ + kstat_t *ksp; + + ip_stat_t template = { + { "ipsec_fanout_proto", KSTAT_DATA_UINT64 }, + { "ip_udp_fannorm", KSTAT_DATA_UINT64 }, + { "ip_udp_fanmb", KSTAT_DATA_UINT64 }, + { "ip_udp_fanothers", KSTAT_DATA_UINT64 }, + { "ip_udp_fast_path", KSTAT_DATA_UINT64 }, + { "ip_udp_slow_path", KSTAT_DATA_UINT64 }, + { "ip_udp_input_err", KSTAT_DATA_UINT64 }, + { "ip_tcppullup", KSTAT_DATA_UINT64 }, + { "ip_tcpoptions", KSTAT_DATA_UINT64 }, + { "ip_multipkttcp", KSTAT_DATA_UINT64 }, + { "ip_tcp_fast_path", KSTAT_DATA_UINT64 }, + { "ip_tcp_slow_path", KSTAT_DATA_UINT64 }, + { "ip_tcp_input_error", KSTAT_DATA_UINT64 }, + { "ip_db_ref", KSTAT_DATA_UINT64 }, + { "ip_notaligned1", KSTAT_DATA_UINT64 }, + { "ip_notaligned2", KSTAT_DATA_UINT64 }, + { "ip_multimblk3", KSTAT_DATA_UINT64 }, + { "ip_multimblk4", KSTAT_DATA_UINT64 }, + { "ip_ipoptions", KSTAT_DATA_UINT64 }, + { "ip_classify_fail", KSTAT_DATA_UINT64 }, + { "ip_opt", KSTAT_DATA_UINT64 }, + { "ip_udp_rput_local", KSTAT_DATA_UINT64 }, + { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 }, + { "ip_conn_flputbq", KSTAT_DATA_UINT64 }, + { "ip_conn_walk_drain", KSTAT_DATA_UINT64 }, + { "ip_out_sw_cksum", KSTAT_DATA_UINT64 }, + { "ip_in_sw_cksum", KSTAT_DATA_UINT64 }, + { "ip_trash_ire_reclaim_calls", KSTAT_DATA_UINT64 }, + { "ip_trash_ire_reclaim_success", KSTAT_DATA_UINT64 }, + { "ip_ire_arp_timer_expired", KSTAT_DATA_UINT64 }, + { "ip_ire_redirect_timer_expired", KSTAT_DATA_UINT64 }, + { "ip_ire_pmtu_timer_expired", KSTAT_DATA_UINT64 }, + { "ip_input_multi_squeue", KSTAT_DATA_UINT64 }, + { "ip_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, + { "ip_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, + { "ip_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, + { "ip_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, + { "ip_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, + { "ip_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, + { "ip_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, + { "ip_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, + { "ip_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, + { "ip_frag_mdt_discarded", KSTAT_DATA_UINT64 }, + { "ip_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, + { "ip_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, + { "ip_frag_mdt_allocd", KSTAT_DATA_UINT64 }, + }; + + ksp = kstat_create_netstack("ip", 0, "ipstat", "net", + KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL, stackid); + + if (ksp == NULL) + return (NULL); + + bcopy(&template, ip_statisticsp, sizeof (template)); + ksp->ks_data = (void *)ip_statisticsp; + ksp->ks_private = (void *)(uintptr_t)stackid; + + kstat_install(ksp); + return (ksp); +} + static void -ip_kstat_init(void) +ip_kstat2_fini(netstackid_t stackid, kstat_t *ksp) +{ + if (ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); + kstat_delete_netstack(ksp, stackid); + } +} + +static void * +ip_kstat_init(netstackid_t stackid, ip_stack_t *ipst) { + kstat_t *ksp; + ip_named_kstat_t template = { { "forwarding", KSTAT_DATA_UINT32, 0 }, { "defaultTTL", KSTAT_DATA_UINT32, 0 }, @@ -28930,15 +29510,15 @@ ip_kstat_init(void) { "outSwitchIPv6", KSTAT_DATA_UINT32, 0 }, }; - ip_mibkp = kstat_create("ip", 0, "ip", "mib2", KSTAT_TYPE_NAMED, + ksp = kstat_create_netstack("ip", 0, "ip", "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(ip_named_kstat_t), - 0); - if (!ip_mibkp) - return; + 0, stackid); + if (ksp == NULL || ksp->ks_data == NULL) + return (NULL); - template.forwarding.value.ui32 = WE_ARE_FORWARDING ? 1:2; - template.defaultTTL.value.ui32 = (uint32_t)ip_def_ttl; - template.reasmTimeout.value.ui32 = ip_g_frag_timeout; + template.forwarding.value.ui32 = WE_ARE_FORWARDING(ipst) ? 1:2; + template.defaultTTL.value.ui32 = (uint32_t)ipst->ips_ip_def_ttl; + template.reasmTimeout.value.ui32 = ipst->ips_ip_g_frag_timeout; template.addrEntrySize.value.i32 = sizeof (mib2_ipAddrEntry_t); template.routeEntrySize.value.i32 = sizeof (mib2_ipRouteEntry_t); @@ -28947,20 +29527,20 @@ ip_kstat_init(void) template.memberEntrySize.value.i32 = sizeof (ipv6_member_t); - bcopy(&template, ip_mibkp->ks_data, sizeof (template)); - - ip_mibkp->ks_update = ip_kstat_update; + bcopy(&template, ksp->ks_data, sizeof (template)); + ksp->ks_update = ip_kstat_update; + ksp->ks_private = (void *)(uintptr_t)stackid; - kstat_install(ip_mibkp); + kstat_install(ksp); + return (ksp); } static void -ip_kstat_fini(void) +ip_kstat_fini(netstackid_t stackid, kstat_t *ksp) { - - if (ip_mibkp != NULL) { - kstat_delete(ip_mibkp); - ip_mibkp = NULL; + if (ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); + kstat_delete_netstack(ksp, stackid); } } @@ -28971,21 +29551,32 @@ ip_kstat_update(kstat_t *kp, int rw) mib2_ipIfStatsEntry_t ipmib; ill_walk_context_t ctx; ill_t *ill; + netstackid_t stackid = (zoneid_t)(uintptr_t)kp->ks_private; + netstack_t *ns; + ip_stack_t *ipst; - if (!kp || !kp->ks_data) + if (kp == NULL || kp->ks_data == NULL) return (EIO); if (rw == KSTAT_WRITE) return (EACCES); + ns = netstack_find_by_stackid(stackid); + if (ns == NULL) + return (-1); + ipst = ns->netstack_ip; + if (ipst == NULL) { + netstack_rele(ns); + return (-1); + } ipkp = (ip_named_kstat_t *)kp->ks_data; - bcopy(&ip_mib, &ipmib, sizeof (ipmib)); - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V4(&ctx); + bcopy(&ipst->ips_ip_mib, &ipmib, sizeof (ipmib)); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V4(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) ip_mib2_add_ip_stats(&ipmib, ill->ill_ip_mib); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); ipkp->forwarding.value.ui32 = ipmib.ipIfStatsForwarding; ipkp->defaultTTL.value.ui32 = ipmib.ipIfStatsDefaultTTL; @@ -28999,7 +29590,7 @@ ip_kstat_update(kstat_t *kp, int rw) ipkp->outRequests.value.ui64 = ipmib.ipIfStatsHCOutRequests; ipkp->outDiscards.value.ui32 = ipmib.ipIfStatsOutDiscards; ipkp->outNoRoutes.value.ui32 = ipmib.ipIfStatsOutNoRoutes; - ipkp->reasmTimeout.value.ui32 = ip_g_frag_timeout; + ipkp->reasmTimeout.value.ui32 = ipst->ips_ip_g_frag_timeout; ipkp->reasmReqds.value.ui32 = ipmib.ipIfStatsReasmReqds; ipkp->reasmOKs.value.ui32 = ipmib.ipIfStatsReasmOKs; ipkp->reasmFails.value.ui32 = ipmib.ipIfStatsReasmFails; @@ -29024,12 +29615,16 @@ ip_kstat_update(kstat_t *kp, int rw) ipkp->outIPv6.value.ui32 = ipmib.ipIfStatsOutWrongIPVersion; ipkp->outSwitchIPv6.value.ui32 = ipmib.ipIfStatsOutSwitchIPVersion; + netstack_rele(ns); + return (0); } -static void -icmp_kstat_init(void) +static void * +icmp_kstat_init(netstackid_t stackid) { + kstat_t *ksp; + icmp_named_kstat_t template = { { "inMsgs", KSTAT_DATA_UINT32 }, { "inErrors", KSTAT_DATA_UINT32 }, @@ -29066,26 +29661,27 @@ icmp_kstat_init(void) { "inBadRedirects", KSTAT_DATA_UINT32 }, }; - icmp_mibkp = kstat_create("ip", 0, "icmp", "mib2", KSTAT_TYPE_NAMED, + ksp = kstat_create_netstack("ip", 0, "icmp", "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(icmp_named_kstat_t), - 0); - if (icmp_mibkp == NULL) - return; + 0, stackid); + if (ksp == NULL || ksp->ks_data == NULL) + return (NULL); - bcopy(&template, icmp_mibkp->ks_data, sizeof (template)); + bcopy(&template, ksp->ks_data, sizeof (template)); - icmp_mibkp->ks_update = icmp_kstat_update; + ksp->ks_update = icmp_kstat_update; + ksp->ks_private = (void *)(uintptr_t)stackid; - kstat_install(icmp_mibkp); + kstat_install(ksp); + return (ksp); } static void -icmp_kstat_fini(void) +icmp_kstat_fini(netstackid_t stackid, kstat_t *ksp) { - - if (icmp_mibkp != NULL) { - kstat_delete(icmp_mibkp); - icmp_mibkp = NULL; + if (ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); + kstat_delete_netstack(ksp, stackid); } } @@ -29093,6 +29689,9 @@ static int icmp_kstat_update(kstat_t *kp, int rw) { icmp_named_kstat_t *icmpkp; + netstackid_t stackid = (zoneid_t)(uintptr_t)kp->ks_private; + netstack_t *ns; + ip_stack_t *ipst; if ((kp == NULL) || (kp->ks_data == NULL)) return (EIO); @@ -29100,42 +29699,62 @@ icmp_kstat_update(kstat_t *kp, int rw) if (rw == KSTAT_WRITE) return (EACCES); + ns = netstack_find_by_stackid(stackid); + if (ns == NULL) + return (-1); + ipst = ns->netstack_ip; + if (ipst == NULL) { + netstack_rele(ns); + return (-1); + } icmpkp = (icmp_named_kstat_t *)kp->ks_data; - icmpkp->inMsgs.value.ui32 = icmp_mib.icmpInMsgs; - icmpkp->inErrors.value.ui32 = icmp_mib.icmpInErrors; - icmpkp->inDestUnreachs.value.ui32 = icmp_mib.icmpInDestUnreachs; - icmpkp->inTimeExcds.value.ui32 = icmp_mib.icmpInTimeExcds; - icmpkp->inParmProbs.value.ui32 = icmp_mib.icmpInParmProbs; - icmpkp->inSrcQuenchs.value.ui32 = icmp_mib.icmpInSrcQuenchs; - icmpkp->inRedirects.value.ui32 = icmp_mib.icmpInRedirects; - icmpkp->inEchos.value.ui32 = icmp_mib.icmpInEchos; - icmpkp->inEchoReps.value.ui32 = icmp_mib.icmpInEchoReps; - icmpkp->inTimestamps.value.ui32 = icmp_mib.icmpInTimestamps; - icmpkp->inTimestampReps.value.ui32 = icmp_mib.icmpInTimestampReps; - icmpkp->inAddrMasks.value.ui32 = icmp_mib.icmpInAddrMasks; - icmpkp->inAddrMaskReps.value.ui32 = icmp_mib.icmpInAddrMaskReps; - icmpkp->outMsgs.value.ui32 = icmp_mib.icmpOutMsgs; - icmpkp->outErrors.value.ui32 = icmp_mib.icmpOutErrors; - icmpkp->outDestUnreachs.value.ui32 = icmp_mib.icmpOutDestUnreachs; - icmpkp->outTimeExcds.value.ui32 = icmp_mib.icmpOutTimeExcds; - icmpkp->outParmProbs.value.ui32 = icmp_mib.icmpOutParmProbs; - icmpkp->outSrcQuenchs.value.ui32 = icmp_mib.icmpOutSrcQuenchs; - icmpkp->outRedirects.value.ui32 = icmp_mib.icmpOutRedirects; - icmpkp->outEchos.value.ui32 = icmp_mib.icmpOutEchos; - icmpkp->outEchoReps.value.ui32 = icmp_mib.icmpOutEchoReps; - icmpkp->outTimestamps.value.ui32 = icmp_mib.icmpOutTimestamps; - icmpkp->outTimestampReps.value.ui32 = icmp_mib.icmpOutTimestampReps; - icmpkp->outAddrMasks.value.ui32 = icmp_mib.icmpOutAddrMasks; - icmpkp->outAddrMaskReps.value.ui32 = icmp_mib.icmpOutAddrMaskReps; - icmpkp->inCksumErrs.value.ui32 = icmp_mib.icmpInCksumErrs; - icmpkp->inUnknowns.value.ui32 = icmp_mib.icmpInUnknowns; - icmpkp->inFragNeeded.value.ui32 = icmp_mib.icmpInFragNeeded; - icmpkp->outFragNeeded.value.ui32 = icmp_mib.icmpOutFragNeeded; - icmpkp->outDrops.value.ui32 = icmp_mib.icmpOutDrops; - icmpkp->inOverflows.value.ui32 = icmp_mib.icmpInOverflows; - icmpkp->inBadRedirects.value.ui32 = icmp_mib.icmpInBadRedirects; - + icmpkp->inMsgs.value.ui32 = ipst->ips_icmp_mib.icmpInMsgs; + icmpkp->inErrors.value.ui32 = ipst->ips_icmp_mib.icmpInErrors; + icmpkp->inDestUnreachs.value.ui32 = + ipst->ips_icmp_mib.icmpInDestUnreachs; + icmpkp->inTimeExcds.value.ui32 = ipst->ips_icmp_mib.icmpInTimeExcds; + icmpkp->inParmProbs.value.ui32 = ipst->ips_icmp_mib.icmpInParmProbs; + icmpkp->inSrcQuenchs.value.ui32 = ipst->ips_icmp_mib.icmpInSrcQuenchs; + icmpkp->inRedirects.value.ui32 = ipst->ips_icmp_mib.icmpInRedirects; + icmpkp->inEchos.value.ui32 = ipst->ips_icmp_mib.icmpInEchos; + icmpkp->inEchoReps.value.ui32 = ipst->ips_icmp_mib.icmpInEchoReps; + icmpkp->inTimestamps.value.ui32 = ipst->ips_icmp_mib.icmpInTimestamps; + icmpkp->inTimestampReps.value.ui32 = + ipst->ips_icmp_mib.icmpInTimestampReps; + icmpkp->inAddrMasks.value.ui32 = ipst->ips_icmp_mib.icmpInAddrMasks; + icmpkp->inAddrMaskReps.value.ui32 = + ipst->ips_icmp_mib.icmpInAddrMaskReps; + icmpkp->outMsgs.value.ui32 = ipst->ips_icmp_mib.icmpOutMsgs; + icmpkp->outErrors.value.ui32 = ipst->ips_icmp_mib.icmpOutErrors; + icmpkp->outDestUnreachs.value.ui32 = + ipst->ips_icmp_mib.icmpOutDestUnreachs; + icmpkp->outTimeExcds.value.ui32 = ipst->ips_icmp_mib.icmpOutTimeExcds; + icmpkp->outParmProbs.value.ui32 = ipst->ips_icmp_mib.icmpOutParmProbs; + icmpkp->outSrcQuenchs.value.ui32 = + ipst->ips_icmp_mib.icmpOutSrcQuenchs; + icmpkp->outRedirects.value.ui32 = ipst->ips_icmp_mib.icmpOutRedirects; + icmpkp->outEchos.value.ui32 = ipst->ips_icmp_mib.icmpOutEchos; + icmpkp->outEchoReps.value.ui32 = ipst->ips_icmp_mib.icmpOutEchoReps; + icmpkp->outTimestamps.value.ui32 = + ipst->ips_icmp_mib.icmpOutTimestamps; + icmpkp->outTimestampReps.value.ui32 = + ipst->ips_icmp_mib.icmpOutTimestampReps; + icmpkp->outAddrMasks.value.ui32 = + ipst->ips_icmp_mib.icmpOutAddrMasks; + icmpkp->outAddrMaskReps.value.ui32 = + ipst->ips_icmp_mib.icmpOutAddrMaskReps; + icmpkp->inCksumErrs.value.ui32 = ipst->ips_icmp_mib.icmpInCksumErrs; + icmpkp->inUnknowns.value.ui32 = ipst->ips_icmp_mib.icmpInUnknowns; + icmpkp->inFragNeeded.value.ui32 = ipst->ips_icmp_mib.icmpInFragNeeded; + icmpkp->outFragNeeded.value.ui32 = + ipst->ips_icmp_mib.icmpOutFragNeeded; + icmpkp->outDrops.value.ui32 = ipst->ips_icmp_mib.icmpOutDrops; + icmpkp->inOverflows.value.ui32 = ipst->ips_icmp_mib.icmpInOverflows; + icmpkp->inBadRedirects.value.ui32 = + ipst->ips_icmp_mib.icmpInBadRedirects; + + netstack_rele(ns); return (0); } @@ -29158,6 +29777,8 @@ ip_fanout_sctp_raw(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, boolean_t isv4, mblk_t *first_mp; boolean_t secure; ip6_t *ip6h; + ip_stack_t *ipst = recv_ill->ill_ipst; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; first_mp = mp; if (mctl_present) { @@ -29169,7 +29790,7 @@ ip_fanout_sctp_raw(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, boolean_t isv4, } ip6h = (isv4) ? NULL : (ip6_t *)ipha; - connp = ipcl_classify_raw(mp, IPPROTO_SCTP, zoneid, ports, ipha); + connp = ipcl_classify_raw(mp, IPPROTO_SCTP, zoneid, ports, ipha, ipst); if (connp == NULL) { sctp_ootb_input(first_mp, recv_ill, ipif_seqid, zoneid, mctl_present); @@ -29182,8 +29803,8 @@ ip_fanout_sctp_raw(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, boolean_t isv4, freemsg(first_mp); return; } - if ((isv4 ? CONN_INBOUND_POLICY_PRESENT(connp) : - CONN_INBOUND_POLICY_PRESENT_V6(connp)) || secure) { + if ((isv4 ? CONN_INBOUND_POLICY_PRESENT(connp, ipss) : + CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) || secure) { first_mp = ipsec_check_inbound_policy(first_mp, connp, (isv4 ? ipha : NULL), ip6h, mctl_present); if (first_mp == NULL) { @@ -29200,8 +29821,8 @@ ip_fanout_sctp_raw(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, boolean_t isv4, freeb(first_mp); /* Initiate IPPF processing here if needed. */ - if ((isv4 && IPP_ENABLED(IPP_LOCAL_IN) && ip_policy) || - (!isv4 && IP6_IN_IPP(flags))) { + if ((isv4 && IPP_ENABLED(IPP_LOCAL_IN, ipst) && ip_policy) || + (!isv4 && IP6_IN_IPP(flags, ipst))) { ip_process(IPP_LOCAL_IN, &mp, recv_ill->ill_phyint->phyint_ifindex); if (mp == NULL) { @@ -29228,7 +29849,7 @@ ip_fanout_sctp_raw(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, boolean_t isv4, } if (isv4) { mp = ip_add_info(mp, recv_ill, in_flags, - IPCL_ZONEID(connp)); + IPCL_ZONEID(connp), ipst); } else { mp = ip_add_info_v6(mp, recv_ill, &ip6h->ip6_dst); if (mp == NULL) { diff --git a/usr/src/uts/common/inet/ip/ip6.c b/usr/src/uts/common/inet/ip/ip6.c index d16ba24c51..49938b8eac 100644 --- a/usr/src/uts/common/inet/ip/ip6.c +++ b/usr/src/uts/common/inet/ip/ip6.c @@ -111,58 +111,6 @@ extern squeue_func_t ip_input_proc; /* - * IP statistics. - */ -#define IP6_STAT(x) (ip6_statistics.x.value.ui64++) -#define IP6_STAT_UPDATE(x, n) (ip6_statistics.x.value.ui64 += (n)) - -typedef struct ip6_stat { - kstat_named_t ip6_udp_fast_path; - kstat_named_t ip6_udp_slow_path; - kstat_named_t ip6_udp_fannorm; - kstat_named_t ip6_udp_fanmb; - kstat_named_t ip6_out_sw_cksum; - kstat_named_t ip6_in_sw_cksum; - kstat_named_t ip6_tcp_in_full_hw_cksum_err; - kstat_named_t ip6_tcp_in_part_hw_cksum_err; - kstat_named_t ip6_tcp_in_sw_cksum_err; - kstat_named_t ip6_tcp_out_sw_cksum_bytes; - kstat_named_t ip6_udp_in_full_hw_cksum_err; - kstat_named_t ip6_udp_in_part_hw_cksum_err; - kstat_named_t ip6_udp_in_sw_cksum_err; - kstat_named_t ip6_udp_out_sw_cksum_bytes; - kstat_named_t ip6_frag_mdt_pkt_out; - kstat_named_t ip6_frag_mdt_discarded; - kstat_named_t ip6_frag_mdt_allocfail; - kstat_named_t ip6_frag_mdt_addpdescfail; - kstat_named_t ip6_frag_mdt_allocd; -} ip6_stat_t; - -static ip6_stat_t ip6_statistics = { - { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, - { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, - { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, - { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, - { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, - { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, - { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, - { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, - { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, - { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, - { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, - { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, - { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, - { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, - { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, - { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, - { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, - { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, - { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, -}; - -static kstat_t *ip6_kstat; - -/* * Naming conventions: * These rules should be judiciously applied * if there is a need to identify something as IPv6 versus IPv4 @@ -180,13 +128,6 @@ static kstat_t *ip6_kstat; */ /* - * IPv6 mibs when the interface (ill) is not known. - * When the ill is known the per-interface mib in the ill is used. - */ -mib2_ipIfStatsEntry_t ip6_mib; -mib2_ipv6IfIcmpEntry_t icmp6_mib; - -/* * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) * from IANA. This mechanism will remain in effect until an official @@ -194,9 +135,6 @@ mib2_ipv6IfIcmpEntry_t icmp6_mib; */ uchar_t ip6opt_ls; -uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ -uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ - const in6_addr_t ipv6_all_ones = { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; @@ -239,11 +177,6 @@ const in6_addr_t ipv6_solicited_node_mcast = { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; #endif /* _BIG_ENDIAN */ -/* - * Used by icmp_send_redirect_v6 for picking random src. - */ -uint_t icmp_redirect_v6_src_index; - /* Leave room for ip_newroute to tack on the src and target addresses */ #define OK_RESOLVER_MP_V6(mp) \ ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) @@ -251,13 +184,13 @@ uint_t icmp_redirect_v6_src_index; static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, boolean_t, zoneid_t); static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, - const in6_addr_t *, boolean_t, zoneid_t); + const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, boolean_t, boolean_t, boolean_t, boolean_t); static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, - iulp_t *); + iulp_t *, ip_stack_t *); static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, uint16_t, boolean_t, boolean_t, boolean_t); static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, @@ -267,10 +200,10 @@ static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, - uint8_t *, uint_t, uint8_t); + uint8_t *, uint_t, uint8_t, ip_stack_t *); static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); -static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); +static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, conn_t *, int, int, int, zoneid_t); @@ -340,6 +273,7 @@ icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, ire_t *ire; mblk_t *first_mp; ipsec_in_t *ii; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill != NULL); first_mp = mp; @@ -364,9 +298,9 @@ icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, } ip6h = (ip6_t *)mp->b_rptr; } - if (icmp_accept_clear_messages == 0) { + if (ipst->ips_icmp_accept_clear_messages == 0) { first_mp = ipsec_check_global_policy(first_mp, NULL, - NULL, ip6h, mctl_present); + NULL, ip6h, mctl_present, ipst->ips_netstack); if (first_mp == NULL) return; } @@ -393,7 +327,7 @@ icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); /* Initiate IPPF processing here */ - if (IP6_IN_IPP(flags)) { + if (IP6_IN_IPP(flags, ipst)) { /* * If the ifindex changes due to SIOCSLIFINDEX @@ -430,7 +364,7 @@ icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, case ICMP6_ECHO_REQUEST: BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && - !ipv6_resp_echo_mcast) + !ipst->ips_ipv6_resp_echo_mcast) break; /* @@ -495,7 +429,7 @@ icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, } /* set the hop limit */ - ip6h->ip6_hops = ipv6_def_hops; + ip6h->ip6_hops = ipst->ips_ipv6_def_hops; /* * Prepare for checksum by putting icmp length in the icmp @@ -538,7 +472,7 @@ icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, * meant to our LOCAL address. */ ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, - NULL); + NULL, ipst); if (ire == NULL || ire->ire_type != IRE_LOCAL) { mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); if (mp == NULL) { @@ -571,7 +505,8 @@ icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, * we attach a IPSEC_IN mp and clear ipsec_in_secure. */ ASSERT(first_mp == mp); - if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { + first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); + if (first_mp == NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); freemsg(mp); return; @@ -623,7 +558,7 @@ icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, case ND_REDIRECT: { BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); - if (ipv6_ignore_redirect) + if (ipst->ips_ipv6_ignore_redirect) break; /* @@ -681,6 +616,7 @@ icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint32_t mtu; ire_t *ire, *first_ire; mblk_t *first_mp; + ip_stack_t *ipst = ill->ill_ipst; first_mp = mp; if (mctl_present) @@ -740,7 +676,7 @@ icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, - MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); + MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); if (first_ire == NULL) { if (ip_debug > 2) { @@ -795,7 +731,8 @@ icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, * for non-link local destinations we match only on the IRE type */ ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, - IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE); + IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, + ipst); if (ire == NULL) { if (ip_debug > 2) { /* ip1dbg */ @@ -889,6 +826,7 @@ icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ipsec_in_t *ii; tcpha_t *tcpha; conn_t *connp; + ip_stack_t *ipst = ill->ill_ipst; first_mp = mp; if (mctl_present) { @@ -938,7 +876,7 @@ icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, * this information to a set of listeners. A separate * list could be kept to keep the cost of this down. */ - ipcl_walk(pkt_too_big, (void *)mp); + ipcl_walk(pkt_too_big, (void *)mp, ipst); } /* Try to pass the ICMP message to clients who need it */ @@ -990,7 +928,7 @@ icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, tcpha = (tcpha_t *)((char *)ip6h + hdr_length); connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, - TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); + TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); if (connp == NULL) { goto drop_pkt; } @@ -1019,6 +957,7 @@ icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, case IPPROTO_ESP: case IPPROTO_AH: { int ipsec_rc; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; /* * We need a IPSEC_IN in the front to fanout to AH/ESP. @@ -1059,7 +998,7 @@ icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, * to locate the ill. */ ASSERT(first_mp == mp); - first_mp = ipsec_in_alloc(B_FALSE); + first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); ASSERT(ill != NULL); if (first_mp == NULL) { freemsg(mp); @@ -1077,8 +1016,8 @@ icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; } - if (!ipsec_loaded()) { - ip_proto_not_sup(q, first_mp, 0, zoneid); + if (!ipsec_loaded(ipss)) { + ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); return; } @@ -1234,6 +1173,7 @@ icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) iulp_t ulp_info = { 0 }; ill_t *prev_ire_ill; ipif_t *ipif; + ip_stack_t *ipst = ill->ill_ipst; ip6h = (ip6_t *)mp->b_rptr; if (ip6h->ip6_nxt != IPPROTO_ICMPV6) @@ -1298,7 +1238,7 @@ icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | - MATCH_IRE_DEFAULT); + MATCH_IRE_DEFAULT, ipst); /* * Check that @@ -1343,7 +1283,8 @@ icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, ALL_ZONES, 0, NULL, - (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); + (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), + ipst); if (sire != NULL) { bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); ASSERT(tmp_ire != NULL); @@ -1416,7 +1357,8 @@ icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), &ulp_info, NULL, - NULL); + NULL, + ipst); } else { queue_t *stq; @@ -1444,7 +1386,8 @@ icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) (RTF_DYNAMIC | RTF_HOST), &ulp_info, NULL, - NULL); + NULL, + ipst); } /* Release reference from earlier ipif_get_next_ipif() */ @@ -1461,7 +1404,7 @@ icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) &rd->nd_rd_target, &ipv6_all_ones, 0, &ire->ire_src_addr_v6, (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, - (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); + (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); /* * Delete any existing IRE_HOST type ires for this destination. @@ -1470,7 +1413,8 @@ icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) */ redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, - (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); + (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP), + ipst); ire_refrele(ire); /* Held in ire_add_v6 */ @@ -1493,7 +1437,7 @@ fail_redirect: } static ill_t * -ip_queue_to_ill_v6(queue_t *q) +ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) { ill_t *ill; @@ -1507,7 +1451,7 @@ ip_queue_to_ill_v6(queue_t *q) ill = NULL; } else { ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, - NULL, NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL, ipst); } if (ill == NULL) ip0dbg(("ip_queue_to_ill_v6: no ill\n")); @@ -1524,7 +1468,7 @@ ip_queue_to_ill_v6(queue_t *q) */ static in6_addr_t * icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, - in6_addr_t *src, zoneid_t zoneid) + in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) { ill_t *ill; ire_t *ire; @@ -1538,7 +1482,8 @@ icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, } ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), - NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); + NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), + ipst); if (ire != NULL) { /* Destined to one of our addresses */ *src = *origdst; @@ -1553,9 +1498,9 @@ icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, /* What is the route back to the original source? */ ire = ire_route_lookup_v6(origsrc, 0, 0, 0, NULL, NULL, zoneid, NULL, - (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); + (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); if (ire == NULL) { - BUMP_MIB(&ip6_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); return (NULL); } /* @@ -1579,9 +1524,9 @@ icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, */ ire = ire_route_lookup_v6(origsrc, 0, 0, 0, NULL, NULL, zoneid, NULL, - (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); + (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); if (ire == NULL) { - BUMP_MIB(&ip6_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); return (NULL); } ASSERT(ire != NULL); @@ -1606,7 +1551,8 @@ icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, */ static void icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, - const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid) + const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, + ip_stack_t *ipst) { ip6_t *ip6h; in6_addr_t v6dst; @@ -1619,7 +1565,7 @@ icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, mblk_t *ipsec_mp; ipsec_out_t *io; - ill = ip_queue_to_ill_v6(q); + ill = ip_queue_to_ill_v6(q, ipst); if (ill == NULL) { freemsg(mp); return; @@ -1674,7 +1620,8 @@ icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, */ ipsec_in_t *ii; ASSERT(mp->b_datap->db_type == M_DATA); - if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { + ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); + if (ipsec_mp == NULL) { freemsg(mp); BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); ill_refrele(ill); @@ -1709,14 +1656,14 @@ icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, v6src = *v6src_ptr; } else { if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, - &v6src, zoneid) == NULL) { + &v6src, zoneid, ipst) == NULL) { freemsg(ipsec_mp); ill_refrele(ill); return; } } v6dst = ip6h->ip6_src; - len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; + len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; msg_len = msgdsize(mp); if (msg_len > len_needed) { if (!adjmsg(mp, len_needed - msg_len)) { @@ -1755,7 +1702,7 @@ icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; ip6h->ip6_nxt = IPPROTO_ICMPV6; - ip6h->ip6_hops = ipv6_def_hops; + ip6h->ip6_hops = ipst->ips_ipv6_def_hops; ip6h->ip6_dst = v6dst; ip6h->ip6_src = v6src; msg_len += IPV6_HDR_LEN + len; @@ -1857,7 +1804,7 @@ icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) */ static mblk_t * icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, - boolean_t llbcast, boolean_t mcast_ok) + boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) { ip6_t *ip6h; @@ -1883,9 +1830,9 @@ icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, if (!pullupmsg(mp, len_needed)) { ill_t *ill; - ill = ip_queue_to_ill_v6(q); + ill = ip_queue_to_ill_v6(q, ipst); if (ill == NULL) { - BUMP_MIB(&icmp6_mib, + BUMP_MIB(&ipst->ips_icmp6_mib, ipv6IfIcmpInErrors); } else { BUMP_MIB(ill->ill_icmp6_mib, @@ -1915,7 +1862,7 @@ icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, freemsg(mp); return (NULL); } - if (icmp_err_rate_limit()) { + if (icmp_err_rate_limit(ipst)) { /* * Only send ICMP error packets every so often. * This should be done on a per port/source basis, @@ -1946,6 +1893,7 @@ icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, int max_redir_hdr_data_len; int pkt_len; in6_addr_t *srcp; + ip_stack_t *ipst = ill->ill_ipst; /* * We are called from ip_rput where we could @@ -1953,7 +1901,7 @@ icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, */ ASSERT(mp->b_datap->db_type == M_DATA); - mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); + mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); if (mp == NULL) return; nce = ndp_lookup_v6(ill, targetp, B_FALSE); @@ -1991,7 +1939,8 @@ icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ - max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; + max_redir_hdr_data_len = + (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; pkt_len = msgdsize(mp); /* Make sure mp is 8 byte aligned */ if (pkt_len > max_redir_hdr_data_len) { @@ -2005,7 +1954,7 @@ icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, rdh->nd_opt_rh_reserved1 = 0; rdh->nd_opt_rh_reserved2 = 0; /* ipif_v6src_addr contains the link-local source address */ - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); if (ill->ill_group != NULL) { /* * The receiver of the redirect will verify whether it @@ -2022,16 +1971,16 @@ icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, int cnt = ill->ill_group->illgrp_ill_count; ill = ill->ill_group->illgrp_ill; - cnt = ++icmp_redirect_v6_src_index % cnt; + cnt = ++ipst->ips_icmp_redirect_v6_src_index % cnt; while (cnt--) ill = ill->ill_group_next; srcp = &ill->ill_ipif->ipif_v6src_addr; } else { srcp = &ill->ill_ipif->ipif_v6src_addr; } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); /* Redirects sent by router, and router is global zone */ - icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID); + icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); kmem_free(buf, len); } @@ -2039,7 +1988,8 @@ icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, /* Generate an ICMP time exceeded message. (May be called as writer.) */ void icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, - boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) + boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, + ip_stack_t *ipst) { icmp6_t icmp6; boolean_t mctl_present; @@ -2047,7 +1997,7 @@ icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, EXTRACT_PKT_MP(mp, first_mp, mctl_present); - mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); + mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); if (mp == NULL) { if (mctl_present) freeb(first_mp); @@ -2057,7 +2007,7 @@ icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; icmp6.icmp6_code = code; icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, - zoneid); + zoneid, ipst); } /* @@ -2065,7 +2015,8 @@ icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, */ void icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, - boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) + boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, + ip_stack_t *ipst) { icmp6_t icmp6; boolean_t mctl_present; @@ -2073,7 +2024,7 @@ icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, EXTRACT_PKT_MP(mp, first_mp, mctl_present); - mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); + mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); if (mp == NULL) { if (mctl_present) freeb(first_mp); @@ -2083,7 +2034,7 @@ icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, icmp6.icmp6_type = ICMP6_DST_UNREACH; icmp6.icmp6_code = code; icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, - zoneid); + zoneid, ipst); } /* @@ -2091,7 +2042,7 @@ icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, */ static void icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, - boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) + boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) { icmp6_t icmp6; mblk_t *first_mp; @@ -2099,7 +2050,7 @@ icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, EXTRACT_PKT_MP(mp, first_mp, mctl_present); - mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); + mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); if (mp == NULL) { if (mctl_present) freeb(first_mp); @@ -2111,7 +2062,7 @@ icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, icmp6.icmp6_mtu = htonl(mtu); icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, - zoneid); + zoneid, ipst); } /* @@ -2120,7 +2071,8 @@ icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, */ static void icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, - uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) + uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, + ip_stack_t *ipst) { icmp6_t icmp6; boolean_t mctl_present; @@ -2128,7 +2080,7 @@ icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, EXTRACT_PKT_MP(mp, first_mp, mctl_present); - mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); + mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); if (mp == NULL) { if (mctl_present) freeb(first_mp); @@ -2139,7 +2091,7 @@ icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, icmp6.icmp6_code = code; icmp6.icmp6_pptr = htonl(offset); icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, - zoneid); + zoneid, ipst); } /* @@ -2175,6 +2127,7 @@ ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; ipa6_conn_x_t *acx6; boolean_t verify_dst; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; ASSERT(connp->conn_af_isv6); len = mp->b_wptr - mp->b_rptr; @@ -2199,7 +2152,8 @@ ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) if (tbr->ADDR_length == 0) { if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && - ipcl_proto_fanout_v6[protocol].connf_head != NULL) { + ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != + NULL) { /* * TCP, SCTP, AH, and ESP have single protocol fanouts. * Do not allow others to bind to these. @@ -2380,9 +2334,9 @@ ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { if (connp->conn_pkt_isv6) - ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); + ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE, ipst); else - ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); + ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE, ipst); } /* @@ -2447,6 +2401,7 @@ ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, ipif_t *ipif = NULL; mblk_t *policy_mp; zoneid_t zoneid; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; if (ipsec_policy_set) policy_mp = mp->b_cont; @@ -2461,7 +2416,7 @@ ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { src_ire = ire_route_lookup_v6(v6src, 0, 0, - 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); + 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); /* * If an address other than in6addr_any is requested, * we verify that it is a valid address for bind @@ -2498,7 +2453,7 @@ ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, * ip_wput_v6 */ multi_ipif = ipif_lookup_group_v6( - &ipv6_unspecified_group, zoneid); + &ipv6_unspecified_group, zoneid, ipst); } mutex_exit(&connp->conn_lock); save_ire = src_ire; @@ -2517,7 +2472,8 @@ ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, } else { *mp->b_wptr++ = (char)connp->conn_ulp; ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, - CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); + CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error, + ipst); if (ipif == NULL) { if (error == EINPROGRESS) { if (src_ire != NULL) @@ -2580,7 +2536,8 @@ ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, } if (error == 0) { if (ire_requested) { - if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { + if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL, + ipst)) { error = -1; goto bad_addr; } @@ -2685,6 +2642,7 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, ipif_t *src_ipif = NULL; zoneid_t zoneid; boolean_t ill_held = B_FALSE; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; src_ire = dst_ire = NULL; /* @@ -2721,7 +2679,7 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, zoneid, 0, &ipif); } else { /* Look for default like ip_wput_v6 */ - ipif = ipif_lookup_group_v6(v6dst, zoneid); + ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); } mutex_exit(&connp->conn_lock); if (ipif == NULL || !ire_requested || @@ -2743,7 +2701,8 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, NULL, &sire, zoneid, MBLK_GETLABEL(mp), MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | - MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR); + MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, + ipst); /* * We also prevent ire's with src address INADDR_ANY to * be used, which are created temporarily for @@ -2797,7 +2756,7 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, if (dst_ire != NULL && is_system_labeled() && !IPCL_IS_TCP(connp) && tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, - connp->conn_mac_exempt) != 0) { + connp->conn_mac_exempt, ipst) != 0) { error = EHOSTUNREACH; if (ip_debug > 2) { pr_addr_dbg("ip_bind_connected: no label for dst %s\n", @@ -2835,7 +2794,8 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, * calling ip_newroute_v6(). This is why we further check on the * IRE during Multidata packet transmission in tcp_multisend(). */ - if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && + if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && + dst_ire != NULL && !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && ILL_MDT_CAPABLE(md_ill)) { @@ -2850,7 +2810,7 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, zoneid, 0, NULL, MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | - MATCH_IRE_RJ_BHOLE); + MATCH_IRE_RJ_BHOLE, ipst); if (src_ire == NULL) { error = EHOSTUNREACH; goto bad_addr; @@ -2896,7 +2856,8 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, if_index = ipp->ipp_ifindex; dst_ill = ill_lookup_on_ifindex( - if_index, B_TRUE, NULL, NULL, NULL, NULL); + if_index, B_TRUE, NULL, NULL, NULL, NULL, + ipst); if (dst_ill == NULL) { ip1dbg(("ip_bind_connected_v6:" " bad ifindex %d\n", if_index)); @@ -2926,7 +2887,7 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, /* No need to hold ill here */ dst_ill = dst_ire->ire_ipif->ipif_ill; } - if (!ip6_asp_can_lookup()) { + if (!ip6_asp_can_lookup(ipst)) { *mp->b_wptr++ = (char)protocol; ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, ip_bind_connected_resume_v6); @@ -2936,7 +2897,7 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, src_ipif = ipif_select_source_v6(dst_ill, v6dst, RESTRICT_TO_NONE, connp->conn_src_preferences, zoneid); - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); if (src_ipif == NULL) { pr_addr_dbg("ip_bind_connected_v6: " "no usable source address for " @@ -2954,7 +2915,7 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, * UP interface for hard binding. */ src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, - NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); + NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); /* src_ire must be a local|loopback */ if (!IRE_IS_LOCAL(src_ire)) { @@ -3017,7 +2978,8 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, if (sire != NULL) ulp_info = &(sire->ire_uinfo); - if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { + if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info, + ipst)) { error = -1; goto bad_addr; } @@ -3116,9 +3078,10 @@ refrele_and_quit: * Insert the ire in b_cont. Returns false if it fails (due to lack of space). * Makes the IRE be IRE_BROADCAST if dst is a multicast address. */ +/* ARGSUSED4 */ static boolean_t ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, - iulp_t *ulp_info) + iulp_t *ulp_info, ip_stack_t *ipst) { mblk_t *mp1; ire_t *ret_ire; @@ -3227,6 +3190,8 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, boolean_t secure, shared_addr; conn_t *connp, *first_connp, *next_connp; connf_t *connfp; + ip_stack_t *ipst = inill->ill_ipst; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; if (mctl_present) { mp = first_mp->b_cont; @@ -3252,7 +3217,7 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, zoneid = tsol_packet_to_zoneid(mp); } - connfp = &ipcl_proto_fanout_v6[nexthdr]; + connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; mutex_enter(&connfp->connf_lock); connp = connfp->connf_head; for (connp = connfp->connf_head; connp != NULL; @@ -3274,7 +3239,7 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, mutex_exit(&connfp->connf_lock); if (ip_fanout_send_icmp_v6(q, first_mp, flags, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, - nexthdr_offset, mctl_present, zoneid)) { + nexthdr_offset, mctl_present, zoneid, ipst)) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); } @@ -3356,7 +3321,8 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, * it instead. */ if (!IPCL_IS_IPTUN(connp) && - (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure)) { + (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || + secure)) { first_mp1 = ipsec_check_inbound_policy (first_mp1, connp, NULL, ip6h, mctl_present); @@ -3380,7 +3346,7 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, mutex_exit(&connfp->connf_lock); /* Initiate IPPF processing */ - if (IP6_IN_IPP(flags)) { + if (IP6_IN_IPP(flags, ipst)) { uint_t ifindex; mutex_enter(&ill->ill_lock); @@ -3443,7 +3409,7 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, * it instead. */ if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && - (CONN_INBOUND_POLICY_PRESENT(connp) || secure)) { + (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { first_mp = ipsec_check_inbound_policy(first_mp, connp, NULL, ip6h, mctl_present); if (first_mp == NULL) { @@ -3466,12 +3432,13 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, int ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, - boolean_t mctl_present, zoneid_t zoneid) + boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) { ip6_t *ip6h; mblk_t *first_mp; boolean_t secure; unsigned char db_type; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; first_mp = mp; if (mctl_present) { @@ -3498,9 +3465,9 @@ ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, * there is no "conn", we are checking with global policy. */ ip6h = (ip6_t *)mp->b_rptr; - if (secure || ipsec_inbound_v6_policy_present) { + if (secure || ipss->ipsec_inbound_v6_policy_present) { first_mp = ipsec_check_global_policy(first_mp, NULL, - NULL, ip6h, mctl_present); + NULL, ip6h, mctl_present, ipst->ips_netstack); if (first_mp == NULL) return (0); } @@ -3510,7 +3477,7 @@ ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, if (flags & IP_FF_SEND_ICMP) { if (flags & IP_FF_HDR_COMPLETE) { - if (ip_hdr_complete_v6(ip6h, zoneid)) { + if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { freemsg(first_mp); return (1); } @@ -3518,11 +3485,11 @@ ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, switch (icmp_type) { case ICMP6_DST_UNREACH: icmp_unreachable_v6(WR(q), first_mp, icmp_code, - B_FALSE, B_FALSE, zoneid); + B_FALSE, B_FALSE, zoneid, ipst); break; case ICMP6_PARAM_PROB: icmp_param_problem_v6(WR(q), first_mp, icmp_code, - nexthdr_offset, B_FALSE, B_FALSE, zoneid); + nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); break; default: #ifdef DEBUG @@ -3555,6 +3522,8 @@ ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, conn_t *connp; tcph_t *tcph; boolean_t syn_present = B_FALSE; + ip_stack_t *ipst = inill->ill_ipst; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; first_mp = mp; if (mctl_present) { @@ -3565,7 +3534,7 @@ ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, secure = B_FALSE; } - connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); + connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); if (connp == NULL || !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { @@ -3578,7 +3547,8 @@ ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, ASSERT((dp->db_struioflag & STRUIO_IP) == 0); /* Initiate IPPf processing, if needed. */ - if (IPP_ENABLED(IPP_LOCAL_IN) && (flags & IP6_NO_IPPOLICY)) { + if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && + (flags & IP6_NO_IPPOLICY)) { ill_index = ill->ill_phyint->phyint_ifindex; ip_process(IPP_LOCAL_IN, &first_mp, ill_index); if (first_mp == NULL) { @@ -3588,7 +3558,8 @@ ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, } } BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); - tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid); + tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, + ipst->ips_netstack->netstack_tcp); if (connp != NULL) CONN_DEC_REF(connp); return; @@ -3604,8 +3575,9 @@ ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, * squeue to be that of the active connect's. */ if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && - !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && - !IP6_IN_IPP(flags)) { + !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && + !secure && + !IP6_IN_IPP(flags, ipst)) { ASSERT(Q_TO_CONN(q) != NULL); sqp = Q_TO_CONN(q)->conn_sqp; } else { @@ -3634,7 +3606,8 @@ ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, return; } if (flags & TH_ACK) { - tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid); + tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, + ipst->ips_netstack->netstack_tcp); CONN_DEC_REF(connp); return; } @@ -3644,7 +3617,7 @@ ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, return; } - if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { + if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { first_mp = ipsec_check_inbound_policy(first_mp, connp, NULL, ip6h, mctl_present); if (first_mp == NULL) { @@ -3680,7 +3653,7 @@ ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, } /* Initiate IPPF processing */ - if (IP6_IN_IPP(flags)) { + if (IP6_IN_IPP(flags, ipst)) { uint_t ifindex; mutex_enter(&ill->ill_lock); @@ -3764,6 +3737,8 @@ ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, mblk_t *mp1, *first_mp1; in6_addr_t src; boolean_t shared_addr; + ip_stack_t *ipst = inill->ill_ipst; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; first_mp = mp; if (mctl_present) { @@ -3782,6 +3757,10 @@ ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, shared_addr = (zoneid == ALL_ZONES); if (shared_addr) { + /* + * No need to handle exclusive-stack zones since ALL_ZONES + * only applies to the shared stack. + */ zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); /* * If no shared MLP is found, tsol_mlp_findzone returns @@ -3795,7 +3774,7 @@ ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, } /* Attempt to find a client stream based on destination port. */ - connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; + connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; mutex_enter(&connfp->connf_lock); connp = connfp->connf_head; if (!IN6_IS_ADDR_MULTICAST(&dst)) { @@ -3828,7 +3807,7 @@ ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, CONN_DEC_REF(connp); return; } - if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { + if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { first_mp = ipsec_check_inbound_policy(first_mp, connp, NULL, ip6h, mctl_present); if (first_mp == NULL) { @@ -3837,7 +3816,7 @@ ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, } } /* Initiate IPPF processing */ - if (IP6_IN_IPP(flags)) { + if (IP6_IN_IPP(flags, ipst)) { uint_t ifindex; mutex_enter(&ill->ill_lock); @@ -3878,7 +3857,7 @@ ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, /* Send it upstream */ CONN_UDP_RECV(connp, mp); - IP6_STAT(ip6_udp_fannorm); + IP6_STAT(ipst, ip6_udp_fannorm); CONN_DEC_REF(connp); if (mctl_present) freeb(first_mp); @@ -3960,8 +3939,7 @@ ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, goto next_one; } - if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || - secure) { + if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { first_mp1 = ipsec_check_inbound_policy (first_mp1, connp, NULL, ip6h, mctl_present); @@ -3978,7 +3956,7 @@ next_one: mutex_enter(&connfp->connf_lock); /* Follow the next pointer before releasing the conn. */ next_conn = connp->conn_next; - IP6_STAT(ip6_udp_fanmb); + IP6_STAT(ipst, ip6_udp_fanmb); CONN_DEC_REF(connp); connp = next_conn; } @@ -3987,7 +3965,7 @@ next_one: mutex_exit(&connfp->connf_lock); /* Initiate IPPF processing */ - if (IP6_IN_IPP(flags)) { + if (IP6_IN_IPP(flags, ipst)) { uint_t ifindex; mutex_enter(&ill->ill_lock); @@ -4027,7 +4005,7 @@ next_one: BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); freemsg(mp); } else { - if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { + if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { first_mp = ipsec_check_inbound_policy(first_mp, connp, NULL, ip6h, mctl_present); if (first_mp == NULL) { @@ -4041,7 +4019,7 @@ next_one: /* Send it upstream */ CONN_UDP_RECV(connp, mp); } - IP6_STAT(ip6_udp_fanmb); + IP6_STAT(ipst, ip6_udp_fanmb); CONN_DEC_REF(connp); if (mctl_present) freeb(first_mp); @@ -4054,14 +4032,14 @@ notfound: * there a client that wants all * unclaimed datagrams? */ - if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { + if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, zoneid); } else { if (ip_fanout_send_icmp_v6(q, first_mp, flags, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, - mctl_present, zoneid)) { + mctl_present, zoneid, ipst)) { BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); } } @@ -4189,12 +4167,12 @@ done: } int -ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) +ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) { ire_t *ire; if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { - ire = ire_lookup_local_v6(zoneid); + ire = ire_lookup_local_v6(zoneid, ipst); if (ire == NULL) { ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); return (1); @@ -4203,7 +4181,7 @@ ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) ire_refrele(ire); } ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; - ip6h->ip6_hops = ipv6_def_hops; + ip6h->ip6_hops = ipst->ips_ipv6_def_hops; return (0); } @@ -4390,7 +4368,7 @@ ip_newroute_get_dst_ill_v6(ill_t *dst_ill) /* ARGSUSED */ void ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, - const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) + const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) { in6_addr_t v6gw; in6_addr_t dst; @@ -4449,7 +4427,7 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { attach_ill = ip_grab_attach_ill(ill, first_mp, (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : - io->ipsec_out_ill_index), B_TRUE); + io->ipsec_out_ill_index), B_TRUE, ipst); /* Failure case frees things for us. */ if (attach_ill == NULL) return; @@ -4493,7 +4471,7 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), - match_flags); + match_flags, ipst); /* * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes * in a NULL ill, but the packet could be a neighbor @@ -4519,7 +4497,7 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, } match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, - &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags); + &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags, ipst); } ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " @@ -4566,7 +4544,7 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, * the destination contained in sire. */ multirt_is_resolvable = ire_multirt_lookup_v6(&ire, - &sire, multirt_flags, MBLK_GETLABEL(mp)); + &sire, multirt_flags, MBLK_GETLABEL(mp), ipst); ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " "ire %p, sire %p\n", @@ -4632,7 +4610,7 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, return; } ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, - RTA_DST); + RTA_DST, ipst); goto icmp_err_ret; } @@ -4781,7 +4759,7 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, ASSERT(src_ipif == NULL); if (ire->ire_type == IRE_IF_RESOLVER && !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && - ip6_asp_can_lookup()) { + ip6_asp_can_lookup(ipst)) { /* * The ire cache entry we're adding is for the * gateway itself. The source address in this case @@ -4800,9 +4778,9 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, */ src_ipif = ipif_lookup_addr_v6( &sire->ire_src_addr_v6, NULL, zoneid, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, ipst); } - if (src_ipif == NULL && ip6_asp_can_lookup()) { + if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { uint_t restrict_ill = RESTRICT_TO_NONE; if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags @@ -4953,7 +4931,8 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, (RTF_SETSRC | RTF_MULTIRT), &(sire->ire_uinfo), NULL, - gcgrp); + gcgrp, + ipst); if (ire == NULL) { if (gcgrp != NULL) { @@ -5004,7 +4983,7 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, } ire_add_then_send(q, ire, xmit_mp); if (ip6_asp_table_held) { - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); ip6_asp_table_held = B_FALSE; } ire_refrele(save_ire); @@ -5114,7 +5093,8 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, (RTF_SETSRC | RTF_MULTIRT) : 0, &(save_ire->ire_uinfo), NULL, - gcgrp); + gcgrp, + ipst); freeb(dlureq_mp); @@ -5166,7 +5146,7 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, } ire_add_then_send(q, ire, xmit_mp); if (ip6_asp_table_held) { - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); ip6_asp_table_held = B_FALSE; } @@ -5250,7 +5230,7 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); if (ip6_asp_table_held) { - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); ip6_asp_table_held = B_FALSE; } ire = ire_create_mp_v6( @@ -5273,7 +5253,8 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 0, /* flags if any */ &(save_ire->ire_uinfo), NULL, - NULL); + NULL, + ipst); ire_refrele(save_ire); if (ire == NULL) { @@ -5462,7 +5443,8 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 0, /* flags if any */ &(save_ire->ire_uinfo), NULL, - gcgrp); + gcgrp, + ipst); if (ire == NULL) { if (gcgrp != NULL) { @@ -5502,7 +5484,7 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, */ ire_add_then_send(q, ire, first_mp); if (ip6_asp_table_held) { - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); ip6_asp_table_held = B_FALSE; } @@ -5556,7 +5538,7 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, * address is resolved. */ if (ip6_asp_table_held) { - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); ip6_asp_table_held = B_FALSE; } ASSERT(ire->ire_nce == NULL); @@ -5609,7 +5591,7 @@ ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, break; } if (ip6_asp_table_held) { - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); ip6_asp_table_held = B_FALSE; } } while (multirt_resolve_next); @@ -5633,9 +5615,9 @@ err_ret: ill_refrele(ill); } else { if (mp->b_prev != NULL) { - BUMP_MIB(&ip6_mib, ipIfStatsInDiscards); + BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); } else { - BUMP_MIB(&ip6_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); } } /* Did this packet originate externally? */ @@ -5657,7 +5639,7 @@ err_ret: icmp_err_ret: if (ip6_asp_table_held) - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); if (src_ipif != NULL) ipif_refrele(src_ipif); if (dst_ill != NULL) { @@ -5691,7 +5673,7 @@ icmp_err_ret: if (ill != NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); } else { - BUMP_MIB(&ip6_mib, ipIfStatsInNoRoutes); + BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); } mp->b_next = NULL; mp->b_prev = NULL; @@ -5700,9 +5682,9 @@ icmp_err_ret: if (ill != NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); } else { - BUMP_MIB(&ip6_mib, ipIfStatsOutNoRoutes); + BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); } - if (ip_hdr_complete_v6(ip6h, zoneid)) { + if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { /* Failed */ if (copy_mp != NULL) { MULTIRT_DEBUG_UNTAG(copy_mp); @@ -5745,7 +5727,7 @@ icmp_err_ret: AF_INET6, v6dstp); } icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, - B_FALSE, B_FALSE, zoneid); + B_FALSE, B_FALSE, zoneid, ipst); } /* @@ -5779,6 +5761,7 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, boolean_t ipif_held = B_FALSE; boolean_t ill_held = B_FALSE; boolean_t ip6_asp_table_held = B_FALSE; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; /* * This loop is run only once in most cases. @@ -5842,7 +5825,7 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, attach_ill = ip_grab_attach_ill(ill, first_mp, (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : - io->ipsec_out_ill_index), B_TRUE); + io->ipsec_out_ill_index), B_TRUE, ipst); /* Failure case frees things for us. */ if (attach_ill == NULL) return; @@ -5927,9 +5910,9 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, */ src_ipif = ipif_lookup_addr_v6(&fire->ire_src_addr_v6, - NULL, zoneid, NULL, NULL, NULL, NULL); + NULL, zoneid, NULL, NULL, NULL, NULL, ipst); } - if (src_ipif == NULL && ip6_asp_can_lookup()) { + if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { ip6_asp_table_held = B_TRUE; src_ipif = ipif_select_source_v6(dst_ill, v6dstp, RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); @@ -6053,7 +6036,8 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 0, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); freeb(dlureq_mp); @@ -6081,7 +6065,7 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, ire_add_then_send(q, ire, first_mp); if (ip6_asp_table_held) { - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); ip6_asp_table_held = B_FALSE; } @@ -6101,7 +6085,7 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, if (copy_mp != NULL) { boolean_t need_resolve = ire_multirt_need_resolve_v6(v6dstp, - MBLK_GETLABEL(copy_mp)); + MBLK_GETLABEL(copy_mp), ipst); if (!need_resolve) { MULTIRT_DEBUG_UNTAG(copy_mp); freemsg(copy_mp); @@ -6125,7 +6109,7 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, ipif_held = B_FALSE; } ipif = ipif_lookup_group_v6(v6dstp, - zoneid); + zoneid, ipst); ip2dbg(("ip_newroute_ipif: " "multirt dst %08x, ipif %p\n", ntohl(V4_PART_OF_V6((*v6dstp))), @@ -6185,7 +6169,8 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 0, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); if (ire == NULL) { ire_refrele(save_ire); @@ -6212,7 +6197,7 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, */ ire_add_then_send(q, ire, first_mp); if (ip6_asp_table_held) { - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); ip6_asp_table_held = B_FALSE; } @@ -6232,7 +6217,7 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, if (copy_mp != NULL) { boolean_t need_resolve = ire_multirt_need_resolve_v6(v6dstp, - MBLK_GETLABEL(copy_mp)); + MBLK_GETLABEL(copy_mp), ipst); if (!need_resolve) { MULTIRT_DEBUG_UNTAG(copy_mp); freemsg(copy_mp); @@ -6257,7 +6242,7 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, ipif_held = B_FALSE; } ipif = ipif_lookup_group_v6( - v6dstp, zoneid); + v6dstp, zoneid, ipst); ip2dbg(("ip_newroute_ipif: " "multirt dst %08x, " "ipif %p\n", @@ -6293,7 +6278,7 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, * address is resolved. */ if (ip6_asp_table_held) { - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); ip6_asp_table_held = B_FALSE; } ire_delete(ire); @@ -6310,7 +6295,7 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, if (copy_mp != NULL) { boolean_t need_resolve = ire_multirt_need_resolve_v6(v6dstp, - MBLK_GETLABEL(copy_mp)); + MBLK_GETLABEL(copy_mp), ipst); if (!need_resolve) { MULTIRT_DEBUG_UNTAG(copy_mp); freemsg(copy_mp); @@ -6335,7 +6320,7 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, ipif_held = B_FALSE; } ipif = ipif_lookup_group_v6( - v6dstp, zoneid); + v6dstp, zoneid, ipst); ip2dbg(("ip_newroute_ipif: " "multirt dst %08x, " "ipif %p\n", @@ -6373,14 +6358,14 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, break; } if (ip6_asp_table_held) { - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); ip6_asp_table_held = B_FALSE; } } while (multirt_resolve_next); err_ret: if (ip6_asp_table_held) - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); if (ire != NULL) ire_refrele(ire); if (fire != NULL) @@ -6428,7 +6413,7 @@ err_ret: */ static int ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, - uint8_t *optptr, uint_t optlen, uint8_t hdr_type) + uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) { uint8_t opt_type; uint_t optused; @@ -6522,7 +6507,7 @@ ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, struct ip6_opt_home_address *oh; in6_addr_t tmp; - if (ipv6_ignore_home_address_opt) + if (ipst->ips_ipv6_ignore_home_address_opt) goto opt_error; if (hdr_type != IPPROTO_DSTOPTS) @@ -6576,7 +6561,7 @@ ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, opt_error: /* Determine which zone should send error */ zoneid = ipif_lookup_addr_zoneid_v6( - &ip6h->ip6_dst, ill); + &ip6h->ip6_dst, ill, ipst); switch (IP6OPT_TYPE(opt_type)) { case IP6OPT_TYPE_SKIP: optused = 2 + optptr[1]; @@ -6601,7 +6586,7 @@ ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ICMP6_PARAMPROB_OPTION, (uint32_t)(optptr - (uint8_t *)ip6h), - B_FALSE, B_FALSE, zoneid); + B_FALSE, B_FALSE, zoneid, ipst); return (-1); case IP6OPT_TYPE_FORCEICMP: if (zoneid == ALL_ZONES) { @@ -6612,7 +6597,7 @@ ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ICMP6_PARAMPROB_OPTION, (uint32_t)(optptr - (uint8_t *)ip6h), - B_FALSE, B_TRUE, zoneid); + B_FALSE, B_TRUE, zoneid, ipst); return (-1); default: ASSERT(0); @@ -6626,13 +6611,13 @@ ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, bad_opt: /* Determine which zone should send error */ - zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); + zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); if (zoneid == ALL_ZONES) { freemsg(first_mp); } else { icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, (uint32_t)(optptr - (uint8_t *)ip6h), - B_FALSE, B_FALSE, zoneid); + B_FALSE, B_FALSE, zoneid, ipst); } return (-1); } @@ -6650,10 +6635,11 @@ ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, uint_t numaddr; in6_addr_t *addrptr; in6_addr_t tmp; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(rth->ip6r_segleft != 0); - if (!ipv6_forward_src_routed) { + if (!ipst->ips_ipv6_forward_src_routed) { /* XXX Check for source routed out same interface? */ BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); @@ -6669,7 +6655,7 @@ ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), - B_FALSE, B_FALSE, GLOBAL_ZONEID); + B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); return; } rthdr = (ip6_rthdr0_t *)rth; @@ -6685,7 +6671,7 @@ ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), - B_FALSE, B_FALSE, GLOBAL_ZONEID); + B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); return; } numaddr = rthdr->ip6r0_len / 2; @@ -6698,7 +6684,7 @@ ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, ICMP6_PARAMPROB_HEADER, (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - (uchar_t *)ip6h), - B_FALSE, B_FALSE, GLOBAL_ZONEID); + B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); return; } addrptr += (numaddr - rthdr->ip6r0_segleft); @@ -6720,7 +6706,7 @@ ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, goto hada_drop; /* Sent by forwarding path, and router is global zone */ icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, - B_FALSE, B_FALSE, GLOBAL_ZONEID); + B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); return; } if (ip_check_v6_mblk(mp, ill) == 0) { @@ -6749,8 +6735,10 @@ ip_rput_v6(queue_t *q, mblk_t *mp) struct iocblk *iocp; uint_t flags = 0; mblk_t *dl_mp; + ip_stack_t *ipst; ill = (ill_t *)q->q_ptr; + ipst = ill->ill_ipst; if (ill->ill_state_flags & ILL_CONDEMNED) { union DL_primitives *dl; @@ -6945,8 +6933,9 @@ ip_rput_v6(queue_t *q, mblk_t *mp) ill_t *, ill, ill_t *, NULL, ip6_t *, ip6h, mblk_t *, first_mp); - FW_HOOKS6(ip6_physical_in_event, ipv6firewall_physical_in, - ill, NULL, ip6h, first_mp, mp); + FW_HOOKS6(ipst->ips_ip6_physical_in_event, + ipst->ips_ipv6firewall_physical_in, + ill, NULL, ip6h, first_mp, mp, ipst); DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); @@ -7104,6 +7093,9 @@ ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, ipsec_in_t *ii = NULL; ah_t *ah; ipsec_status_t ipsec_rc; + ip_stack_t *ipst = ill->ill_ipst; + netstack_t *ns = ipst->ips_netstack; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT((hada_mp == NULL) || (!mctl_present)); @@ -7122,7 +7114,8 @@ ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, ASSERT(nexthdr == IPPROTO_AH); if (!mctl_present) { mp = first_mp; - if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { + first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); + if (first_mp == NULL) { ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " "allocation failure.\n")); BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); @@ -7150,12 +7143,12 @@ ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, ii->ipsec_in_da = hada_mp; } - if (!ipsec_loaded()) { - ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); + if (!ipsec_loaded(ipss)) { + ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); return (B_TRUE); } - ah = ipsec_inbound_ah_sa(first_mp); + ah = ipsec_inbound_ah_sa(first_mp, ns); if (ah == NULL) return (B_TRUE); ASSERT(ii->ipsec_in_ah_sa != NULL); @@ -7168,7 +7161,7 @@ ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, ip_fanout_proto_again(first_mp, ill, ill, ire); break; case IPSEC_STATUS_FAILED: - BUMP_MIB(&ip6_mib, ipIfStatsInDiscards); + BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); break; case IPSEC_STATUS_PENDING: /* no action needed */ @@ -7279,6 +7272,7 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, uint32_t reass_sum; boolean_t cksum_err; mblk_t *mp1; + ip_stack_t *ipst = inill->ill_ipst; EXTRACT_PKT_MP(mp, first_mp, mctl_present); @@ -7361,7 +7355,7 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, whereptr += ehdrlen; remlen -= ehdrlen; switch (ip_process_options_v6(q, first_mp, ip6h, optptr, - ehdrlen - 2, IPPROTO_HOPOPTS)) { + ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { case -1: /* * Packet has been consumed and any @@ -7464,10 +7458,10 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, - MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); + MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); } else { ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, - MBLK_GETLABEL(mp)); + MBLK_GETLABEL(mp), ipst); } if (ire == NULL) { /* @@ -7490,7 +7484,7 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, /* Sent by forwarding path, and router is global zone */ icmp_time_exceeded_v6(WR(q), first_mp, ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, - GLOBAL_ZONEID); + GLOBAL_ZONEID, ipst); return; } /* @@ -7507,7 +7501,7 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, ill->ill_phyint->phyint_ifindex; ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, - ALL_ZONES); + ALL_ZONES, ipst); return; } ipif_id = ire->ire_ipif->ipif_seqid; @@ -7557,7 +7551,7 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, /* Sent by forwarding path, and router is global zone */ icmp_time_exceeded_v6(WR(q), mp, ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, - GLOBAL_ZONEID); + GLOBAL_ZONEID, ipst); ire_refrele(ire); return; } @@ -7592,7 +7586,7 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); /* Sent by forwarding path, and router is global zone */ icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, - ll_multicast, B_TRUE, GLOBAL_ZONEID); + ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); ire_refrele(ire); return; } @@ -7619,7 +7613,7 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, return; } /* TBD add site-local check at site boundary? */ - } else if (ipv6_send_redirects) { + } else if (ipst->ips_ipv6_send_redirects) { in6_addr_t *v6targ; in6_addr_t gw_addr_v6; ire_t *src_ire_v6 = NULL; @@ -7628,7 +7622,7 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, * Don't send a redirect when forwarding a source * routed packet. */ - if (ip_source_routed_v6(ip6h, mp)) + if (ip_source_routed_v6(ip6h, mp, ipst)) goto forward; mutex_enter(&ire->ire_lock); @@ -7653,7 +7647,8 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, - MATCH_IRE_IPIF | MATCH_IRE_TYPE); + MATCH_IRE_IPIF | MATCH_IRE_TYPE, + ipst); if (src_ire_v6 != NULL) { /* @@ -7679,8 +7674,9 @@ forward: ill_t *, inill, ill_t *, outill, ip6_t *, ip6h, mblk_t *, mp); - FW_HOOKS6(ip6_forwarding_event, ipv6firewall_forwarding, - inill, outill, ip6h, mp, mp); + FW_HOOKS6(ipst->ips_ip6_forwarding_event, + ipst->ips_ipv6firewall_forwarding, + inill, outill, ip6h, mp, mp, ipst); DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); @@ -7739,7 +7735,8 @@ forward: } ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); - if (check_multi && ipv6_strict_dst_multihoming && no_forward) { + if (check_multi && ipst->ips_ipv6_strict_dst_multihoming && + no_forward) { /* * This packet came in on an interface other than the * one associated with the destination address @@ -7862,7 +7859,7 @@ ipv6forus: mp1 = mp->b_cont; if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) - IP6_STAT(ip6_in_sw_cksum); + IP6_STAT(ipst, ip6_in_sw_cksum); IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), @@ -7872,13 +7869,15 @@ ipv6forus: if (cksum_err) { BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); - if (hck_flags & HCK_FULLCKSUM) - IP6_STAT(ip6_tcp_in_full_hw_cksum_err); - else if (hck_flags & HCK_PARTIALCKSUM) - IP6_STAT(ip6_tcp_in_part_hw_cksum_err); - else - IP6_STAT(ip6_tcp_in_sw_cksum_err); - + if (hck_flags & HCK_FULLCKSUM) { + IP6_STAT(ipst, + ip6_tcp_in_full_hw_cksum_err); + } else if (hck_flags & HCK_PARTIALCKSUM) { + IP6_STAT(ipst, + ip6_tcp_in_part_hw_cksum_err); + } else { + IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); + } freemsg(first_mp); return; } @@ -7893,6 +7892,9 @@ tcp_fanout: sctp_hdr_t *sctph; uint32_t calcsum, pktsum; uint_t hdr_len = pkt_len - remlen; + sctp_stack_t *sctps; + + sctps = inill->ill_ipst->ips_netstack->netstack_sctp; /* SCTP needs all of the SCTP header */ if (remlen < sizeof (*sctph)) { @@ -7916,14 +7918,14 @@ tcp_fanout: sctph->sh_chksum = 0; calcsum = sctp_cksum(mp, hdr_len); if (calcsum != pktsum) { - BUMP_MIB(&sctp_mib, sctpChecksumError); + BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); freemsg(mp); return; } sctph->sh_chksum = pktsum; ports = *(uint32_t *)(mp->b_rptr + hdr_len); if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, - ports, ipif_id, zoneid, mp)) == NULL) { + ports, ipif_id, zoneid, mp, sctps)) == NULL) { ip_fanout_sctp_raw(first_mp, ill, (ipha_t *)ip6h, B_FALSE, ports, mctl_present, @@ -8021,18 +8023,20 @@ tcp_fanout: } if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) - IP6_STAT(ip6_in_sw_cksum); + IP6_STAT(ipst, ip6_in_sw_cksum); if (cksum_err) { BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); if (hck_flags & HCK_FULLCKSUM) - IP6_STAT(ip6_udp_in_full_hw_cksum_err); + IP6_STAT(ipst, + ip6_udp_in_full_hw_cksum_err); else if (hck_flags & HCK_PARTIALCKSUM) - IP6_STAT(ip6_udp_in_part_hw_cksum_err); + IP6_STAT(ipst, + ip6_udp_in_part_hw_cksum_err); else - IP6_STAT(ip6_udp_in_sw_cksum_err); + IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); freemsg(first_mp); return; @@ -8071,7 +8075,7 @@ tcp_fanout: icmp_fanout: /* Check variable for testing applications */ - if (ipv6_drop_inbound_icmpv6) { + if (ipst->ips_ipv6_drop_inbound_icmpv6) { freemsg(first_mp); return; } @@ -8118,7 +8122,6 @@ tcp_fanout: hdr_len, mctl_present, 0, zoneid, dl_mp); } - } /* FALLTHRU */ default: { /* @@ -8193,7 +8196,7 @@ tcp_fanout: * defined/implemented yet ). */ switch (ip_process_options_v6(q, first_mp, ip6h, optptr, - ehdrlen - 2, IPPROTO_DSTOPTS)) { + ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { case -1: /* * Packet has been consumed and any needed @@ -8261,8 +8264,13 @@ tcp_fanout: * duplicates must be discarded. Filtering is active * only if the the ip_cgtp_filter ndd variable is * non-zero. + * + * Only applies to the shared stack since the + * filter_ops do not carry an ip_stack_t or zoneid. */ - if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { + if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL) && + ipst->ips_netstack->netstack_stackid == + GLOBAL_NETSTACKID) { int cgtp_flt_pkt = ip_cgtp_filter_ops->cfo_filter_v6( inill->ill_rq, ip6h, fraghdr); @@ -8309,9 +8317,9 @@ tcp_fanout: icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_NEXTHEADER, prev_nexthdr_offset, - B_FALSE, B_FALSE, zoneid); + B_FALSE, B_FALSE, zoneid, ipst); return; - + } case IPPROTO_ROUTING: { uint_t ehdrlen; ip6_rthdr_t *rthdr; @@ -8368,11 +8376,14 @@ tcp_fanout: ipsec_in_t *ii; int ipsec_rc; + ipsec_stack_t *ipss; + ipss = ipst->ips_netstack->netstack_ipsec; if (!mctl_present) { ASSERT(first_mp == mp); - if ((first_mp = ipsec_in_alloc(B_FALSE)) == - NULL) { + first_mp = ipsec_in_alloc(B_FALSE, + ipst->ips_netstack); + if (first_mp == NULL) { ip1dbg(("ip_rput_data_v6: IPSEC_IN " "allocation failure.\n")); BUMP_MIB(ill->ill_ip_mib, @@ -8405,15 +8416,16 @@ tcp_fanout: ii = (ipsec_in_t *)first_mp->b_rptr; } - if (!ipsec_loaded()) { + if (!ipsec_loaded(ipss)) { ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, - ire->ire_zoneid); + ire->ire_zoneid, ipst); return; } /* select inbound SA and have IPsec process the pkt */ if (nexthdr == IPPROTO_ESP) { - esph_t *esph = ipsec_inbound_esp_sa(first_mp); + esph_t *esph = ipsec_inbound_esp_sa(first_mp, + ipst->ips_netstack); if (esph == NULL) return; ASSERT(ii->ipsec_in_esp_sa != NULL); @@ -8422,7 +8434,8 @@ tcp_fanout: ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( first_mp, esph); } else { - ah_t *ah = ipsec_inbound_ah_sa(first_mp); + ah_t *ah = ipsec_inbound_ah_sa(first_mp, + ipst->ips_netstack); if (ah == NULL) return; ASSERT(ii->ipsec_in_ah_sa != NULL); @@ -8469,7 +8482,8 @@ udp_fanout: if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { connp = NULL; } else { - connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); + connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, + ipst); if ((connp != NULL) && (connp->conn_upq == NULL)) { CONN_DEC_REF(connp); connp = NULL; @@ -8481,7 +8495,7 @@ udp_fanout: ports = *(uint32_t *)(mp->b_rptr + hdr_len + UDP_PORTS_OFFSET); - IP6_STAT(ip6_udp_slow_path); + IP6_STAT(ipst, ip6_udp_slow_path); ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, zoneid); @@ -8496,7 +8510,7 @@ udp_fanout: } /* Initiate IPPF processing */ - if (IP6_IN_IPP(flags)) { + if (IP6_IN_IPP(flags, ipst)) { ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); if (mp == NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); @@ -8515,7 +8529,7 @@ udp_fanout: } } - IP6_STAT(ip6_udp_fast_path); + IP6_STAT(ipst, ip6_udp_fast_path); BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); /* Send it upstream */ @@ -8567,7 +8581,7 @@ ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, boolean_t pruned = B_FALSE; uint32_t sum_val; uint16_t sum_flags; - + ip_stack_t *ipst = ill->ill_ipst; if (cksum_val != NULL) *cksum_val = 0; @@ -8638,14 +8652,14 @@ ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, zoneid_t zoneid; BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); - zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); + zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); if (zoneid == ALL_ZONES) { freemsg(mp); return (NULL); } icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, (uint32_t)((char *)&ip6h->ip6_plen - - (char *)ip6h), B_FALSE, B_FALSE, zoneid); + (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); return (NULL); } @@ -8664,14 +8678,14 @@ ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, zoneid_t zoneid; BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); - zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); + zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); if (zoneid == ALL_ZONES) { freemsg(mp); return (NULL); } icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, (uint32_t)((char *)&fraghdr->ip6f_offlg - - (char *)ip6h), B_FALSE, B_FALSE, zoneid); + (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); return (NULL); } @@ -8687,7 +8701,7 @@ ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, * Drop the fragmented as early as possible, if * we don't have resource(s) to re-assemble. */ - if (ip_reass_queue_bytes == 0) { + if (ipst->ips_ip_reass_queue_bytes == 0) { freemsg(mp); return (NULL); } @@ -8722,9 +8736,10 @@ ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, */ if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= - ip_reass_queue_bytes) { - ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 - : (ip_reass_queue_bytes - msg_len)); + ipst->ips_ip_reass_queue_bytes) { + ill_frag_prune(ill, + (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : + (ipst->ips_ip_reass_queue_bytes - msg_len)); pruned = B_TRUE; } @@ -8790,7 +8805,7 @@ ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, return (NULL); } - if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { + if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { /* * Too many fragmented packets in this hash bucket. * Free the oldest. @@ -9171,7 +9186,7 @@ done: * want to send redirects. */ static boolean_t -ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) +ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) { uint8_t nexthdr; in6_addr_t *addrptr; @@ -9241,7 +9256,8 @@ ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) if (addrptr != NULL) { ire = ire_ctable_lookup_v6(addrptr, NULL, IRE_LOCAL, NULL, ALL_ZONES, NULL, - MATCH_IRE_TYPE); + MATCH_IRE_TYPE, + ipst); if (ire != NULL) { ire_refrele(ire); return (B_TRUE); @@ -9315,6 +9331,16 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) ill_t *saved_ill = NULL; boolean_t conn_lock_held; boolean_t need_decref = B_FALSE; + ip_stack_t *ipst; + + if (q->q_next != NULL) { + ill = (ill_t *)q->q_ptr; + ipst = ill->ill_ipst; + } else { + connp = (conn_t *)arg; + ASSERT(connp != NULL); + ipst = connp->conn_netstack->netstack_ip; + } /* * Highest bit in version field is Reachability Confirmation bit @@ -9363,7 +9389,7 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) * originating from tcp should have been directed over to * tcp_multisend() in the first place. */ - BUMP_MIB(&ip6_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); freemsg(mp); return; } else if (DB_TYPE(mp) == M_CTL) { @@ -9416,7 +9442,7 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) if (mctltype == IPSEC_IN || IPVER(ip6h) != IPV6_VERSION || io->ipsec_out_proc_begin) { - mibptr = &ip6_mib; + mibptr = &ipst->ips_ip6_mib; goto notv6; } } @@ -9428,12 +9454,11 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) ip6h = (ip6_t *)mp->b_rptr; if (IPVER(ip6h) != IPV6_VERSION) { - mibptr = &ip6_mib; + mibptr = &ipst->ips_ip6_mib; goto notv6; } if (q->q_next != NULL) { - ill = (ill_t *)q->q_ptr; /* * We don't know if this ill will be used for IPv6 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. @@ -9483,7 +9508,7 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) (void) putq(q, mp); return; } - mibptr = &ip6_mib; + mibptr = &ipst->ips_ip6_mib; unspec_src = connp->conn_unspec_src; do_outrequests = B_TRUE; if (mp->b_flag & MSGHASREF) { @@ -9506,7 +9531,7 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) ASSERT(first_mp == mp); /* XXX Any better way to get the protocol fast ? */ if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, - connp->conn_ulp)) == NULL)) { + connp->conn_ulp, ipst->ips_netstack)) == NULL)) { BUMP_MIB(mibptr, ipIfStatsOutDiscards); if (need_decref) CONN_DEC_REF(connp); @@ -9619,7 +9644,7 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) if (ill != NULL) ill_refrele(ill); ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, ipst); if (ill == NULL) { if (do_outrequests) { BUMP_MIB(mibptr, @@ -9675,7 +9700,7 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, connp != NULL ? IPCL_ZONEID(connp) : zoneid, NULL, - MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); + MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); if (ire == NULL) { if (do_outrequests) BUMP_MIB(mibptr, @@ -9876,7 +9901,8 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) * as it does not really have a real destination to * talk to. */ - ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp)); + ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp), + ipst); } else { /* * IRE_MARK_CONDEMNED is marked in ire_delete. We don't @@ -9903,7 +9929,7 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) IRE_REFRELE_NOTR(ire); ire = ire_cache_lookup_v6(v6dstp, zoneid, - MBLK_GETLABEL(mp)); + MBLK_GETLABEL(mp), ipst); if (ire != NULL) { IRE_REFHOLD_NOTR(ire); @@ -9957,17 +9983,18 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) * NDP packets must have a hop limit of 255; don't * change the hop limit in that case. */ - if ((ip_multirt_ttl > 0) && - (ip6h->ip6_hops > ip_multirt_ttl) && + if ((ipst->ips_ip_multirt_ttl > 0) && + (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && (ip6h->ip6_hops != IPV6_MAX_HOPS)) { if (ip_debug > 3) { ip2dbg(("ip_wput_v6: forcing multirt " "hop limit to %d (was %d) ", - ip_multirt_ttl, ip6h->ip6_hops)); + ipst->ips_ip_multirt_ttl, + ip6h->ip6_hops)); pr_addr_dbg("v6dst %s\n", AF_INET6, &ire->ire_addr_v6); } - ip6h->ip6_hops = ip_multirt_ttl; + ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; } /* @@ -9982,7 +10009,7 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) */ multirt_need_resolve = ire_multirt_need_resolve_v6(&ire->ire_addr_v6, - MBLK_GETLABEL(first_mp)); + MBLK_GETLABEL(first_mp), ipst); ip2dbg(("ip_wput_v6: ire %p, " "multirt_need_resolve %d, first_mp %p\n", (void *)ire, multirt_need_resolve, @@ -10015,7 +10042,7 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) ip6h = (ip6_t *)copy_mp->b_rptr; } ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, - &ip6h->ip6_src, NULL, zoneid); + &ip6h->ip6_src, NULL, zoneid, ipst); } if (ill != NULL) ill_refrele(ill); @@ -10099,7 +10126,7 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) BUMP_MIB(mibptr, ipIfStatsHCOutRequests); if (need_decref) CONN_DEC_REF(connp); - ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); + ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); if (ill != NULL) ill_refrele(ill); return; @@ -10241,7 +10268,7 @@ multicast_discard: } else { mutex_exit(&connp->conn_lock); conn_lock_held = B_FALSE; - ipif = ipif_lookup_group_v6(v6dstp, zoneid); + ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); if (ipif == NULL) { ip1dbg(("ip_output_v6: multicast no ipif\n")); goto multicast_discard; @@ -10309,7 +10336,8 @@ multicast_discard: ASSERT(io->ipsec_out_type == IPSEC_OUT); } else { ASSERT(mp == first_mp); - if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { + if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == + NULL) { BUMP_MIB(mibptr, ipIfStatsOutDiscards); freemsg(mp); if (ill != NULL) @@ -10361,7 +10389,7 @@ send_from_ill: * It is used only when ire_cache_lookup is used above. */ ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, - zoneid, MBLK_GETLABEL(mp), match_flags); + zoneid, MBLK_GETLABEL(mp), match_flags, ipst); if (ire != NULL) { /* * Check if the ire has the RTF_MULTIRT flag, inherited @@ -10375,17 +10403,18 @@ send_from_ill: * NDP packets must have a hop limit of 255; don't * change the hop limit in that case. */ - if ((ip_multirt_ttl > 0) && - (ip6h->ip6_hops > ip_multirt_ttl) && + if ((ipst->ips_ip_multirt_ttl > 0) && + (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && (ip6h->ip6_hops != IPV6_MAX_HOPS)) { if (ip_debug > 3) { ip2dbg(("ip_wput_v6: forcing multirt " "hop limit to %d (was %d) ", - ip_multirt_ttl, ip6h->ip6_hops)); + ipst->ips_ip_multirt_ttl, + ip6h->ip6_hops)); pr_addr_dbg("v6dst %s\n", AF_INET6, &ire->ire_addr_v6); } - ip6h->ip6_hops = ip_multirt_ttl; + ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; } /* @@ -10400,7 +10429,7 @@ send_from_ill: */ multirt_need_resolve = ire_multirt_need_resolve_v6(&ire->ire_addr_v6, - MBLK_GETLABEL(first_mp)); + MBLK_GETLABEL(first_mp), ipst); ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " "multirt_need_resolve %d, first_mp %p\n", (void *)ire, multirt_need_resolve, @@ -10440,7 +10469,7 @@ send_from_ill: } if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, - zoneid); + zoneid, ipst); if (ipif == NULL) { ip1dbg(("ip_wput_v6: No ipif for " "multicast\n")); @@ -10453,7 +10482,7 @@ send_from_ill: ipif_refrele(ipif); } else { ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, - &ip6h->ip6_src, ill, zoneid); + &ip6h->ip6_src, ill, zoneid, ipst); } } ill_refrele(ill); @@ -10532,7 +10561,7 @@ send_from_ill: unspec_src, zoneid); } else { ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, - zoneid); + zoneid, ipst); } ill_refrele(ill); return; @@ -10552,7 +10581,7 @@ notv6: /* The 'q' is the default SCTP queue */ connp = (conn_t *)arg; } else { - ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); + ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE, ipst); } } BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); @@ -10601,6 +10630,7 @@ ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, mib2_ipIfStatsEntry_t *mibptr; ilm_t *ilm; uint_t nexthdr_offset; + ip_stack_t *ipst = ill->ill_ipst; if (DB_TYPE(mp) == M_CTL) { io = (ipsec_out_t *)mp->b_rptr; @@ -10630,8 +10660,9 @@ ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, ill_t *, ill, ill_t *, NULL, ip6_t *, ip6h, mblk_t *, first_mp); - FW_HOOKS6(ip6_loopback_in_event, ipv6firewall_loopback_in, - ill, NULL, ip6h, first_mp, mp); + FW_HOOKS6(ipst->ips_ip6_loopback_in_event, + ipst->ips_ipv6firewall_loopback_in, + ill, NULL, ip6h, first_mp, mp, ipst); DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); @@ -10731,7 +10762,7 @@ ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, icmp_update_out_mib_v6(ill, icmp6); /* Check variable for testing applications */ - if (ipv6_drop_inbound_icmpv6) { + if (ipst->ips_ipv6_drop_inbound_icmpv6) { freemsg(first_mp); return; } @@ -10850,6 +10881,8 @@ ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, boolean_t conn_multicast_loop; /* conn value for multicast */ boolean_t multicast_forward; /* Should we forward ? */ int max_frag; + ip_stack_t *ipst = ire->ire_ipst; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; ill = ire_to_ill(ire); first_mp = mp; @@ -10922,10 +10955,10 @@ ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | - MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); + MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); if (src_ire != NULL && !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && - (!ip_restrict_interzone_loopback || + (!ipst->ips_ip_restrict_interzone_loopback || ire_local_same_ill_group(ire, src_ire))) { if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { @@ -10942,19 +10975,20 @@ ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, } ire_refrele(src_ire); } - if (ip_hdr_complete_v6(ip6h, zoneid)) { + if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { /* Failed */ freemsg(first_mp); return; } icmp_unreachable_v6(q, first_mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, - zoneid); + zoneid, ipst); return; } } - if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { + if (mp->b_datap->db_type == M_CTL || + ipss->ipsec_outbound_v6_policy_present) { mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, connp, unspec_src, zoneid); if (mp == NULL) { @@ -11042,9 +11076,11 @@ ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, ip6_t *, nip6h, mblk_t *, nmp); - FW_HOOKS6(ip6_loopback_out_event, - ipv6firewall_loopback_out, - NULL, ill, nip6h, nmp, mp_ip6h); + FW_HOOKS6( + ipst->ips_ip6_loopback_out_event, + ipst->ips_ipv6firewall_loopback_out, + NULL, ill, nip6h, nmp, mp_ip6h, + ipst); DTRACE_PROBE1( ip6__loopback__out__end, @@ -11108,7 +11144,7 @@ ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, * the queue to enqueue the packet and we discard * the packet. */ - if (ip_output_queue && connp != NULL && + if (ipst->ips_ip_output_queue && connp != NULL && !mctl_present && caller != IRE_SEND) { if (caller == IP_WSRV) { connp->conn_did_putbq = 1; @@ -11261,8 +11297,9 @@ ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, /* Software checksum? */ if (DB_CKSUMFLAGS(mp) == 0) { - IP6_STAT(ip6_out_sw_cksum); - IP6_STAT_UPDATE(ip6_tcp_out_sw_cksum_bytes, + IP6_STAT(ipst, ip6_out_sw_cksum); + IP6_STAT_UPDATE(ipst, + ip6_tcp_out_sw_cksum_bytes, (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - hdr_length); } @@ -11308,8 +11345,9 @@ ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, /* Software checksum? */ if (DB_CKSUMFLAGS(mp) == 0) { - IP6_STAT(ip6_out_sw_cksum); - IP6_STAT_UPDATE(ip6_udp_out_sw_cksum_bytes, + IP6_STAT(ipst, ip6_out_sw_cksum); + IP6_STAT_UPDATE(ipst, + ip6_udp_out_sw_cksum_bytes, (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - hdr_length); } @@ -11399,7 +11437,7 @@ ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, (ire->ire_frag_flag & IPH_FRAG_HDR)) { if (connp != NULL && (flags & IP6I_DONTFRAG)) { icmp_pkt2big_v6(ire->ire_stq, first_mp, - max_frag, B_FALSE, B_TRUE, zoneid); + max_frag, B_FALSE, B_TRUE, zoneid, ipst); return; } @@ -11425,7 +11463,7 @@ ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, IPV6_HDR_LEN, max_frag)); ASSERT(mp == first_mp); /* Initiate IPPF processing */ - if (IPP_ENABLED(IPP_LOCAL_OUT)) { + if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { ip_process(IPP_LOCAL_OUT, &mp, ill_index); if (mp == NULL) { return; @@ -11450,7 +11488,7 @@ ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, * generate. */ icmp_pkt2big_v6(ire->ire_stq, first_mp, - max_frag, B_FALSE, B_TRUE, zoneid); + max_frag, B_FALSE, B_TRUE, zoneid, ipst); return; } if (attach_index != 0) @@ -11480,8 +11518,9 @@ ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, DTRACE_PROBE4(ip6__loopback__out__start, ill_t *, NULL, ill_t *, ill, ip6_t *, ip6h, mblk_t *, first_mp); - FW_HOOKS6(ip6_loopback_out_event, ipv6firewall_loopback_out, - NULL, ill, ip6h, first_mp, mp); + FW_HOOKS6(ipst->ips_ip6_loopback_out_event, + ipst->ips_ipv6firewall_loopback_out, + NULL, ill, ip6h, first_mp, mp, ipst); DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); if (first_mp != NULL) ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); @@ -11507,6 +11546,7 @@ ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, uint16_t offset; queue_t *stq = ire->ire_stq; ill_t *ill = (ill_t *)stq->q_ptr; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(DB_TYPE(mp) == M_DATA); ASSERT(MBLKL(mp) > unfragmentable_len); @@ -11523,7 +11563,7 @@ ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, ASSERT(pkts > 1); /* Allocate a message block which will hold all the IP Headers. */ - wroff = ip_wroff_extra; + wroff = ipst->ips_ip_wroff_extra; hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); i1 = pkts * hdr_chunk_len; @@ -11539,14 +11579,14 @@ ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, if (md_mp == NULL) { freemsg(hdr_mp); } else { -free_mmd: IP6_STAT(ip6_frag_mdt_discarded); +free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); freemsg(md_mp); } - IP6_STAT(ip6_frag_mdt_allocfail); + IP6_STAT(ipst, ip6_frag_mdt_allocfail); BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); return; } - IP6_STAT(ip6_frag_mdt_allocd); + IP6_STAT(ipst, ip6_frag_mdt_allocd); /* * Add a payload buffer to the Multidata; this operation must not @@ -11655,7 +11695,7 @@ free_mmd: IP6_STAT(ip6_frag_mdt_discarded); (void *)mmd, (void *)&pdi, error); /* NOTREACHED */ } - IP6_STAT(ip6_frag_mdt_addpdescfail); + IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); /* Free unattached payload message blocks as well */ md_mp->b_cont = mp->b_cont; goto free_mmd; @@ -11695,7 +11735,7 @@ free_mmd: IP6_STAT(ip6_frag_mdt_discarded); UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + pkts * (unfragmentable_len + sizeof (ip6_frag_t))); - IP6_STAT_UPDATE(ip6_frag_mdt_pkt_out, pkts); + IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); ire->ire_ob_pkt_count += pkts; if (ire->ire_ipif != NULL) @@ -11748,6 +11788,7 @@ ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, uint8_t nexthdr; uint_t prev_nexthdr_offset; uint8_t *ptr; + ip_stack_t *ipst = ire->ire_ipst; ASSERT(ire->ire_type == IRE_CACHE); ill = (ill_t *)ire->ire_stq->q_ptr; @@ -11806,7 +11847,7 @@ ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, /* Check if we can use MDT to send out the frags. */ ASSERT(!IRE_IS_LOCAL(ire)); - if (ip_multidata_outbound && reachable == 0 && + if (ipst->ips_ip_multidata_outbound && reachable == 0 && !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, @@ -11820,14 +11861,14 @@ ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, * fragment header. This (or a copy) will be used as the * first mblk for each fragment we send. */ - hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, - BPRI_HI); + hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + + ipst->ips_ip_wroff_extra, BPRI_HI); if (hmp == NULL) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); freemsg(mp); return; } - hmp->b_rptr += ip_wroff_extra; + hmp->b_rptr += ipst->ips_ip_wroff_extra; hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); fip6h = (ip6_t *)hmp->b_rptr; @@ -12038,6 +12079,7 @@ ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, ire_t *save_ire = ire; boolean_t multirt_send = B_FALSE; mblk_t *next_mp = NULL; + ip_stack_t *ipst = ire->ire_ipst; ip6h = (ip6_t *)mp->b_rptr; ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); @@ -12216,7 +12258,7 @@ ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; /* Initiate IPPF processing */ - if (IP6_OUT_IPP(flags)) { + if (IP6_OUT_IPP(flags, ipst)) { ip_process(IPP_LOCAL_OUT, &mp, ill_index); if (mp == NULL) { BUMP_MIB(ill->ill_ip_mib, @@ -12323,9 +12365,9 @@ ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, ill_t *, NULL, ill_t *, out_ill, ip6_t *, ip6h, mblk_t *, mp); - FW_HOOKS6(ip6_physical_out_event, - ipv6firewall_physical_out, - NULL, out_ill, ip6h, mp, mp_ip6h); + FW_HOOKS6(ipst->ips_ip6_physical_out_event, + ipst->ips_ipv6firewall_physical_out, + NULL, out_ill, ip6h, mp, mp_ip6h, ipst); DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); @@ -12507,7 +12549,7 @@ ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, nce->nce_state = ND_DELAY; mutex_exit(&nce->nce_lock); NDP_RESTART_TIMER(nce, - delay_first_probe_time); + ipst->ips_delay_first_probe_time); if (ip_debug > 3) { /* ip2dbg */ pr_addr_dbg("ip_xmit_v6: state" @@ -12572,8 +12614,8 @@ ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, * position in the queue to enqueue the packet and we discard * the packet. */ - if (ip_output_queue && (connp != NULL) && (io == NULL) && - (caller != IRE_SEND)) { + if (ipst->ips_ip_output_queue && (connp != NULL) && + (io == NULL) && (caller != IRE_SEND)) { if (caller == IP_WSRV) { connp->conn_did_putbq = 1; (void) putbq(connp->conn_wq, mp); @@ -12885,8 +12927,9 @@ ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) * (last hop in the routing header when the packet is sent) and * the first hop (ip6_dst when the packet is sent) */ +/* ARGSUSED2 */ uint32_t -ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) +ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) { uint_t numaddr; uint_t i; @@ -12971,6 +13014,7 @@ ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, irb_t *irb; int index, error = 0; opt_restart_t *or; + ip_stack_t *ipst = ire->ire_ipst; irb = ire->ire_bucket; ASSERT(irb != NULL); @@ -12987,7 +13031,7 @@ ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, - MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); + MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); /* No resolver exists for the gateway; skip this ire. */ if (ire_gw == NULL) continue; @@ -13030,14 +13074,52 @@ ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, } void -ip6_kstat_init(void) +*ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) +{ + kstat_t *ksp; + + ip6_stat_t template = { + { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, + { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, + { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, + { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, + { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, + { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, + { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, + { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, + { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, + { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, + { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, + { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, + { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, + { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, + { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, + { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, + { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, + { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, + { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, + }; + ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", + KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL, stackid); + + if (ksp == NULL) + return (NULL); + + bcopy(&template, ip6_statisticsp, sizeof (template)); + ksp->ks_data = (void *)ip6_statisticsp; + ksp->ks_private = (void *)(uintptr_t)stackid; + + kstat_install(ksp); + return (ksp); +} + +void +ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) { - if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", - "net", KSTAT_TYPE_NAMED, - sizeof (ip6_statistics) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL)) != NULL) { - ip6_kstat->ks_data = &ip6_statistics; - kstat_install(ip6_kstat); + if (ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); + kstat_delete_netstack(ksp, stackid); } } @@ -13097,6 +13179,7 @@ ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) ill_t *ill; ire_t *ire; int error; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; /* * Verify the source address and ifindex. Privileged users can use @@ -13106,7 +13189,7 @@ ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) if (pkti->ipi6_ifindex != 0) { ASSERT(connp != NULL); ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, - CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); + CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error, ipst); if (ill == NULL) { /* * We just want to know if the interface exists, we @@ -13123,7 +13206,7 @@ ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) secpolicy_net_rawaccess(cr) != 0) { ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, - connp->conn_zoneid, NULL, MATCH_IRE_TYPE); + connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); if (ire != NULL) ire_refrele(ire); else diff --git a/usr/src/uts/common/inet/ip/ip6_asp.c b/usr/src/uts/common/inet/ip/ip6_asp.c index d9f9f40bd9..ee5e163960 100644 --- a/usr/src/uts/common/inet/ip/ip6_asp.c +++ b/usr/src/uts/common/inet/ip/ip6_asp.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -42,6 +41,7 @@ #include <inet/ip6.h> #include <inet/ip6_asp.h> #include <inet/ip_ire.h> +#include <inet/ipclassifier.h> #define IN6ADDR_MASK128_INIT \ { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU } @@ -78,45 +78,36 @@ static ip6_asp_t default_ip6_asp_table[] = { "Default", 40 } }; -/* pending binds */ -static mblk_t *ip6_asp_pending_ops = NULL, *ip6_asp_pending_ops_tail = NULL; - -/* Synchronize updates with table usage */ -static mblk_t *ip6_asp_pending_update = NULL; /* pending table updates */ - -static boolean_t ip6_asp_uip = B_FALSE; /* table update in progress */ -static kmutex_t ip6_asp_lock; /* protect all the above */ -static uint32_t ip6_asp_refcnt = 0; /* outstanding references */ - /* * The IPv6 Default Address Selection policy table. * Until someone up above reconfigures the policy table, use the global * default. The table needs no lock since the only way to alter it is * through the SIOCSIP6ADDRPOLICY which is exclusive in ip. */ -static ip6_asp_t *ip6_asp_table = default_ip6_asp_table; -/* The number of policy entries in the table */ -static uint_t ip6_asp_table_count = - sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t); - static void ip6_asp_copy(ip6_asp_t *, ip6_asp_t *, uint_t); -static void ip6_asp_check_for_updates(); +static void ip6_asp_check_for_updates(ip_stack_t *); void -ip6_asp_init(void) +ip6_asp_init(ip_stack_t *ipst) { /* Initialize the table lock */ - mutex_init(&ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ipst->ips_ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL); + + ipst->ips_ip6_asp_table = default_ip6_asp_table; + + ipst->ips_ip6_asp_table_count = + sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t); } void -ip6_asp_free(void) +ip6_asp_free(ip_stack_t *ipst) { - if (ip6_asp_table != default_ip6_asp_table) { - kmem_free(ip6_asp_table, - ip6_asp_table_count * sizeof (ip6_asp_t)); + if (ipst->ips_ip6_asp_table != default_ip6_asp_table) { + kmem_free(ipst->ips_ip6_asp_table, + ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t)); + ipst->ips_ip6_asp_table = NULL; } - mutex_destroy(&ip6_asp_lock); + mutex_destroy(&ipst->ips_ip6_asp_lock); } /* @@ -124,15 +115,15 @@ ip6_asp_free(void) * count and return true. */ boolean_t -ip6_asp_can_lookup() +ip6_asp_can_lookup(ip_stack_t *ipst) { - mutex_enter(&ip6_asp_lock); - if (ip6_asp_uip) { - mutex_exit(&ip6_asp_lock); + mutex_enter(&ipst->ips_ip6_asp_lock); + if (ipst->ips_ip6_asp_uip) { + mutex_exit(&ipst->ips_ip6_asp_lock); return (B_FALSE); } - IP6_ASP_TABLE_REFHOLD(); - mutex_exit(&ip6_asp_lock); + IP6_ASP_TABLE_REFHOLD(ipst); + mutex_exit(&ipst->ips_ip6_asp_lock); return (B_TRUE); } @@ -140,6 +131,8 @@ ip6_asp_can_lookup() void ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func) { + conn_t *connp = Q_TO_CONN(q); + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; ASSERT((mp->b_prev == NULL) && (mp->b_queue == NULL) && (mp->b_next == NULL)); @@ -147,32 +140,33 @@ ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func) mp->b_prev = (void *)func; mp->b_next = NULL; - mutex_enter(&ip6_asp_lock); - if (ip6_asp_pending_ops == NULL) { - ASSERT(ip6_asp_pending_ops_tail == NULL); - ip6_asp_pending_ops = ip6_asp_pending_ops_tail = mp; + mutex_enter(&ipst->ips_ip6_asp_lock); + if (ipst->ips_ip6_asp_pending_ops == NULL) { + ASSERT(ipst->ips_ip6_asp_pending_ops_tail == NULL); + ipst->ips_ip6_asp_pending_ops = + ipst->ips_ip6_asp_pending_ops_tail = mp; } else { - ip6_asp_pending_ops_tail->b_next = mp; - ip6_asp_pending_ops_tail = mp; + ipst->ips_ip6_asp_pending_ops_tail->b_next = mp; + ipst->ips_ip6_asp_pending_ops_tail = mp; } - mutex_exit(&ip6_asp_lock); + mutex_exit(&ipst->ips_ip6_asp_lock); } static void -ip6_asp_complete_op() +ip6_asp_complete_op(ip_stack_t *ipst) { mblk_t *mp; queue_t *q; aspfunc_t func; - mutex_enter(&ip6_asp_lock); - while (ip6_asp_pending_ops != NULL) { - mp = ip6_asp_pending_ops; - ip6_asp_pending_ops = mp->b_next; + mutex_enter(&ipst->ips_ip6_asp_lock); + while (ipst->ips_ip6_asp_pending_ops != NULL) { + mp = ipst->ips_ip6_asp_pending_ops; + ipst->ips_ip6_asp_pending_ops = mp->b_next; mp->b_next = NULL; - if (ip6_asp_pending_ops == NULL) - ip6_asp_pending_ops_tail = NULL; - mutex_exit(&ip6_asp_lock); + if (ipst->ips_ip6_asp_pending_ops == NULL) + ipst->ips_ip6_asp_pending_ops_tail = NULL; + mutex_exit(&ipst->ips_ip6_asp_lock); q = (queue_t *)mp->b_queue; func = (aspfunc_t)mp->b_prev; @@ -182,9 +176,9 @@ ip6_asp_complete_op() (*func)(NULL, q, mp, NULL); - mutex_enter(&ip6_asp_lock); + mutex_enter(&ipst->ips_ip6_asp_lock); } - mutex_exit(&ip6_asp_lock); + mutex_exit(&ipst->ips_ip6_asp_lock); } /* @@ -192,9 +186,9 @@ ip6_asp_complete_op() * saved update to the table, if any. */ void -ip6_asp_table_refrele() +ip6_asp_table_refrele(ip_stack_t *ipst) { - IP6_ASP_TABLE_REFRELE(); + IP6_ASP_TABLE_REFRELE(ipst); } /* @@ -209,15 +203,15 @@ ip6_asp_table_refrele() * better than O(n). */ char * -ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence) +ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence, ip_stack_t *ipst) { ip6_asp_t *aspp; ip6_asp_t *match = NULL; ip6_asp_t *default_policy; - aspp = ip6_asp_table; + aspp = ipst->ips_ip6_asp_table; /* The default entry must always be the last one */ - default_policy = aspp + ip6_asp_table_count - 1; + default_policy = aspp + ipst->ips_ip6_asp_table_count - 1; while (match == NULL) { if (aspp == default_policy) { @@ -242,24 +236,25 @@ ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence) * ip_sioctl_ip6addrpolicy() has already done it for us. */ void -ip6_asp_check_for_updates() +ip6_asp_check_for_updates(ip_stack_t *ipst) { ip6_asp_t *table; size_t table_size; mblk_t *data_mp, *mp; struct iocblk *iocp; - mutex_enter(&ip6_asp_lock); - if (ip6_asp_pending_update == NULL || ip6_asp_refcnt > 0) { - mutex_exit(&ip6_asp_lock); + mutex_enter(&ipst->ips_ip6_asp_lock); + if (ipst->ips_ip6_asp_pending_update == NULL || + ipst->ips_ip6_asp_refcnt > 0) { + mutex_exit(&ipst->ips_ip6_asp_lock); return; } - mp = ip6_asp_pending_update; - ip6_asp_pending_update = NULL; + mp = ipst->ips_ip6_asp_pending_update; + ipst->ips_ip6_asp_pending_update = NULL; ASSERT(mp->b_prev != NULL); - ip6_asp_uip = B_TRUE; + ipst->ips_ip6_asp_uip = B_TRUE; iocp = (struct iocblk *)mp->b_rptr; data_mp = mp->b_cont; @@ -271,7 +266,7 @@ ip6_asp_check_for_updates() table_size = iocp->ioc_count; } - ip6_asp_replace(mp, table, table_size, B_TRUE, + ip6_asp_replace(mp, table, table_size, B_TRUE, ipst, iocp->ioc_flag & IOC_MODELS); } @@ -282,10 +277,10 @@ ip6_asp_check_for_updates() * table. The caller is responsible for making sure that there are exactly * new_count policy entries in new_table. */ -/*ARGSUSED4*/ +/*ARGSUSED5*/ void ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size, - boolean_t locked, model_t datamodel) + boolean_t locked, ip_stack_t *ipst, model_t datamodel) { int ret_val = 0; ip6_asp_t *tmp_table; @@ -310,44 +305,44 @@ ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size, if (!locked) - mutex_enter(&ip6_asp_lock); + mutex_enter(&ipst->ips_ip6_asp_lock); /* * Check if we are in the process of creating any IRE using the * current information. If so, wait till that is done. */ - if (!locked && ip6_asp_refcnt > 0) { + if (!locked && ipst->ips_ip6_asp_refcnt > 0) { /* Save this request for later processing */ - if (ip6_asp_pending_update == NULL) { - ip6_asp_pending_update = mp; + if (ipst->ips_ip6_asp_pending_update == NULL) { + ipst->ips_ip6_asp_pending_update = mp; } else { /* Let's not queue multiple requests for now */ ip1dbg(("ip6_asp_replace: discarding request\n")); - mutex_exit(&ip6_asp_lock); + mutex_exit(&ipst->ips_ip6_asp_lock); ret_val = EAGAIN; goto replace_end; } - mutex_exit(&ip6_asp_lock); + mutex_exit(&ipst->ips_ip6_asp_lock); return; } /* Prevent lookups till the table have been updated */ if (!locked) - ip6_asp_uip = B_TRUE; + ipst->ips_ip6_asp_uip = B_TRUE; - ASSERT(ip6_asp_refcnt == 0); + ASSERT(ipst->ips_ip6_asp_refcnt == 0); if (new_table == NULL) { /* * This is a special case. The user wants to revert * back to using the default table. */ - if (ip6_asp_table == default_ip6_asp_table) + if (ipst->ips_ip6_asp_table == default_ip6_asp_table) goto unlock_end; - kmem_free(ip6_asp_table, - ip6_asp_table_count * sizeof (ip6_asp_t)); - ip6_asp_table = default_ip6_asp_table; - ip6_asp_table_count = + kmem_free(ipst->ips_ip6_asp_table, + ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t)); + ipst->ips_ip6_asp_table = default_ip6_asp_table; + ipst->ips_ip6_asp_table_count = sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t); goto unlock_end; } @@ -413,12 +408,12 @@ ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size, ip1dbg(("ip6_asp_replace: bad table: no default entry\n")); goto unlock_end; } - if (ip6_asp_table != default_ip6_asp_table) { - kmem_free(ip6_asp_table, - ip6_asp_table_count * sizeof (ip6_asp_t)); + if (ipst->ips_ip6_asp_table != default_ip6_asp_table) { + kmem_free(ipst->ips_ip6_asp_table, + ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t)); } - ip6_asp_table = tmp_table; - ip6_asp_table_count = count; + ipst->ips_ip6_asp_table = tmp_table; + ipst->ips_ip6_asp_table_count = count; /* * The user has changed the address selection policy table. IPv6 @@ -426,11 +421,11 @@ ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size, * IRE_HOST_REDIRECT entries used the old table, so we need to * clear the cache. */ - ire_walk_v6(ire_delete_cache_v6, NULL, ALL_ZONES); + ire_walk_v6(ire_delete_cache_v6, NULL, ALL_ZONES, ipst); unlock_end: - ip6_asp_uip = B_FALSE; - mutex_exit(&ip6_asp_lock); + ipst->ips_ip6_asp_uip = B_FALSE; + mutex_exit(&ipst->ips_ip6_asp_lock); replace_end: /* Reply to the ioctl */ @@ -446,7 +441,7 @@ replace_end: DB_TYPE(mp) = (iocp->ioc_error == 0) ? M_IOCACK : M_IOCNAK; qreply(q, mp); check_binds: - ip6_asp_complete_op(); + ip6_asp_complete_op(ipst); } /* @@ -501,7 +496,7 @@ ip6_asp_copy(ip6_asp_t *src_table, ip6_asp_t *dst_table, uint_t count) * dtable. */ int -ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size) +ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size, ip_stack_t *ipst) { uint_t dtable_count; @@ -510,12 +505,12 @@ ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size) return (-1); dtable_count = dtable_size / sizeof (ip6_asp_t); - bcopy(ip6_asp_table, dtable, - MIN(ip6_asp_table_count, dtable_count) * + bcopy(ipst->ips_ip6_asp_table, dtable, + MIN(ipst->ips_ip6_asp_table_count, dtable_count) * sizeof (ip6_asp_t)); } - return (ip6_asp_table_count); + return (ipst->ips_ip6_asp_table_count); } /* diff --git a/usr/src/uts/common/inet/ip/ip6_if.c b/usr/src/uts/common/inet/ip/ip6_if.c index 5937c0ee26..c47a70f4ab 100644 --- a/usr/src/uts/common/inet/ip/ip6_if.c +++ b/usr/src/uts/common/inet/ip/ip6_if.c @@ -42,6 +42,7 @@ #include <sys/kstat.h> #include <sys/debug.h> #include <sys/zone.h> +#include <sys/policy.h> #include <sys/systm.h> #include <sys/param.h> @@ -79,18 +80,18 @@ static in6_addr_t ipv6_ll_template = static ipif_t * ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, - queue_t *q, mblk_t *mp, ipsq_func_t func, int *error); + queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst); /* * ipif_lookup_group_v6 */ ipif_t * -ipif_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid) +ipif_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) { ire_t *ire; ipif_t *ipif; - ire = ire_lookup_multi_v6(group, zoneid); + ire = ire_lookup_multi_v6(group, zoneid, ipst); if (ire == NULL) return (NULL); ipif = ire->ire_ipif; @@ -103,12 +104,12 @@ ipif_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid) * ill_lookup_group_v6 */ ill_t * -ill_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid) +ill_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) { ire_t *ire; ill_t *ill; - ire = ire_lookup_multi_v6(group, zoneid); + ire = ire_lookup_multi_v6(group, zoneid, ipst); if (ire == NULL) return (NULL); ill = ire->ire_ipif->ipif_ill; @@ -123,7 +124,7 @@ ill_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid) */ static ipif_t * ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, - queue_t *q, mblk_t *mp, ipsq_func_t func, int *error) + queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst) { ipif_t *ipif; ill_t *ill; @@ -139,8 +140,8 @@ ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, * This is done to avoid returning non-point-to-point * ipif instead of unnumbered point-to-point ipif. */ - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V6(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V6(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { GRAB_CONN_LOCK(q); mutex_enter(&ill->ill_lock); @@ -156,13 +157,13 @@ ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, ipif_refhold_locked(ipif); mutex_exit(&ill->ill_lock); RELEASE_CONN_LOCK(q); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ipif); } else if (IPIF_CAN_WAIT(ipif, q)) { ipsq = ill->ill_phyint->phyint_ipsq; mutex_enter(&ipsq->ipsq_lock); mutex_exit(&ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); ipsq_enq(ipsq, q, mp, func, NEW_OP, ill); mutex_exit(&ipsq->ipsq_lock); @@ -175,10 +176,10 @@ ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, mutex_exit(&ill->ill_lock); RELEASE_CONN_LOCK(q); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); /* lookup the ipif based on interface address */ ipif = ipif_lookup_addr_v6(if_addr, NULL, ALL_ZONES, q, mp, func, - error); + error, ipst); ASSERT(ipif == NULL || ipif->ipif_isv6); return (ipif); } @@ -193,7 +194,7 @@ ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, /* ARGSUSED */ ipif_t * ipif_lookup_addr_v6(const in6_addr_t *addr, ill_t *match_ill, zoneid_t zoneid, - queue_t *q, mblk_t *mp, ipsq_func_t func, int *error) + queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst) { ipif_t *ipif; ill_t *ill; @@ -204,13 +205,13 @@ ipif_lookup_addr_v6(const in6_addr_t *addr, ill_t *match_ill, zoneid_t zoneid, if (error != NULL) *error = 0; - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); /* * Repeat twice, first based on local addresses and * next time for pointopoint. */ repeat: - ill = ILL_START_WALK_V6(&ctx); + ill = ILL_START_WALK_V6(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { if (match_ill != NULL && ill != match_ill) { continue; @@ -234,13 +235,13 @@ repeat: ipif_refhold_locked(ipif); mutex_exit(&ill->ill_lock); RELEASE_CONN_LOCK(q); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ipif); } else if (IPIF_CAN_WAIT(ipif, q)) { ipsq = ill->ill_phyint->phyint_ipsq; mutex_enter(&ipsq->ipsq_lock); mutex_exit(&ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); ipsq_enq(ipsq, q, mp, func, NEW_OP, ill); mutex_exit(&ipsq->ipsq_lock); @@ -256,7 +257,7 @@ repeat: /* If we already did the ptp case, then we are done */ if (ptp) { - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (error != NULL) *error = ENXIO; return (NULL); @@ -274,7 +275,8 @@ repeat: * Return the zoneid for the ipif. ALL_ZONES if none found. */ zoneid_t -ipif_lookup_addr_zoneid_v6(const in6_addr_t *addr, ill_t *match_ill) +ipif_lookup_addr_zoneid_v6(const in6_addr_t *addr, ill_t *match_ill, + ip_stack_t *ipst) { ipif_t *ipif; ill_t *ill; @@ -282,13 +284,13 @@ ipif_lookup_addr_zoneid_v6(const in6_addr_t *addr, ill_t *match_ill) ill_walk_context_t ctx; zoneid_t zoneid; - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); /* * Repeat twice, first based on local addresses and * next time for pointopoint. */ repeat: - ill = ILL_START_WALK_V6(&ctx); + ill = ILL_START_WALK_V6(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { if (match_ill != NULL && ill != match_ill) { continue; @@ -306,7 +308,7 @@ repeat: !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { zoneid = ipif->ipif_zoneid; mutex_exit(&ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); /* * If ipif_zoneid was ALL_ZONES then we have * a trusted extensions shared IP address. @@ -322,7 +324,7 @@ repeat: /* If we already did the ptp case, then we are done */ if (ptp) { - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ALL_ZONES); } ptp = B_TRUE; @@ -393,7 +395,7 @@ int ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, const in6_addr_t *gw_addr, const in6_addr_t *src_addr, int flags, ipif_t *ipif_arg, ire_t **ire_arg, queue_t *q, mblk_t *mp, ipsq_func_t func, - struct rtsa_s *sp) + struct rtsa_s *sp, ip_stack_t *ipst) { ire_t *ire; ire_t *gw_ire = NULL; @@ -428,7 +430,7 @@ ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, * Get the ipif, if any, corresponding to the gw_addr */ ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, q, mp, func, - &error); + &error, ipst); if (ipif != NULL) ipif_refheld = B_TRUE; else if (error == EINPROGRESS) { @@ -448,7 +450,7 @@ ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, IN6_ARE_ADDR_EQUAL(dst_addr, &ipv6_loopback) && IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) { ire = ire_ctable_lookup_v6(dst_addr, 0, IRE_LOOPBACK, - ipif, ALL_ZONES, NULL, match_flags); + ipif, ALL_ZONES, NULL, match_flags, ipst); if (ire != NULL) { ire_refrele(ire); if (ipif_refheld) @@ -477,7 +479,8 @@ ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, flags, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); if (ire == NULL) { if (ipif_refheld) ipif_refrele(ipif); @@ -594,7 +597,7 @@ ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, */ match_flags |= MATCH_IRE_MASK; ire = ire_ftable_lookup_v6(dst_addr, mask, 0, IRE_INTERFACE, - ipif, NULL, ALL_ZONES, 0, NULL, match_flags); + ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); if (ire != NULL) { ire_refrele(ire); if (ipif_refheld) @@ -627,7 +630,8 @@ ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, flags, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); if (ire == NULL) { if (ipif_refheld) ipif_refrele(ipif); @@ -677,7 +681,7 @@ ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, if (ipif_arg != NULL) match_flags |= MATCH_IRE_ILL; gw_ire = ire_ftable_lookup_v6(gw_addr, 0, 0, IRE_INTERFACE, ipif_arg, - NULL, ALL_ZONES, 0, NULL, match_flags); + NULL, ALL_ZONES, 0, NULL, match_flags, ipst); if (gw_ire == NULL) return (ENETUNREACH); @@ -699,7 +703,7 @@ ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, /* check for a duplicate entry */ ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, ipif_arg, NULL, ALL_ZONES, 0, NULL, - match_flags | MATCH_IRE_MASK | MATCH_IRE_GW); + match_flags | MATCH_IRE_MASK | MATCH_IRE_GW, ipst); if (ire != NULL) { ire_refrele(gw_ire); ire_refrele(ire); @@ -758,7 +762,9 @@ ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, flags, &gw_ire->ire_uinfo, /* Inherit ULP info from gw */ gc, /* security attribute */ - NULL); + NULL, + ipst); + /* * The ire holds a reference to the 'gc' and the 'gc' holds a * reference to the 'gcgrp'. We can now release the extra reference @@ -801,6 +807,7 @@ ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, * IP source address cannot be a multicast. */ if ((ip_cgtp_filter_ops != NULL) && + ipst->ips_netstack->netstack_stackid == GLOBAL_NETSTACKID && !IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))) { int res = ip_cgtp_filter_ops->cfo_add_dest_v6( &ire->ire_addr_v6, @@ -823,7 +830,7 @@ ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, */ if (gc != NULL) { ASSERT(gcgrp != NULL); - ire_clookup_delete_cache_gw_v6(gw_addr, ALL_ZONES); + ire_clookup_delete_cache_gw_v6(gw_addr, ALL_ZONES, ipst); } save_ire: @@ -889,7 +896,7 @@ save_ire: int ip_rt_delete_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, const in6_addr_t *gw_addr, uint_t rtm_addrs, int flags, ipif_t *ipif_arg, - queue_t *q, mblk_t *mp, ipsq_func_t func) + queue_t *q, mblk_t *mp, ipsq_func_t func, ip_stack_t *ipst) { ire_t *ire = NULL; ipif_t *ipif; @@ -934,7 +941,8 @@ ip_rt_delete_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, * For more detail on specifying routes by gateway address and by * interface index, see the comments in ip_rt_add_v6(). */ - ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, q, mp, func, &err); + ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, q, mp, func, &err, + ipst); if (ipif != NULL) { ipif_refheld = B_TRUE; if (ipif_arg != NULL) { @@ -948,11 +956,11 @@ ip_rt_delete_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, if (ipif->ipif_ire_type == IRE_LOOPBACK) ire = ire_ctable_lookup_v6(dst_addr, 0, IRE_LOOPBACK, - ipif, ALL_ZONES, NULL, match_flags); + ipif, ALL_ZONES, NULL, match_flags, ipst); if (ire == NULL) ire = ire_ftable_lookup_v6(dst_addr, mask, 0, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, NULL, - match_flags); + match_flags, ipst); } else if (err == EINPROGRESS) { return (err); } else { @@ -982,7 +990,7 @@ ip_rt_delete_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, else type = IRE_PREFIX; ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, - ipif_arg, NULL, ALL_ZONES, 0, NULL, match_flags); + ipif_arg, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); } if (ipif_refheld) { @@ -999,7 +1007,8 @@ ip_rt_delete_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, * Packets coming from that address will no longer be * filtered to remove duplicates. */ - if (ip_cgtp_filter_ops != NULL) { + if (ip_cgtp_filter_ops != NULL && + ipst->ips_netstack->netstack_stackid == GLOBAL_NETSTACKID) { err = ip_cgtp_filter_ops->cfo_del_dest_v6( &ire->ire_addr_v6, &ire->ire_gateway_addr_v6); } @@ -1269,6 +1278,7 @@ ipif_ndp_setup_multicast(ipif_t *ipif, nce_t **ret_nce) phyint_t *phyi = ill->ill_phyint; uint32_t hw_extract_start; dl_unitdata_req_t *dlur; + ip_stack_t *ipst = ill->ill_ipst; if (ret_nce != NULL) *ret_nce = NULL; @@ -1325,7 +1335,7 @@ ipif_ndp_setup_multicast(ipif_t *ipif, nce_t **ret_nce) if ((ipif->ipif_flags & IPIF_BROADCAST) || (ill->ill_flags & ILLF_MULTICAST) || (phyi->phyint_flags & PHYI_MULTI_BCAST)) { - mutex_enter(&ndp6.ndp_g_lock); + mutex_enter(&ipst->ips_ndp6->ndp_g_lock); err = ndp_add(ill, phys_addr, &v6_mcast_addr, /* v6 address */ @@ -1337,7 +1347,7 @@ ipif_ndp_setup_multicast(ipif_t *ipif, nce_t **ret_nce) &mnce, NULL, NULL); - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); if (err == 0) { if (ret_nce != NULL) { *ret_nce = mnce; @@ -1488,7 +1498,7 @@ ipif_ndp_down(ipif_t *ipif) */ if (ipif->ipif_ill->ill_ipif_up_count == 0) { ndp_walk(ipif->ipif_ill, (pfi_t)ndp_delete_per_ill, - (uchar_t *)ipif->ipif_ill); + (uchar_t *)ipif->ipif_ill, ipif->ipif_ill->ill_ipst); } } @@ -1502,6 +1512,7 @@ ipif_recover_ire_v6(ipif_t *ipif) mblk_t *mp; ire_t **ipif_saved_irep; ire_t **irep; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; ip1dbg(("ipif_recover_ire_v6(%s:%u)", ipif->ipif_ill->ill_name, ipif->ipif_id)); @@ -1601,7 +1612,8 @@ ipif_recover_ire_v6(ipif_t *ipif) ifrt->ifrt_flags, &ifrt->ifrt_iulp_info, NULL, - NULL); + NULL, + ipst); if (ire == NULL) { mutex_exit(&ipif->ipif_saved_ire_lock); kmem_free(ipif_saved_irep, @@ -1762,11 +1774,13 @@ typedef struct dstinfo { * rule_ifprefix and rule_prefix. */ typedef enum {CAND_AVOID, CAND_TIE, CAND_PREFER} rule_res_t; -typedef rule_res_t (*rulef_t)(cand_t *, cand_t *, const dstinfo_t *); +typedef rule_res_t (*rulef_t)(cand_t *, cand_t *, const dstinfo_t *, + ip_stack_t *); /* Prefer an address if it is equal to the destination address. */ +/* ARGSUSED3 */ static rule_res_t -rule_isdst(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) +rule_isdst(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) { if (!bc->cand_isdst_set) { bc->cand_isdst = @@ -1791,8 +1805,9 @@ rule_isdst(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) * prefer addresses that are of greater scope than the destination over * those that are of lesser scope than the destination. */ +/* ARGSUSED3 */ static rule_res_t -rule_scope(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) +rule_scope(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) { if (!bc->cand_scope_set) { bc->cand_scope = ip_addr_scope_v6(&bc->cand_srcaddr); @@ -1822,7 +1837,8 @@ rule_scope(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) */ /* ARGSUSED2 */ static rule_res_t -rule_deprecated(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) +rule_deprecated(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, + ip_stack_t *ipst) { if (!bc->cand_isdeprecated_set) { bc->cand_isdeprecated = @@ -1850,7 +1866,8 @@ rule_deprecated(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) */ /* ARGSUSED2 */ static rule_res_t -rule_preferred(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) +rule_preferred(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, + ip_stack_t *ipst) { if (!bc->cand_ispreferred_set) { bc->cand_ispreferred = ((bc->cand_flags & IPIF_PREFERRED) != 0); @@ -1873,8 +1890,10 @@ rule_preferred(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) * to an interface that is in the same IPMP group as the outgoing * interface. */ +/* ARGSUSED3 */ static rule_res_t -rule_interface(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) +rule_interface(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, + ip_stack_t *ipst) { ill_t *dstill = dstinfo->dst_ill; @@ -1909,18 +1928,18 @@ rule_interface(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) * Prefer source addresses whose label matches the destination's label. */ static rule_res_t -rule_label(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) +rule_label(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) { char *label; if (!bc->cand_matchedlabel_set) { - label = ip6_asp_lookup(&bc->cand_srcaddr, NULL); + label = ip6_asp_lookup(&bc->cand_srcaddr, NULL, ipst); bc->cand_matchedlabel = ip6_asp_labelcmp(label, dstinfo->dst_label); bc->cand_matchedlabel_set = B_TRUE; } - label = ip6_asp_lookup(&cc->cand_srcaddr, NULL); + label = ip6_asp_lookup(&cc->cand_srcaddr, NULL, ipst); cc->cand_matchedlabel = ip6_asp_labelcmp(label, dstinfo->dst_label); cc->cand_matchedlabel_set = B_TRUE; @@ -1937,8 +1956,10 @@ rule_label(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) * the logic of this rule and prefer temporary addresses by using the * IPV6_SRC_PREFERENCES socket option. */ +/* ARGSUSED3 */ static rule_res_t -rule_temporary(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) +rule_temporary(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, + ip_stack_t *ipst) { if (!bc->cand_istmp_set) { bc->cand_istmp = ((bc->cand_flags & IPIF_TEMPORARY) != 0); @@ -1964,8 +1985,10 @@ rule_temporary(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) * under the interface mask. This gets us on the same subnet before applying * any Solaris-specific rules. */ +/* ARGSUSED3 */ static rule_res_t -rule_ifprefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) +rule_ifprefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, + ip_stack_t *ipst) { if (!bc->cand_pref_eq_set) { bc->cand_pref_eq = V6_MASK_EQ_2(bc->cand_srcaddr, @@ -2009,7 +2032,8 @@ rule_ifprefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) */ /* ARGSUSED2 */ static rule_res_t -rule_zone_specific(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) +rule_zone_specific(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, + ip_stack_t *ipst) { if ((bc->cand_zoneid == ALL_ZONES) == (cc->cand_zoneid == ALL_ZONES)) @@ -2033,7 +2057,8 @@ rule_zone_specific(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) */ /* ARGSUSED2 */ static rule_res_t -rule_addr_type(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) +rule_addr_type(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, + ip_stack_t *ipst) { #define ATYPE(x) \ ((x) & IPIF_DHCPRUNNING) ? 1 : ((x) & IPIF_ADDRCONF) ? 3 : 2 @@ -2055,8 +2080,9 @@ rule_addr_type(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) * addresses with the destination, and pick the address with the longest string * of leading zeros, as per CommonPrefixLen() defined in RFC 3484. */ +/* ARGSUSED3 */ static rule_res_t -rule_prefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) +rule_prefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) { if (!bc->cand_common_pref_set) { bc->cand_common_pref = ip_common_prefix_v6(&bc->cand_srcaddr, @@ -2082,7 +2108,8 @@ rule_prefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) */ /* ARGSUSED */ static rule_res_t -rule_must_be_last(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo) +rule_must_be_last(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, + ip_stack_t *ipst) { return (CAND_AVOID); } @@ -2129,6 +2156,7 @@ ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst, boolean_t first_candidate = B_TRUE; rule_res_t rule_result; tsol_tpc_t *src_rhtp, *dst_rhtp; + ip_stack_t *ipst = dstill->ill_ipst; /* * The list of ordering rules. They are applied in the order they @@ -2164,7 +2192,7 @@ ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst, if (dstill->ill_usesrc_ifindex != 0) { if ((usesrc_ill = ill_lookup_on_ifindex(dstill->ill_usesrc_ifindex, B_TRUE, - NULL, NULL, NULL, NULL)) != NULL) { + NULL, NULL, NULL, NULL, ipst)) != NULL) { dstinfo.dst_ill = usesrc_ill; } else { return (NULL); @@ -2195,10 +2223,10 @@ ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst, dstinfo.dst_addr = dst; dstinfo.dst_scope = ip_addr_scope_v6(dst); - dstinfo.dst_label = ip6_asp_lookup(dst, NULL); + dstinfo.dst_label = ip6_asp_lookup(dst, NULL, ipst); dstinfo.dst_prefer_src_tmp = ((src_prefs & IPV6_PREFER_SRC_TMP) != 0); - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); /* * Section three of the I-D states that for multicast and * link-local destinations, the candidate set must be restricted to @@ -2208,7 +2236,7 @@ ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst, * otherwise will almost certainly cause problems. */ if (IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst) || - ipv6_strict_dst_multihoming || usesrc_ill != NULL) { + ipst->ips_ipv6_strict_dst_multihoming || usesrc_ill != NULL) { if (restrict_ill == RESTRICT_TO_NONE) dstinfo.dst_restrict_ill = RESTRICT_TO_GROUP; else @@ -2234,7 +2262,7 @@ ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst, ill = dstinfo.dst_ill; } } else { - ill = ILL_START_WALK_V6(&ctx); + ill = ILL_START_WALK_V6(&ctx, ipst); } while (ill != NULL) { @@ -2311,7 +2339,8 @@ ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst, for (index = 0; rules[index] != NULL; index++) { /* Apply a comparison rule. */ rule_result = - (rules[index])(&best_c, &curr_c, &dstinfo); + (rules[index])(&best_c, &curr_c, &dstinfo, + ipst); if (rule_result == CAND_AVOID) { /* * The best candidate is still the @@ -2370,7 +2399,7 @@ next_ill: TPC_RELE(dst_rhtp); if (ipif == NULL) { - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (NULL); } @@ -2378,11 +2407,11 @@ next_ill: if (IPIF_CAN_LOOKUP(ipif)) { ipif_refhold_locked(ipif); mutex_exit(&ipif->ipif_ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ipif); } mutex_exit(&ipif->ipif_ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); ip1dbg(("ipif_select_source_v6 cannot lookup ipif %p" " returning null \n", (void *)ipif)); @@ -2409,6 +2438,7 @@ ipif_recreate_interface_routes_v6(ipif_t *old_ipif, ipif_t *ipif) ipif_t *nipif = NULL; boolean_t nipif_refheld = B_FALSE; boolean_t ip6_asp_table_held = B_FALSE; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; ill = ipif->ipif_ill; @@ -2479,7 +2509,7 @@ ipif_recreate_interface_routes_v6(ipif_t *old_ipif, ipif_t *ipif) * address selection to ipif's assigned to the same link as the * interface. */ - if (ip6_asp_can_lookup()) { + if (ip6_asp_can_lookup(ipst)) { ip6_asp_table_held = B_TRUE; nipif = ipif_select_source_v6(ill, &ipif->ipif_v6subnet, RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, @@ -2510,7 +2540,8 @@ ipif_recreate_interface_routes_v6(ipif_t *old_ipif, ipif_t *ipif) 0, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); if (ire != NULL) { ire_t *ret_ire; @@ -2537,7 +2568,7 @@ ipif_recreate_interface_routes_v6(ipif_t *old_ipif, ipif_t *ipif) */ ire_refrele(ipif_ire); if (ip6_asp_table_held) - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); if (nipif_refheld) ipif_refrele(nipif); } @@ -2779,6 +2810,7 @@ ipif_up_done_v6(ipif_t *ipif) boolean_t ire_added = B_FALSE; boolean_t loopback = B_FALSE; boolean_t ip6_asp_table_held = B_FALSE; + ip_stack_t *ipst = ill->ill_ipst; ip1dbg(("ipif_up_done_v6(%s:%u)\n", ipif->ipif_ill->ill_name, ipif->ipif_id)); @@ -2847,7 +2879,7 @@ ipif_up_done_v6(ipif_t *ipif) * Can't use our source address. Select a different * source address for the IRE_INTERFACE and IRE_LOCAL */ - if (ip6_asp_can_lookup()) { + if (ip6_asp_can_lookup(ipst)) { ip6_asp_table_held = B_TRUE; src_ipif = ipif_select_source_v6(ipif->ipif_ill, &ipif->ipif_v6subnet, RESTRICT_TO_NONE, @@ -2881,13 +2913,13 @@ ipif_up_done_v6(ipif_t *ipif) /* Register the source address for __sin6_src_id */ err = ip_srcid_insert(&ipif->ipif_v6lcl_addr, - ipif->ipif_zoneid); + ipif->ipif_zoneid, ipst); if (err != 0) { ip0dbg(("ipif_up_done_v6: srcid_insert %d\n", err)); if (src_ipif_held) ipif_refrele(src_ipif); if (ip6_asp_table_held) - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); return (err); } /* @@ -2917,7 +2949,8 @@ ipif_up_done_v6(ipif_t *ipif) (ipif->ipif_flags & IPIF_PRIVATE) ? RTF_PRIVATE : 0, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); } /* @@ -2959,7 +2992,8 @@ ipif_up_done_v6(ipif_t *ipif) (ipif->ipif_flags & IPIF_PRIVATE) ? RTF_PRIVATE : 0, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); } /* @@ -2987,7 +3021,7 @@ ipif_up_done_v6(ipif_t *ipif) */ isdup = ire_ftable_lookup_v6(first_addr, &prefix_mask, 0, IRE_IF_NORESOLVER, ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, - (MATCH_IRE_SRC | MATCH_IRE_MASK)); + (MATCH_IRE_SRC | MATCH_IRE_MASK), ipst); if (isdup == NULL) { ip1dbg(("ipif_up_done_v6: creating if IRE %d for %s", @@ -3012,7 +3046,8 @@ ipif_up_done_v6(ipif_t *ipif) RTF_UP, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); } else { ire_refrele(isdup); } @@ -3036,13 +3071,13 @@ ipif_up_done_v6(ipif_t *ipif) * now under ill_g_lock, and if it fails got bad, and remove * from group also */ - rw_enter(&ill_g_lock, RW_READER); - mutex_enter(&ip_addr_avail_lock); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + mutex_enter(&ipst->ips_ip_addr_avail_lock); ill->ill_ipif_up_count++; ipif->ipif_flags |= IPIF_UP; err = ip_addr_availability_check(ipif); - mutex_exit(&ip_addr_avail_lock); - rw_exit(&ill_g_lock); + mutex_exit(&ipst->ips_ip_addr_avail_lock); + rw_exit(&ipst->ips_ill_g_lock); if (err != 0) { /* @@ -3087,7 +3122,7 @@ ipif_up_done_v6(ipif_t *ipif) (void) ire_add(irep1, NULL, NULL, NULL, B_FALSE); } if (ip6_asp_table_held) { - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); ip6_asp_table_held = B_FALSE; } ire_added = B_TRUE; @@ -3110,7 +3145,7 @@ ipif_up_done_v6(ipif_t *ipif) ASSERT(phyi->phyint_groupname != NULL); if (ill->ill_ipif_up_count == 1) { ASSERT(ill->ill_group == NULL); - err = illgrp_insert(&illgrp_head_v6, ill, + err = illgrp_insert(&ipst->ips_illgrp_head_v6, ill, phyi->phyint_groupname, NULL, B_TRUE); if (err != 0) { ip1dbg(("ipif_up_done_v6: illgrp allocation " @@ -3181,7 +3216,7 @@ ipif_up_done_v6(ipif_t *ipif) bad: if (ip6_asp_table_held) - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); /* * We don't have to bother removing from ill groups because * @@ -3206,7 +3241,7 @@ bad: } } - (void) ip_srcid_remove(&ipif->ipif_v6lcl_addr, ipif->ipif_zoneid); + (void) ip_srcid_remove(&ipif->ipif_v6lcl_addr, ipif->ipif_zoneid, ipst); if (ipif_saved_irep != NULL) { kmem_free(ipif_saved_irep, @@ -3302,6 +3337,8 @@ ip_siocsetndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, struct lifreq *lifr; lif_nd_req_t *lnr; + ASSERT(!(q->q_flag & QREADR) && q->q_next == NULL); + lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; lnr = &lifr->lifr_nd; /* Only allow for logical unit zero i.e. not on "le0:17" */ diff --git a/usr/src/uts/common/inet/ip/ip6_ire.c b/usr/src/uts/common/inet/ip/ip6_ire.c index 2778a75312..655417a58f 100644 --- a/usr/src/uts/common/inet/ip/ip6_ire.c +++ b/usr/src/uts/common/inet/ip/ip6_ire.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -62,9 +62,6 @@ #include <sys/tsol/label.h> #include <sys/tsol/tnet.h> -irb_t *ip_forwarding_table_v6[IP6_MASK_TABLE_SIZE]; -/* This is dynamically allocated in ip_ire_init */ -irb_t *ip_cache_table_v6; static ire_t ire_null; static ire_t *ire_ihandle_lookup_onlink_v6(ire_t *cire); @@ -85,6 +82,7 @@ int ip_ire_report_v6(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) { zoneid_t zoneid; + ip_stack_t *ipst; (void) mi_mpprintf(mp, "IRE " MI_COL_HDRPAD_STR @@ -118,9 +116,10 @@ ip_ire_report_v6(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) zoneid = Q_TO_CONN(q)->conn_zoneid; if (zoneid == GLOBAL_ZONEID) zoneid = ALL_ZONES; + ipst = CONNQ_TO_IPST(q); - ire_walk_v6(ire_report_ftable_v6, (char *)mp->b_cont, zoneid); - ire_walk_v6(ire_report_ctable_v6, (char *)mp->b_cont, zoneid); + ire_walk_v6(ire_report_ftable_v6, (char *)mp->b_cont, zoneid, ipst); + ire_walk_v6(ire_report_ctable_v6, (char *)mp->b_cont, zoneid, ipst); return (0); } @@ -266,7 +265,7 @@ ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, - tsol_gc_t *gc, tsol_gcgrp_t *gcgrp) + tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) { /* @@ -301,7 +300,7 @@ ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, } } - BUMP_IRE_STATS(ire_stats_v6, ire_stats_alloced); + BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced); ire->ire_addr_v6 = *v6addr; if (v6src_addr != NULL) @@ -328,7 +327,7 @@ ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, /* ire_init_common will free the mblks upon encountering any failure */ if (!ire_init_common(ire, max_fragp, fp_mp, rfq, stq, type, dlureq_mp, ipif, NULL, phandle, ihandle, flags, IPV6_VERSION, ulp_info, - gc, gcgrp)) + gc, gcgrp, ipst)) return (NULL); return (ire); @@ -345,7 +344,7 @@ ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, - tsol_gc_t *gc, tsol_gcgrp_t *gcgrp) + tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) { ire_t *ire; ire_t *ret_ire; @@ -370,7 +369,7 @@ ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, NULL, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle, - ihandle, flags, ulp_info, gc, gcgrp); + ihandle, flags, ulp_info, gc, gcgrp, ipst); if (ret_ire == NULL) { freeb(ire->ire_mp); @@ -391,7 +390,7 @@ ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, uint_t *max_fragp, mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, - tsol_gc_t *gc, tsol_gcgrp_t *gcgrp) + tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) { ire_t *ire; ire_t *ret_ire; @@ -407,7 +406,7 @@ ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, max_fragp, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle, - ihandle, flags, ulp_info, gc, gcgrp); + ihandle, flags, ulp_info, gc, gcgrp, ipst); if (ret_ire == NULL) { kmem_cache_free(ire_cache, ire); @@ -428,7 +427,7 @@ ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, * Supports link-local addresses by following the ipif/ill when recursing. */ ire_t * -ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid) +ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) { ire_t *ire; ipif_t *ipif = NULL; @@ -436,7 +435,7 @@ ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid) in6_addr_t gw_addr_v6; ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL, - zoneid, 0, NULL, MATCH_IRE_DEFAULT); + zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst); /* We search a resolvable ire in case of multirouting. */ if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { @@ -447,7 +446,7 @@ ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid) * IRE_REFRELE the original ire and change it. */ (void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW, - NULL); + NULL, ipst); if (cire != NULL) ire_refrele(cire); } @@ -476,7 +475,7 @@ ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid) ire_refrele(ire); ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0, IRE_INTERFACE, ipif, NULL, zoneid, 0, - NULL, match_flags); + NULL, match_flags, ipst); return (ire); case IRE_IF_NORESOLVER: case IRE_IF_RESOLVER: @@ -493,15 +492,15 @@ ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid) * Preference for IRE_LOCAL entries. */ ire_t * -ire_lookup_local_v6(zoneid_t zoneid) +ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst) { ire_t *ire; irb_t *irb; ire_t *maybe = NULL; int i; - for (i = 0; i < ip6_cache_table_size; i++) { - irb = &ip_cache_table_v6[i]; + for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { + irb = &ipst->ips_ip_cache_table_v6[i]; if (irb->irb_ire == NULL) continue; rw_enter(&irb->irb_lock, RW_READER); @@ -604,6 +603,7 @@ ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) boolean_t ndp_g_lock_held = B_FALSE; ire_t *ire = *ire_p; int error; + ip_stack_t *ipst = ire->ire_ipst; ASSERT(ire->ire_ipversion == IPV6_VERSION); ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */ @@ -648,44 +648,48 @@ ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) if ((ire->ire_type & IRE_CACHETABLE) == 0) { /* IRE goes into Forward Table */ mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); - if ((ip_forwarding_table_v6[mask_table_index]) == NULL) { + if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == + NULL) { irb_t *ptr; int i; - ptr = (irb_t *)mi_zalloc((ip6_ftable_hash_size * - sizeof (irb_t))); + ptr = (irb_t *)mi_zalloc(( + ipst->ips_ip6_ftable_hash_size * sizeof (irb_t))); if (ptr == NULL) { ire_delete(ire); *ire_p = NULL; return (ENOMEM); } - for (i = 0; i < ip6_ftable_hash_size; i++) { + for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { rw_init(&ptr[i].irb_lock, NULL, RW_DEFAULT, NULL); } - mutex_enter(&ire_ft_init_lock); - if (ip_forwarding_table_v6[mask_table_index] == NULL) { - ip_forwarding_table_v6[mask_table_index] = ptr; - mutex_exit(&ire_ft_init_lock); + mutex_enter(&ipst->ips_ire_ft_init_lock); + if (ipst->ips_ip_forwarding_table_v6[ + mask_table_index] == NULL) { + ipst->ips_ip_forwarding_table_v6[ + mask_table_index] = ptr; + mutex_exit(&ipst->ips_ire_ft_init_lock); } else { /* * Some other thread won the race in * initializing the forwarding table at the * same index. */ - mutex_exit(&ire_ft_init_lock); - for (i = 0; i < ip6_ftable_hash_size; i++) { + mutex_exit(&ipst->ips_ire_ft_init_lock); + for (i = 0; i < ipst->ips_ip6_ftable_hash_size; + i++) { rw_destroy(&ptr[i].irb_lock); } mi_free(ptr); } } - irb_ptr = &(ip_forwarding_table_v6[mask_table_index][ + irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][ IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, - ip6_ftable_hash_size)]); + ipst->ips_ip6_ftable_hash_size)]); } else { - irb_ptr = &(ip_cache_table_v6[IRE_ADDR_HASH_V6( - ire->ire_addr_v6, ip6_cache_table_size)]); + irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6( + ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]); } /* * For xresolv interfaces (v6 interfaces with an external @@ -767,7 +771,7 @@ ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) * To avoid lock order problems, get the ndp6.ndp_g_lock now itself. */ if (ire->ire_type == IRE_CACHE) { - mutex_enter(&ndp6.ndp_g_lock); + mutex_enter(&ipst->ips_ndp6->ndp_g_lock); ndp_g_lock_held = B_TRUE; } @@ -779,7 +783,7 @@ ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) error = ire_atomic_start(irb_ptr, ire, q, mp, func); if (error != 0) { if (ndp_g_lock_held) - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); /* * We don't know whether it is a valid ipif or not. * So, set it to NULL. This assumes that the ire has not added @@ -866,7 +870,7 @@ ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) (void *)ire1, (void *)ire)); IRE_REFHOLD(ire1); if (ndp_g_lock_held) - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); ire_atomic_end(irb_ptr, ire); ire_delete(ire); if (pire != NULL) { @@ -941,7 +945,7 @@ ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) (nce->nce_state == ND_UNREACHABLE)) { failed: if (ndp_g_lock_held) - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); if (nce != NULL) mutex_exit(&nce->nce_lock); ire_atomic_end(irb_ptr, ire); @@ -997,8 +1001,8 @@ failed: * We keep a count of default gateways which is used when * assigning them as routes. */ - ipv6_ire_default_count++; - ASSERT(ipv6_ire_default_count != 0); /* Wraparound */ + ipst->ips_ipv6_ire_default_count++; + ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */ } /* Insert at *irep */ ire1 = *irep; @@ -1034,7 +1038,7 @@ failed: * up the reference count on this yet. */ IRE_REFHOLD_LOCKED(ire); - BUMP_IRE_STATS(ire_stats_v6, ire_stats_inserted); + BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted); irb_ptr->irb_ire_cnt++; if (ire->ire_marks & IRE_MARK_TEMPORARY) irb_ptr->irb_tmp_ire_cnt++; @@ -1050,7 +1054,7 @@ failed: } if (ndp_g_lock_held) - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); ire_atomic_end(irb_ptr, ire); if (pire != NULL) { @@ -1072,7 +1076,8 @@ failed: if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) { ire_t *lire; lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL, - IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, + ipst); if (lire != NULL) { ire_refrele(lire); ire_flush_cache_v6(ire, IRE_FLUSH_ADD); @@ -1093,7 +1098,7 @@ failed: * when a default gateway is going away. */ static void -ire_delete_host_redirects_v6(const in6_addr_t *gateway) +ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst) { irb_t *irb_ptr; irb_t *irb; @@ -1102,10 +1107,10 @@ ire_delete_host_redirects_v6(const in6_addr_t *gateway) int i; /* get the hash table for HOST routes */ - irb_ptr = ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; + irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; if (irb_ptr == NULL) return; - for (i = 0; (i < ip6_ftable_hash_size); i++) { + for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) { irb = &irb_ptr[i]; IRB_REFHOLD(irb); for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { @@ -1131,16 +1136,17 @@ ire_delete_host_redirects_v6(const in6_addr_t *gateway) * called by ip_arp_news and the match is always only on the address. */ void -ip_ire_clookup_and_delete_v6(const in6_addr_t *addr) +ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst) { irb_t *irb; ire_t *cire; boolean_t found = B_FALSE; - irb = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, ip6_cache_table_size)]; + irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, + ipst->ips_ip6_cache_table_size)]; IRB_REFHOLD(irb); for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) { - if (cire->ire_marks == IRE_MARK_CONDEMNED) + if (cire->ire_marks & IRE_MARK_CONDEMNED) continue; if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) { @@ -1171,6 +1177,7 @@ void ire_delete_v6(ire_t *ire) { in6_addr_t gw_addr_v6; + ip_stack_t *ipst = ire->ire_ipst; ASSERT(ire->ire_refcnt >= 1); ASSERT(ire->ire_ipversion == IPV6_VERSION); @@ -1186,7 +1193,7 @@ ire_delete_v6(ire_t *ire) mutex_enter(&ire->ire_lock); gw_addr_v6 = ire->ire_gateway_addr_v6; mutex_exit(&ire->ire_lock); - ire_delete_host_redirects_v6(&gw_addr_v6); + ire_delete_host_redirects_v6(&gw_addr_v6, ipst); } } @@ -1276,6 +1283,7 @@ ire_flush_cache_v6(ire_t *ire, int flag) int i; ire_t *cire; irb_t *irb; + ip_stack_t *ipst = ire->ire_ipst; if (ire->ire_type & IRE_CACHE) return; @@ -1293,8 +1301,8 @@ ire_flush_cache_v6(ire_t *ire, int flag) * due to the addition of * new IRE. */ - for (i = 0; i < ip6_cache_table_size; i++) { - irb = &ip_cache_table_v6[i]; + for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { + irb = &ipst->ips_ip_cache_table_v6[i]; if ((cire = irb->irb_ire) == NULL) continue; IRB_REFHOLD(irb); @@ -1364,8 +1372,8 @@ ire_flush_cache_v6(ire_t *ire, int flag) * handle in the IRE as this IRE is * being deleted/changed. */ - for (i = 0; i < ip6_cache_table_size; i++) { - irb = &ip_cache_table_v6[i]; + for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { + irb = &ipst->ips_ip_cache_table_v6[i]; if ((cire = irb->irb_ire) == NULL) continue; IRB_REFHOLD(irb); @@ -1567,7 +1575,7 @@ ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, ire_t * ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, - zoneid_t zoneid, const ts_label_t *tsl, int flags) + zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst) { ire_t *ire = NULL; @@ -1589,13 +1597,13 @@ ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, */ if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid, - tsl, flags); + tsl, flags, ipst); if (ire != NULL) return (ire); } if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif, - pire, zoneid, 0, tsl, flags); + pire, zoneid, 0, tsl, flags, ipst); } return (ire); } @@ -1634,7 +1642,8 @@ ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, ire_t * ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, - zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags) + zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags, + ip_stack_t *ipst) { irb_t *irb_ptr; ire_t *rire; @@ -1674,10 +1683,11 @@ ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, uint_t masklen; masklen = ip_mask_to_plen_v6(mask); - if (ip_forwarding_table_v6[masklen] == NULL) + if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) return (NULL); - irb_ptr = &(ip_forwarding_table_v6[masklen][ - IRE_ADDR_MASK_HASH_V6(*addr, *mask, ip6_ftable_hash_size)]); + irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][ + IRE_ADDR_MASK_HASH_V6(*addr, *mask, + ipst->ips_ip6_ftable_hash_size)]); rw_enter(&irb_ptr->irb_lock, RW_READER); for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) { @@ -1697,12 +1707,12 @@ ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) { in6_addr_t tmpmask; - if ((ip_forwarding_table_v6[i]) == NULL) + if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) continue; (void) ip_plen_to_mask_v6(i, &tmpmask); - irb_ptr = &ip_forwarding_table_v6[i][ + irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][ IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, - ip6_ftable_hash_size)]; + ipst->ips_ip6_ftable_hash_size)]; rw_enter(&irb_ptr->irb_lock, RW_READER); for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) { @@ -1731,11 +1741,11 @@ ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, */ if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE && (type & (IRE_DEFAULT | IRE_INTERFACE))) { - if (ip_forwarding_table_v6[0] != NULL) { + if (ipst->ips_ip_forwarding_table_v6[0] != NULL) { /* addr & mask is zero for defaults */ - irb_ptr = &ip_forwarding_table_v6[0][ + irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][ IRE_ADDR_HASH_V6(ipv6_all_zeros, - ip6_ftable_hash_size)]; + ipst->ips_ip6_ftable_hash_size)]; rw_enter(&irb_ptr->irb_lock, RW_READER); for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) { @@ -1766,9 +1776,9 @@ ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, uint_t g_index; uint_t index; - if (ip_forwarding_table_v6[0] == NULL) + if (ipst->ips_ip_forwarding_table_v6[0] == NULL) return (NULL); - irb_ptr = &(ip_forwarding_table_v6[0])[0]; + irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0]; /* * Keep a tab on the bucket while looking the IRE_DEFAULT @@ -1790,9 +1800,9 @@ ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, * the IRE bucket, ipv6_ire_default_count can only increase so * we can't reach the end of the hash list unexpectedly. */ - if (ipv6_ire_default_count != 0) { - g_index = ipv6_ire_default_index++; - index = g_index % ipv6_ire_default_count; + if (ipst->ips_ipv6_ire_default_count != 0) { + g_index = ipst->ips_ipv6_ire_default_index++; + index = g_index % ipst->ips_ipv6_ire_default_count; while (index != 0) { if (!(ire->ire_type & IRE_INTERFACE)) index--; @@ -1847,7 +1857,8 @@ ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, match_flags = MATCH_IRE_ILL_GROUP | MATCH_IRE_SECATTR; rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL, - 0, ire->ire_ipif, zoneid, tsl, match_flags); + 0, ire->ire_ipif, zoneid, tsl, match_flags, + ipst); if (rire != NULL) { nce = rire->ire_nce; if (nce != NULL && @@ -1868,7 +1879,8 @@ ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, saved_ire = NULL; } ire_refrele(rire); - } else if (ipv6_ire_default_count > 1 && + } else if (ipst-> + ips_ipv6_ire_default_count > 1 && zoneid != ALL_ZONES) { /* * When we're in a local zone, we're @@ -1883,7 +1895,7 @@ ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, rire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, - zoneid, tsl, match_flags); + zoneid, tsl, match_flags, ipst); if (rire != NULL) { ire_refrele(rire); saved_ire = ire; @@ -1987,7 +1999,7 @@ found_ire_held: mutex_exit(&ire->ire_lock); ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0, - ire->ire_ipif, NULL, zoneid, tsl, match_flags); + ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst); if (ire == NULL) { /* * In this case we have to deal with the @@ -2034,7 +2046,7 @@ found_ire_held: ire_refrele(ire); ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid, - NULL, match_flags); + NULL, match_flags, ipst); if (ire == NULL) { /* * In this case we have to deal with the @@ -2082,12 +2094,14 @@ found_ire_held: * be created on demand by ip_newroute_v6. */ void -ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid) +ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid, + ip_stack_t *ipst) { irb_t *irb; ire_t *ire; - irb = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, ip6_cache_table_size)]; + irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, + ipst->ips_ip6_cache_table_size)]; IRB_REFHOLD(irb); for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { if (ire->ire_marks & IRE_MARK_CONDEMNED) @@ -2101,7 +2115,7 @@ ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid) } IRB_REFRELE(irb); - ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid); + ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst); } /* @@ -2113,7 +2127,7 @@ ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid) ire_t * ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl, - int flags) + int flags, ip_stack_t *ipst) { ire_t *ire; irb_t *irb_ptr; @@ -2128,8 +2142,8 @@ ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, (ipif == NULL)) return (NULL); - irb_ptr = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, - ip6_cache_table_size)]; + irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, + ipst->ips_ip6_cache_table_size)]; rw_enter(&irb_ptr->irb_lock, RW_READER); for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { if (ire->ire_marks & IRE_MARK_CONDEMNED) @@ -2167,13 +2181,13 @@ ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, */ ire_t * ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid, - const ts_label_t *tsl) + const ts_label_t *tsl, ip_stack_t *ipst) { irb_t *irb_ptr; ire_t *ire; - irb_ptr = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, - ip6_cache_table_size)]; + irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, + ipst->ips_ip6_cache_table_size)]; rw_enter(&irb_ptr->irb_lock, RW_READER); for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) @@ -2198,9 +2212,9 @@ ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid, } if (ire->ire_type == IRE_LOCAL) { - if (ip_restrict_interzone_loopback && + if (ipst->ips_ip_restrict_interzone_loopback && !ire_local_ok_across_zones(ire, zoneid, - (void *)addr, tsl)) + (void *)addr, tsl, ipst)) continue; IRE_REFHOLD(ire); @@ -2230,6 +2244,7 @@ ire_ihandle_lookup_onlink_v6(ire_t *cire) int i; int j; irb_t *irb_ptr; + ip_stack_t *ipst = cire->ire_ipst; ASSERT(cire != NULL); @@ -2241,7 +2256,7 @@ ire_ihandle_lookup_onlink_v6(ire_t *cire) */ ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, - NULL, match_flags); + NULL, match_flags, ipst); if (ire != NULL) return (ire); /* @@ -2271,10 +2286,10 @@ ire_ihandle_lookup_onlink_v6(ire_t *cire) * case because we don't know the mask) */ i = ip_mask_to_plen_v6(&cire->ire_cmask_v6); - if ((ip_forwarding_table_v6[i]) == NULL) + if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) return (NULL); - for (j = 0; j < ip6_ftable_hash_size; j++) { - irb_ptr = &ip_forwarding_table_v6[i][j]; + for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) { + irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j]; rw_enter(&irb_ptr->irb_lock, RW_READER); for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) { @@ -2309,6 +2324,7 @@ ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) int match_flags; in6_addr_t gw_addr; ipif_t *gw_ipif; + ip_stack_t *ipst = cire->ire_ipst; ASSERT(cire != NULL && pire != NULL); @@ -2327,7 +2343,7 @@ ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) */ ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0, IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, - NULL, match_flags); + NULL, match_flags, ipst); if (ire != NULL) return (ire); /* @@ -2362,7 +2378,7 @@ ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) gw_addr = pire->ire_gateway_addr_v6; mutex_exit(&pire->ire_lock); ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET, - pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags); + pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); if (ire == NULL) return (NULL); /* @@ -2378,7 +2394,7 @@ ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) match_flags |= MATCH_IRE_IHANDLE; ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE, gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, - NULL, match_flags); + NULL, match_flags, ipst); return (ire); } @@ -2398,23 +2414,24 @@ ire_t * ipif_to_ire_v6(const ipif_t *ipif) { ire_t *ire; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; ASSERT(ipif->ipif_isv6); if (ipif->ipif_ire_type == IRE_LOOPBACK) { ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL, IRE_LOOPBACK, ipif, ALL_ZONES, NULL, - (MATCH_IRE_TYPE | MATCH_IRE_IPIF)); + (MATCH_IRE_TYPE | MATCH_IRE_IPIF), ipst); } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { /* In this case we need to lookup destination address. */ ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr, &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | - MATCH_IRE_MASK)); + MATCH_IRE_MASK), ipst); } else { ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet, &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | - MATCH_IRE_MASK)); + MATCH_IRE_MASK), ipst); } return (ire); } @@ -2425,7 +2442,8 @@ ipif_to_ire_v6(const ipif_t *ipif) * This only works in the global zone. */ boolean_t -ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl) +ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl, + ip_stack_t *ipst) { ire_t *first_fire; ire_t *first_cire; @@ -2439,7 +2457,7 @@ ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl) /* Retrieve the first IRE_HOST that matches the destination */ first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST, NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE | - MATCH_IRE_SECATTR); + MATCH_IRE_SECATTR, ipst); /* No route at all */ if (first_fire == NULL) { @@ -2450,7 +2468,7 @@ ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl) ASSERT(firb); /* Retrieve the first IRE_CACHE ire for that destination. */ - first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl); + first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst); /* No resolved route. */ if (first_cire == NULL) { @@ -2491,7 +2509,7 @@ ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl) /* At least one route is unresolved; search for a resolvable route. */ if (unres_cnt > 0) resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire, - MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl); + MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst); if (first_fire) ire_refrele(first_fire); @@ -2512,7 +2530,7 @@ ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl) */ boolean_t ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, - const ts_label_t *tsl) + const ts_label_t *tsl, ip_stack_t *ipst) { clock_t delta; ire_t *best_fire = NULL; @@ -2558,7 +2576,7 @@ ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, * if we don't find one, no route for that dest is * resolved yet. */ - first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl); + first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst); if (first_cire) { cirb = first_cire->ire_bucket; } @@ -2660,7 +2678,7 @@ ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, */ gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL, ALL_ZONES, tsl, MATCH_IRE_RECURSIVE | - MATCH_IRE_SECATTR); + MATCH_IRE_SECATTR, ipst); ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n", (void *)gw_ire)); @@ -2707,8 +2725,9 @@ ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, delta = TICK_TO_MSEC(delta); res = (boolean_t) - ((delta > ip_multirt_resolution_interval) || - (!(flags & MULTIRT_USESTAMP))); + ((delta > ipst-> + ips_ip_multirt_resolution_interval) || + (!(flags & MULTIRT_USESTAMP))); ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, " "res %d\n", @@ -2796,7 +2815,7 @@ ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE | - MATCH_IRE_SECATTR); + MATCH_IRE_SECATTR, ipst); /* No resolver for the gateway; we skip this ire. */ if (gw_ire == NULL) { @@ -2869,7 +2888,8 @@ ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, delta = TICK_TO_MSEC(delta); res = (boolean_t) - ((delta > ip_multirt_resolution_interval) || + ((delta > ipst-> + ips_ip_multirt_resolution_interval) || (!(flags & MULTIRT_USESTAMP))); ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, " @@ -2952,9 +2972,10 @@ ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) irb_t *irb; in6_addr_t v6gw; int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, - NULL, MATCH_IRE_DEFAULT); + NULL, MATCH_IRE_DEFAULT, ipst); if (ire == NULL) return (NULL); @@ -2980,7 +3001,7 @@ ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) mutex_exit(&ire->ire_lock); gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, - NULL, match_flags); + NULL, match_flags, ipst); if (gw_ire != NULL) { if (save_ire != NULL) { diff --git a/usr/src/uts/common/inet/ip/ip6_rts.c b/usr/src/uts/common/inet/ip/ip6_rts.c index 0fa31914f6..7d2ddd5c04 100644 --- a/usr/src/uts/common/inet/ip/ip6_rts.c +++ b/usr/src/uts/common/inet/ip/ip6_rts.c @@ -1,5 +1,5 @@ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -199,7 +199,7 @@ void ip_rts_change_v6(int type, const in6_addr_t *dst_addr, const in6_addr_t *gw_addr, const in6_addr_t *net_mask, const in6_addr_t *source, const in6_addr_t *author, - int flags, int error, int rtm_addrs) + int flags, int error, int rtm_addrs, ip_stack_t *ipst) { rt_msghdr_t *rtm; mblk_t *mp; @@ -216,5 +216,5 @@ ip_rts_change_v6(int type, const in6_addr_t *dst_addr, rtm->rtm_errno = error; rtm->rtm_flags |= RTF_DONE; rtm->rtm_addrs = rtm_addrs; - rts_queue_input(mp, NULL, AF_INET6); + rts_queue_input(mp, NULL, AF_INET6, ipst); } diff --git a/usr/src/uts/common/inet/ip/ip_ftable.c b/usr/src/uts/common/inet/ip/ip_ftable.c index 0e34f0040c..3adce39f19 100644 --- a/usr/src/uts/common/inet/ip/ip_ftable.c +++ b/usr/src/uts/common/inet/ip/ip_ftable.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -94,9 +94,9 @@ typedef struct ire_ftable_args_s { ire_t *ift_best_ire; } ire_ftable_args_t; -struct radix_node_head *ip_ftable; -static ire_t *route_to_dst(const struct sockaddr *, zoneid_t); -static ire_t *ire_round_robin(irb_t *, zoneid_t, ire_ftable_args_t *); +static ire_t *route_to_dst(const struct sockaddr *, zoneid_t, ip_stack_t *); +static ire_t *ire_round_robin(irb_t *, zoneid_t, ire_ftable_args_t *, + ip_stack_t *); static void ire_del_host_redir(ire_t *, char *); static boolean_t ire_find_best_route(struct radix_node *, void *); @@ -133,7 +133,7 @@ static boolean_t ire_find_best_route(struct radix_node *, void *); ire_t * ire_ftable_lookup(ipaddr_t addr, ipaddr_t mask, ipaddr_t gateway, int type, const ipif_t *ipif, ire_t **pire, zoneid_t zoneid, - uint32_t ihandle, const ts_label_t *tsl, int flags) + uint32_t ihandle, const ts_label_t *tsl, int flags, ip_stack_t *ipst) { ire_t *ire = NULL; ipaddr_t gw_addr; @@ -193,11 +193,11 @@ ire_ftable_lookup(ipaddr_t addr, ipaddr_t mask, ipaddr_t gateway, * rn_match_args.Before dropping the global tree lock, ensure * that the radix node can't be deleted by incrementing ire_refcnt. */ - RADIX_NODE_HEAD_RLOCK(ip_ftable); - rt = (struct rt_entry *)ip_ftable->rnh_matchaddr_args(&rdst, ip_ftable, - ire_find_best_route, &margs); + RADIX_NODE_HEAD_RLOCK(ipst->ips_ip_ftable); + rt = (struct rt_entry *)ipst->ips_ip_ftable->rnh_matchaddr_args(&rdst, + ipst->ips_ip_ftable, ire_find_best_route, &margs); ire = margs.ift_best_ire; - RADIX_NODE_HEAD_UNLOCK(ip_ftable); + RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable); if (rt == NULL) { return (NULL); @@ -247,7 +247,8 @@ found_default_ire: MATCH_IRE_DEFAULT)) { ire_t *next_ire; - next_ire = ire_round_robin(ire->ire_bucket, zoneid, &margs); + next_ire = ire_round_robin(ire->ire_bucket, zoneid, &margs, + ipst); IRE_REFRELE(ire); if (next_ire != NULL) { ire = next_ire; @@ -310,7 +311,7 @@ found_ire_held: match_flags |= MATCH_IRE_ILL_GROUP; ire = ire_route_lookup(ire->ire_gateway_addr, 0, 0, 0, - ire->ire_ipif, NULL, zoneid, tsl, match_flags); + ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst); DTRACE_PROBE2(ftable__route__lookup1, (ire_t *), ire, (ire_t *), save_ire); if (ire == NULL || @@ -361,7 +362,7 @@ found_ire_held: ire = ire_route_lookup(gw_addr, 0, 0, (found_incomplete? IRE_INTERFACE : (IRE_CACHETABLE | IRE_INTERFACE)), - gw_ipif, NULL, zoneid, tsl, match_flags); + gw_ipif, NULL, zoneid, tsl, match_flags, ipst); DTRACE_PROBE2(ftable__route__lookup2, (ire_t *), ire, (ire_t *), save_ire); if (ire == NULL || @@ -420,11 +421,12 @@ ipif_lookup_multi_ire(ipif_t *ipif, ipaddr_t group) irb_t *irb; ipaddr_t gw_addr; int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; ASSERT(CLASSD(group)); ire = ire_ftable_lookup(group, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, - NULL, MATCH_IRE_DEFAULT); + NULL, MATCH_IRE_DEFAULT, ipst); if (ire == NULL) return (NULL); @@ -447,7 +449,7 @@ ipif_lookup_multi_ire(ipif_t *ipif, ipaddr_t group) case IRE_HOST: gw_addr = ire->ire_gateway_addr; gw_ire = ire_ftable_lookup(gw_addr, 0, 0, IRE_INTERFACE, - ipif, NULL, ALL_ZONES, 0, NULL, match_flags); + ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); if (gw_ire != NULL) { if (save_ire != NULL) { @@ -494,7 +496,7 @@ ipif_lookup_multi_ire(ipif_t *ipif, ipaddr_t group) * Supports IP_BOUND_IF by following the ipif/ill when recursing. */ ire_t * -ire_lookup_multi(ipaddr_t group, zoneid_t zoneid) +ire_lookup_multi(ipaddr_t group, zoneid_t zoneid, ip_stack_t *ipst) { ire_t *ire; ipif_t *ipif = NULL; @@ -502,7 +504,7 @@ ire_lookup_multi(ipaddr_t group, zoneid_t zoneid) ipaddr_t gw_addr; ire = ire_ftable_lookup(group, 0, 0, 0, NULL, NULL, zoneid, - 0, NULL, MATCH_IRE_DEFAULT); + 0, NULL, MATCH_IRE_DEFAULT, ipst); /* We search a resolvable ire in case of multirouting. */ if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { @@ -512,7 +514,8 @@ ire_lookup_multi(ipaddr_t group, zoneid_t zoneid) * may be changed here. In that case, ire_multirt_lookup() * IRE_REFRELE the original ire and change it. */ - (void) ire_multirt_lookup(&cire, &ire, MULTIRT_CACHEGW, NULL); + (void) ire_multirt_lookup(&cire, &ire, MULTIRT_CACHEGW, + NULL, ipst); if (cire != NULL) ire_refrele(cire); } @@ -539,7 +542,7 @@ ire_lookup_multi(ipaddr_t group, zoneid_t zoneid) ire_refrele(ire); ire = ire_ftable_lookup(gw_addr, 0, 0, IRE_INTERFACE, ipif, NULL, zoneid, 0, - NULL, match_flags); + NULL, match_flags, ipst); return (ire); case IRE_IF_NORESOLVER: case IRE_IF_RESOLVER: @@ -568,15 +571,15 @@ ire_del_host_redir(ire_t *ire, char *gateway) * when a default gateway is going away. */ void -ire_delete_host_redirects(ipaddr_t gateway) +ire_delete_host_redirects(ipaddr_t gateway, ip_stack_t *ipst) { struct rtfuncarg rtfarg; (void) memset(&rtfarg, 0, sizeof (rtfarg)); rtfarg.rt_func = ire_del_host_redir; rtfarg.rt_arg = (void *)&gateway; - (void) ip_ftable->rnh_walktree_mt(ip_ftable, rtfunc, &rtfarg, - irb_refhold_rn, irb_refrele_rn); + (void) ipst->ips_ip_ftable->rnh_walktree_mt(ipst->ips_ip_ftable, + rtfunc, &rtfarg, irb_refhold_rn, irb_refrele_rn); } struct ihandle_arg { @@ -620,8 +623,10 @@ ire_ihandle_lookup_onlink(ire_t *cire) ire_t *ire; int match_flags; struct ihandle_arg ih; + ip_stack_t *ipst; ASSERT(cire != NULL); + ipst = cire->ire_ipst; /* * We don't need to specify the zoneid to ire_ftable_lookup() below @@ -635,7 +640,7 @@ ire_ihandle_lookup_onlink(ire_t *cire) */ ire = ire_ftable_lookup(cire->ire_addr, cire->ire_cmask, 0, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, - NULL, match_flags); + NULL, match_flags, ipst); if (ire != NULL) return (ire); /* @@ -665,8 +670,8 @@ ire_ihandle_lookup_onlink(ire_t *cire) */ (void) memset(&ih, 0, sizeof (ih)); ih.ihandle = cire->ire_ihandle; - (void) ip_ftable->rnh_walktree_mt(ip_ftable, ire_ihandle_onlink_match, - &ih, irb_refhold_rn, irb_refrele_rn); + (void) ipst->ips_ip_ftable->rnh_walktree_mt(ipst->ips_ip_ftable, + ire_ihandle_onlink_match, &ih, irb_refhold_rn, irb_refrele_rn); return (ih.ire); } @@ -700,6 +705,7 @@ ire_forward_src_ipif(ipaddr_t dst, ire_t *sire, ire_t *ire, ill_t *dst_ill, int zoneid, ushort_t *marks) { ipif_t *src_ipif; + ip_stack_t *ipst = dst_ill->ill_ipst; /* * Pick the best source address from dst_ill. @@ -736,7 +742,7 @@ ire_forward_src_ipif(ipaddr_t dst, ire_t *sire, ire_t *ire, ill_t *dst_ill, * address still exists. */ src_ipif = ipif_lookup_addr(sire->ire_src_addr, NULL, - zoneid, NULL, NULL, NULL, NULL); + zoneid, NULL, NULL, NULL, NULL, ipst); return (src_ipif); } *marks |= IRE_MARK_USESRC_CHECK; @@ -756,9 +762,6 @@ ire_forward_src_ipif(ipaddr_t dst, ire_t *sire, ire_t *ire, ill_t *dst_ill, return (src_ipif); } -/* Added to root cause a bug - should be removed later */ -ire_t *ire_gw_cache = NULL; - /* * This function is called by ip_rput_noire() and ip_fast_forward() * to resolve the route of incoming packet that needs to be forwarded. @@ -786,7 +789,7 @@ ire_t *ire_gw_cache = NULL; ire_t * ire_forward(ipaddr_t dst, boolean_t *check_multirt, ire_t *supplied_ire, - ire_t *supplied_sire, const struct ts_label_s *tsl) + ire_t *supplied_sire, const struct ts_label_s *tsl, ip_stack_t *ipst) { ipaddr_t gw = 0; ire_t *ire = NULL; @@ -811,10 +814,10 @@ ire_forward(ipaddr_t dst, boolean_t *check_multirt, ire_t *supplied_ire, ire = ire_ftable_lookup(dst, 0, 0, 0, NULL, &sire, zoneid, 0, tsl, MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | - MATCH_IRE_RJ_BHOLE | MATCH_IRE_PARENT|MATCH_IRE_SECATTR); + MATCH_IRE_RJ_BHOLE | MATCH_IRE_PARENT|MATCH_IRE_SECATTR, ipst); if (ire == NULL) { - ip_rts_change(RTM_MISS, dst, 0, 0, 0, 0, 0, 0, RTA_DST); + ip_rts_change(RTM_MISS, dst, 0, 0, 0, 0, 0, 0, RTA_DST, ipst); goto icmp_err_ret; } @@ -1020,7 +1023,8 @@ create_irecache: 0, &(ire->ire_uinfo), NULL, - gcgrp); + gcgrp, + ipst); ip1dbg(("incomplete ire_cache 0x%p\n", (void *)ire)); if (ire != NULL) { gcgrp = NULL; /* reference now held by IRE */ @@ -1072,7 +1076,8 @@ icmp_err_ret: } /* - * Obtain the rt_entry and rt_irb for the route to be added to the ip_ftable. + * Obtain the rt_entry and rt_irb for the route to be added to + * the ips_ip_ftable. * First attempt to add a node to the radix tree via rn_addroute. If the * route already exists, return the bucket for the existing route. * @@ -1088,8 +1093,9 @@ ire_get_bucket(ire_t *ire) struct rt_entry *rt; struct rt_sockaddr rmask, rdst; irb_t *irb = NULL; + ip_stack_t *ipst = ire->ire_ipst; - ASSERT(ip_ftable != NULL); + ASSERT(ipst->ips_ip_ftable != NULL); /* first try to see if route exists (based on rtalloc1) */ (void) memset(&rdst, 0, sizeof (rdst)); @@ -1111,18 +1117,20 @@ ire_get_bucket(ire_t *ire) rt->rt_dst = rdst; irb = &rt->rt_irb; irb->irb_marks |= IRB_MARK_FTABLE; /* dynamically allocated/freed */ + irb->irb_ipst = ipst; rw_init(&irb->irb_lock, NULL, RW_DEFAULT, NULL); - RADIX_NODE_HEAD_WLOCK(ip_ftable); - rn = ip_ftable->rnh_addaddr(&rt->rt_dst, &rmask, ip_ftable, - (struct radix_node *)rt); + RADIX_NODE_HEAD_WLOCK(ipst->ips_ip_ftable); + rn = ipst->ips_ip_ftable->rnh_addaddr(&rt->rt_dst, &rmask, + ipst->ips_ip_ftable, (struct radix_node *)rt); if (rn == NULL) { - RADIX_NODE_HEAD_UNLOCK(ip_ftable); + RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable); Free(rt, rt_entry_cache); rt = NULL; irb = NULL; - RADIX_NODE_HEAD_RLOCK(ip_ftable); - if ((rn = ip_ftable->rnh_lookup(&rdst, &rmask, ip_ftable)) != - NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { + RADIX_NODE_HEAD_RLOCK(ipst->ips_ip_ftable); + rn = ipst->ips_ip_ftable->rnh_lookup(&rdst, &rmask, + ipst->ips_ip_ftable); + if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { /* found a non-root match */ rt = (struct rt_entry *)rn; } @@ -1131,7 +1139,7 @@ ire_get_bucket(ire_t *ire) irb = &rt->rt_irb; IRB_REFHOLD(irb); } - RADIX_NODE_HEAD_UNLOCK(ip_ftable); + RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable); return (irb); } @@ -1151,18 +1159,32 @@ ifindex_lookup(const struct sockaddr *ipaddr, zoneid_t zoneid) uint_t ifindex = 0; ire_t *ire; ill_t *ill; + netstack_t *ns; + ip_stack_t *ipst; + + if (zoneid == ALL_ZONES) + ns = netstack_find_by_zoneid(GLOBAL_ZONEID); + else + ns = netstack_find_by_zoneid(zoneid); + ASSERT(ns != NULL); - /* zoneid is a placeholder for future routing table per-zone project */ - ASSERT(zoneid == ALL_ZONES); + /* + * For exclusive stacks we set the zoneid to zero + * since IP uses the global zoneid in the exclusive stacks. + */ + if (ns->netstack_stackid != GLOBAL_NETSTACKID) + zoneid = GLOBAL_ZONEID; + ipst = ns->netstack_ip; ASSERT(ipaddr->sa_family == AF_INET || ipaddr->sa_family == AF_INET6); - if ((ire = route_to_dst(ipaddr, zoneid)) != NULL) { + if ((ire = route_to_dst(ipaddr, zoneid, ipst)) != NULL) { ill = ire_to_ill(ire); if (ill != NULL) ifindex = ill->ill_phyint->phyint_ifindex; ire_refrele(ire); } + netstack_rele(ns); return (ifindex); } @@ -1171,7 +1193,7 @@ ifindex_lookup(const struct sockaddr *ipaddr, zoneid_t zoneid) * it tries to match the the route to the corresponding ipif for the ifindex */ static ire_t * -route_to_dst(const struct sockaddr *dst_addr, zoneid_t zoneid) +route_to_dst(const struct sockaddr *dst_addr, zoneid_t zoneid, ip_stack_t *ipst) { ire_t *ire = NULL; int match_flags; @@ -1184,11 +1206,11 @@ route_to_dst(const struct sockaddr *dst_addr, zoneid_t zoneid) if (dst_addr->sa_family == AF_INET) { ire = ire_route_lookup( ((struct sockaddr_in *)dst_addr)->sin_addr.s_addr, - 0, 0, 0, NULL, NULL, zoneid, NULL, match_flags); + 0, 0, 0, NULL, NULL, zoneid, NULL, match_flags, ipst); } else { ire = ire_route_lookup_v6( &((struct sockaddr_in6 *)dst_addr)->sin6_addr, - 0, 0, 0, NULL, NULL, zoneid, NULL, match_flags); + 0, 0, 0, NULL, NULL, zoneid, NULL, match_flags, ipst); } return (ire); } @@ -1226,9 +1248,25 @@ ipfil_sendpkt(const struct sockaddr *dst_addr, mblk_t *mp, uint_t ifindex, int value; int match_flags; ipaddr_t dst; + netstack_t *ns; + ip_stack_t *ipst; ASSERT(mp != NULL); + if (zoneid == ALL_ZONES) + ns = netstack_find_by_zoneid(GLOBAL_ZONEID); + else + ns = netstack_find_by_zoneid(zoneid); + ASSERT(ns != NULL); + + /* + * For exclusive stacks we set the zoneid to zero + * since IP uses the global zoneid in the exclusive stacks. + */ + if (ns->netstack_stackid != GLOBAL_NETSTACKID) + zoneid = GLOBAL_ZONEID; + ipst = ns->netstack_ip; + ASSERT(dst_addr->sa_family == AF_INET || dst_addr->sa_family == AF_INET6); @@ -1262,7 +1300,7 @@ ipfil_sendpkt(const struct sockaddr *dst_addr, mblk_t *mp, uint_t ifindex, MATCH_IRE_RECURSIVE | MATCH_IRE_RJ_BHOLE); ire = ire_route_lookup(dst, 0, 0, 0, NULL, &sire, zoneid, MBLK_GETLABEL(mp), - match_flags); + match_flags, ipst); } else { ipif_t *supplied_ipif; ill_t *ill; @@ -1274,7 +1312,7 @@ ipfil_sendpkt(const struct sockaddr *dst_addr, mblk_t *mp, uint_t ifindex, */ ill = ill_lookup_on_ifindex(ifindex, B_FALSE, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, ipst); if (ill != NULL) { supplied_ipif = ipif_get_next_ipif(NULL, ill); } else { @@ -1290,7 +1328,7 @@ ipfil_sendpkt(const struct sockaddr *dst_addr, mblk_t *mp, uint_t ifindex, MATCH_IRE_SECATTR); ire = ire_route_lookup(dst, 0, 0, 0, supplied_ipif, - &sire, zoneid, MBLK_GETLABEL(mp), match_flags); + &sire, zoneid, MBLK_GETLABEL(mp), match_flags, ipst); ipif_refrele(supplied_ipif); ill_refrele(ill); } @@ -1349,7 +1387,7 @@ ipfil_sendpkt(const struct sockaddr *dst_addr, mblk_t *mp, uint_t ifindex, * to the ire cache table */ ire_cache = ire_forward(dst, &check_multirt, ire, sire, - MBLK_GETLABEL(mp)); + MBLK_GETLABEL(mp), ipst); if (ire_cache == NULL) { ip1dbg(("ipfil_sendpkt: failed to create the" " ire cache entry \n")); @@ -1390,20 +1428,23 @@ ipfil_sendpkt(const struct sockaddr *dst_addr, mblk_t *mp, uint_t ifindex, value = ECOMM; break; case LOOKUP_IN_PROGRESS: + netstack_rele(ns); return (EINPROGRESS); case SEND_PASSED: + netstack_rele(ns); return (0); } discard: if (dst_addr->sa_family == AF_INET) { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); } else { - BUMP_MIB(&ip6_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); } if (ire != NULL) ire_refrele(ire); if (sire != NULL) ire_refrele(sire); + netstack_rele(ns); return (value); } @@ -1601,7 +1642,8 @@ irb_refrele_ftable(irb_t *irb) * the first IRE_INTERFACE route found (if any). */ ire_t * -ire_round_robin(irb_t *irb_ptr, zoneid_t zoneid, ire_ftable_args_t *margs) +ire_round_robin(irb_t *irb_ptr, zoneid_t zoneid, ire_ftable_args_t *margs, + ip_stack_t *ipst) { ire_t *ire_origin; ire_t *ire, *maybe_ire = NULL; @@ -1676,7 +1718,7 @@ ire_round_robin(irb_t *irb_ptr, zoneid_t zoneid, ire_ftable_args_t *margs) } rire = ire_route_lookup(ire->ire_gateway_addr, 0, 0, 0, ire->ire_ipif, NULL, zoneid, margs->ift_tsl, - match_flags); + match_flags, ipst); if (rire != NULL) { ire_refrele(rire); IRE_REFHOLD(ire); diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index 7868efa93d..a7aac93eda 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -43,6 +43,8 @@ #include <sys/kstat.h> #include <sys/debug.h> #include <sys/zone.h> +#include <sys/sunldi.h> +#include <sys/file.h> #include <sys/kmem.h> #include <sys/systm.h> @@ -144,7 +146,7 @@ static int ip_sioctl_subnet_tail(ipif_t *ipif, in6_addr_t, in6_addr_t, queue_t *q, mblk_t *mp, boolean_t need_up); static int ip_sioctl_arp_common(ill_t *ill, queue_t *q, mblk_t *mp, sin_t *sin, boolean_t x_arp_ioctl, boolean_t if_arp_ioctl); -static ipaddr_t ip_subnet_mask(ipaddr_t addr, ipif_t **); +static ipaddr_t ip_subnet_mask(ipaddr_t addr, ipif_t **, ip_stack_t *); static void ip_wput_ioctl(queue_t *q, mblk_t *mp); static void ipsq_flush(ill_t *ill); static void ipsq_clean_all(ill_t *ill); @@ -171,7 +173,7 @@ static int ipif_set_values_tail(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q); static ipif_t *ipif_lookup_on_name(char *name, size_t namelen, boolean_t do_alloc, boolean_t *exists, boolean_t isv6, zoneid_t zoneid, - queue_t *q, mblk_t *mp, ipsq_func_t func, int *error); + queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *); static int ipif_up(ipif_t *ipif, queue_t *q, mblk_t *mp); static void ipif_update_other_ipifs(ipif_t *old_ipif, ill_group_t *illgrp); @@ -212,8 +214,8 @@ static boolean_t ip_ib_v4mapinfo(uint_t, uint8_t *, uint8_t *, uint32_t *, static void ipif_save_ire(ipif_t *, ire_t *); static void ipif_remove_ire(ipif_t *, ire_t *); -static void ip_cgtp_bcast_add(ire_t *, ire_t *); -static void ip_cgtp_bcast_delete(ire_t *); +static void ip_cgtp_bcast_add(ire_t *, ire_t *, ip_stack_t *); +static void ip_cgtp_bcast_delete(ire_t *, ip_stack_t *); /* * Per-ill IPsec capabilities management. @@ -240,8 +242,8 @@ static void ill_capability_lso_ack(ill_t *, mblk_t *, dl_capability_sub_t *); static void ill_capability_lso_reset(ill_t *, mblk_t **); static void ill_capability_dls_ack(ill_t *, mblk_t *, dl_capability_sub_t *); static mac_resource_handle_t ill_ring_add(void *, mac_resource_t *); -static void ill_capability_dls_reset(ill_t *, mblk_t **); -static void ill_capability_dls_disable(ill_t *); +static void ill_capability_dls_reset(ill_t *, mblk_t **); +static void ill_capability_dls_disable(ill_t *); static void illgrp_cache_delete(ire_t *, char *); static void illgrp_delete(ill_t *ill); @@ -305,19 +307,6 @@ struct ill_ipsec_capab_s { }; /* - * List of AH and ESP IPsec acceleration capable ills - */ -typedef struct ipsec_capab_ill_s { - uint_t ill_index; - boolean_t ill_isv6; - struct ipsec_capab_ill_s *next; -} ipsec_capab_ill_t; - -static ipsec_capab_ill_t *ipsec_capab_ills_ah; -static ipsec_capab_ill_t *ipsec_capab_ills_esp; -krwlock_t ipsec_capab_ills_lock; - -/* * The field values are larger than strictly necessary for simple * AR_ENTRY_ADDs but the padding lets us accomodate the socket ioctls. */ @@ -519,50 +508,17 @@ static ill_t ill_null; /* Empty ILL for init. */ char ipif_loopback_name[] = "lo0"; static char *ipv4_forward_suffix = ":ip_forwarding"; static char *ipv6_forward_suffix = ":ip6_forwarding"; -static kstat_t *loopback_ksp = NULL; static sin6_t sin6_null; /* Zero address for quick clears */ static sin_t sin_null; /* Zero address for quick clears */ -static uint_t ill_index = 1; /* Used to assign interface indicies */ -/* When set search for unused index */ -static boolean_t ill_index_wrap = B_FALSE; + /* When set search for unused ipif_seqid */ static ipif_t ipif_zero; -uint_t ipif_src_random; - -/* - * For details on the protection offered by these locks please refer - * to the notes under the Synchronization section at the start of ip.c - */ -krwlock_t ill_g_lock; /* The global ill_g_lock */ -kmutex_t ip_addr_avail_lock; /* Address availability check lock */ -ipsq_t *ipsq_g_head; /* List of all ipsq's on the system */ - -krwlock_t ill_g_usesrc_lock; /* Protects usesrc related fields */ - -/* - * illgrp_head/ifgrp_head is protected by IP's perimeter. - */ -static ill_group_t *illgrp_head_v4; /* Head of IPv4 ill groups */ -ill_group_t *illgrp_head_v6; /* Head of IPv6 ill groups */ - -ill_g_head_t ill_g_heads[MAX_G_HEADS]; /* ILL List Head */ /* * ppa arena is created after these many * interfaces have been plumbed. */ -uint_t ill_no_arena = 12; - -#pragma align CACHE_ALIGN_SIZE(phyint_g_list) -static phyint_list_t phyint_g_list; /* start of phyint list */ - -/* - * Reflects value of FAILBACK variable in IPMP config file - * /etc/default/mpathd. Default value is B_TRUE. - * Set to B_FALSE if user disabled failback by configuring "FAILBACK=no" - * in.mpathd uses SIOCSIPMPFAILBACK ioctl to pass this information to kernel. - */ -static boolean_t ipmp_enable_failback = B_TRUE; +uint_t ill_no_arena = 12; /* Setable in /etc/system */ /* * Enable soft rings if ip_squeue_soft_ring or ip_squeue_fanout @@ -575,10 +531,11 @@ static boolean_t ipmp_enable_failback = B_TRUE; #define ILL_CAPAB_DLS (ILL_CAPAB_SOFT_RING | ILL_CAPAB_POLL) static uint_t -ipif_rand(void) +ipif_rand(ip_stack_t *ipst) { - ipif_src_random = ipif_src_random * 1103515245 + 12345; - return ((ipif_src_random >> 16) & 0x7fff); + ipst->ips_ipif_src_random = ipst->ips_ipif_src_random * 1103515245 + + 12345; + return ((ipst->ips_ipif_src_random >> 16) & 0x7fff); } /* @@ -743,6 +700,7 @@ ill_delete(ill_t *ill) { ipif_t *ipif; ill_t *prev_ill; + ip_stack_t *ipst = ill->ill_ipst; /* * ill_delete may be forcibly entering the ipsq. The previous @@ -800,7 +758,7 @@ ill_delete(ill_t *ill) * If an address on this ILL is being used as a source address then * clear out the pointers in other ILLs that point to this ILL. */ - rw_enter(&ill_g_usesrc_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_usesrc_lock, RW_WRITER); if (ill->ill_usesrc_grp_next != NULL) { if (ill->ill_usesrc_ifindex == 0) { /* usesrc ILL ? */ ill_disband_usesrc_group(ill); @@ -810,7 +768,7 @@ ill_delete(ill_t *ill) ill->ill_usesrc_grp_next; } } - rw_exit(&ill_g_usesrc_lock); + rw_exit(&ipst->ips_ill_g_usesrc_lock); } static void @@ -857,6 +815,7 @@ ill_delete_tail(ill_t *ill) { mblk_t **mpp; ipif_t *ipif; + ip_stack_t *ipst = ill->ill_ipst; for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { ipif_non_duplicate(ipif); @@ -976,10 +935,10 @@ ill_delete_tail(ill_t *ill) */ (void) ill_glist_delete(ill); - rw_enter(&ip_g_nd_lock, RW_WRITER); + rw_enter(&ipst->ips_ip_g_nd_lock, RW_WRITER); if (ill->ill_ndd_name != NULL) - nd_unload(&ip_g_nd, ill->ill_ndd_name); - rw_exit(&ip_g_nd_lock); + nd_unload(&ipst->ips_ip_g_nd, ill->ill_ndd_name); + rw_exit(&ipst->ips_ip_g_nd_lock); if (ill->ill_frag_ptr != NULL) { @@ -1012,28 +971,38 @@ ill_delete_tail(ill_t *ill) } while (mpp++ != &ill->ill_last_mp_to_free); ill_free_mib(ill); + /* Drop refcnt here */ + netstack_rele(ill->ill_ipst->ips_netstack); + ill->ill_ipst = NULL; + ILL_TRACE_CLEANUP(ill); } static void ill_free_mib(ill_t *ill) { + ip_stack_t *ipst = ill->ill_ipst; + /* * MIB statistics must not be lost, so when an interface * goes away the counter values will be added to the global * MIBs. */ if (ill->ill_ip_mib != NULL) { - if (ill->ill_isv6) - ip_mib2_add_ip_stats(&ip6_mib, ill->ill_ip_mib); - else - ip_mib2_add_ip_stats(&ip_mib, ill->ill_ip_mib); + if (ill->ill_isv6) { + ip_mib2_add_ip_stats(&ipst->ips_ip6_mib, + ill->ill_ip_mib); + } else { + ip_mib2_add_ip_stats(&ipst->ips_ip_mib, + ill->ill_ip_mib); + } kmem_free(ill->ill_ip_mib, sizeof (*ill->ill_ip_mib)); ill->ill_ip_mib = NULL; } if (ill->ill_icmp6_mib != NULL) { - ip_mib2_add_icmp6_stats(&icmp6_mib, ill->ill_icmp6_mib); + ip_mib2_add_icmp6_stats(&ipst->ips_icmp6_mib, + ill->ill_icmp6_mib); kmem_free(ill->ill_icmp6_mib, sizeof (*ill->ill_icmp6_mib)); ill->ill_icmp6_mib = NULL; } @@ -1663,37 +1632,39 @@ ill_down_start(queue_t *q, mblk_t *mp) static void ill_down(ill_t *ill) { + ip_stack_t *ipst = ill->ill_ipst; + /* Blow off any IREs dependent on this ILL. */ - ire_walk(ill_downi, (char *)ill); + ire_walk(ill_downi, (char *)ill, ipst); - mutex_enter(&ire_mrtun_lock); - if (ire_mrtun_count != 0) { - mutex_exit(&ire_mrtun_lock); + mutex_enter(&ipst->ips_ire_mrtun_lock); + if (ipst->ips_ire_mrtun_count != 0) { + mutex_exit(&ipst->ips_ire_mrtun_lock); ire_walk_ill_mrtun(0, 0, ill_downi_mrtun_srcif, - (char *)ill, NULL); + (char *)ill, NULL, ipst); } else { - mutex_exit(&ire_mrtun_lock); + mutex_exit(&ipst->ips_ire_mrtun_lock); } /* * If any interface based forwarding table exists * Blow off the ires there dependent on this ill */ - mutex_enter(&ire_srcif_table_lock); - if (ire_srcif_table_count > 0) { - mutex_exit(&ire_srcif_table_lock); - ire_walk_srcif_table_v4(ill_downi_mrtun_srcif, (char *)ill); + mutex_enter(&ipst->ips_ire_srcif_table_lock); + if (ipst->ips_ire_srcif_table_count > 0) { + mutex_exit(&ipst->ips_ire_srcif_table_lock); + ire_walk_srcif_table_v4(ill_downi_mrtun_srcif, (char *)ill, + ipst); } else { - mutex_exit(&ire_srcif_table_lock); + mutex_exit(&ipst->ips_ire_srcif_table_lock); } /* Remove any conn_*_ill depending on this ill */ - ipcl_walk(conn_cleanup_ill, (caddr_t)ill); + ipcl_walk(conn_cleanup_ill, (caddr_t)ill, ipst); if (ill->ill_group != NULL) { illgrp_delete(ill); } - } static void @@ -2797,13 +2768,13 @@ ill_ring_add(void *arg, mac_resource_t *mrp) uint_t normal_pkt_cnt = mrfp->mrf_normal_pkt_count; - bzero(rx_ring, sizeof (ill_rx_ring_t)); + bzero(rx_ring, sizeof (ill_rx_ring_t)); - rx_ring->rr_blank = mrfp->mrf_blank; - rx_ring->rr_handle = mrfp->mrf_arg; - rx_ring->rr_ill = ill; - rx_ring->rr_normal_blank_time = normal_blank_time; - rx_ring->rr_normal_pkt_cnt = normal_pkt_cnt; + rx_ring->rr_blank = mrfp->mrf_blank; + rx_ring->rr_handle = mrfp->mrf_arg; + rx_ring->rr_ill = ill; + rx_ring->rr_normal_blank_time = normal_blank_time; + rx_ring->rr_normal_pkt_cnt = normal_pkt_cnt; rx_ring->rr_max_blank_time = normal_blank_time * rr_max_blank_ratio; @@ -2814,12 +2785,12 @@ ill_ring_add(void *arg, mac_resource_t *mrp) rx_ring->rr_min_pkt_cnt = normal_pkt_cnt * rr_min_pkt_cnt_ratio; - rx_ring->rr_ring_state = ILL_RING_INUSE; - mutex_exit(&ill->ill_lock); + rx_ring->rr_ring_state = ILL_RING_INUSE; + mutex_exit(&ill->ill_lock); DTRACE_PROBE2(ill__ring__add, (void *), ill, (int), ip_rx_index); - return ((mac_resource_handle_t)rx_ring); + return ((mac_resource_handle_t)rx_ring); } } @@ -2844,6 +2815,7 @@ ill_capability_dls_init(ill_t *ill) ill_dls_capab_t *ill_dls = ill->ill_dls_capab; conn_t *connp; size_t sz; + ip_stack_t *ipst = ill->ill_ipst; if (ill->ill_capabilities & ILL_CAPAB_SOFT_RING) { if (ill_dls == NULL) { @@ -2877,7 +2849,8 @@ ill_capability_dls_init(ill_t *ill) return (B_TRUE); } - if ((connp = ipcl_conn_create(IPCL_TCPCONN, KM_NOSLEEP)) == NULL) + if ((connp = ipcl_conn_create(IPCL_TCPCONN, KM_NOSLEEP, + ipst->ips_netstack)) == NULL) return (B_FALSE); sz = sizeof (ill_dls_capab_t); @@ -3729,6 +3702,7 @@ ill_frag_timeout(ill_t *ill, time_t dead_interval) mblk_t *send_icmp_head; mblk_t *send_icmp_head_v6; zoneid_t zoneid; + ip_stack_t *ipst = ill->ill_ipst; ipfb = ill->ill_frag_hash_tbl; if (ipfb == NULL) @@ -3818,13 +3792,13 @@ ill_frag_timeout(ill_t *ill, time_t dead_interval) else ip6h = (ip6_t *)mp->b_rptr; zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, - ill); + ill, ipst); if (zoneid == ALL_ZONES) { freemsg(mp); } else { icmp_time_exceeded_v6(ill->ill_wq, mp, ICMP_REASSEMBLY_TIME_EXCEEDED, B_FALSE, - B_FALSE, zoneid); + B_FALSE, zoneid, ipst); } } while (send_icmp_head != NULL) { @@ -3839,12 +3813,13 @@ ill_frag_timeout(ill_t *ill, time_t dead_interval) else dst = ((ipha_t *)mp->b_rptr)->ipha_dst; - zoneid = ipif_lookup_addr_zoneid(dst, ill); + zoneid = ipif_lookup_addr_zoneid(dst, ill, ipst); if (zoneid == ALL_ZONES) { freemsg(mp); } else { icmp_time_exceeded(ill->ill_wq, mp, - ICMP_REASSEMBLY_TIME_EXCEEDED, zoneid); + ICMP_REASSEMBLY_TIME_EXCEEDED, zoneid, + ipst); } } } @@ -4007,6 +3982,7 @@ nd_ill_forward_set(queue_t *q, mblk_t *mp, char *valuestr, caddr_t cp, { long value; int retval; + ip_stack_t *ipst = CONNQ_TO_IPST(q); cmn_err(CE_WARN, ND_FORWARD_WARNING); @@ -4015,9 +3991,9 @@ nd_ill_forward_set(queue_t *q, mblk_t *mp, char *valuestr, caddr_t cp, return (EINVAL); } - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); retval = ill_forward_set(q, mp, (value != 0), cp); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (retval); } @@ -4033,8 +4009,9 @@ ill_forward_set(queue_t *q, mblk_t *mp, boolean_t enable, caddr_t cp) { ill_t *ill = (ill_t *)cp; ill_group_t *illgrp; + ip_stack_t *ipst = ill->ill_ipst; - ASSERT(IAM_WRITER_ILL(ill) || RW_READ_HELD(&ill_g_lock)); + ASSERT(IAM_WRITER_ILL(ill) || RW_READ_HELD(&ipst->ips_ill_g_lock)); if ((enable && (ill->ill_flags & ILLF_ROUTER)) || (!enable && !(ill->ill_flags & ILLF_ROUTER)) || @@ -4121,6 +4098,7 @@ static int ill_set_ndd_name(ill_t *ill) { char *suffix; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(IAM_WRITER_ILL(ill)); @@ -4143,8 +4121,8 @@ ill_set_ndd_name(ill_t *ill) * Modifying the nd table thru nd_load/nd_unload requires * the writer lock. */ - rw_enter(&ip_g_nd_lock, RW_WRITER); - if (!nd_load(&ip_g_nd, ill->ill_ndd_name, nd_ill_forward_get, + rw_enter(&ipst->ips_ip_g_nd_lock, RW_WRITER); + if (!nd_load(&ipst->ips_ip_g_nd, ill->ill_ndd_name, nd_ill_forward_get, nd_ill_forward_set, (caddr_t)ill)) { /* * If the nd_load failed, it only meant that it could not @@ -4153,11 +4131,11 @@ ill_set_ndd_name(ill_t *ill) * this interface is at the mercy of the global ip_forwarding * variable. */ - rw_exit(&ip_g_nd_lock); + rw_exit(&ipst->ips_ip_g_nd_lock); ill->ill_ndd_name = NULL; return (ENOMEM); } - rw_exit(&ip_g_nd_lock); + rw_exit(&ipst->ips_ip_g_nd_lock); return (0); } @@ -4174,13 +4152,14 @@ ill_set_ndd_name(ill_t *ill) * necessary under the ill lock. */ ill_t * -ill_first(int start_list, int end_list, ill_walk_context_t *ctx) +ill_first(int start_list, int end_list, ill_walk_context_t *ctx, + ip_stack_t *ipst) { ill_if_t *ifp; ill_t *ill; avl_tree_t *avl_tree; - ASSERT(RW_LOCK_HELD(&ill_g_lock)); + ASSERT(RW_LOCK_HELD(&ipst->ips_ill_g_lock)); ASSERT(end_list <= MAX_G_HEADS && start_list >= 0); /* @@ -4195,9 +4174,9 @@ ill_first(int start_list, int end_list, ill_walk_context_t *ctx) } while (ctx->ctx_current_list <= ctx->ctx_last_list) { - ifp = IP_VX_ILL_G_LIST(ctx->ctx_current_list); + ifp = IP_VX_ILL_G_LIST(ctx->ctx_current_list, ipst); if (ifp != (ill_if_t *) - &IP_VX_ILL_G_LIST(ctx->ctx_current_list)) { + &IP_VX_ILL_G_LIST(ctx->ctx_current_list, ipst)) { avl_tree = &ifp->illif_avl_by_ppa; ill = avl_first(avl_tree); /* @@ -4227,11 +4206,10 @@ ill_next(ill_walk_context_t *ctx, ill_t *lastill) { ill_if_t *ifp; ill_t *ill; + ip_stack_t *ipst = lastill->ill_ipst; - - ASSERT(RW_LOCK_HELD(&ill_g_lock)); ASSERT(lastill->ill_ifptr != (ill_if_t *) - &IP_VX_ILL_G_LIST(ctx->ctx_current_list)); + &IP_VX_ILL_G_LIST(ctx->ctx_current_list, ipst)); if ((ill = avl_walk(&lastill->ill_ifptr->illif_avl_by_ppa, lastill, AVL_AFTER)) != NULL) { return (ill); @@ -4241,10 +4219,11 @@ ill_next(ill_walk_context_t *ctx, ill_t *lastill) ifp = lastill->ill_ifptr->illif_next; /* make sure not at end of circular list */ - while (ifp == (ill_if_t *)&IP_VX_ILL_G_LIST(ctx->ctx_current_list)) { + while (ifp == + (ill_if_t *)&IP_VX_ILL_G_LIST(ctx->ctx_current_list, ipst)) { if (++ctx->ctx_current_list > ctx->ctx_last_list) return (NULL); - ifp = IP_VX_ILL_G_LIST(ctx->ctx_current_list); + ifp = IP_VX_ILL_G_LIST(ctx->ctx_current_list, ipst); } return (avl_first(&ifp->illif_avl_by_ppa)); @@ -4288,7 +4267,7 @@ ill_get_ppa_ptr(char *name) */ static ill_t * ill_find_by_name(char *name, boolean_t isv6, queue_t *q, mblk_t *mp, - ipsq_func_t func, int *error) + ipsq_func_t func, int *error, ip_stack_t *ipst) { char *ppa_ptr = NULL; int len; @@ -4319,9 +4298,9 @@ ill_find_by_name(char *name, boolean_t isv6, queue_t *q, mblk_t *mp, ppa = stoi(&ppa_ptr); - ifp = IP_VX_ILL_G_LIST(list); + ifp = IP_VX_ILL_G_LIST(list, ipst); - while (ifp != (ill_if_t *)&IP_VX_ILL_G_LIST(list)) { + while (ifp != (ill_if_t *)&IP_VX_ILL_G_LIST(list, ipst)) { /* * match is done on len - 1 as the name is not null * terminated it contains ppa in addition to the interface @@ -4336,7 +4315,7 @@ ill_find_by_name(char *name, boolean_t isv6, queue_t *q, mblk_t *mp, } - if (ifp == (ill_if_t *)&IP_VX_ILL_G_LIST(list)) { + if (ifp == (ill_if_t *)&IP_VX_ILL_G_LIST(list, ipst)) { /* * Even the interface type does not exist. */ @@ -4406,8 +4385,6 @@ ill_compare_ppa(const void *ppa_ptr, const void *ill_ptr) static void ill_delete_interface_type(ill_if_t *interface) { - ASSERT(RW_WRITE_HELD(&ill_g_lock)); - ASSERT(interface != NULL); ASSERT(avl_numnodes(&interface->illif_avl_by_ppa) == 0); @@ -4432,11 +4409,12 @@ ill_glist_delete(ill_t *ill) char *nicname; size_t nicnamelen; hook_nic_event_t *info; + ip_stack_t *ipst; if (ill == NULL) return; - - rw_enter(&ill_g_lock, RW_WRITER); + ipst = ill->ill_ipst; + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); if (ill->ill_name != NULL) { nicname = kmem_alloc(ill->ill_name_length, KM_NOSLEEP); @@ -4513,7 +4491,8 @@ ill_glist_delete(ill_t *ill) info->hne_event = NE_UNPLUMB; info->hne_data = nicname; info->hne_datalen = nicnamelen; - info->hne_family = ill->ill_isv6 ? ipv6 : ipv4; + info->hne_family = ill->ill_isv6 ? + ipst->ips_ipv6_net_data : ipst->ips_ipv4_net_data; } else { ip2dbg(("ill_glist_delete: could not attach UNPLUMB nic event " "information for %s (ENOMEM)\n", ill->ill_name)); @@ -4524,8 +4503,7 @@ ill_glist_delete(ill_t *ill) ill->ill_nic_event_info = info; ill_phyint_free(ill); - - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); } /* @@ -4638,8 +4616,9 @@ ill_glist_insert(ill_t *ill, char *name, boolean_t isv6) int name_length; int index; boolean_t check_length = B_FALSE; + ip_stack_t *ipst = ill->ill_ipst; - ASSERT(RW_WRITE_HELD(&ill_g_lock)); + ASSERT(RW_WRITE_HELD(&ipst->ips_ill_g_lock)); name_length = mi_strlen(name) + 1; @@ -4648,11 +4627,11 @@ ill_glist_insert(ill_t *ill, char *name, boolean_t isv6) else index = IP_V4_G_HEAD; - ill_interface = IP_VX_ILL_G_LIST(index); + ill_interface = IP_VX_ILL_G_LIST(index, ipst); /* * Search for interface type based on name */ - while (ill_interface != (ill_if_t *)&IP_VX_ILL_G_LIST(index)) { + while (ill_interface != (ill_if_t *)&IP_VX_ILL_G_LIST(index, ipst)) { if ((ill_interface->illif_name_len == name_length) && (strcmp(ill_interface->illif_name, name) == 0)) { break; @@ -4663,7 +4642,7 @@ ill_glist_insert(ill_t *ill, char *name, boolean_t isv6) /* * Interface type not found, create one. */ - if (ill_interface == (ill_if_t *)&IP_VX_ILL_G_LIST(index)) { + if (ill_interface == (ill_if_t *)&IP_VX_ILL_G_LIST(index, ipst)) { ill_g_head_t ghead; @@ -4689,7 +4668,7 @@ ill_glist_insert(ill_t *ill, char *name, boolean_t isv6) * link the structure in the back to maintain order * of configuration for ifconfig output. */ - ghead = ill_g_heads[index]; + ghead = ipst->ips_ill_g_heads[index]; insque(ill_interface, ghead.ill_g_list_tail); } @@ -4769,6 +4748,7 @@ ipsq_init(ill_t *ill) ipsq->ipsq_refs = 1; ipsq->ipsq_writer = curthread; ipsq->ipsq_reentry_cnt = 1; + ipsq->ipsq_ipst = ill->ill_ipst; /* No netstack_hold */ #ifdef ILL_DEBUG ipsq->ipsq_depth = getpcstack((pc_t *)ipsq->ipsq_stack, IP_STACK_DEPTH); #endif @@ -4944,12 +4924,32 @@ ill_xarp_info(struct sockaddr_dl *sdl, ill_t *ill) static int loopback_kstat_update(kstat_t *ksp, int rw) { - kstat_named_t *kn = KSTAT_NAMED_PTR(ksp); + kstat_named_t *kn; + netstackid_t stackid; + netstack_t *ns; + ip_stack_t *ipst; + + if (ksp == NULL || ksp->ks_data == NULL) + return (EIO); if (rw == KSTAT_WRITE) return (EACCES); - kn[0].value.ui32 = loopback_packets; - kn[1].value.ui32 = loopback_packets; + + kn = KSTAT_NAMED_PTR(ksp); + stackid = (zoneid_t)(uintptr_t)ksp->ks_private; + + ns = netstack_find_by_stackid(stackid); + if (ns == NULL) + return (-1); + + ipst = ns->netstack_ip; + if (ipst == NULL) { + netstack_rele(ns); + return (-1); + } + kn[0].value.ui32 = ipst->ips_loopback_packets; + kn[1].value.ui32 = ipst->ips_loopback_packets; + netstack_rele(ns); return (0); } @@ -4958,16 +4958,16 @@ loopback_kstat_update(kstat_t *ksp, int rw) * Has ifindex been plumbed already. */ static boolean_t -phyint_exists(uint_t index) +phyint_exists(uint_t index, ip_stack_t *ipst) { phyint_t *phyi; - ASSERT(RW_LOCK_HELD(&ill_g_lock)); + ASSERT(RW_LOCK_HELD(&ipst->ips_ill_g_lock)); /* * Indexes are stored in the phyint - a common structure * to both IPv4 and IPv6. */ - phyi = avl_find(&phyint_g_list.phyint_list_avl_by_index, + phyi = avl_find(&ipst->ips_phyint_g_list->phyint_list_avl_by_index, (void *) &index, NULL); return (phyi != NULL); } @@ -4976,16 +4976,16 @@ phyint_exists(uint_t index) * Assign a unique interface index for the phyint. */ static boolean_t -phyint_assign_ifindex(phyint_t *phyi) +phyint_assign_ifindex(phyint_t *phyi, ip_stack_t *ipst) { uint_t starting_index; ASSERT(phyi->phyint_ifindex == 0); - if (!ill_index_wrap) { - phyi->phyint_ifindex = ill_index++; - if (ill_index == 0) { + if (!ipst->ips_ill_index_wrap) { + phyi->phyint_ifindex = ipst->ips_ill_index++; + if (ipst->ips_ill_index == 0) { /* Reached the uint_t limit Next time wrap */ - ill_index_wrap = B_TRUE; + ipst->ips_ill_index_wrap = B_TRUE; } return (B_TRUE); } @@ -4995,11 +4995,12 @@ phyint_assign_ifindex(phyint_t *phyi) * at this point and don't want to call any function that attempts * to get the lock again. */ - starting_index = ill_index++; - for (; ill_index != starting_index; ill_index++) { - if (ill_index != 0 && !phyint_exists(ill_index)) { + starting_index = ipst->ips_ill_index++; + for (; ipst->ips_ill_index != starting_index; ipst->ips_ill_index++) { + if (ipst->ips_ill_index != 0 && + !phyint_exists(ipst->ips_ill_index, ipst)) { /* found unused index - use it */ - phyi->phyint_ifindex = ill_index; + phyi->phyint_ifindex = ipst->ips_ill_index; return (B_TRUE); } } @@ -5020,7 +5021,8 @@ phyint_assign_ifindex(phyint_t *phyi) */ ill_t * ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6, - queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, boolean_t *did_alloc) + queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, boolean_t *did_alloc, + ip_stack_t *ipst) { ill_t *ill; ipif_t *ipif; @@ -5030,9 +5032,9 @@ ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6, isloopback = mi_strcmp(name, ipif_loopback_name) == 0; - rw_enter(&ill_g_lock, RW_READER); - ill = ill_find_by_name(name, isv6, q, mp, func, error); - rw_exit(&ill_g_lock); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ill_find_by_name(name, isv6, q, mp, func, error, ipst); + rw_exit(&ipst->ips_ill_g_lock); if (ill != NULL || (error != NULL && *error == EINPROGRESS)) return (ill); @@ -5043,11 +5045,11 @@ ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6, if (!isloopback || !do_alloc) return (NULL); - rw_enter(&ill_g_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); - ill = ill_find_by_name(name, isv6, q, mp, func, error); + ill = ill_find_by_name(name, isv6, q, mp, func, error, ipst); if (ill != NULL || (error != NULL && *error == EINPROGRESS)) { - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ill); } @@ -5059,6 +5061,14 @@ ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6, *ill = ill_null; mutex_init(&ill->ill_lock, NULL, MUTEX_DEFAULT, NULL); + ill->ill_ipst = ipst; + netstack_hold(ipst->ips_netstack); + /* + * For exclusive stacks we set the zoneid to zero + * to make IP operate as if in the global zone. + */ + ill->ill_zoneid = GLOBAL_ZONEID; + ill->ill_phyint = (phyint_t *)mi_zalloc(sizeof (phyint_t)); if (ill->ill_phyint == NULL) goto done; @@ -5176,23 +5186,29 @@ ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6, ill->ill_phyint->phyint_flags |= PHYI_LOOPBACK | PHYI_VIRTUAL; mutex_exit(&ill->ill_phyint->phyint_lock); - if (loopback_ksp == NULL) { + if (ipst->ips_loopback_ksp == NULL) { /* Export loopback interface statistics */ - loopback_ksp = kstat_create("lo", 0, ipif_loopback_name, "net", - KSTAT_TYPE_NAMED, 2, 0); - if (loopback_ksp != NULL) { - loopback_ksp->ks_update = loopback_kstat_update; - kn = KSTAT_NAMED_PTR(loopback_ksp); + ipst->ips_loopback_ksp = kstat_create_netstack("lo", 0, + ipif_loopback_name, "net", + KSTAT_TYPE_NAMED, 2, 0, + ipst->ips_netstack->netstack_stackid); + if (ipst->ips_loopback_ksp != NULL) { + ipst->ips_loopback_ksp->ks_update = + loopback_kstat_update; + kn = KSTAT_NAMED_PTR(ipst->ips_loopback_ksp); kstat_named_init(&kn[0], "ipackets", KSTAT_DATA_UINT32); kstat_named_init(&kn[1], "opackets", KSTAT_DATA_UINT32); - kstat_install(loopback_ksp); + ipst->ips_loopback_ksp->ks_private = + (void *)(uintptr_t)ipst->ips_netstack-> + netstack_stackid; + kstat_install(ipst->ips_loopback_ksp); } } if (error != NULL) *error = 0; *did_alloc = B_TRUE; - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ill); done: if (ill != NULL) { @@ -5200,25 +5216,50 @@ done: ipsq_t *ipsq; ipsq = ill->ill_phyint->phyint_ipsq; - if (ipsq != NULL) + if (ipsq != NULL) { + ipsq->ipsq_ipst = NULL; kmem_free(ipsq, sizeof (ipsq_t)); + } mi_free(ill->ill_phyint); } ill_free_mib(ill); + if (ill->ill_ipst != NULL) + netstack_rele(ill->ill_ipst->ips_netstack); mi_free(ill); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (error != NULL) *error = ENOMEM; return (NULL); } /* + * For IPP calls - use the ip_stack_t for global stack. + */ +ill_t * +ill_lookup_on_ifindex_global_instance(uint_t index, boolean_t isv6, + queue_t *q, mblk_t *mp, ipsq_func_t func, int *err) +{ + ip_stack_t *ipst; + ill_t *ill; + + ipst = netstack_find_by_stackid(GLOBAL_NETSTACKID)->netstack_ip; + if (ipst == NULL) { + cmn_err(CE_WARN, "No ip_stack_t for zoneid zero!\n"); + return (NULL); + } + + ill = ill_lookup_on_ifindex(index, isv6, q, mp, func, err, ipst); + netstack_rele(ipst->ips_netstack); + return (ill); +} + +/* * Return a pointer to the ill which matches the index and IP version type. */ ill_t * ill_lookup_on_ifindex(uint_t index, boolean_t isv6, queue_t *q, mblk_t *mp, - ipsq_func_t func, int *err) + ipsq_func_t func, int *err, ip_stack_t *ipst) { ill_t *ill; ipsq_t *ipsq; @@ -5234,8 +5275,8 @@ ill_lookup_on_ifindex(uint_t index, boolean_t isv6, queue_t *q, mblk_t *mp, * Indexes are stored in the phyint - a common structure * to both IPv4 and IPv6. */ - rw_enter(&ill_g_lock, RW_READER); - phyi = avl_find(&phyint_g_list.phyint_list_avl_by_index, + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + phyi = avl_find(&ipst->ips_phyint_g_list->phyint_list_avl_by_index, (void *) &index, NULL); if (phyi != NULL) { ill = isv6 ? phyi->phyint_illv6: phyi->phyint_illv4; @@ -5250,12 +5291,12 @@ ill_lookup_on_ifindex(uint_t index, boolean_t isv6, queue_t *q, mblk_t *mp, ill_refhold_locked(ill); mutex_exit(&ill->ill_lock); RELEASE_CONN_LOCK(q); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ill); } else if (ILL_CAN_WAIT(ill, q)) { ipsq = ill->ill_phyint->phyint_ipsq; mutex_enter(&ipsq->ipsq_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); mutex_exit(&ill->ill_lock); ipsq_enq(ipsq, q, mp, func, NEW_OP, ill); mutex_exit(&ipsq->ipsq_lock); @@ -5267,7 +5308,7 @@ ill_lookup_on_ifindex(uint_t index, boolean_t isv6, queue_t *q, mblk_t *mp, mutex_exit(&ill->ill_lock); } } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (err != NULL) *err = ENXIO; return (NULL); @@ -5278,24 +5319,25 @@ ill_lookup_on_ifindex(uint_t index, boolean_t isv6, queue_t *q, mblk_t *mp, * If there is no next ifindex for the given protocol, return 0. */ uint_t -ill_get_next_ifindex(uint_t index, boolean_t isv6) +ill_get_next_ifindex(uint_t index, boolean_t isv6, ip_stack_t *ipst) { phyint_t *phyi; phyint_t *phyi_initial; uint_t ifindex; - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); if (index == 0) { - phyi = avl_first(&phyint_g_list.phyint_list_avl_by_index); + phyi = avl_first( + &ipst->ips_phyint_g_list->phyint_list_avl_by_index); } else { phyi = phyi_initial = avl_find( - &phyint_g_list.phyint_list_avl_by_index, + &ipst->ips_phyint_g_list->phyint_list_avl_by_index, (void *) &index, NULL); } for (; phyi != NULL; - phyi = avl_walk(&phyint_g_list.phyint_list_avl_by_index, + phyi = avl_walk(&ipst->ips_phyint_g_list->phyint_list_avl_by_index, phyi, AVL_AFTER)) { /* * If we're not returning the first interface in the tree @@ -5317,7 +5359,7 @@ ill_get_next_ifindex(uint_t index, boolean_t isv6) } } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (phyi != NULL) ifindex = phyi->phyint_ifindex; @@ -5333,23 +5375,23 @@ ill_get_next_ifindex(uint_t index, boolean_t isv6) * If there is no next ifindex for the interface, return 0. */ uint_t -ill_get_ifindex_by_name(char *name) +ill_get_ifindex_by_name(char *name, ip_stack_t *ipst) { phyint_t *phyi; avl_index_t where = 0; uint_t ifindex; - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); - if ((phyi = avl_find(&phyint_g_list.phyint_list_avl_by_name, + if ((phyi = avl_find(&ipst->ips_phyint_g_list->phyint_list_avl_by_name, name, &where)) == NULL) { - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (0); } ifindex = phyi->phyint_ifindex; - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ifindex); } @@ -5459,6 +5501,9 @@ ip_ill_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) { ill_t *ill; ill_walk_context_t ctx; + ip_stack_t *ipst; + + ipst = CONNQ_TO_IPST(q); (void) mi_mpprintf(mp, "ILL " MI_COL_HDRPAD_STR @@ -5470,8 +5515,8 @@ ip_ill_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) "upcnt mxfrg err name"); /* 12345 12345 123 xxxxxxxx */ - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_ALL(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_ALL(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR @@ -5480,7 +5525,7 @@ ip_ill_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) ill->ill_ipif_up_count, ill->ill_max_frag, ill->ill_error, ill->ill_name); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (0); } @@ -5507,6 +5552,7 @@ ip_ipif_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) uint64_t flags; zoneid_t zoneid; ill_walk_context_t ctx; + ip_stack_t *ipst = CONNQ_TO_IPST(q); (void) mi_mpprintf(mp, "IPIF metric mtu in/out/forward name zone flags...\n" @@ -5520,8 +5566,8 @@ ip_ipif_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) ASSERT(q->q_next == NULL); zoneid = Q_TO_CONN(q)->conn_zoneid; /* IP is a driver */ - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_ALL(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_ALL(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { @@ -5567,7 +5613,7 @@ ip_ipif_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) buf6, sizeof (buf6))); } } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (0); } @@ -5798,12 +5844,12 @@ ip_addr_ok_v4(ipaddr_t addr, ipaddr_t subnet_mask) * Returns held ipif */ ipif_t * -ipif_lookup_group(ipaddr_t group, zoneid_t zoneid) +ipif_lookup_group(ipaddr_t group, zoneid_t zoneid, ip_stack_t *ipst) { ire_t *ire; ipif_t *ipif; - ire = ire_lookup_multi(group, zoneid); + ire = ire_lookup_multi(group, zoneid, ipst); if (ire == NULL) return (NULL); ipif = ire->ire_ipif; @@ -5818,7 +5864,7 @@ ipif_lookup_group(ipaddr_t group, zoneid_t zoneid) */ ipif_t * ipif_lookup_interface(ipaddr_t if_addr, ipaddr_t dst, queue_t *q, mblk_t *mp, - ipsq_func_t func, int *error) + ipsq_func_t func, int *error, ip_stack_t *ipst) { ipif_t *ipif; ill_t *ill; @@ -5834,8 +5880,8 @@ ipif_lookup_interface(ipaddr_t if_addr, ipaddr_t dst, queue_t *q, mblk_t *mp, * This is done to avoid returning non-point-to-point * ipif instead of unnumbered point-to-point ipif. */ - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V4(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V4(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { GRAB_CONN_LOCK(q); mutex_enter(&ill->ill_lock); @@ -5853,13 +5899,13 @@ ipif_lookup_interface(ipaddr_t if_addr, ipaddr_t dst, queue_t *q, mblk_t *mp, ipif_refhold_locked(ipif); mutex_exit(&ill->ill_lock); RELEASE_CONN_LOCK(q); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ipif); } else if (IPIF_CAN_WAIT(ipif, q)) { ipsq = ill->ill_phyint->phyint_ipsq; mutex_enter(&ipsq->ipsq_lock); mutex_exit(&ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); ipsq_enq(ipsq, q, mp, func, NEW_OP, ill); mutex_exit(&ipsq->ipsq_lock); @@ -5872,10 +5918,11 @@ ipif_lookup_interface(ipaddr_t if_addr, ipaddr_t dst, queue_t *q, mblk_t *mp, mutex_exit(&ill->ill_lock); RELEASE_CONN_LOCK(q); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); /* lookup the ipif based on interface address */ - ipif = ipif_lookup_addr(if_addr, NULL, ALL_ZONES, q, mp, func, error); + ipif = ipif_lookup_addr(if_addr, NULL, ALL_ZONES, q, mp, func, error, + ipst); ASSERT(ipif == NULL || !ipif->ipif_isv6); return (ipif); } @@ -5889,7 +5936,7 @@ ipif_lookup_interface(ipaddr_t if_addr, ipaddr_t dst, queue_t *q, mblk_t *mp, */ ipif_t * ipif_lookup_addr(ipaddr_t addr, ill_t *match_ill, zoneid_t zoneid, queue_t *q, - mblk_t *mp, ipsq_func_t func, int *error) + mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst) { ipif_t *ipif; ill_t *ill; @@ -5900,13 +5947,13 @@ ipif_lookup_addr(ipaddr_t addr, ill_t *match_ill, zoneid_t zoneid, queue_t *q, if (error != NULL) *error = 0; - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); /* * Repeat twice, first based on local addresses and * next time for pointopoint. */ repeat: - ill = ILL_START_WALK_V4(&ctx); + ill = ILL_START_WALK_V4(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { if (match_ill != NULL && ill != match_ill) { continue; @@ -5932,13 +5979,13 @@ repeat: ipif_refhold_locked(ipif); mutex_exit(&ill->ill_lock); RELEASE_CONN_LOCK(q); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ipif); } else if (IPIF_CAN_WAIT(ipif, q)) { ipsq = ill->ill_phyint->phyint_ipsq; mutex_enter(&ipsq->ipsq_lock); mutex_exit(&ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); ipsq_enq(ipsq, q, mp, func, NEW_OP, ill); mutex_exit(&ipsq->ipsq_lock); @@ -5954,7 +6001,7 @@ repeat: /* If we already did the ptp case, then we are done */ if (ptp) { - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (error != NULL) *error = ENXIO; return (NULL); @@ -5972,7 +6019,7 @@ repeat: * Return the zoneid for the ipif which matches. ALL_ZONES if no match. */ zoneid_t -ipif_lookup_addr_zoneid(ipaddr_t addr, ill_t *match_ill) +ipif_lookup_addr_zoneid(ipaddr_t addr, ill_t *match_ill, ip_stack_t *ipst) { zoneid_t zoneid; ipif_t *ipif; @@ -5980,13 +6027,13 @@ ipif_lookup_addr_zoneid(ipaddr_t addr, ill_t *match_ill) boolean_t ptp = B_FALSE; ill_walk_context_t ctx; - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); /* * Repeat twice, first based on local addresses and * next time for pointopoint. */ repeat: - ill = ILL_START_WALK_V4(&ctx); + ill = ILL_START_WALK_V4(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { if (match_ill != NULL && ill != match_ill) { continue; @@ -6002,7 +6049,7 @@ repeat: !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { zoneid = ipif->ipif_zoneid; mutex_exit(&ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); /* * If ipif_zoneid was ALL_ZONES then we have * a trusted extensions shared IP address. @@ -6018,7 +6065,7 @@ repeat: /* If we already did the ptp case, then we are done */ if (ptp) { - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ALL_ZONES); } ptp = B_TRUE; @@ -6038,6 +6085,7 @@ ipif_lookup_remote(ill_t *ill, ipaddr_t addr, zoneid_t zoneid) { ipif_t *ipif; ire_t *ire; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(!ill->ill_isv6); @@ -6072,7 +6120,7 @@ ipif_lookup_remote(ill_t *ill, ipaddr_t addr, zoneid_t zoneid) } mutex_exit(&ill->ill_lock); ire = ire_route_lookup(addr, 0, 0, 0, NULL, NULL, zoneid, - NULL, MATCH_IRE_RECURSIVE); + NULL, MATCH_IRE_RECURSIVE, ipst); if (ire != NULL) { /* * The callers of this function wants to know the @@ -6525,14 +6573,14 @@ ill_thread_exit(ill_t *ill, void *dummy) #ifdef ILL_DEBUG void -ip_thread_exit(void) +ip_thread_exit(ip_stack_t *ipst) { ill_t *ill; ipif_t *ipif; ill_walk_context_t ctx; - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_ALL(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_ALL(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { @@ -6540,11 +6588,11 @@ ip_thread_exit(void) } ill_thread_exit(ill, NULL); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); - ire_walk(ire_thread_exit, NULL); - ndp_walk_common(&ndp4, NULL, nce_thread_exit, NULL, B_FALSE); - ndp_walk_common(&ndp6, NULL, nce_thread_exit, NULL, B_FALSE); + ire_walk(ire_thread_exit, NULL, ipst); + ndp_walk_common(ipst->ips_ndp4, NULL, nce_thread_exit, NULL, B_FALSE); + ndp_walk_common(ipst->ips_ndp6, NULL, nce_thread_exit, NULL, B_FALSE); } /* @@ -6681,7 +6729,7 @@ int ip_rt_add(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, ipaddr_t src_addr, int flags, ipif_t *ipif_arg, ipif_t *src_ipif, ire_t **ire_arg, boolean_t ioctl_msg, queue_t *q, mblk_t *mp, - ipsq_func_t func, struct rtsa_s *sp) + ipsq_func_t func, struct rtsa_s *sp, ip_stack_t *ipst) { ire_t *ire; ire_t *gw_ire = NULL; @@ -6719,7 +6767,7 @@ ip_rt_add(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, */ if (gw_addr != 0) { ipif = ipif_lookup_interface(gw_addr, dst_addr, q, mp, func, - &error); + &error, ipst); if (ipif != NULL) { if (IS_VNI(ipif->ipif_ill)) { ipif_refrele(ipif); @@ -6752,7 +6800,7 @@ ip_rt_add(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, if (gw_addr == INADDR_LOOPBACK && dst_addr == INADDR_LOOPBACK && mask == IP_HOST_MASK) { ire = ire_ctable_lookup(dst_addr, 0, IRE_LOOPBACK, ipif, - ALL_ZONES, NULL, match_flags); + ALL_ZONES, NULL, match_flags, ipst); if (ire != NULL) { ire_refrele(ire); if (ipif_refheld) @@ -6784,7 +6832,8 @@ ip_rt_add(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, RTF_PRIVATE : 0, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); if (ire == NULL) { if (ipif_refheld) @@ -6918,7 +6967,7 @@ ip_rt_add(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, } else { ire = ire_ftable_lookup(dst_addr, mask, 0, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, - NULL, match_flags); + NULL, match_flags, ipst); } if (ire != NULL) { ire_refrele(ire); @@ -6966,7 +7015,8 @@ ip_rt_add(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, flags, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); if (ire == NULL) { if (ipif_refheld) ipif_refrele(ipif); @@ -7024,7 +7074,7 @@ ip_rt_add(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, if (ipif_arg != NULL) match_flags |= MATCH_IRE_ILL; gw_ire = ire_ftable_lookup(gw_addr, 0, 0, IRE_INTERFACE, ipif_arg, NULL, - ALL_ZONES, 0, NULL, match_flags); + ALL_ZONES, 0, NULL, match_flags, ipst); if (gw_ire == NULL) return (ENETUNREACH); @@ -7046,7 +7096,7 @@ ip_rt_add(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, /* check for a duplicate entry */ ire = ire_ftable_lookup(dst_addr, mask, gw_addr, type, ipif_arg, NULL, ALL_ZONES, 0, NULL, - match_flags | MATCH_IRE_MASK | MATCH_IRE_GW); + match_flags | MATCH_IRE_MASK | MATCH_IRE_GW, ipst); if (ire != NULL) { ire_refrele(gw_ire); ire_refrele(ire); @@ -7107,7 +7157,9 @@ ip_rt_add(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, flags, &gw_ire->ire_uinfo, /* Inherit ULP info from gw */ gc, /* security attribute */ - NULL); + NULL, + ipst); + /* * The ire holds a reference to the 'gc' and the 'gc' holds a * reference to the 'gcgrp'. We can now release the extra reference @@ -7151,13 +7203,14 @@ ip_rt_add(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, * or a multicast. */ ire_t *ire_dst = ire_ctable_lookup(ire->ire_addr, 0, - IRE_BROADCAST, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + IRE_BROADCAST, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (ire_dst != NULL) { - ip_cgtp_bcast_add(ire, ire_dst); + ip_cgtp_bcast_add(ire, ire_dst, ipst); ire_refrele(ire_dst); goto save_ire; } - if ((ip_cgtp_filter_ops != NULL) && !CLASSD(ire->ire_addr)) { + if ((ip_cgtp_filter_ops != NULL) && !CLASSD(ire->ire_addr) && + ipst->ips_netstack->netstack_stackid == GLOBAL_NETSTACKID) { int res = ip_cgtp_filter_ops->cfo_add_dest_v4( ire->ire_addr, ire->ire_gateway_addr, @@ -7179,7 +7232,7 @@ ip_rt_add(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, */ if (gc != NULL) { ASSERT(gcgrp != NULL); - ire_clookup_delete_cache_gw(gw_addr, ALL_ZONES); + ire_clookup_delete_cache_gw(gw_addr, ALL_ZONES, ipst); } save_ire: @@ -7205,7 +7258,7 @@ save_ire: ipif_save_ire(ipif, ire); } if (ioctl_msg) - ip_rts_rtmsg(RTM_OLDADD, ire, 0); + ip_rts_rtmsg(RTM_OLDADD, ire, 0, ipst); if (ire_arg != NULL) { /* * Store the ire that was successfully added into where ire_arg @@ -7234,7 +7287,8 @@ save_ire: int ip_rt_delete(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, uint_t rtm_addrs, int flags, ipif_t *ipif_arg, ipif_t *src_ipif, - boolean_t ioctl_msg, queue_t *q, mblk_t *mp, ipsq_func_t func) + boolean_t ioctl_msg, queue_t *q, mblk_t *mp, ipsq_func_t func, + ip_stack_t *ipst) { ire_t *ire = NULL; ipif_t *ipif; @@ -7287,7 +7341,7 @@ ip_rt_delete(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, if (src_ipif != NULL) { if (ipif_arg == NULL && gw_addr != 0) { ipif_arg = ipif_lookup_interface(gw_addr, dst_addr, - q, mp, func, &err); + q, mp, func, &err, ipst); if (ipif_arg != NULL) ipif_refheld = B_TRUE; } @@ -7298,7 +7352,7 @@ ip_rt_delete(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, ipif = ipif_arg; } else { ipif = ipif_lookup_interface(gw_addr, dst_addr, - q, mp, func, &err); + q, mp, func, &err, ipst); if (ipif != NULL) ipif_refheld = B_TRUE; else if (err == EINPROGRESS) @@ -7324,12 +7378,12 @@ ip_rt_delete(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, if (ipif->ipif_ire_type == IRE_LOOPBACK) { ire = ire_ctable_lookup(dst_addr, 0, IRE_LOOPBACK, ipif, ALL_ZONES, NULL, - match_flags); + match_flags, ipst); } if (ire == NULL) { ire = ire_ftable_lookup(dst_addr, mask, 0, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, - NULL, match_flags); + NULL, match_flags, ipst); } } } @@ -7365,7 +7419,8 @@ ip_rt_delete(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, else type = IRE_PREFIX; ire = ire_ftable_lookup(dst_addr, mask, gw_addr, type, - ipif_arg, NULL, ALL_ZONES, 0, NULL, match_flags); + ipif_arg, NULL, ALL_ZONES, 0, NULL, match_flags, + ipst); } } @@ -7383,11 +7438,12 @@ ip_rt_delete(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, * Packets coming from that address will no longer be * filtered to remove duplicates. */ - if (ip_cgtp_filter_ops != NULL) { - err = ip_cgtp_filter_ops->cfo_del_dest_v4(ire->ire_addr, - ire->ire_gateway_addr); + if (ip_cgtp_filter_ops != NULL && + ipst->ips_netstack->netstack_stackid == GLOBAL_NETSTACKID) { + err = ip_cgtp_filter_ops->cfo_del_dest_v4( + ire->ire_addr, ire->ire_gateway_addr); } - ip_cgtp_bcast_delete(ire); + ip_cgtp_bcast_delete(ire, ipst); } ipif = ire->ire_ipif; @@ -7400,7 +7456,7 @@ ip_rt_delete(ipaddr_t dst_addr, ipaddr_t mask, ipaddr_t gw_addr, ipif_remove_ire(ipif, ire); } if (ioctl_msg) - ip_rts_rtmsg(RTM_OLDDEL, ire, 0); + ip_rts_rtmsg(RTM_OLDDEL, ire, 0, ipst); ire_delete(ire); ire_refrele(ire); return (err); @@ -7421,6 +7477,10 @@ ip_siocaddrt(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, mblk_t *mp1; struct rtentry *rt; ipif_t *ipif = NULL; + ip_stack_t *ipst; + + ASSERT(q->q_next == NULL); + ipst = CONNQ_TO_IPST(q); ip1dbg(("ip_siocaddrt:")); /* Existence of mp1 verified in ip_wput_nondata */ @@ -7444,11 +7504,11 @@ ip_siocaddrt(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, * Note that ip_subnet_mask returns a zero mask in the case of * default (an all-zeroes address). */ - mask = ip_subnet_mask(dst_addr, &ipif); + mask = ip_subnet_mask(dst_addr, &ipif, ipst); } error = ip_rt_add(dst_addr, mask, gw_addr, 0, rt->rt_flags, NULL, NULL, - NULL, B_TRUE, q, mp, ip_process_ioctl, NULL); + NULL, B_TRUE, q, mp, ip_process_ioctl, NULL, ipst); if (ipif != NULL) ipif_refrele(ipif); return (error); @@ -7469,6 +7529,10 @@ ip_siocdelrt(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, mblk_t *mp1; struct rtentry *rt; ipif_t *ipif = NULL; + ip_stack_t *ipst; + + ASSERT(q->q_next == NULL); + ipst = CONNQ_TO_IPST(q); ip1dbg(("ip_siocdelrt:")); /* Existence of mp1 verified in ip_wput_nondata */ @@ -7492,12 +7556,12 @@ ip_siocdelrt(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, * Note that ip_subnet_mask returns a zero mask in the case of * default (an all-zeroes address). */ - mask = ip_subnet_mask(dst_addr, &ipif); + mask = ip_subnet_mask(dst_addr, &ipif, ipst); } error = ip_rt_delete(dst_addr, mask, gw_addr, RTA_DST | RTA_GATEWAY | RTA_NETMASK, rt->rt_flags, NULL, NULL, - B_TRUE, q, mp, ip_process_ioctl); + B_TRUE, q, mp, ip_process_ioctl, ipst); if (ipif != NULL) ipif_refrele(ipif); return (error); @@ -7798,6 +7862,7 @@ ipsq_exit(ipsq_t *ipsq, boolean_t start_igmp_timer, boolean_t start_mld_timer) size_t ill_list_size = 0; int cnt = 0; boolean_t need_ipsq_free = B_FALSE; + ip_stack_t *ipst = ipsq->ipsq_ipst; ASSERT(IAM_WRITER_IPSQ(ipsq)); mutex_enter(&ipsq->ipsq_lock); @@ -7840,7 +7905,8 @@ again: * If we need to call ill_split_ipsq and change <ill-ipsq> we need * to grab ill_g_lock as writer. */ - rw_enter(&ill_g_lock, ipsq->ipsq_split ? RW_WRITER : RW_READER); + rw_enter(&ipst->ips_ill_g_lock, + ipsq->ipsq_split ? RW_WRITER : RW_READER); /* ipsq_refs can't change while ill_g_lock is held as reader */ if (ipsq->ipsq_refs != 0) { @@ -7863,7 +7929,7 @@ again: /* oops, some message has landed up, we can't get out */ if (ill_list != NULL) ill_unlock_ills(ill_list, cnt); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (ill_list != NULL) kmem_free(ill_list, ill_list_size); ill_list = NULL; @@ -7913,7 +7979,7 @@ again: ill_unlock_ills(ill_list, cnt); if (ipsq->ipsq_refs == 0) need_ipsq_free = B_TRUE; - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (ill_list != 0) kmem_free(ill_list, ill_list_size); @@ -7941,23 +8007,23 @@ again: * all others pass B_TRUE. */ if (start_igmp_timer) { - mutex_enter(&igmp_timer_lock); - next = igmp_deferred_next; - igmp_deferred_next = INFINITY; - mutex_exit(&igmp_timer_lock); + mutex_enter(&ipst->ips_igmp_timer_lock); + next = ipst->ips_igmp_deferred_next; + ipst->ips_igmp_deferred_next = INFINITY; + mutex_exit(&ipst->ips_igmp_timer_lock); if (next != INFINITY) - igmp_start_timers(next); + igmp_start_timers(next, ipst); } if (start_mld_timer) { - mutex_enter(&mld_timer_lock); - next = mld_deferred_next; - mld_deferred_next = INFINITY; - mutex_exit(&mld_timer_lock); + mutex_enter(&ipst->ips_mld_timer_lock); + next = ipst->ips_mld_deferred_next; + ipst->ips_mld_deferred_next = INFINITY; + mutex_exit(&ipst->ips_mld_timer_lock); if (next != INFINITY) - mld_start_timers(next); + mld_start_timers(next, ipst); } } @@ -8178,8 +8244,10 @@ ip_sioctl_slifoindex(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, struct lifreq *lifr = (struct lifreq *)ifreq; boolean_t isv6; conn_t *connp; + ip_stack_t *ipst; connp = Q_TO_CONN(q); + ipst = connp->conn_netstack->netstack_ip; isv6 = connp->conn_af_isv6; /* * Set original index. @@ -8205,7 +8273,7 @@ ip_sioctl_slifoindex(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, * physical interface. */ ill = ill_lookup_on_ifindex(lifr->lifr_index, isv6, NULL, NULL, - NULL, NULL); + NULL, NULL, ipst); if (ill == NULL) return (ENXIO); ill_refrele(ill); @@ -8267,6 +8335,7 @@ ip_extract_tunreq(queue_t *q, mblk_t *mp, ipif_t **ipifp, ipsq_func_t func) mblk_t *mp1; int error; conn_t *connp; + ip_stack_t *ipst; /* Existence verified in ip_wput_nondata */ mp1 = mp->b_cont->b_cont; @@ -8280,11 +8349,12 @@ ip_extract_tunreq(queue_t *q, mblk_t *mp, ipif_t **ipifp, ipsq_func_t func) connp = Q_TO_CONN(q); isv6 = connp->conn_af_isv6; + ipst = connp->conn_netstack->netstack_ip; /* Disallows implicit create */ ipif = ipif_lookup_on_name(ta->ifta_lifr_name, mi_strlen(ta->ifta_lifr_name), B_FALSE, &exists, isv6, - connp->conn_zoneid, CONNP_TO_WQ(connp), mp, func, &error); + connp->conn_zoneid, CONNP_TO_WQ(connp), mp, func, &error, ipst); if (ipif == NULL) return (error); @@ -8348,12 +8418,14 @@ ip_extract_lifreq_cmn(queue_t *q, mblk_t *mp, int cmd_type, int flags, int err; mblk_t *mp1; zoneid_t zoneid; + ip_stack_t *ipst; if (q->q_next != NULL) { ill = (ill_t *)q->q_ptr; isv6 = ill->ill_isv6; connp = NULL; zoneid = ALL_ZONES; + ipst = ill->ill_ipst; } else { ill = NULL; connp = Q_TO_CONN(q); @@ -8363,6 +8435,7 @@ ip_extract_lifreq_cmn(queue_t *q, mblk_t *mp, int cmd_type, int flags, /* global zone can access ipifs in all zones */ zoneid = ALL_ZONES; } + ipst = connp->conn_netstack->netstack_ip; } /* Has been checked in ip_wput_nondata */ @@ -8405,7 +8478,6 @@ ip_extract_lifreq_cmn(queue_t *q, mblk_t *mp, int cmd_type, int flags, ci->ci_lifr = lifr; } - if (iocp->ioc_cmd == SIOCSLIFNAME) { /* * The ioctl will be failed if the ioctl comes down @@ -8423,7 +8495,8 @@ ip_extract_lifreq_cmn(queue_t *q, mblk_t *mp, int cmd_type, int flags, } else { ipif = ipif_lookup_on_name(name, mi_strlen(name), B_FALSE, &exists, isv6, zoneid, - (connp == NULL) ? q : CONNP_TO_WQ(connp), mp, func, &err); + (connp == NULL) ? q : CONNP_TO_WQ(connp), mp, func, &err, + ipst); if (ipif == NULL) { if (err == EINPROGRESS) return (err); @@ -8438,7 +8511,7 @@ ip_extract_lifreq_cmn(queue_t *q, mblk_t *mp, int cmd_type, int flags, ipif = ipif_lookup_on_name(name, mi_strlen(name), B_FALSE, &exists, !isv6, zoneid, (connp == NULL) ? q : - CONNP_TO_WQ(connp), mp, func, &err); + CONNP_TO_WQ(connp), mp, func, &err, ipst); if (err == EINPROGRESS) return (err); } @@ -8484,15 +8557,15 @@ ip_extract_lifreq_cmn(queue_t *q, mblk_t *mp, int cmd_type, int flags, * Return the total number of ipifs. */ static uint_t -ip_get_numifs(zoneid_t zoneid) +ip_get_numifs(zoneid_t zoneid, ip_stack_t *ipst) { uint_t numifs = 0; ill_t *ill; ill_walk_context_t ctx; ipif_t *ipif; - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V4(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V4(&ctx, ipst); while (ill != NULL) { for (ipif = ill->ill_ipif; ipif != NULL; @@ -8503,7 +8576,7 @@ ip_get_numifs(zoneid_t zoneid) } ill = ill_next(&ctx, ill); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (numifs); } @@ -8511,7 +8584,7 @@ ip_get_numifs(zoneid_t zoneid) * Return the total number of ipifs. */ static uint_t -ip_get_numlifs(int family, int lifn_flags, zoneid_t zoneid) +ip_get_numlifs(int family, int lifn_flags, zoneid_t zoneid, ip_stack_t *ipst) { uint_t numifs = 0; ill_t *ill; @@ -8520,13 +8593,13 @@ ip_get_numlifs(int family, int lifn_flags, zoneid_t zoneid) ip1dbg(("ip_get_numlifs(%d %u %d)\n", family, lifn_flags, (int)zoneid)); - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); if (family == AF_INET) - ill = ILL_START_WALK_V4(&ctx); + ill = ILL_START_WALK_V4(&ctx, ipst); else if (family == AF_INET6) - ill = ILL_START_WALK_V6(&ctx); + ill = ILL_START_WALK_V6(&ctx, ipst); else - ill = ILL_START_WALK_ALL(&ctx); + ill = ILL_START_WALK_ALL(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { for (ipif = ill->ill_ipif; ipif != NULL; @@ -8555,7 +8628,7 @@ ip_get_numlifs(int family, int lifn_flags, zoneid_t zoneid) numifs++; } } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (numifs); } @@ -8564,20 +8637,21 @@ ip_get_lifsrcofnum(ill_t *ill) { uint_t numifs = 0; ill_t *ill_head = ill; + ip_stack_t *ipst = ill->ill_ipst; /* * ill_g_usesrc_lock protects ill_usesrc_grp_next, for example, some * other thread may be trying to relink the ILLs in this usesrc group * and adjusting the ill_usesrc_grp_next pointers */ - rw_enter(&ill_g_usesrc_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_usesrc_lock, RW_READER); if ((ill->ill_usesrc_ifindex == 0) && (ill->ill_usesrc_grp_next != NULL)) { for (; (ill != NULL) && (ill->ill_usesrc_grp_next != ill_head); ill = ill->ill_usesrc_grp_next) numifs++; } - rw_exit(&ill_g_usesrc_lock); + rw_exit(&ipst->ips_ill_g_usesrc_lock); return (numifs); } @@ -8589,13 +8663,15 @@ ip_sioctl_get_ifnum(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, ip_ioctl_cmd_t *ipip, void *ifreq) { int *nump; + conn_t *connp = Q_TO_CONN(q); ASSERT(q->q_next == NULL); /* not a valid ioctl for ip as a module */ /* Existence of b_cont->b_cont checked in ip_wput_nondata */ nump = (int *)mp->b_cont->b_cont->b_rptr; - *nump = ip_get_numifs(Q_TO_CONN(q)->conn_zoneid); + *nump = ip_get_numifs(connp->conn_zoneid, + connp->conn_netstack->netstack_ip); ip1dbg(("ip_sioctl_get_ifnum numifs %d", *nump)); return (0); } @@ -8608,6 +8684,7 @@ ip_sioctl_get_lifnum(ipif_t *dummy_ipif, sin_t *dummy_sin, { struct lifnum *lifn; mblk_t *mp1; + conn_t *connp = Q_TO_CONN(q); ASSERT(q->q_next == NULL); /* not a valid ioctl for ip as a module */ @@ -8625,7 +8702,7 @@ ip_sioctl_get_lifnum(ipif_t *dummy_ipif, sin_t *dummy_sin, } lifn->lifn_count = ip_get_numlifs(lifn->lifn_family, lifn->lifn_flags, - Q_TO_CONN(q)->conn_zoneid); + connp->conn_zoneid, connp->conn_netstack->netstack_ip); ip1dbg(("ip_sioctl_get_lifnum numifs %d", lifn->lifn_count)); return (0); } @@ -8645,6 +8722,7 @@ ip_sioctl_get_ifconf(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, struct sockaddr_in *sin; int32_t ifclen; zoneid_t zoneid; + ip_stack_t *ipst = CONNQ_TO_IPST(q); ASSERT(q->q_next == NULL); /* not valid ioctls for ip as a module */ @@ -8700,7 +8778,7 @@ ip_sioctl_get_ifconf(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, * number of interfaces for a device, so we don't need * to count them here... */ - numifs = ip_get_numifs(zoneid); + numifs = ip_get_numifs(zoneid, ipst); ifclen = STRUCT_FGET(ifc, ifc_len); ifc_bufsize = numifs * sizeof (struct ifreq); @@ -8730,8 +8808,8 @@ ip_sioctl_get_ifconf(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, ifr = (struct ifreq *)mp1->b_rptr; - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V4(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V4(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { @@ -8741,7 +8819,7 @@ ip_sioctl_get_ifconf(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, if ((uchar_t *)&ifr[1] > mp1->b_wptr) { if (iocp->ioc_cmd == O_SIOCGIFCONF) { /* old behaviour */ - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (EINVAL); } else { goto if_copydone; @@ -8758,7 +8836,7 @@ ip_sioctl_get_ifconf(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, } } if_copydone: - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); mp1->b_wptr = (uchar_t *)ifr; if (STRUCT_BUF(ifc) != NULL) { @@ -8790,8 +8868,10 @@ ip_sioctl_get_lifsrcof(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, boolean_t isv6 = B_FALSE; struct sockaddr_in *sin; struct sockaddr_in6 *sin6; - STRUCT_HANDLE(lifsrcof, lifs); + ip_stack_t *ipst; + + ipst = CONNQ_TO_IPST(q); ASSERT(q->q_next == NULL); @@ -8813,7 +8893,7 @@ ip_sioctl_get_lifsrcof(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, ifindex = STRUCT_FGET(lifs, lifs_ifindex); isv6 = (Q_TO_CONN(q))->conn_af_isv6; ipif = ipif_lookup_on_ifindex(ifindex, isv6, zoneid, q, mp, - ip_process_ioctl, &err); + ip_process_ioctl, &err, ipst); if (ipif == NULL) { ip1dbg(("ip_sioctl_get_lifsrcof: no ipif for ifindex %d\n", ifindex)); @@ -8850,8 +8930,8 @@ ip_sioctl_get_lifsrcof(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, orig_ipif = ipif; /* ill_g_usesrc_lock protects ill_usesrc_grp_next */ - rw_enter(&ill_g_usesrc_lock, RW_READER); - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_usesrc_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); ill = ill->ill_usesrc_grp_next; /* start from next ill */ for (; (ill != NULL) && (ill != ill_head); @@ -8880,8 +8960,8 @@ ip_sioctl_get_lifsrcof(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, } lifr++; } - rw_exit(&ill_g_usesrc_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_usesrc_lock); + rw_exit(&ipst->ips_ill_g_lock); ipif_refrele(orig_ipif); mp1->b_wptr = (uchar_t *)lifr; STRUCT_FSET(lifs, lifs_len, (int)((uchar_t *)lifr - mp1->b_rptr)); @@ -8910,6 +8990,7 @@ ip_sioctl_get_lifconf(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, int32_t lifclen; zoneid_t zoneid; STRUCT_HANDLE(lifconf, lifc); + ip_stack_t *ipst = CONNQ_TO_IPST(q); ip1dbg(("ip_sioctl_get_lifconf")); @@ -8979,7 +9060,7 @@ ip_sioctl_get_lifconf(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, * If lifc_len is smaller than what is needed, return * EINVAL. */ - numlifs = ip_get_numlifs(family, flags, zoneid); + numlifs = ip_get_numlifs(family, flags, zoneid, ipst); lifc_bufsize = numlifs * sizeof (struct lifreq); lifclen = STRUCT_FGET(lifc, lifc_len); if (lifc_bufsize > lifclen) { @@ -8999,8 +9080,8 @@ ip_sioctl_get_lifconf(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, lifr = (struct lifreq *)mp1->b_rptr; - rw_enter(&ill_g_lock, RW_READER); - ill = ill_first(list, list, &ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ill_first(list, list, &ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { @@ -9029,7 +9110,7 @@ ip_sioctl_get_lifconf(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, if ((uchar_t *)&lifr[1] > mp1->b_wptr) { if (iocp->ioc_cmd == O_SIOCGLIFCONF) { - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (EINVAL); } else { goto lif_copydone; @@ -9062,7 +9143,7 @@ ip_sioctl_get_lifconf(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, } } lif_copydone: - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); mp1->b_wptr = (uchar_t *)lifr; if (STRUCT_BUF(lifc) != NULL) { @@ -9077,8 +9158,15 @@ int ip_sioctl_set_ipmpfailback(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, ip_ioctl_cmd_t *ipip, void *ifreq) { + ip_stack_t *ipst; + + if (q->q_next == NULL) + ipst = CONNQ_TO_IPST(q); + else + ipst = ILLQ_TO_IPST(q); + /* Existence of b_cont->b_cont checked in ip_wput_nondata */ - ipmp_enable_failback = *(int *)mp->b_cont->b_cont->b_rptr; + ipst->ips_ipmp_enable_failback = *(int *)mp->b_cont->b_cont->b_rptr; return (0); } @@ -9089,6 +9177,12 @@ ip_sioctl_ip6addrpolicy(queue_t *q, mblk_t *mp) size_t table_size; mblk_t *data_mp; struct iocblk *iocp = (struct iocblk *)mp->b_rptr; + ip_stack_t *ipst; + + if (q->q_next == NULL) + ipst = CONNQ_TO_IPST(q); + else + ipst = ILLQ_TO_IPST(q); /* These two ioctls are I_STR only */ if (iocp->ioc_count == TRANSPARENT) { @@ -9125,7 +9219,7 @@ ip_sioctl_ip6addrpolicy(queue_t *q, mblk_t *mp) switch (iocp->ioc_cmd) { case SIOCGIP6ADDRPOLICY: - iocp->ioc_rval = ip6_asp_get(table, table_size); + iocp->ioc_rval = ip6_asp_get(table, table_size, ipst); if (iocp->ioc_rval == -1) iocp->ioc_error = EINVAL; #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4 @@ -9168,7 +9262,7 @@ ip_sioctl_ip6addrpolicy(queue_t *q, mblk_t *mp) * and just recompile everything that depends on it.) */ #endif - ip6_asp_replace(mp, table, table_size, B_FALSE, + ip6_asp_replace(mp, table, table_size, B_FALSE, ipst, iocp->ioc_flag & IOC_MODELS); return; } @@ -9193,6 +9287,7 @@ ip_sioctl_dstinfo(queue_t *q, mblk_t *mp) ipif_t *src_ipif, *ire_ipif; struct iocblk *iocp = (struct iocblk *)mp->b_rptr; zoneid_t zoneid; + ip_stack_t *ipst = CONNQ_TO_IPST(q); ASSERT(q->q_next == NULL); /* this ioctl not allowed if ip is module */ zoneid = Q_TO_CONN(q)->conn_zoneid; @@ -9233,16 +9328,16 @@ ip_sioctl_dstinfo(queue_t *q, mblk_t *mp) * and ipif_select_source[_v6]() do not. */ dir->dir_dscope = ip_addr_scope_v6(daddr); - dlabel = ip6_asp_lookup(daddr, &dir->dir_precedence); + dlabel = ip6_asp_lookup(daddr, &dir->dir_precedence, ipst); isipv4 = IN6_IS_ADDR_V4MAPPED(daddr); if (isipv4) { IN6_V4MAPPED_TO_IPADDR(daddr, v4daddr); ire = ire_ftable_lookup(v4daddr, NULL, NULL, - 0, NULL, NULL, zoneid, 0, NULL, match_ire); + 0, NULL, NULL, zoneid, 0, NULL, match_ire, ipst); } else { ire = ire_ftable_lookup_v6(daddr, NULL, NULL, - 0, NULL, NULL, zoneid, 0, NULL, match_ire); + 0, NULL, NULL, zoneid, 0, NULL, match_ire, ipst); } if (ire == NULL) { dir->dir_dreachable = 0; @@ -9276,7 +9371,7 @@ ip_sioctl_dstinfo(queue_t *q, mblk_t *mp) *saddr = src_ipif->ipif_v6lcl_addr; dir->dir_sscope = ip_addr_scope_v6(saddr); - slabel = ip6_asp_lookup(saddr, NULL); + slabel = ip6_asp_lookup(saddr, NULL, ipst); dir->dir_labelmatch = ip6_asp_labelcmp(dlabel, slabel); dir->dir_sdeprecated = (src_ipif->ipif_flags & IPIF_DEPRECATED) ? 1 : 0; @@ -9311,11 +9406,13 @@ ip_sioctl_tmyaddr(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, ire_t *ire; mblk_t *mp1; zoneid_t zoneid; + ip_stack_t *ipst; ip1dbg(("ip_sioctl_tmyaddr")); ASSERT(q->q_next == NULL); /* this ioctl not allowed if ip is module */ zoneid = Q_TO_CONN(q)->conn_zoneid; + ipst = CONNQ_TO_IPST(q); /* Existence verified in ip_wput_nondata */ mp1 = mp->b_cont->b_cont; @@ -9332,14 +9429,14 @@ ip_sioctl_tmyaddr(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, v4_addr); ire = ire_ctable_lookup(v4_addr, 0, IRE_LOCAL|IRE_LOOPBACK, NULL, zoneid, - NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); + NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); } else { in6_addr_t v6addr; v6addr = sin6->sin6_addr; ire = ire_ctable_lookup_v6(&v6addr, 0, IRE_LOCAL|IRE_LOOPBACK, NULL, zoneid, - NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); + NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); } break; } @@ -9349,7 +9446,7 @@ ip_sioctl_tmyaddr(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, v4addr = sin->sin_addr.s_addr; ire = ire_ctable_lookup(v4addr, 0, IRE_LOCAL|IRE_LOOPBACK, NULL, zoneid, - NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); + NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); break; } default: @@ -9388,11 +9485,13 @@ ip_sioctl_tonlink(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, mblk_t *mp1; ire_t *ire = NULL; zoneid_t zoneid; + ip_stack_t *ipst; ip1dbg(("ip_sioctl_tonlink")); ASSERT(q->q_next == NULL); /* this ioctl not allowed if ip is module */ zoneid = Q_TO_CONN(q)->conn_zoneid; + ipst = CONNQ_TO_IPST(q); /* Existence verified in ip_wput_nondata */ mp1 = mp->b_cont->b_cont; @@ -9416,7 +9515,7 @@ ip_sioctl_tonlink(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, if (!CLASSD(v4_addr)) { ire = ire_route_lookup(v4_addr, 0, 0, 0, NULL, NULL, zoneid, NULL, - MATCH_IRE_GW); + MATCH_IRE_GW, ipst); } } else { in6_addr_t v6addr; @@ -9427,7 +9526,7 @@ ip_sioctl_tonlink(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, if (!IN6_IS_ADDR_MULTICAST(&v6addr)) { ire = ire_route_lookup_v6(&v6addr, 0, &v6gw, 0, NULL, NULL, zoneid, - NULL, MATCH_IRE_GW); + NULL, MATCH_IRE_GW, ipst); } } break; @@ -9439,7 +9538,7 @@ ip_sioctl_tonlink(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, if (!CLASSD(v4addr)) { ire = ire_route_lookup(v4addr, 0, 0, 0, NULL, NULL, zoneid, NULL, - MATCH_IRE_GW); + MATCH_IRE_GW, ipst); } break; } @@ -9539,9 +9638,11 @@ ip_sioctl_arp_common(ill_t *ill, queue_t *q, mblk_t *mp, sin_t *sin, boolean_t success; int flags, alength; char *lladdr; + ip_stack_t *ipst; ASSERT(!(q->q_flag & QREADR) && q->q_next == NULL); connp = Q_TO_CONN(q); + ipst = connp->conn_netstack->netstack_ip; iocp = (struct iocblk *)mp->b_rptr; /* @@ -9671,12 +9772,13 @@ ip_sioctl_arp_common(ill_t *ill, queue_t *q, mblk_t *mp, sin_t *sin, * pick up any change from arp. */ if (!if_arp_ioctl) { - (void) ip_ire_clookup_and_delete(ipaddr, NULL); + (void) ip_ire_clookup_and_delete(ipaddr, NULL, ipst); break; } else { ipif_t *ipif = ipif_get_next_ipif(NULL, ill); if (ipif != NULL) { - (void) ip_ire_clookup_and_delete(ipaddr, ipif); + (void) ip_ire_clookup_and_delete(ipaddr, ipif, + ipst); ipif_refrele(ipif); } break; @@ -9747,11 +9849,13 @@ ip_sioctl_xarp(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, ill_t *ill = NULL; struct sockaddr_in *sin; boolean_t if_arp_ioctl = B_FALSE; + ip_stack_t *ipst; /* ioctl comes down on an conn */ ASSERT(!(q->q_flag & QREADR) && q->q_next == NULL); connp = Q_TO_CONN(q); isv6 = connp->conn_af_isv6; + ipst = connp->conn_netstack->netstack_ip; /* Existance verified in ip_wput_nondata */ mp1 = mp->b_cont->b_cont; @@ -9780,7 +9884,7 @@ ip_sioctl_xarp(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, *cptr = '\0'; ill = ill_lookup_on_name(xar->xarp_ha.sdl_data, B_FALSE, isv6, CONNP_TO_WQ(connp), mp, ip_process_ioctl, - &err, NULL); + &err, NULL, ipst); *cptr = cval; if (ill == NULL) return (err); @@ -9796,7 +9900,8 @@ ip_sioctl_xarp(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, * as an extended BSD ioctl. The kernel uses the IP address * to figure out the network interface. */ - ire = ire_cache_lookup(sin->sin_addr.s_addr, ALL_ZONES, NULL); + ire = ire_cache_lookup(sin->sin_addr.s_addr, ALL_ZONES, NULL, + ipst); if ((ire == NULL) || (ire->ire_type == IRE_LOOPBACK) || ((ill = ire_to_ill(ire)) == NULL) || (ill->ill_net_type != IRE_IF_RESOLVER)) { @@ -9804,7 +9909,7 @@ ip_sioctl_xarp(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, ire_refrele(ire); ire = ire_ftable_lookup(sin->sin_addr.s_addr, 0, 0, IRE_IF_RESOLVER, NULL, NULL, ALL_ZONES, 0, - NULL, MATCH_IRE_TYPE); + NULL, MATCH_IRE_TYPE, ipst); if ((ire == NULL) || ((ill = ire_to_ill(ire)) == NULL)) { if (ire != NULL) @@ -9851,10 +9956,12 @@ ip_sioctl_arp(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, int err; conn_t *connp; ill_t *ill; + ip_stack_t *ipst; /* ioctl comes down on an conn */ ASSERT(!(q->q_flag & QREADR) && q->q_next == NULL); connp = Q_TO_CONN(q); + ipst = CONNQ_TO_IPST(q); isv6 = connp->conn_af_isv6; if (isv6) return (ENXIO); @@ -9881,14 +9988,14 @@ ip_sioctl_arp(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, * be rare enough since IRE cache entries have a longer * life expectancy than ARP cache entries. */ - ire = ire_cache_lookup(sin->sin_addr.s_addr, ALL_ZONES, NULL); + ire = ire_cache_lookup(sin->sin_addr.s_addr, ALL_ZONES, NULL, ipst); if ((ire == NULL) || (ire->ire_type == IRE_LOOPBACK) || ((ill = ire_to_ill(ire)) == NULL)) { if (ire != NULL) ire_refrele(ire); ire = ire_ftable_lookup(sin->sin_addr.s_addr, 0, 0, IRE_IF_RESOLVER, NULL, NULL, ALL_ZONES, 0, - NULL, MATCH_IRE_TYPE); + NULL, MATCH_IRE_TYPE, ipst); if ((ire == NULL) || ((ill = ire_to_ill(ire)) == NULL)) { if (ire != NULL) ire_refrele(ire); @@ -9929,6 +10036,12 @@ ip_sioctl_plink(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) boolean_t entered_ipsq = B_FALSE; boolean_t islink; queue_t *dwq = NULL; + ip_stack_t *ipst; + + if (CONN_Q(q)) + ipst = CONNQ_TO_IPST(q); + else + ipst = ILLQ_TO_IPST(q); ASSERT(iocp->ioc_cmd == I_PLINK || iocp->ioc_cmd == I_PUNLINK || iocp->ioc_cmd == I_LINK || iocp->ioc_cmd == I_UNLINK); @@ -10054,7 +10167,7 @@ ip_sioctl_plink(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) * stream. */ ill = ill_lookup_on_name(ipmxp->ipmx_name, B_FALSE, B_FALSE, - q, mp, ip_sioctl_plink, &err, NULL); + q, mp, ip_sioctl_plink, &err, NULL, ipst); if (ill == NULL) { if (err == EINPROGRESS) { return; @@ -10268,6 +10381,12 @@ ip_sioctl_copyin_setup(queue_t *q, mblk_t *mp) struct iocblk *iocp = (struct iocblk *)mp->b_rptr; ip_ioctl_cmd_t *ipip; cred_t *cr; + ip_stack_t *ipst; + + if (CONN_Q(q)) + ipst = CONNQ_TO_IPST(q); + else + ipst = ILLQ_TO_IPST(q); ipip = ip_sioctl_lookup(iocp->ioc_cmd); if (ipip == NULL) { @@ -10290,7 +10409,7 @@ ip_sioctl_copyin_setup(queue_t *q, mblk_t *mp) * come back. */ if ((iocp->ioc_cmd == SIOCGDSTINFO || - iocp->ioc_cmd == SIOCGIP6ADDRPOLICY) && !ip6_asp_can_lookup()) { + iocp->ioc_cmd == SIOCGIP6ADDRPOLICY) && !ip6_asp_can_lookup(ipst)) { ip6_asp_pending_op(q, mp, ip_sioctl_copyin_resume); return; } @@ -10333,9 +10452,9 @@ ip_sioctl_copyin_setup(queue_t *q, mblk_t *mp) /* Make sure normal users don't send down privileged ioctls */ if ((ipip->ipi_flags & IPI_PRIV) && - (cr != NULL) && secpolicy_net_config(cr, B_TRUE) != 0) { + (cr != NULL) && secpolicy_ip_config(cr, B_TRUE) != 0) { /* We checked the privilege earlier but log it here */ - miocnak(q, mp, 0, secpolicy_net_config(cr, B_FALSE)); + miocnak(q, mp, 0, secpolicy_ip_config(cr, B_FALSE)); return; } @@ -10377,7 +10496,7 @@ ip_sioctl_copyin_setup(queue_t *q, mblk_t *mp) return; case SIOCGIP6ADDRPOLICY: ip_sioctl_ip6addrpolicy(q, mp); - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); return; case SIOCSIP6ADDRPOLICY: @@ -10386,7 +10505,7 @@ ip_sioctl_copyin_setup(queue_t *q, mblk_t *mp) case SIOCGDSTINFO: ip_sioctl_dstinfo(q, mp); - ip6_asp_table_refrele(); + ip6_asp_table_refrele(ipst); return; case I_PLINK: @@ -10414,9 +10533,9 @@ ip_sioctl_copyin_setup(queue_t *q, mblk_t *mp) * Modifying the nd table thru nd_load/nd_unload requires * the writer lock. */ - rw_enter(&ip_g_nd_lock, RW_READER); - if (nd_getset(q, ip_g_nd, mp)) { - rw_exit(&ip_g_nd_lock); + rw_enter(&ipst->ips_ip_g_nd_lock, RW_READER); + if (nd_getset(q, ipst->ips_ip_g_nd, mp)) { + rw_exit(&ipst->ips_ip_g_nd_lock); if (iocp->ioc_error) iocp->ioc_count = 0; @@ -10424,7 +10543,7 @@ ip_sioctl_copyin_setup(queue_t *q, mblk_t *mp) qreply(q, mp); return; } - rw_exit(&ip_g_nd_lock); + rw_exit(&ipst->ips_ip_g_nd_lock); /* * We don't understand this subioctl of ND_GET / ND_SET. * Maybe intended for some driver / module below us @@ -10477,9 +10596,11 @@ ip_sioctl_iocack(queue_t *q, mblk_t *mp) sin_t *sin; ipaddr_t addr; int err; + ip_stack_t *ipst; ill = q->q_ptr; ASSERT(ill != NULL); + ipst = ill->ill_ipst; /* * We should get back from ARP a packet chain that looks like: @@ -10565,10 +10686,10 @@ ip_sioctl_iocack(queue_t *q, mblk_t *mp) ipsqill = ill; ire = ire_ctable_lookup(addr, 0, IRE_CACHE, ipsqill->ill_ipif, ALL_ZONES, - NULL, MATCH_IRE_TYPE | MATCH_IRE_ILL); + NULL, MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); } else { ire = ire_ctable_lookup(addr, 0, IRE_CACHE, - NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (ire != NULL) ipsqill = ire_to_ill(ire); } @@ -10651,7 +10772,7 @@ ip_sioctl_iocack(queue_t *q, mblk_t *mp) */ ipintf = ill->ill_ipif; } - if (ip_ire_clookup_and_delete(addr, ipintf)) { + if (ip_ire_clookup_and_delete(addr, ipintf, ipst)) { /* * The address in "addr" may be an entry for a * router. If that's true, then any off-net @@ -10664,7 +10785,7 @@ ip_sioctl_iocack(queue_t *q, mblk_t *mp) ire_delete_cache_gw, (char *)&addr, ill); else ire_walk_v4(ire_delete_cache_gw, (char *)&addr, - ALL_ZONES); + ALL_ZONES, ipst); iocp->ioc_error = 0; } } @@ -10755,7 +10876,9 @@ ip_sioctl_addif(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, conn_t *connp; zoneid_t zoneid; int orig_ifindex = 0; + ip_stack_t *ipst = CONNQ_TO_IPST(q); + ASSERT(q->q_next == NULL); ip1dbg(("ip_sioctl_addif\n")); /* Existence of mp1 has been checked in ip_wput_nondata */ mp1 = mp->b_cont->b_cont; @@ -10783,8 +10906,8 @@ ip_sioctl_addif(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, * can't be any other writer thread. So can pass null below * for the last 4 args to ipif_lookup_name. */ - ipif = ipif_lookup_on_name(lifr->lifr_name, namelen, - B_TRUE, &exists, isv6, zoneid, NULL, NULL, NULL, NULL); + ipif = ipif_lookup_on_name(lifr->lifr_name, namelen, B_TRUE, + &exists, isv6, zoneid, NULL, NULL, NULL, NULL, ipst); /* Prevent any further action */ if (ipif == NULL) { return (ENOBUFS); @@ -10823,7 +10946,7 @@ ip_sioctl_addif(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, } } ill = ill_lookup_on_name(name, B_FALSE, isv6, - CONNP_TO_WQ(connp), mp, ip_process_ioctl, &err, NULL); + CONNP_TO_WQ(connp), mp, ip_process_ioctl, &err, NULL, ipst); if (found_sep) *cp = IPIF_SEPARATOR_CHAR; if (ill == NULL) @@ -10862,10 +10985,12 @@ ip_sioctl_addif(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, * plumbed when we're adding an IPv4 (resp. IPv6) ipif. * Otherwise we create the ipif on the failed interface. */ - rw_enter(&ill_g_lock, RW_READER); - phyi = avl_first(&phyint_g_list.phyint_list_avl_by_index); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + phyi = avl_first(&ipst->ips_phyint_g_list-> + phyint_list_avl_by_index); for (; phyi != NULL; - phyi = avl_walk(&phyint_g_list.phyint_list_avl_by_index, + phyi = avl_walk(&ipst->ips_phyint_g_list-> + phyint_list_avl_by_index, phyi, AVL_AFTER)) { if (phyi->phyint_groupname_len == 0) continue; @@ -10878,7 +11003,7 @@ ip_sioctl_addif(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, break; } } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (phyi != NULL) { orig_ifindex = ill->ill_phyint->phyint_ifindex; @@ -10958,7 +11083,11 @@ ip_sioctl_removeif(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, conn_t *connp; ill_t *ill = ipif->ipif_ill; boolean_t success; + ip_stack_t *ipst; + + ipst = CONNQ_TO_IPST(q); + ASSERT(q->q_next == NULL); ip1dbg(("ip_sioctl_remove_if(%s:%u %p)\n", ipif->ipif_ill->ill_name, ipif->ipif_id, (void *)ipif)); ASSERT(IAM_WRITER_IPIF(ipif)); @@ -11033,14 +11162,15 @@ ip_sioctl_removeif(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, sin6 = (sin6_t *)sin; /* We are a writer, so we should be able to lookup */ ipif = ipif_lookup_addr_v6(&sin6->sin6_addr, - ill, ALL_ZONES, NULL, NULL, NULL, NULL); + ill, ALL_ZONES, NULL, NULL, NULL, NULL, ipst); if (ipif == NULL) { /* * Maybe the address in on another interface in * the same IPMP group? We check this below. */ ipif = ipif_lookup_addr_v6(&sin6->sin6_addr, - NULL, ALL_ZONES, NULL, NULL, NULL, NULL); + NULL, ALL_ZONES, NULL, NULL, NULL, NULL, + ipst); } } else { ipaddr_t addr; @@ -11051,14 +11181,14 @@ ip_sioctl_removeif(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, addr = sin->sin_addr.s_addr; /* We are a writer, so we should be able to lookup */ ipif = ipif_lookup_addr(addr, ill, ALL_ZONES, NULL, - NULL, NULL, NULL); + NULL, NULL, NULL, ipst); if (ipif == NULL) { /* * Maybe the address in on another interface in * the same IPMP group? We check this below. */ ipif = ipif_lookup_addr(addr, NULL, ALL_ZONES, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, ipst); } } if (ipif == NULL) { @@ -11367,11 +11497,14 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, info = kmem_alloc(sizeof (hook_nic_event_t), KM_NOSLEEP); if (info != NULL) { + ip_stack_t *ipst = ill->ill_ipst; + info->hne_nic = ipif->ipif_ill->ill_phyint->phyint_ifindex; info->hne_lif = MAP_IPIF_ID(ipif->ipif_id); info->hne_event = NE_ADDRESS_CHANGE; - info->hne_family = ipif->ipif_isv6 ? ipv6 : ipv4; + info->hne_family = ipif->ipif_isv6 ? + ipst->ips_ipv6_net_data : ipst->ips_ipv4_net_data; info->hne_data = kmem_alloc(sinlen, KM_NOSLEEP); if (info->hne_data != NULL) { info->hne_datalen = sinlen; @@ -11391,7 +11524,7 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, ipif->ipif_ill->ill_nic_event_info = info; } - mutex_exit(&ipif->ipif_ill->ill_lock); + mutex_exit(&ill->ill_lock); if (need_up) { /* @@ -11831,6 +11964,7 @@ ip_sioctl_flags(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, struct lifreq *lifr; boolean_t set_linklocal = B_FALSE; boolean_t zero_source = B_FALSE; + ip_stack_t *ipst; ip1dbg(("ip_sioctl_flags(%s:%u %p)\n", ipif->ipif_ill->ill_name, ipif->ipif_id, (void *)ipif)); @@ -11839,6 +11973,7 @@ ip_sioctl_flags(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, ill = ipif->ipif_ill; phyi = ill->ill_phyint; + ipst = ill->ill_ipst; if (ipip->ipi_cmd_type == IF_CMD) { ifr = (struct ifreq *)if_req; @@ -11957,13 +12092,15 @@ ip_sioctl_flags(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, * PHYI_FAILED and PHYI_INACTIVE are exclusive */ if ((turn_on & PHYI_FAILED) && - ((intf_flags & PHYI_STANDBY) || !ipmp_enable_failback)) { + ((intf_flags & PHYI_STANDBY) || + !ipst->ips_ipmp_enable_failback)) { /* Reset PHYI_INACTIVE when PHYI_FAILED is being set */ phyi->phyint_flags &= ~PHYI_INACTIVE; } if ((turn_off & PHYI_FAILED) && ((intf_flags & PHYI_STANDBY) || - (!ipmp_enable_failback && ill_is_inactive(ill)))) { + (!ipst->ips_ipmp_enable_failback && + ill_is_inactive(ill)))) { phyint_inactive(phyi); } @@ -11981,7 +12118,7 @@ ip_sioctl_flags(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, phyint_inactive(phyi); } if (turn_off & PHYI_STANDBY) { - if (ipmp_enable_failback) { + if (ipst->ips_ipmp_enable_failback) { /* * Reset PHYI_INACTIVE. */ @@ -12282,6 +12419,9 @@ ip_sioctl_flags_restart(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, return (err); } +/* + * Can operate on either a module or a driver queue. + */ /* ARGSUSED */ int ip_sioctl_get_flags(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, @@ -12331,6 +12471,7 @@ ip_sioctl_mtu(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, struct ifreq *ifr; struct lifreq *lifr; ire_t *ire; + ip_stack_t *ipst; ip1dbg(("ip_sioctl_mtu(%s:%u %p)\n", ipif->ipif_ill->ill_name, ipif->ipif_id, (void *)ipif)); @@ -12374,11 +12515,14 @@ ip_sioctl_mtu(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, ire->ire_max_frag = ipif->ipif_mtu; ire_refrele(ire); } + ipst = ipif->ipif_ill->ill_ipst; if (ipif->ipif_flags & IPIF_UP) { if (ipif->ipif_isv6) - ire_walk_v6(ipif_mtu_change, (char *)ipif, ALL_ZONES); + ire_walk_v6(ipif_mtu_change, (char *)ipif, ALL_ZONES, + ipst); else - ire_walk_v4(ipif_mtu_change, (char *)ipif, ALL_ZONES); + ire_walk_v4(ipif_mtu_change, (char *)ipif, ALL_ZONES, + ipst); } /* Update the MTU in SCTP's list */ sctp_update_ipif(ipif, SCTP_IPIF_UPDATE); @@ -12414,6 +12558,7 @@ ip_sioctl_brdaddr(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, { ipaddr_t addr; ire_t *ire; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; ip1dbg(("ip_sioctl_brdaddr(%s:%u)\n", ipif->ipif_ill->ill_name, ipif->ipif_id)); @@ -12441,7 +12586,7 @@ ip_sioctl_brdaddr(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, */ ire = ire_ctable_lookup(addr, 0, IRE_BROADCAST, ipif, ALL_ZONES, NULL, - (MATCH_IRE_ILL | MATCH_IRE_TYPE)); + (MATCH_IRE_ILL | MATCH_IRE_TYPE), ipst); if (ire == NULL) { return (EINVAL); } else { @@ -13195,7 +13340,7 @@ ip_sioctl_get_lnkinfo(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, * experimental. */ static ipaddr_t -ip_subnet_mask(ipaddr_t addr, ipif_t **ipifp) +ip_subnet_mask(ipaddr_t addr, ipif_t **ipifp, ip_stack_t *ipst) { ipaddr_t net_mask; ill_t *ill; @@ -13211,8 +13356,8 @@ ip_subnet_mask(ipaddr_t addr, ipif_t **ipifp) /* Let's check to see if this is maybe a local subnet route. */ /* this function only applies to IPv4 interfaces */ - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V4(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V4(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { mutex_enter(&ill->ill_lock); for (ipif = ill->ill_ipif; ipif != NULL; @@ -13241,7 +13386,7 @@ ip_subnet_mask(ipaddr_t addr, ipif_t **ipifp) */ ipif_refhold_locked(ipif); mutex_exit(&ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (fallback_ipif != NULL) ipif_refrele(fallback_ipif); *ipifp = ipif; @@ -13250,7 +13395,7 @@ ip_subnet_mask(ipaddr_t addr, ipif_t **ipifp) } mutex_exit(&ill->ill_lock); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); *ipifp = fallback_ipif; return ((fallback_ipif != NULL) ? @@ -13357,8 +13502,6 @@ ipif_lookup_seqid(ill_t *ill, uint_t seqid) return (NULL); } -uint64_t ipif_g_seqid; - /* * Assign a unique id for the ipif. This is used later when we send * IRES to ARP for resolution where we initialize ire_ipif_seqid @@ -13369,7 +13512,9 @@ uint64_t ipif_g_seqid; static void ipif_assign_seqid(ipif_t *ipif) { - ipif->ipif_seqid = atomic_add_64_nv(&ipif_g_seqid, 1); + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; + + ipif->ipif_seqid = atomic_add_64_nv(&ipst->ips_ipif_g_seqid, 1); } /* @@ -13385,12 +13530,14 @@ ipif_insert(ipif_t *ipif, boolean_t acquire_g_lock, boolean_t acquire_ill_lock) ipif_t *tipif; ipif_t **tipifp; int id; + ip_stack_t *ipst; ASSERT(ipif->ipif_ill->ill_net_type == IRE_LOOPBACK || IAM_WRITER_IPIF(ipif)); ill = ipif->ipif_ill; ASSERT(ill != NULL); + ipst = ill->ill_ipst; /* * In the case of lo0:0 we already hold the ill_g_lock. @@ -13398,7 +13545,7 @@ ipif_insert(ipif_t *ipif, boolean_t acquire_g_lock, boolean_t acquire_ill_lock) * ipif_insert. Another such caller is ipif_move. */ if (acquire_g_lock) - rw_enter(&ill_g_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); if (acquire_ill_lock) mutex_enter(&ill->ill_lock); id = ipif->ipif_id; @@ -13413,15 +13560,15 @@ ipif_insert(ipif_t *ipif, boolean_t acquire_g_lock, boolean_t acquire_ill_lock) tipifp = &(tipif->ipif_next); } /* limit number of logical interfaces */ - if (id >= ip_addrs_per_if) { + if (id >= ipst->ips_ip_addrs_per_if) { if (acquire_ill_lock) mutex_exit(&ill->ill_lock); if (acquire_g_lock) - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (-1); } ipif->ipif_id = id; /* assign new id */ - } else if (id < ip_addrs_per_if) { + } else if (id < ipst->ips_ip_addrs_per_if) { /* we have a real id; insert ipif in the right place */ while ((tipif = *tipifp) != NULL) { ASSERT(tipif->ipif_id != id); @@ -13433,7 +13580,7 @@ ipif_insert(ipif_t *ipif, boolean_t acquire_g_lock, boolean_t acquire_ill_lock) if (acquire_ill_lock) mutex_exit(&ill->ill_lock); if (acquire_g_lock) - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (-1); } @@ -13444,7 +13591,7 @@ ipif_insert(ipif_t *ipif, boolean_t acquire_g_lock, boolean_t acquire_ill_lock) if (acquire_ill_lock) mutex_exit(&ill->ill_lock); if (acquire_g_lock) - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (0); } @@ -13479,7 +13626,11 @@ ipif_allocate(ill_t *ill, int id, uint_t ire_type, boolean_t initialize) ipif->ipif_ill = ill; ipif->ipif_id = id; /* could be -1 */ - ipif->ipif_zoneid = GLOBAL_ZONEID; + /* + * Inherit the zoneid from the ill; for the shared stack instance + * this is always the global zone + */ + ipif->ipif_zoneid = ill->ill_zoneid; mutex_init(&ipif->ipif_saved_ire_lock, NULL, MUTEX_DEFAULT, NULL); @@ -14208,11 +14359,11 @@ ill_signal_ipsq_ills(ipsq_t *ipsq, boolean_t caller_holds_lock) } static ipsq_t * -ipsq_create(char *groupname) +ipsq_create(char *groupname, ip_stack_t *ipst) { ipsq_t *ipsq; - ASSERT(RW_WRITE_HELD(&ill_g_lock)); + ASSERT(RW_WRITE_HELD(&ipst->ips_ill_g_lock)); ipsq = kmem_zalloc(sizeof (ipsq_t), KM_NOSLEEP); if (ipsq == NULL) { return (NULL); @@ -14225,8 +14376,9 @@ ipsq_create(char *groupname) mutex_init(&ipsq->ipsq_lock, NULL, MUTEX_DEFAULT, NULL); ipsq->ipsq_flags |= IPSQ_GROUP; - ipsq->ipsq_next = ipsq_g_head; - ipsq_g_head = ipsq; + ipsq->ipsq_next = ipst->ips_ipsq_g_head; + ipst->ips_ipsq_g_head = ipsq; + ipsq->ipsq_ipst = ipst; /* No netstack_hold */ return (ipsq); } @@ -14256,19 +14408,22 @@ ipsq_create(char *groupname) * natural state. */ static ipsq_t * -ip_ipsq_lookup(char *groupname, boolean_t create, ipsq_t *exclude_ipsq) +ip_ipsq_lookup(char *groupname, boolean_t create, ipsq_t *exclude_ipsq, + ip_stack_t *ipst) { ipsq_t *ipsq; int group_len; phyint_t *phyint; - ASSERT(RW_LOCK_HELD(&ill_g_lock)); + ASSERT(RW_LOCK_HELD(&ipst->ips_ill_g_lock)); group_len = strlen(groupname); ASSERT(group_len != 0); group_len++; - for (ipsq = ipsq_g_head; ipsq != NULL; ipsq = ipsq->ipsq_next) { + for (ipsq = ipst->ips_ipsq_g_head; + ipsq != NULL; + ipsq = ipsq->ipsq_next) { /* * When an ipsq is being split, and ill_split_ipsq * calls this function, we exclude it from being considered. @@ -14291,7 +14446,7 @@ ip_ipsq_lookup(char *groupname, boolean_t create, ipsq_t *exclude_ipsq) * part of 1 ipsq and is not found in any other * ipsq. */ - ASSERT(ip_ipsq_lookup(groupname, B_FALSE, ipsq) == + ASSERT(ip_ipsq_lookup(groupname, B_FALSE, ipsq, ipst) == NULL); return (ipsq); } @@ -14313,15 +14468,15 @@ ip_ipsq_lookup(char *groupname, boolean_t create, ipsq_t *exclude_ipsq) * part of 1 ipsq and is not found in any other * ipsq. */ - ASSERT(ip_ipsq_lookup(groupname, B_FALSE, ipsq) - == NULL); + ASSERT(ip_ipsq_lookup(groupname, B_FALSE, ipsq, + ipst) == NULL); return (ipsq); } phyint = phyint->phyint_ipsq_next; } } if (create) - ipsq = ipsq_create(groupname); + ipsq = ipsq_create(groupname, ipst); return (ipsq); } @@ -14330,6 +14485,7 @@ ipsq_delete(ipsq_t *ipsq) { ipsq_t *nipsq; ipsq_t *pipsq = NULL; + ip_stack_t *ipst = ipsq->ipsq_ipst; /* * We don't hold the ipsq lock, but we are sure no new @@ -14345,17 +14501,19 @@ ipsq_delete(ipsq_t *ipsq) /* * This is not the ipsq of an IPMP group. */ + ipsq->ipsq_ipst = NULL; kmem_free(ipsq, sizeof (ipsq_t)); return; } - rw_enter(&ill_g_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); /* * Locate the ipsq before we can remove it from * the singly linked list of ipsq's. */ - for (nipsq = ipsq_g_head; nipsq != NULL; nipsq = nipsq->ipsq_next) { + for (nipsq = ipst->ips_ipsq_g_head; nipsq != NULL; + nipsq = nipsq->ipsq_next) { if (nipsq == ipsq) { break; } @@ -14368,17 +14526,16 @@ ipsq_delete(ipsq_t *ipsq) if (pipsq != NULL) pipsq->ipsq_next = ipsq->ipsq_next; else - ipsq_g_head = ipsq->ipsq_next; + ipst->ips_ipsq_g_head = ipsq->ipsq_next; + ipsq->ipsq_ipst = NULL; kmem_free(ipsq, sizeof (ipsq_t)); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); } static void ill_move_to_new_ipsq(ipsq_t *old_ipsq, ipsq_t *new_ipsq, mblk_t *current_mp, queue_t *q) - { - ASSERT(MUTEX_HELD(&new_ipsq->ipsq_lock)); ASSERT(old_ipsq->ipsq_mphead == NULL && old_ipsq->ipsq_mptail == NULL); ASSERT(old_ipsq->ipsq_pending_ipif == NULL); @@ -14608,13 +14765,13 @@ ill_down_ipifs(ill_t *ill, mblk_t *mp, int index, boolean_t chk_nofailover) } } -#define IPSQ_INC_REF(ipsq) { \ - ASSERT(RW_WRITE_HELD(&ill_g_lock)); \ +#define IPSQ_INC_REF(ipsq, ipst) { \ + ASSERT(RW_WRITE_HELD(&ipst->ips_ill_g_lock)); \ (ipsq)->ipsq_refs++; \ } -#define IPSQ_DEC_REF(ipsq) { \ - ASSERT(RW_WRITE_HELD(&ill_g_lock)); \ +#define IPSQ_DEC_REF(ipsq, ipst) { \ + ASSERT(RW_WRITE_HELD(&ipst->ips_ill_g_lock)); \ (ipsq)->ipsq_refs--; \ if ((ipsq)->ipsq_refs == 0) \ (ipsq)->ipsq_name[0] = '\0'; \ @@ -14625,7 +14782,7 @@ ill_down_ipifs(ill_t *ill, mblk_t *mp, int index, boolean_t chk_nofailover) * new_ipsq. */ static void -ill_merge_ipsq(ipsq_t *cur_ipsq, ipsq_t *new_ipsq) +ill_merge_ipsq(ipsq_t *cur_ipsq, ipsq_t *new_ipsq, ip_stack_t *ipst) { phyint_t *phyint; phyint_t *next_phyint; @@ -14635,16 +14792,16 @@ ill_merge_ipsq(ipsq_t *cur_ipsq, ipsq_t *new_ipsq) * writer and the ill_lock of the ill in question. Also the dest * ipsq can't vanish while we hold the ill_g_lock as writer. */ - ASSERT(RW_WRITE_HELD(&ill_g_lock)); + ASSERT(RW_WRITE_HELD(&ipst->ips_ill_g_lock)); phyint = cur_ipsq->ipsq_phyint_list; cur_ipsq->ipsq_phyint_list = NULL; while (phyint != NULL) { next_phyint = phyint->phyint_ipsq_next; - IPSQ_DEC_REF(cur_ipsq); + IPSQ_DEC_REF(cur_ipsq, ipst); phyint->phyint_ipsq_next = new_ipsq->ipsq_phyint_list; new_ipsq->ipsq_phyint_list = phyint; - IPSQ_INC_REF(new_ipsq); + IPSQ_INC_REF(new_ipsq, ipst); phyint->phyint_ipsq = new_ipsq; phyint = next_phyint; } @@ -14655,7 +14812,8 @@ ill_merge_ipsq(ipsq_t *cur_ipsq, ipsq_t *new_ipsq) #define SPLIT_FAILED 2 int -ill_split_to_grp_ipsq(phyint_t *phyint, ipsq_t *cur_ipsq, boolean_t need_retry) +ill_split_to_grp_ipsq(phyint_t *phyint, ipsq_t *cur_ipsq, boolean_t need_retry, + ip_stack_t *ipst) { ipsq_t *newipsq = NULL; @@ -14663,7 +14821,7 @@ ill_split_to_grp_ipsq(phyint_t *phyint, ipsq_t *cur_ipsq, boolean_t need_retry) * Assertions denote pre-requisites for changing the ipsq of * a phyint */ - ASSERT(RW_WRITE_HELD(&ill_g_lock)); + ASSERT(RW_WRITE_HELD(&ipst->ips_ill_g_lock)); /* * <ill-phyint> assocs can't change while ill_g_lock * is held as writer. See ill_phyint_reinit() @@ -14684,17 +14842,17 @@ ill_split_to_grp_ipsq(phyint_t *phyint, ipsq_t *cur_ipsq, boolean_t need_retry) * to be in the same ipsq even in the event of mem alloc fails. */ newipsq = ip_ipsq_lookup(phyint->phyint_groupname, !need_retry, - cur_ipsq); + cur_ipsq, ipst); if (newipsq == NULL) { /* Memory allocation failure */ return (SPLIT_FAILED); } else { /* ipsq_refs protected by ill_g_lock (writer) */ - IPSQ_DEC_REF(cur_ipsq); + IPSQ_DEC_REF(cur_ipsq, ipst); phyint->phyint_ipsq = newipsq; phyint->phyint_ipsq_next = newipsq->ipsq_phyint_list; newipsq->ipsq_phyint_list = phyint; - IPSQ_INC_REF(newipsq); + IPSQ_INC_REF(newipsq, ipst); return (SPLIT_SUCCESS); } } @@ -14706,11 +14864,11 @@ ill_split_to_grp_ipsq(phyint_t *phyint, ipsq_t *cur_ipsq, boolean_t need_retry) * to do this split */ static int -ill_split_to_own_ipsq(phyint_t *phyint, ipsq_t *cur_ipsq) +ill_split_to_own_ipsq(phyint_t *phyint, ipsq_t *cur_ipsq, ip_stack_t *ipst) { ipsq_t *newipsq; - ASSERT(RW_WRITE_HELD(&ill_g_lock)); + ASSERT(RW_WRITE_HELD(&ipst->ips_ill_g_lock)); /* * <ill-phyint> assocs can't change while ill_g_lock * is held as writer. See ill_phyint_reinit() @@ -14731,7 +14889,7 @@ ill_split_to_own_ipsq(phyint_t *phyint, ipsq_t *cur_ipsq) } /* ipsq_ref is protected by ill_g_lock (writer) */ - IPSQ_DEC_REF(cur_ipsq); + IPSQ_DEC_REF(cur_ipsq, ipst); /* * This is a new ipsq that is unknown to the world. @@ -14760,6 +14918,7 @@ ill_split_ipsq(ipsq_t *cur_ipsq) phyint_t *next_phyint; int error; boolean_t need_retry = B_FALSE; + ip_stack_t *ipst = cur_ipsq->ipsq_ipst; phyint = cur_ipsq->ipsq_phyint_list; cur_ipsq->ipsq_phyint_list = NULL; @@ -14771,10 +14930,10 @@ ill_split_ipsq(ipsq_t *cur_ipsq) * to return without creating an ipsq. */ if (phyint->phyint_groupname == NULL) { - error = ill_split_to_own_ipsq(phyint, cur_ipsq); + error = ill_split_to_own_ipsq(phyint, cur_ipsq, ipst); } else { error = ill_split_to_grp_ipsq(phyint, cur_ipsq, - need_retry); + need_retry, ipst); } switch (error) { @@ -14812,12 +14971,13 @@ ill_lock_ipsq_ills(ipsq_t *ipsq, ill_t **list, int list_max) { int cnt = 0; phyint_t *phyint; + ip_stack_t *ipst = ipsq->ipsq_ipst; /* * The caller holds ill_g_lock to ensure that the ill memberships * of the ipsq don't change */ - ASSERT(RW_LOCK_HELD(&ill_g_lock)); + ASSERT(RW_LOCK_HELD(&ipst->ips_ill_g_lock)); phyint = ipsq->ipsq_phyint_list; while (phyint != NULL) { @@ -14905,7 +15065,9 @@ ill_merge_groups(ill_t *from_ill, ill_t *to_ill, char *groupname, mblk_t *mp, int cnt; size_t ill_list_size; boolean_t became_writer_on_new_sq = B_FALSE; + ip_stack_t *ipst = from_ill->ill_ipst; + ASSERT(to_ill == NULL || ipst == to_ill->ill_ipst); /* Exactly 1 of 'to_ill' and groupname can be specified. */ ASSERT((to_ill != NULL) ^ (groupname != NULL)); @@ -14914,11 +15076,11 @@ ill_merge_groups(ill_t *from_ill, ill_t *to_ill, char *groupname, mblk_t *mp, * change the <ill-ipsq> assoc of an ill. Need to hold the * ipsq_lock to prevent new messages from landing on an ipsq. */ - rw_enter(&ill_g_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); old_ipsq = from_ill->ill_phyint->phyint_ipsq; if (groupname != NULL) - new_ipsq = ip_ipsq_lookup(groupname, B_TRUE, NULL); + new_ipsq = ip_ipsq_lookup(groupname, B_TRUE, NULL, ipst); else { new_ipsq = to_ill->ill_phyint->phyint_ipsq; } @@ -14929,7 +15091,7 @@ ill_merge_groups(ill_t *from_ill, ill_t *to_ill, char *groupname, mblk_t *mp, * both groups are on the same ipsq. */ if (old_ipsq == new_ipsq) { - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (0); } @@ -14937,7 +15099,7 @@ ill_merge_groups(ill_t *from_ill, ill_t *to_ill, char *groupname, mblk_t *mp, ill_list_size = cnt * sizeof (ill_t *); ill_list = kmem_zalloc(ill_list_size, KM_NOSLEEP); if (ill_list == NULL) { - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ENOMEM); } cnt = ill_lock_ipsq_ills(old_ipsq, ill_list, cnt); @@ -14965,7 +15127,7 @@ ill_merge_groups(ill_t *from_ill, ill_t *to_ill, char *groupname, mblk_t *mp, * 'new_ipsq' has been looked up, and it can't change its <ill-ipsq> * assocs. till we release the ill_g_lock, and hence it can't vanish. */ - ill_merge_ipsq(old_ipsq, new_ipsq); + ill_merge_ipsq(old_ipsq, new_ipsq, ipst); /* * Mark the new ipsq as needing a split since it is currently @@ -14977,7 +15139,7 @@ ill_merge_groups(ill_t *from_ill, ill_t *to_ill, char *groupname, mblk_t *mp, /* Now release all the locks */ mutex_exit(&new_ipsq->ipsq_lock); ill_unlock_ills(ill_list, cnt); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); kmem_free(ill_list, ill_list_size); @@ -15014,6 +15176,7 @@ ill_bcast_delete_and_add(ill_t *ill, ipaddr_t addr) { ire_t *ire, *nire, *nire_next, *ire_head = NULL; ire_t **ire_ptpn = &ire_head; + ip_stack_t *ipst = ill->ill_ipst; /* * The loopback and non-loopback IREs are inserted in the order in which @@ -15022,7 +15185,7 @@ ill_bcast_delete_and_add(ill_t *ill, ipaddr_t addr) */ for (;;) { ire = ire_ctable_lookup(addr, 0, IRE_BROADCAST, ill->ill_ipif, - ALL_ZONES, NULL, MATCH_IRE_TYPE | MATCH_IRE_ILL); + ALL_ZONES, NULL, MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); if (ire == NULL) break; @@ -15069,7 +15232,8 @@ ill_bcast_delete_and_add(ill_t *ill, ipaddr_t addr) ire->ire_flags, &ire->ire_uinfo, NULL, - NULL) == NULL) { + NULL, + ipst) == NULL) { cmn_err(CE_PANIC, "ire_init() failed"); } ire_delete(ire); @@ -15172,11 +15336,12 @@ ill_clear_bcast_mark(ill_t *ill, ipaddr_t addr) { ire_t *ire; irb_t *irb; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill->ill_group == NULL); ire = ire_ctable_lookup(addr, 0, IRE_BROADCAST, ill->ill_ipif, - ALL_ZONES, NULL, MATCH_IRE_TYPE | MATCH_IRE_ILL); + ALL_ZONES, NULL, MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); if (ire != NULL) { /* @@ -15209,7 +15374,7 @@ ill_clear_bcast_mark(ill_t *ill, ipaddr_t addr) * a different ire with the same address for receiving. */ static void -ill_mark_bcast(ill_group_t *illgrp, ipaddr_t addr) +ill_mark_bcast(ill_group_t *illgrp, ipaddr_t addr, ip_stack_t *ipst) { irb_t *irb; ire_t *ire; @@ -15228,7 +15393,7 @@ ill_mark_bcast(ill_group_t *illgrp, ipaddr_t addr) boolean_t fallback = B_FALSE; ire = ire_ctable_lookup(addr, 0, IRE_BROADCAST, NULL, ALL_ZONES, - NULL, MATCH_IRE_TYPE); + NULL, MATCH_IRE_TYPE, ipst); /* * We may not be able to find some ires if a previous * ire_create failed. This happens when an ipif goes @@ -15379,7 +15544,8 @@ redo: clear_ire->ire_flags, &clear_ire->ire_uinfo, NULL, - NULL) == NULL) + NULL, + ipst) == NULL) cmn_err(CE_PANIC, "ire_init() failed"); if (clear_ire->ire_stq == NULL) { ire_t *ire_next = clear_ire->ire_next; @@ -15420,7 +15586,8 @@ redo: clear_ire_stq->ire_flags, &clear_ire_stq->ire_uinfo, NULL, - NULL) == NULL) + NULL, + ipst) == NULL) cmn_err(CE_PANIC, "ire_init() failed"); } } @@ -15456,7 +15623,7 @@ redo: membar_producer(); *irep = new_lb_ire; new_lb_ire_used = B_TRUE; - BUMP_IRE_STATS(ire_stats_v4, ire_stats_inserted); + BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_inserted); new_lb_ire->ire_bucket->irb_ire_cnt++; new_lb_ire->ire_ipif->ipif_ire_cnt++; @@ -15473,7 +15640,8 @@ redo: membar_producer(); *irep = new_nlb_ire; new_nlb_ire_used = B_TRUE; - BUMP_IRE_STATS(ire_stats_v4, ire_stats_inserted); + BUMP_IRE_STATS(ipst->ips_ire_stats_v4, + ire_stats_inserted); new_nlb_ire->ire_bucket->irb_ire_cnt++; new_nlb_ire->ire_ipif->ipif_ire_cnt++; ((ill_t *)new_nlb_ire->ire_stq->q_ptr)->ill_ire_cnt++; @@ -15502,6 +15670,7 @@ ipif_renominate_bcast(ipif_t *ipif) ipaddr_t subnet_netmask; ipaddr_t addr; ill_group_t *illgrp; + ip_stack_t *ipst = ill->ill_ipst; illgrp = ill->ill_group; /* @@ -15516,8 +15685,8 @@ ipif_renominate_bcast(ipif_t *ipif) if (ipif->ipif_subnet == 0) return; - ill_mark_bcast(illgrp, 0); - ill_mark_bcast(illgrp, INADDR_BROADCAST); + ill_mark_bcast(illgrp, 0, ipst); + ill_mark_bcast(illgrp, INADDR_BROADCAST, ipst); if ((ipif->ipif_lcl_addr != INADDR_ANY) && !(ipif->ipif_flags & IPIF_NOLOCAL)) { @@ -15526,17 +15695,17 @@ ipif_renominate_bcast(ipif_t *ipif) net_mask = htonl(IN_CLASSA_NET); } addr = net_mask & ipif->ipif_subnet; - ill_mark_bcast(illgrp, addr); + ill_mark_bcast(illgrp, addr, ipst); net_addr = ~net_mask | addr; - ill_mark_bcast(illgrp, net_addr); + ill_mark_bcast(illgrp, net_addr, ipst); subnet_netmask = ipif->ipif_net_mask; addr = ipif->ipif_subnet; - ill_mark_bcast(illgrp, addr); + ill_mark_bcast(illgrp, addr, ipst); subnet_addr = ~subnet_netmask | addr; - ill_mark_bcast(illgrp, subnet_addr); + ill_mark_bcast(illgrp, subnet_addr, ipst); } /* @@ -15571,6 +15740,7 @@ ill_nominate_bcast_rcv(ill_group_t *illgrp) ipaddr_t net_mask = 0; ipaddr_t subnet_netmask; ipaddr_t addr; + ip_stack_t *ipst; /* * When the last memeber is leaving, there is nothing to @@ -15583,6 +15753,7 @@ ill_nominate_bcast_rcv(ill_group_t *illgrp) ill = illgrp->illgrp_ill; ASSERT(!ill->ill_isv6); + ipst = ill->ill_ipst; /* * We assume that ires with same address and belonging to the * same group, has been grouped together. Nominating a *single* @@ -15625,8 +15796,8 @@ ill_nominate_bcast_rcv(ill_group_t *illgrp) * the first ire in the bucket for receiving and disables the * others. */ - ill_mark_bcast(illgrp, 0); - ill_mark_bcast(illgrp, INADDR_BROADCAST); + ill_mark_bcast(illgrp, 0, ipst); + ill_mark_bcast(illgrp, INADDR_BROADCAST, ipst); for (; ill != NULL; ill = ill->ill_group_next) { for (ipif = ill->ill_ipif; ipif != NULL; @@ -15644,9 +15815,9 @@ ill_nominate_bcast_rcv(ill_group_t *illgrp) } addr = net_mask & ipif->ipif_subnet; if (prev_net_addr == 0 || prev_net_addr != addr) { - ill_mark_bcast(illgrp, addr); + ill_mark_bcast(illgrp, addr, ipst); net_addr = ~net_mask | addr; - ill_mark_bcast(illgrp, net_addr); + ill_mark_bcast(illgrp, net_addr, ipst); } prev_net_addr = addr; @@ -15654,9 +15825,9 @@ ill_nominate_bcast_rcv(ill_group_t *illgrp) addr = ipif->ipif_subnet; if (prev_subnet_addr == 0 || prev_subnet_addr != addr) { - ill_mark_bcast(illgrp, addr); + ill_mark_bcast(illgrp, addr, ipst); subnet_addr = ~subnet_netmask | addr; - ill_mark_bcast(illgrp, subnet_addr); + ill_mark_bcast(illgrp, subnet_addr, ipst); } prev_subnet_addr = addr; } @@ -15801,6 +15972,7 @@ ill_handoff_responsibility(ill_t *ill, ill_group_t *illgrp) ipaddr_t net_mask = 0; ipaddr_t subnet_netmask; ipaddr_t addr; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill->ill_group == NULL); /* @@ -15905,7 +16077,7 @@ ill_handoff_responsibility(ill_t *ill, ill_group_t *illgrp) * the ire reference, we clean up the extra reference to the ill held in * ire->ire_stq. */ - ipcl_walk(conn_cleanup_stale_ire, NULL); + ipcl_walk(conn_cleanup_stale_ire, NULL, ipst); /* * Re-do source address selection for all the members in the @@ -15932,12 +16104,13 @@ illgrp_delete(ill_t *ill) ill_group_t *illgrp; ill_group_t *tmpg; ill_t *tmp_ill; + ip_stack_t *ipst = ill->ill_ipst; /* * Reset illgrp_ill_schednext if it was pointing at us. * We need to do this before we set ill_group to NULL. */ - rw_enter(&ill_g_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); mutex_enter(&ill->ill_lock); illgrp_reset_schednext(ill); @@ -15960,7 +16133,7 @@ illgrp_delete(ill_t *ill) illgrp->illgrp_ill_count--; mutex_exit(&ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); /* * As this ill is leaving the group, we need to hand off @@ -15970,16 +16143,16 @@ illgrp_delete(ill_t *ill) ill_handoff_responsibility(ill, illgrp); - rw_enter(&ill_g_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); if (illgrp->illgrp_ill_count == 0) { ASSERT(illgrp->illgrp_ill == NULL); if (ill->ill_isv6) { - if (illgrp == illgrp_head_v6) { - illgrp_head_v6 = illgrp->illgrp_next; + if (illgrp == ipst->ips_illgrp_head_v6) { + ipst->ips_illgrp_head_v6 = illgrp->illgrp_next; } else { - tmpg = illgrp_head_v6; + tmpg = ipst->ips_illgrp_head_v6; while (tmpg->illgrp_next != illgrp) { tmpg = tmpg->illgrp_next; ASSERT(tmpg != NULL); @@ -15987,10 +16160,10 @@ illgrp_delete(ill_t *ill) tmpg->illgrp_next = illgrp->illgrp_next; } } else { - if (illgrp == illgrp_head_v4) { - illgrp_head_v4 = illgrp->illgrp_next; + if (illgrp == ipst->ips_illgrp_head_v4) { + ipst->ips_illgrp_head_v4 = illgrp->illgrp_next; } else { - tmpg = illgrp_head_v4; + tmpg = ipst->ips_illgrp_head_v4; while (tmpg->illgrp_next != illgrp) { tmpg = tmpg->illgrp_next; ASSERT(tmpg != NULL); @@ -16001,7 +16174,7 @@ illgrp_delete(ill_t *ill) mutex_destroy(&illgrp->illgrp_lock); mi_free(illgrp); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); /* * Even though the ill is out of the group its not necessary @@ -16065,10 +16238,11 @@ illgrp_insert(ill_group_t **illgrp_head, ill_t *ill, char *groupname, ill_group_t *illgrp; ill_t *prev_ill; phyint_t *phyi; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill->ill_group == NULL); - rw_enter(&ill_g_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); mutex_enter(&ill->ill_lock); if (groupname != NULL) { @@ -16154,7 +16328,7 @@ illgrp_insert(ill_group_t **illgrp_head, ill_t *ill, char *groupname, ill->ill_flags |= (illgrp->illgrp_ill->ill_flags & ILLF_ROUTER); } mutex_exit(&ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); /* * 1) When ipif_up_done() calls this function, ipif_up_count @@ -16208,18 +16382,18 @@ illgrp_insert(ill_group_t **illgrp_head, ill_t *ill, char *groupname, * Needs work: called only from ip_sioctl_groupname */ static phyint_t * -phyint_lookup_group(char *groupname) +phyint_lookup_group(char *groupname, ip_stack_t *ipst) { phyint_t *phyi; - ASSERT(RW_LOCK_HELD(&ill_g_lock)); + ASSERT(RW_LOCK_HELD(&ipst->ips_ill_g_lock)); /* * Group names are stored in the phyint - a common structure * to both IPv4 and IPv6. */ - phyi = avl_first(&phyint_g_list.phyint_list_avl_by_index); + phyi = avl_first(&ipst->ips_phyint_g_list->phyint_list_avl_by_index); for (; phyi != NULL; - phyi = avl_walk(&phyint_g_list.phyint_list_avl_by_index, + phyi = avl_walk(&ipst->ips_phyint_g_list->phyint_list_avl_by_index, phyi, AVL_AFTER)) { if (phyi->phyint_groupname_len == 0) continue; @@ -16285,6 +16459,7 @@ ip_sioctl_groupname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, mblk_t *mp1; char *groupname; ipsq_t *ipsq; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(IAM_WRITER_IPIF(ipif)); @@ -16389,7 +16564,7 @@ ip_sioctl_groupname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, mutex_exit(&ill_v6->ill_lock); } - rw_enter(&ill_g_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); GRAB_ILL_LOCKS(ill_v4, ill_v6); mutex_enter(&phyi->phyint_lock); ASSERT(phyi->phyint_groupname != NULL); @@ -16398,7 +16573,7 @@ ip_sioctl_groupname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, phyi->phyint_groupname_len = 0; mutex_exit(&phyi->phyint_lock); RELEASE_ILL_LOCKS(ill_v4, ill_v6); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); err = ill_up_ipifs(ill, q, mp); /* @@ -16419,7 +16594,7 @@ ip_sioctl_groupname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, } } - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); /* * Merge ipsq for the group's. * This check is here as multiple groups/ills might be @@ -16427,9 +16602,9 @@ ip_sioctl_groupname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, * If we have to merege than the operation is restarted * on the new ipsq. */ - ipsq = ip_ipsq_lookup(groupname, B_FALSE, NULL); + ipsq = ip_ipsq_lookup(groupname, B_FALSE, NULL, ipst); if (phyi->phyint_ipsq != ipsq) { - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); err = ill_merge_groups(ill, NULL, groupname, mp, q); goto done; } @@ -16448,7 +16623,7 @@ ip_sioctl_groupname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, * packets across the group because of potential link-level * header differences. */ - phyi_tmp = phyint_lookup_group(groupname); + phyi_tmp = phyint_lookup_group(groupname, ipst); if (phyi_tmp != NULL) { if ((ill_v4 != NULL && phyi_tmp->phyint_illv4 != NULL) && @@ -16459,7 +16634,7 @@ ip_sioctl_groupname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, mutex_enter(&phyi->phyint_ipsq->ipsq_lock); phyi->phyint_ipsq->ipsq_split = B_TRUE; mutex_exit(&phyi->phyint_ipsq->ipsq_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (EINVAL); } if ((ill_v6 != NULL && @@ -16471,12 +16646,12 @@ ip_sioctl_groupname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, mutex_enter(&phyi->phyint_ipsq->ipsq_lock); phyi->phyint_ipsq->ipsq_split = B_TRUE; mutex_exit(&phyi->phyint_ipsq->ipsq_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (EINVAL); } } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); /* * bring down all v4 ipifs. @@ -16529,7 +16704,7 @@ ip_sioctl_groupname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, if (tmp == NULL) return (ENOMEM); - rw_enter(&ill_g_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); GRAB_ILL_LOCKS(ill_v4, ill_v6); mutex_enter(&phyi->phyint_lock); if (phyi->phyint_groupname_len != 0) { @@ -16545,7 +16720,7 @@ ip_sioctl_groupname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, phyi->phyint_groupname_len = namelen + 1; mutex_exit(&phyi->phyint_lock); RELEASE_ILL_LOCKS(ill_v4, ill_v6); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); err = ill_up_ipifs(ill, q, mp); } @@ -16683,12 +16858,13 @@ static void conn_move_ill(ill_t *from_ill, ill_t *to_ill, int ifindex) { conn_move_t connm; + ip_stack_t *ipst = from_ill->ill_ipst; connm.cm_from_ill = from_ill; connm.cm_to_ill = to_ill; connm.cm_ifindex = ifindex; - ipcl_walk(conn_move, (caddr_t)&connm); + ipcl_walk(conn_move, (caddr_t)&connm, ipst); } /* @@ -16848,10 +17024,11 @@ ilm_move_v6(ill_t *from_ill, ill_t *to_ill, int ifindex) int count; char buf[INET6_ADDRSTRLEN]; in6_addr_t ipv6_snm = ipv6_solicited_node_mcast; + ip_stack_t *ipst = from_ill->ill_ipst; ASSERT(MUTEX_HELD(&to_ill->ill_lock)); ASSERT(MUTEX_HELD(&from_ill->ill_lock)); - ASSERT(RW_WRITE_HELD(&ill_g_lock)); + ASSERT(RW_WRITE_HELD(&ipst->ips_ill_g_lock)); if (ifindex == 0) { /* @@ -17193,10 +17370,11 @@ ilm_move_v4(ill_t *from_ill, ill_t *to_ill, ipif_t *ipif) ilm_t *ilm_next; ilm_t *new_ilm; ilm_t **ilmp; + ip_stack_t *ipst = from_ill->ill_ipst; ASSERT(MUTEX_HELD(&to_ill->ill_lock)); ASSERT(MUTEX_HELD(&from_ill->ill_lock)); - ASSERT(RW_WRITE_HELD(&ill_g_lock)); + ASSERT(RW_WRITE_HELD(&ipst->ips_ill_g_lock)); ilmp = &from_ill->ill_ilm; for (ilm = from_ill->ill_ilm; ilm != NULL; ilm = ilm_next) { @@ -17328,6 +17506,7 @@ ipif_get_id(ill_t *ill, uint_t id) uint_t unit; ipif_t *tipif; boolean_t found = B_FALSE; + ip_stack_t *ipst = ill->ill_ipst; /* * During failback, we want to go back to the same id @@ -17360,7 +17539,7 @@ ipif_get_id(ill_t *ill, uint_t id) if (!found) return (id); } - for (unit = 0; unit <= ip_addrs_per_if; unit++) { + for (unit = 0; unit <= ipst->ips_ip_addrs_per_if; unit++) { found = B_FALSE; for (tipif = ill->ill_ipif; tipif != NULL; tipif = tipif->ipif_next) { @@ -17390,6 +17569,7 @@ ipif_move(ipif_t *ipif, ill_t *to_ill, queue_t *q, mblk_t *mp, boolean_t failback_cmd; boolean_t remove_ipif; int rc; + ip_stack_t *ipst; ASSERT(IAM_WRITER_ILL(to_ill)); ASSERT(IAM_WRITER_IPIF(ipif)); @@ -17399,10 +17579,11 @@ ipif_move(ipif_t *ipif, ill_t *to_ill, queue_t *q, mblk_t *mp, remove_ipif = B_FALSE; from_ill = ipif->ipif_ill; + ipst = from_ill->ill_ipst; ASSERT(MUTEX_HELD(&to_ill->ill_lock)); ASSERT(MUTEX_HELD(&from_ill->ill_lock)); - ASSERT(RW_WRITE_HELD(&ill_g_lock)); + ASSERT(RW_WRITE_HELD(&ipst->ips_ill_g_lock)); /* * Don't move LINK LOCAL addresses as they are tied to @@ -17481,7 +17662,7 @@ ipif_move(ipif_t *ipif, ill_t *to_ill, queue_t *q, mblk_t *mp, if (to_ipif->ipif_id == MAX_ADDRS_PER_IF) to_ipif->ipif_id = 0; - if (unit == ip_addrs_per_if) { + if (unit == ipst->ips_ip_addrs_per_if) { ipif->ipif_was_up = B_FALSE; IPIF_UNMARK_MOVING(ipif); return (EINVAL); @@ -17705,6 +17886,7 @@ ill_move(ill_t *from_ill, ill_t *to_ill, queue_t *q, mblk_t *mp) ipif_t *rep_ipif_ptr = NULL; ipif_t *from_ipif = NULL; boolean_t check_rep_if = B_FALSE; + ip_stack_t *ipst = from_ill->ill_ipst; iocp = (struct iocblk *)mp->b_rptr; if (iocp->ioc_cmd == SIOCLIFFAILOVER) { @@ -17766,7 +17948,7 @@ ill_move(ill_t *from_ill, ill_t *to_ill, queue_t *q, mblk_t *mp) RELEASE_ILL_LOCKS(from_ill, to_ill); ASSERT(!MUTEX_HELD(&to_ill->ill_lock)); - rw_enter(&ill_g_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); GRAB_ILL_LOCKS(from_ill, to_ill); err = ipif_move_all(from_ill, to_ill, q, mp, ifindex, &rep_ipif_ptr); @@ -17775,7 +17957,7 @@ ill_move(ill_t *from_ill, ill_t *to_ill, queue_t *q, mblk_t *mp) ilm_move_v6(from_ill, to_ill, ifindex); RELEASE_ILL_LOCKS(from_ill, to_ill); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); /* * send rts messages and multicast messages. @@ -17813,6 +17995,13 @@ ip_extract_move_args(queue_t *q, mblk_t *mp, ill_t **ill_from_v4, boolean_t exists; sin_t *sin; int err = 0; + ip_stack_t *ipst; + + if (CONN_Q(q)) + ipst = CONNQ_TO_IPST(q); + else + ipst = ILLQ_TO_IPST(q); + if ((mp1 = mp->b_cont) == NULL) return (EPROTO); @@ -17837,10 +18026,10 @@ ip_extract_move_args(queue_t *q, mblk_t *mp, ill_t **ill_from_v4, */ ipif_v4 = ipif_lookup_on_name(lifr->lifr_name, mi_strlen(lifr->lifr_name), B_FALSE, &exists, B_FALSE, - ALL_ZONES, NULL, NULL, NULL, NULL); + ALL_ZONES, NULL, NULL, NULL, NULL, ipst); ipif_v6 = ipif_lookup_on_name(lifr->lifr_name, mi_strlen(lifr->lifr_name), B_FALSE, &exists, B_TRUE, - ALL_ZONES, NULL, NULL, NULL, NULL); + ALL_ZONES, NULL, NULL, NULL, NULL, ipst); if (ipif_v4 == NULL && ipif_v6 == NULL) return (ENXIO); @@ -17870,7 +18059,7 @@ ip_extract_move_args(queue_t *q, mblk_t *mp, ill_t **ill_from_v4, err = 0; dst_index = lifr->lifr_movetoindex; *ill_to_v4 = ill_lookup_on_ifindex(dst_index, B_FALSE, - q, mp, ip_process_ioctl, &err); + q, mp, ip_process_ioctl, &err, ipst); if (err != 0) { /* * There could be only v6. @@ -17881,7 +18070,7 @@ ip_extract_move_args(queue_t *q, mblk_t *mp, ill_t **ill_from_v4, } *ill_to_v6 = ill_lookup_on_ifindex(dst_index, B_TRUE, - q, mp, ip_process_ioctl, &err); + q, mp, ip_process_ioctl, &err, ipst); if (err != 0) { if (err != ENXIO) goto done; @@ -18205,12 +18394,15 @@ ill_dl_down(ill_t *ill) info = kmem_alloc(sizeof (hook_nic_event_t), KM_NOSLEEP); if (info != NULL) { + ip_stack_t *ipst = ill->ill_ipst; + info->hne_nic = ill->ill_phyint->phyint_ifindex; info->hne_lif = 0; info->hne_event = NE_DOWN; info->hne_data = NULL; info->hne_datalen = 0; - info->hne_family = ill->ill_isv6 ? ipv6 : ipv4; + info->hne_family = ill->ill_isv6 ? + ipst->ips_ipv6_net_data : ipst->ips_ipv4_net_data; } else ip2dbg(("ill_dl_down: could not attach DOWN nic event " "information for %s (ENOMEM)\n", ill->ill_name)); @@ -18518,6 +18710,7 @@ ipif_down(ipif_t *ipif, queue_t *q, mblk_t *mp) conn_t *connp; boolean_t success; boolean_t ipif_was_up = B_FALSE; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(IAM_WRITER_IPIF(ipif)); @@ -18553,7 +18746,7 @@ ipif_down(ipif_t *ipif, queue_t *q, mblk_t *mp) int err; err = ip_srcid_remove(&ipif->ipif_v6lcl_addr, - ipif->ipif_zoneid); + ipif->ipif_zoneid, ipst); if (err != 0) { ip0dbg(("ipif_down: srcid_remove %d\n", err)); } @@ -18575,30 +18768,34 @@ ipif_down(ipif_t *ipif, queue_t *q, mblk_t *mp) * because we have already marked down here i.e cleared * IPIF_UP. */ - if (ipif->ipif_isv6) - ire_walk_v6(ipif_down_delete_ire, (char *)ipif, ALL_ZONES); - else - ire_walk_v4(ipif_down_delete_ire, (char *)ipif, ALL_ZONES); + if (ipif->ipif_isv6) { + ire_walk_v6(ipif_down_delete_ire, (char *)ipif, ALL_ZONES, + ipst); + } else { + ire_walk_v4(ipif_down_delete_ire, (char *)ipif, ALL_ZONES, + ipst); + } /* * Need to add these also to be saved and restored when the * ipif is brought down and up */ - mutex_enter(&ire_mrtun_lock); - if (ire_mrtun_count != 0) { - mutex_exit(&ire_mrtun_lock); + mutex_enter(&ipst->ips_ire_mrtun_lock); + if (ipst->ips_ire_mrtun_count != 0) { + mutex_exit(&ipst->ips_ire_mrtun_lock); ire_walk_ill_mrtun(0, 0, ipif_down_delete_ire, - (char *)ipif, NULL); + (char *)ipif, NULL, ipst); } else { - mutex_exit(&ire_mrtun_lock); + mutex_exit(&ipst->ips_ire_mrtun_lock); } - mutex_enter(&ire_srcif_table_lock); - if (ire_srcif_table_count > 0) { - mutex_exit(&ire_srcif_table_lock); - ire_walk_srcif_table_v4(ipif_down_delete_ire, (char *)ipif); + mutex_enter(&ipst->ips_ire_srcif_table_lock); + if (ipst->ips_ire_srcif_table_count > 0) { + mutex_exit(&ipst->ips_ire_srcif_table_lock); + ire_walk_srcif_table_v4(ipif_down_delete_ire, (char *)ipif, + ipst); } else { - mutex_exit(&ire_srcif_table_lock); + mutex_exit(&ipst->ips_ire_srcif_table_lock); } /* @@ -18608,7 +18805,7 @@ ipif_down(ipif_t *ipif, queue_t *q, mblk_t *mp) * conn. The caching is done after making sure that the ire is not yet * condemned. Also documented in the block comment above ip_output */ - ipcl_walk(conn_cleanup_stale_ire, NULL); + ipcl_walk(conn_cleanup_stale_ire, NULL, ipst); /* Also, delete the ires cached in SCTP */ sctp_ire_cache_flush(ipif); @@ -18974,6 +19171,8 @@ illgrp_cache_delete(ire_t *ire, char *ill_arg) static void ipif_free(ipif_t *ipif) { + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; + ASSERT(IAM_WRITER_IPIF(ipif)); if (ipif->ipif_recovery_id != 0) @@ -19005,10 +19204,10 @@ ipif_free(ipif_t *ipif) (void) untimeout(ipif->ipif_recovery_id); ipif->ipif_recovery_id = 0; - rw_enter(&ill_g_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); /* Remove pointers to this ill in the multicast routing tables */ reset_mrt_vif_ipif(ipif); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); } /* @@ -19020,6 +19219,7 @@ ipif_free_tail(ipif_t *ipif) { mblk_t *mp; ipif_t **ipifp; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; /* * Free state for addition IRE_IF_[NO]RESOLVER ire's. @@ -19035,7 +19235,7 @@ ipif_free_tail(ipif_t *ipif) * inserting or removing an ipif from the linked list * of ipifs hanging off the ill. */ - rw_enter(&ill_g_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); /* * Remove all multicast memberships on the interface now. * This removes IPv4 multicast memberships joined within @@ -19069,7 +19269,7 @@ ipif_free_tail(ipif_t *ipif) } mutex_exit(&ipif->ipif_ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); mutex_destroy(&ipif->ipif_saved_ire_lock); @@ -19120,7 +19320,7 @@ ipif_get_name(const ipif_t *ipif, char *buf, int len) static ipif_t * ipif_lookup_on_name(char *name, size_t namelen, boolean_t do_alloc, boolean_t *exists, boolean_t isv6, zoneid_t zoneid, queue_t *q, - mblk_t *mp, ipsq_func_t func, int *error) + mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst) { char *cp; char *endp; @@ -19185,7 +19385,7 @@ ipif_lookup_on_name(char *name, size_t namelen, boolean_t do_alloc, * ill_lookup_on_name will clear it. */ ill = ill_lookup_on_name(name, do_alloc, isv6, - q, mp, func, error, &did_alloc); + q, mp, func, error, &did_alloc, ipst); if (cp != endp) *cp = IPIF_SEPARATOR_CHAR; if (ill == NULL) @@ -19299,10 +19499,11 @@ ipif_mask_reply(ipif_t *ipif) icmph_t *icmph; ipha_t *ipha; mblk_t *mp; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; #define REPLY_LEN (sizeof (icmp_ipha) + sizeof (icmph_t) + IP_ADDR_LEN) - if (!ip_respond_to_address_mask_broadcast) + if (!ipst->ips_ip_respond_to_address_mask_broadcast) return; /* ICMP mask reply is IPv4 only */ @@ -19318,7 +19519,7 @@ ipif_mask_reply(ipif_t *ipif) ipha = (ipha_t *)mp->b_rptr; bzero(ipha, REPLY_LEN); *ipha = icmp_ipha; - ipha->ipha_ttl = ip_broadcast_ttl; + ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl; ipha->ipha_src = ipif->ipif_src_addr; ipha->ipha_dst = ipif->ipif_brd_addr; ipha->ipha_length = htons(REPLY_LEN); @@ -19513,6 +19714,7 @@ ipif_recover_ire(ipif_t *ipif) mblk_t *mp; ire_t **ipif_saved_irep; ire_t **irep; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; ip1dbg(("ipif_recover_ire(%s:%u)", ipif->ipif_ill->ill_name, ipif->ipif_id)); @@ -19620,7 +19822,8 @@ ipif_recover_ire(ipif_t *ipif) ifrt->ifrt_flags, &ifrt->ifrt_iulp_info, NULL, - NULL); + NULL, + ipst); if (ire == NULL) { mutex_exit(&ipif->ipif_saved_ire_lock); @@ -19724,10 +19927,11 @@ ip_addr_availability_check(ipif_t *new_ipif) ill_t *ill; ipif_t *ipif; ill_walk_context_t ctx; + ip_stack_t *ipst = new_ipif->ipif_ill->ill_ipst; ASSERT(IAM_WRITER_IPIF(new_ipif)); - ASSERT(MUTEX_HELD(&ip_addr_avail_lock)); - ASSERT(RW_READ_HELD(&ill_g_lock)); + ASSERT(MUTEX_HELD(&ipst->ips_ip_addr_avail_lock)); + ASSERT(RW_READ_HELD(&ipst->ips_ill_g_lock)); new_ipif->ipif_flags &= ~IPIF_UNNUMBERED; if (IN6_IS_ADDR_UNSPECIFIED(&new_ipif->ipif_v6lcl_addr) || @@ -19737,9 +19941,9 @@ ip_addr_availability_check(ipif_t *new_ipif) our_v6addr = new_ipif->ipif_v6lcl_addr; if (new_ipif->ipif_isv6) - ill = ILL_START_WALK_V6(&ctx); + ill = ILL_START_WALK_V6(&ctx, ipst); else - ill = ILL_START_WALK_V4(&ctx); + ill = ILL_START_WALK_V4(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { for (ipif = ill->ill_ipif; ipif != NULL; @@ -20019,6 +20223,7 @@ ipif_up_done(ipif_t *ipif) boolean_t src_ipif_held = B_FALSE; boolean_t ire_added = B_FALSE; boolean_t loopback = B_FALSE; + ip_stack_t *ipst = ill->ill_ipst; ip1dbg(("ipif_up_done(%s:%u)\n", ipif->ipif_ill->ill_name, ipif->ipif_id)); @@ -20111,7 +20316,7 @@ ipif_up_done(ipif_t *ipif) /* Register the source address for __sin6_src_id */ err = ip_srcid_insert(&ipif->ipif_v6lcl_addr, - ipif->ipif_zoneid); + ipif->ipif_zoneid, ipst); if (err != 0) { ip0dbg(("ipif_up_done: srcid_insert %d\n", err)); return (err); @@ -20143,7 +20348,8 @@ ipif_up_done(ipif_t *ipif) RTF_PRIVATE : 0, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); } else { ip1dbg(( "ipif_up_done: not creating IRE %d for 0x%x: flags 0x%x\n", @@ -20208,7 +20414,8 @@ ipif_up_done(ipif_t *ipif) (ipif->ipif_flags & IPIF_PRIVATE) ? RTF_PRIVATE: 0, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); } /* @@ -20287,14 +20494,14 @@ ipif_up_done(ipif_t *ipif) * just to make sure no new ills or new ipifs are being added * to the system while we are checking the uniqueness of addresses. */ - rw_enter(&ill_g_lock, RW_READER); - mutex_enter(&ip_addr_avail_lock); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + mutex_enter(&ipst->ips_ip_addr_avail_lock); /* Mark it up, and increment counters. */ ipif->ipif_flags |= IPIF_UP; ill->ill_ipif_up_count++; err = ip_addr_availability_check(ipif); - mutex_exit(&ip_addr_avail_lock); - rw_exit(&ill_g_lock); + mutex_exit(&ipst->ips_ip_addr_avail_lock); + rw_exit(&ipst->ips_ill_g_lock); if (err != 0) { /* @@ -20354,7 +20561,7 @@ ipif_up_done(ipif_t *ipif) ASSERT(phyi->phyint_groupname != NULL); if (ill->ill_ipif_up_count == 1) { ASSERT(ill->ill_group == NULL); - err = illgrp_insert(&illgrp_head_v4, ill, + err = illgrp_insert(&ipst->ips_illgrp_head_v4, ill, phyi->phyint_groupname, NULL, B_TRUE); if (err != 0) { ip1dbg(("ipif_up_done: illgrp allocation " @@ -20390,7 +20597,7 @@ ipif_up_done(ipif_t *ipif) ire = ire_ctable_lookup(ipif->ipif_brd_addr, 0, IRE_BROADCAST, ipif, ALL_ZONES, - NULL, (MATCH_IRE_TYPE | MATCH_IRE_ILL)); + NULL, (MATCH_IRE_TYPE | MATCH_IRE_ILL), ipst); if (ire == NULL) { /* @@ -20497,7 +20704,7 @@ bad: ire_refrele(*irep); } } - (void) ip_srcid_remove(&ipif->ipif_v6lcl_addr, ipif->ipif_zoneid); + (void) ip_srcid_remove(&ipif->ipif_v6lcl_addr, ipif->ipif_zoneid, ipst); if (ipif_saved_irep != NULL) { kmem_free(ipif_saved_irep, @@ -20638,6 +20845,7 @@ illgrp_scheduler(ill_t *ill) int illcnt; int i; uint64_t flags; + ip_stack_t *ipst = ill->ill_ipst; /* * We don't use a lock to check for the ill_group. If this ill @@ -20654,9 +20862,9 @@ illgrp_scheduler(ill_t *ill) * a set of stable ills. No ill can be added or deleted or change * group while we hold the reader lock. */ - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); if ((illgrp = ill->ill_group) == NULL) { - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); ill_refhold(ill); return (ill); } @@ -20688,7 +20896,7 @@ illgrp_scheduler(ill_t *ill) retill = illgrp->illgrp_ill; } mutex_exit(&illgrp->illgrp_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (i == illcnt ? NULL : retill); } @@ -20705,6 +20913,7 @@ ipif_usesrc_avail(ill_t *ill, zoneid_t zoneid) ipif_t *ipif = NULL; ill_t *uill; boolean_t isv6; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill != NULL); @@ -20712,7 +20921,7 @@ ipif_usesrc_avail(ill_t *ill, zoneid_t zoneid) ifindex = ill->ill_usesrc_ifindex; if (ifindex != 0) { uill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, - NULL); + NULL, ipst); if (uill == NULL) return (NULL); mutex_enter(&uill->ill_lock); @@ -20763,10 +20972,11 @@ ipif_select_source(ill_t *ill, ipaddr_t dst, zoneid_t zoneid) boolean_t specific_found; ill_t *till, *usill = NULL; tsol_tpc_t *src_rhtp, *dst_rhtp; + ip_stack_t *ipst = ill->ill_ipst; if (ill->ill_usesrc_ifindex != 0) { - usill = ill_lookup_on_ifindex(ill->ill_usesrc_ifindex, B_FALSE, - NULL, NULL, NULL, NULL); + usill = ill_lookup_on_ifindex(ill->ill_usesrc_ifindex, + B_FALSE, NULL, NULL, NULL, NULL, ipst); if (usill != NULL) ill = usill; /* Select source from usesrc ILL */ else @@ -20798,7 +21008,7 @@ ipif_select_source(ill_t *ill, ipaddr_t dst, zoneid_t zoneid) * we retry. Inside the loop we still need to check for CONDEMNED, * but not under a lock. */ - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); retry: till = ill; @@ -20966,7 +21176,7 @@ retry: } else { if (wrapped) index = MAX_IPIF_SELECT_SOURCE; - ipif = ipif_arr[ipif_rand() % index]; + ipif = ipif_arr[ipif_rand(ipst) % index]; ASSERT(ipif != NULL); } @@ -20980,7 +21190,7 @@ retry: mutex_exit(&ipif->ipif_ill->ill_lock); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (usill != NULL) ill_refrele(usill); if (dst_rhtp != NULL) @@ -21027,6 +21237,7 @@ ipif_recreate_interface_routes(ipif_t *old_ipif, ipif_t *ipif) ipif_t *nipif; ill_t *ill; boolean_t need_rele = B_FALSE; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; ASSERT(old_ipif == NULL || IAM_WRITER_IPIF(old_ipif)); ASSERT(IAM_WRITER_IPIF(ipif)); @@ -21120,7 +21331,8 @@ ipif_recreate_interface_routes(ipif_t *old_ipif, ipif_t *ipif) 0, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); if (ire != NULL) { ire_t *ret_ire; @@ -21213,6 +21425,10 @@ if_unitsel_restart(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, } +/* + * Can operate on either a module or a driver queue. + * Returns an error if not a module queue. + */ /* ARGSUSED */ int if_unitsel(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, @@ -21223,7 +21439,7 @@ if_unitsel(ipif_t *dummy_ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, char interf_name[LIFNAMSIZ]; uint_t ppa = *(uint_t *)mp->b_cont->b_cont->b_rptr; - if (!q->q_next) { + if (q->q_next == NULL) { ip1dbg(( "if_unitsel: IF_UNITSEL: no q_next\n")); return (EINVAL); @@ -21281,7 +21497,6 @@ ipif_check_bcast_ires(ipif_t *test_ipif) ire_t *ire_array[12]; ire_t **irep = &ire_array[0]; ire_t **irep1; - ipaddr_t net_addr, subnet_addr, net_mask, subnet_mask; ipaddr_t test_net_addr, test_subnet_addr; ipaddr_t test_net_mask, test_subnet_mask; @@ -21297,6 +21512,7 @@ ipif_check_bcast_ires(ipif_t *test_ipif) ipif_t *backup_ipif_allzeros = (ipif_t *)NULL; ipif_t *backup_ipif_allones = (ipif_t *)NULL; uint64_t check_flags = IPIF_DEPRECATED | IPIF_NOLOCAL | IPIF_ANYCAST; + ip_stack_t *ipst = test_ipif->ipif_ill->ill_ipst; ASSERT(!test_ipif->ipif_isv6); ASSERT(IAM_WRITER_IPIF(test_ipif)); @@ -21310,10 +21526,12 @@ ipif_check_bcast_ires(ipif_t *test_ipif) return; test_allzero_ire = ire_ctable_lookup(0, 0, IRE_BROADCAST, - test_ipif, ALL_ZONES, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF)); + test_ipif, ALL_ZONES, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF), + ipst); test_allone_ire = ire_ctable_lookup(INADDR_BROADCAST, 0, IRE_BROADCAST, - test_ipif, ALL_ZONES, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF)); + test_ipif, ALL_ZONES, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF), + ipst); test_net_mask = ip_net_mask(test_ipif->ipif_subnet); test_subnet_mask = test_ipif->ipif_net_mask; @@ -21329,14 +21547,16 @@ ipif_check_bcast_ires(ipif_t *test_ipif) */ test_net_addr = test_net_mask & test_ipif->ipif_subnet; test_net_ire = ire_ctable_lookup(test_net_addr, 0, IRE_BROADCAST, - test_ipif, ALL_ZONES, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF)); + test_ipif, ALL_ZONES, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF), + ipst); /* * Check if there is a subnet broadcast IRE associated with this ipif */ test_subnet_addr = test_subnet_mask & test_ipif->ipif_subnet; test_subnet_ire = ire_ctable_lookup(test_subnet_addr, 0, IRE_BROADCAST, - test_ipif, ALL_ZONES, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF)); + test_ipif, ALL_ZONES, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF), + ipst); /* * No broadcast ire's associated with this ipif. @@ -21569,6 +21789,9 @@ bad: * since ipif_lookup_on_name uses the _isv6 flags when matching. * Returns EINPROGRESS when mp has been consumed by queueing it on * ill_pending_mp and the ioctl will complete in ip_rput. + * + * Can operate on either a module or a driver queue. + * Returns an error if not a module queue. */ /* ARGSUSED */ int @@ -21581,7 +21804,12 @@ ip_sioctl_slifname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, ASSERT(ipif != NULL); ip1dbg(("ip_sioctl_slifname %s\n", lifr->lifr_name)); - ASSERT(q->q_next != NULL); + + if (q->q_next == NULL) { + ip1dbg(( + "if_sioctl_slifname: SIOCSLIFNAME: no q_next\n")); + return (EINVAL); + } ill = (ill_t *)q->q_ptr; /* @@ -21692,7 +21920,7 @@ ip_sioctl_slifname_restart(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, */ ipif_t * ipif_lookup_on_ifindex(uint_t index, boolean_t isv6, zoneid_t zoneid, - queue_t *q, mblk_t *mp, ipsq_func_t func, int *err) + queue_t *q, mblk_t *mp, ipsq_func_t func, int *err, ip_stack_t *ipst) { ill_t *ill; ipsq_t *ipsq; @@ -21710,13 +21938,13 @@ ipif_lookup_on_ifindex(uint_t index, boolean_t isv6, zoneid_t zoneid, * to both IPv4 and IPv6. */ - rw_enter(&ill_g_lock, RW_READER); - phyi = avl_find(&phyint_g_list.phyint_list_avl_by_index, + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + phyi = avl_find(&ipst->ips_phyint_g_list->phyint_list_avl_by_index, (void *) &index, NULL); if (phyi != NULL) { ill = isv6 ? phyi->phyint_illv6 : phyi->phyint_illv4; if (ill == NULL) { - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (err != NULL) *err = ENXIO; return (NULL); @@ -21733,14 +21961,14 @@ ipif_lookup_on_ifindex(uint_t index, boolean_t isv6, zoneid_t zoneid, ipif_refhold_locked(ipif); mutex_exit(&ill->ill_lock); RELEASE_CONN_LOCK(q); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (ipif); } } } else if (ILL_CAN_WAIT(ill, q)) { ipsq = ill->ill_phyint->phyint_ipsq; mutex_enter(&ipsq->ipsq_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); mutex_exit(&ill->ill_lock); ipsq_enq(ipsq, q, mp, func, NEW_OP, ill); mutex_exit(&ipsq->ipsq_lock); @@ -21751,7 +21979,7 @@ ipif_lookup_on_ifindex(uint_t index, boolean_t isv6, zoneid_t zoneid, mutex_exit(&ill->ill_lock); RELEASE_CONN_LOCK(q); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (err != NULL) *err = ENXIO; return (NULL); @@ -21814,12 +22042,13 @@ ip_change_ifindex(ill_t *ill_orig, conn_change_t *connc) uint_t new_ifindex; ilm_t *ilm; ill_walk_context_t ctx; + ip_stack_t *ipst = ill_orig->ill_ipst; old_ifindex = connc->cc_old_ifindex; new_ifindex = connc->cc_new_ifindex; - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_ALL(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_ALL(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { if ((ill_orig->ill_net_type != ill->ill_net_type) || (ill_orig->ill_type != ill->ill_type)) { @@ -21835,7 +22064,7 @@ ip_change_ifindex(ill_t *ill_orig, conn_change_t *connc) ilm->ilm_orig_ifindex = new_ifindex; } } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); } /* @@ -21858,6 +22087,7 @@ ip_sioctl_slifindex(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, uint_t index; ill_t *ill_v4; ill_t *ill_v6; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; if (ipip->ipi_cmd_type == IF_CMD) index = ifr->ifr_index; @@ -21897,8 +22127,10 @@ ip_sioctl_slifindex(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, * Use ill_lookup_on_ifindex to determine if the * new index is unused and if so allow the change. */ - ill_v6 = ill_lookup_on_ifindex(index, B_TRUE, NULL, NULL, NULL, NULL); - ill_v4 = ill_lookup_on_ifindex(index, B_FALSE, NULL, NULL, NULL, NULL); + ill_v6 = ill_lookup_on_ifindex(index, B_TRUE, NULL, NULL, NULL, NULL, + ipst); + ill_v4 = ill_lookup_on_ifindex(index, B_FALSE, NULL, NULL, NULL, NULL, + ipst); if (ill_v6 != NULL || ill_v4 != NULL) { if (ill_v4 != NULL) ill_refrele(ill_v4); @@ -21923,7 +22155,7 @@ ip_sioctl_slifindex(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, connc.cc_old_ifindex = old_index; connc.cc_new_ifindex = index; ip_change_ifindex(ill, &connc); - ipcl_walk(conn_change_ifindex, (caddr_t)&connc); + ipcl_walk(conn_change_ifindex, (caddr_t)&connc, ipst); /* Send the routing sockets message */ ip_rts_ifmsg(ipif); @@ -22034,10 +22266,24 @@ ip_sioctl_slifzone_tail(ipif_t *ipif, zoneid_t zoneid, queue_t *q, mblk_t *mp, boolean_t need_up) { int err = 0; + ip_stack_t *ipst; ip1dbg(("ip_sioctl_zoneid_tail(%s:%u %p)\n", ipif->ipif_ill->ill_name, ipif->ipif_id, (void *)ipif)); + if (CONN_Q(q)) + ipst = CONNQ_TO_IPST(q); + else + ipst = ILLQ_TO_IPST(q); + + /* + * For exclusive stacks we don't allow a different zoneid than + * global. + */ + if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID && + zoneid != GLOBAL_ZONEID) + return (EINVAL); + /* Set the new zone id. */ ipif->ipif_zoneid = zoneid; @@ -22153,7 +22399,9 @@ static void ill_disband_usesrc_group(ill_t *uill) { ill_t *next_ill, *tmp_ill; - ASSERT(RW_WRITE_HELD(&ill_g_usesrc_lock)); + ip_stack_t *ipst = uill->ill_ipst; + + ASSERT(RW_WRITE_HELD(&ipst->ips_ill_g_usesrc_lock)); next_ill = uill->ill_usesrc_grp_next; do { @@ -22174,9 +22422,10 @@ int ill_relink_usesrc_ills(ill_t *ucill, ill_t *uill, uint_t ifindex) { ill_t *ill, *tmp_ill; + ip_stack_t *ipst = ucill->ill_ipst; ASSERT((ucill != NULL) && (ucill->ill_usesrc_grp_next != NULL) && - (uill != NULL) && RW_WRITE_HELD(&ill_g_usesrc_lock)); + (uill != NULL) && RW_WRITE_HELD(&ipst->ips_ill_g_usesrc_lock)); /* * Check if the usesrc client ILL passed in is not already @@ -22232,6 +22481,7 @@ ip_sioctl_slifusesrc(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, uint_t ifindex; phyint_t *us_phyint, *us_cli_phyint; ipsq_t *ipsq = NULL; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; ASSERT(IAM_WRITER_IPIF(ipif)); ASSERT(q->q_next == NULL); @@ -22264,7 +22514,7 @@ ip_sioctl_slifusesrc(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, } usesrc_ill = ill_lookup_on_ifindex(ifindex, isv6, q, mp, - ip_process_ioctl, &err); + ip_process_ioctl, &err, ipst); if (usesrc_ill == NULL) { return (err); @@ -22337,23 +22587,23 @@ ip_sioctl_slifusesrc(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, if (ipif->ipif_isv6) ire_walk_v6(ipif_delete_cache_ire, (char *)usesrc_cli_ill, - ALL_ZONES); + ALL_ZONES, ipst); else ire_walk_v4(ipif_delete_cache_ire, (char *)usesrc_cli_ill, - ALL_ZONES); + ALL_ZONES, ipst); /* * ill_g_usesrc_lock global lock protects the ill_usesrc_grp_next * and the ill_usesrc_ifindex fields */ - rw_enter(&ill_g_usesrc_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_usesrc_lock, RW_WRITER); if (reset_flg) { ret = ill_relink_usesrc_ills(usesrc_cli_ill, usesrc_ill, 0); if (ret != 0) { err = EINVAL; } - rw_exit(&ill_g_usesrc_lock); + rw_exit(&ipst->ips_ill_g_usesrc_lock); goto done; } @@ -22383,7 +22633,7 @@ ip_sioctl_slifusesrc(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, if (ret != 0) err = EINVAL; } - rw_exit(&ill_g_usesrc_lock); + rw_exit(&ipst->ips_ill_g_usesrc_lock); done: if (ill_flag_changed) { @@ -22455,10 +22705,11 @@ ill_phyint_free(ill_t *ill) phyint_t *phyi; phyint_t *next_phyint; ipsq_t *cur_ipsq; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill->ill_phyint != NULL); - ASSERT(RW_WRITE_HELD(&ill_g_lock)); + ASSERT(RW_WRITE_HELD(&ipst->ips_ill_g_lock)); phyi = ill->ill_phyint; ill->ill_phyint = NULL; /* @@ -22487,9 +22738,9 @@ ill_phyint_free(ill_t *ill) * Make sure this phyint was put in the list. */ if (phyi->phyint_ifindex > 0) { - avl_remove(&phyint_g_list.phyint_list_avl_by_index, + avl_remove(&ipst->ips_phyint_g_list->phyint_list_avl_by_index, phyi); - avl_remove(&phyint_g_list.phyint_list_avl_by_name, + avl_remove(&ipst->ips_phyint_g_list->phyint_list_avl_by_name, phyi); } /* @@ -22510,7 +22761,7 @@ ill_phyint_free(ill_t *ill) } ASSERT(next_phyint != NULL); } - IPSQ_DEC_REF(cur_ipsq); + IPSQ_DEC_REF(cur_ipsq, ipst); if (phyi->phyint_groupname_len != 0) { ASSERT(phyi->phyint_groupname != NULL); @@ -22536,8 +22787,9 @@ ill_phyint_reinit(ill_t *ill) avl_index_t where = 0; ill_t *ill_other = NULL; ipsq_t *ipsq; + ip_stack_t *ipst = ill->ill_ipst; - ASSERT(RW_WRITE_HELD(&ill_g_lock)); + ASSERT(RW_WRITE_HELD(&ipst->ips_ill_g_lock)); phyi_old = ill->ill_phyint; ASSERT(isv6 || (phyi_old->phyint_illv4 == ill && @@ -22546,7 +22798,7 @@ ill_phyint_reinit(ill_t *ill) phyi_old->phyint_illv4 == NULL)); ASSERT(phyi_old->phyint_ifindex == 0); - phyi = avl_find(&phyint_g_list.phyint_list_avl_by_name, + phyi = avl_find(&ipst->ips_phyint_g_list->phyint_list_avl_by_name, ill->ill_name, &where); /* @@ -22600,7 +22852,7 @@ ill_phyint_reinit(ill_t *ill) * ipsq_exit */ ipsq = phyi_old->phyint_ipsq; - IPSQ_DEC_REF(ipsq); + IPSQ_DEC_REF(ipsq, ipst); ASSERT(ipsq->ipsq_refs == 0); /* Get the singleton phyint out of the ipsq list */ ASSERT(phyi_old->phyint_ipsq_next == NULL); @@ -22618,15 +22870,16 @@ ill_phyint_reinit(ill_t *ill) phyi = phyi_old; mutex_enter(&phyi->phyint_lock); /* XXX We need a recovery strategy here. */ - if (!phyint_assign_ifindex(phyi)) + if (!phyint_assign_ifindex(phyi, ipst)) cmn_err(CE_PANIC, "phyint_assign_ifindex() failed"); - avl_insert(&phyint_g_list.phyint_list_avl_by_name, + avl_insert(&ipst->ips_phyint_g_list->phyint_list_avl_by_name, (void *)phyi, where); - (void) avl_find(&phyint_g_list.phyint_list_avl_by_index, + (void) avl_find(&ipst->ips_phyint_g_list-> + phyint_list_avl_by_index, &phyi->phyint_ifindex, &where); - avl_insert(&phyint_g_list.phyint_list_avl_by_index, + avl_insert(&ipst->ips_phyint_g_list->phyint_list_avl_by_index, (void *)phyi, where); } @@ -22684,7 +22937,8 @@ ill_phyint_reinit(ill_t *ill) info->hne_nic = ill->ill_phyint->phyint_ifindex; info->hne_lif = 0; info->hne_event = NE_PLUMB; - info->hne_family = ill->ill_isv6 ? ipv6 : ipv4; + info->hne_family = ill->ill_isv6 ? + ipst->ips_ipv6_net_data : ipst->ips_ipv4_net_data; info->hne_data = kmem_alloc(ill->ill_name_length, KM_NOSLEEP); if (info->hne_data != NULL) { @@ -22748,12 +23002,11 @@ ip_ifname_notify(ill_t *ill, queue_t *q) putnext(q, mp1); } -static boolean_t ip_trash_timer_started = B_FALSE; - static int ipif_set_values_tail(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q) { int err; + ip_stack_t *ipst = ill->ill_ipst; /* Set the obsolete NDD per-interface forwarding name. */ err = ill_set_ndd_name(ill); @@ -22775,33 +23028,35 @@ ipif_set_values_tail(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q) * If there is no IRE expiration timer running, get one started. * igmp and mld timers will be triggered by the first multicast */ - if (!ip_trash_timer_started) { + if (ipst->ips_ip_ire_expire_id == 0) { /* * acquire the lock and check again. */ - mutex_enter(&ip_trash_timer_lock); - if (!ip_trash_timer_started) { - ip_ire_expire_id = timeout(ip_trash_timer_expire, NULL, - MSEC_TO_TICK(ip_timer_interval)); - ip_trash_timer_started = B_TRUE; + mutex_enter(&ipst->ips_ip_trash_timer_lock); + if (ipst->ips_ip_ire_expire_id == 0) { + ipst->ips_ip_ire_expire_id = timeout( + ip_trash_timer_expire, ipst, + MSEC_TO_TICK(ipst->ips_ip_timer_interval)); } - mutex_exit(&ip_trash_timer_lock); + mutex_exit(&ipst->ips_ip_trash_timer_lock); } if (ill->ill_isv6) { - mutex_enter(&mld_slowtimeout_lock); - if (mld_slowtimeout_id == 0) { - mld_slowtimeout_id = timeout(mld_slowtimo, NULL, + mutex_enter(&ipst->ips_mld_slowtimeout_lock); + if (ipst->ips_mld_slowtimeout_id == 0) { + ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, + (void *)ipst, MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); } - mutex_exit(&mld_slowtimeout_lock); + mutex_exit(&ipst->ips_mld_slowtimeout_lock); } else { - mutex_enter(&igmp_slowtimeout_lock); - if (igmp_slowtimeout_id == 0) { - igmp_slowtimeout_id = timeout(igmp_slowtimo, NULL, + mutex_enter(&ipst->ips_igmp_slowtimeout_lock); + if (ipst->ips_igmp_slowtimeout_id == 0) { + ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, + (void *)ipst, MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); } - mutex_exit(&igmp_slowtimeout_lock); + mutex_exit(&ipst->ips_igmp_slowtimeout_lock); } return (err); @@ -22827,13 +23082,16 @@ ipif_set_values(queue_t *q, mblk_t *mp, char *interf_name, uint_t *new_ppa_ptr) char *old_ptr; char old_char; int error; + ip_stack_t *ipst; ip1dbg(("ipif_set_values: interface %s\n", interf_name)); ASSERT(q->q_next != NULL); ASSERT(interf_name != NULL); ill = (ill_t *)q->q_ptr; + ipst = ill->ill_ipst; + ASSERT(ill->ill_ipst != NULL); ASSERT(ill->ill_name[0] == '\0'); ASSERT(IAM_WRITER_ILL(ill)); ASSERT((mi_strlen(interf_name) + 1) <= LIFNAMSIZ); @@ -22929,7 +23187,7 @@ ipif_set_values(queue_t *q, mblk_t *mp, char *interf_name, uint_t *new_ppa_ptr) * Set the ILLF_ROUTER flag according to the global * IPv6 forwarding policy. */ - if (ipv6_forward != 0) + if (ipst->ips_ipv6_forward != 0) ill->ill_flags |= ILLF_ROUTER; } else if (ill->ill_flags & ILLF_IPV4) { ill->ill_isv6 = B_FALSE; @@ -22943,7 +23201,7 @@ ipif_set_values(queue_t *q, mblk_t *mp, char *interf_name, uint_t *new_ppa_ptr) * Set the ILLF_ROUTER flag according to the global * IPv4 forwarding policy. */ - if (ip_g_forward != 0) + if (ipst->ips_ip_g_forward != 0) ill->ill_flags |= ILLF_ROUTER; } @@ -22971,7 +23229,7 @@ ipif_set_values(queue_t *q, mblk_t *mp, char *interf_name, uint_t *new_ppa_ptr) ill->ill_ifname_pending_err = 0; ill_refhold(ill); - rw_enter(&ill_g_lock, RW_WRITER); + rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); if ((error = ill_glist_insert(ill, interf_name, (ill->ill_flags & ILLF_IPV6) == ILLF_IPV6)) > 0) { ill->ill_ppa = UINT_MAX; @@ -22980,7 +23238,7 @@ ipif_set_values(queue_t *q, mblk_t *mp, char *interf_name, uint_t *new_ppa_ptr) * undo null termination done above. */ ppa_ptr[0] = old_char; - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); ill_refrele(ill); return (error); } @@ -23015,7 +23273,7 @@ ipif_set_values(queue_t *q, mblk_t *mp, char *interf_name, uint_t *new_ppa_ptr) ipsq = ipsq_try_enter(NULL, ill, q, mp, ip_reprocess_ioctl, NEW_OP, B_TRUE); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); ill_refrele(ill); if (ipsq == NULL) return (EINPROGRESS); @@ -23040,10 +23298,8 @@ ipif_set_values(queue_t *q, mblk_t *mp, char *interf_name, uint_t *new_ppa_ptr) } -extern void (*ip_cleanup_func)(void); - void -ipif_init(void) +ipif_init(ip_stack_t *ipst) { hrtime_t hrt; int i; @@ -23054,23 +23310,24 @@ ipif_init(void) * source address everytime, this need not be really random. */ hrt = gethrtime(); - ipif_src_random = ((hrt >> 32) & 0xffffffff) * (hrt & 0xffffffff); + ipst->ips_ipif_src_random = + ((hrt >> 32) & 0xffffffff) * (hrt & 0xffffffff); for (i = 0; i < MAX_G_HEADS; i++) { - ill_g_heads[i].ill_g_list_head = (ill_if_t *)&ill_g_heads[i]; - ill_g_heads[i].ill_g_list_tail = (ill_if_t *)&ill_g_heads[i]; + ipst->ips_ill_g_heads[i].ill_g_list_head = + (ill_if_t *)&ipst->ips_ill_g_heads[i]; + ipst->ips_ill_g_heads[i].ill_g_list_tail = + (ill_if_t *)&ipst->ips_ill_g_heads[i]; } - avl_create(&phyint_g_list.phyint_list_avl_by_index, + avl_create(&ipst->ips_phyint_g_list->phyint_list_avl_by_index, ill_phyint_compare_index, sizeof (phyint_t), offsetof(struct phyint, phyint_avl_by_index)); - avl_create(&phyint_g_list.phyint_list_avl_by_name, + avl_create(&ipst->ips_phyint_g_list->phyint_list_avl_by_name, ill_phyint_compare_name, sizeof (phyint_t), offsetof(struct phyint, phyint_avl_by_name)); - - ip_cleanup_func = ip_thread_exit; } /* @@ -23083,7 +23340,8 @@ ipif_init(void) */ int ip_mrtun_rt_add(ipaddr_t in_src_addr, int flags, ipif_t *ipif_arg, - ipif_t *src_ipif, ire_t **ire_arg, queue_t *q, mblk_t *mp, ipsq_func_t func) + ipif_t *src_ipif, ire_t **ire_arg, queue_t *q, mblk_t *mp, ipsq_func_t func, + ip_stack_t *ipst) { ire_t *ire; ire_t *save_ire; @@ -23167,7 +23425,8 @@ ip_mrtun_rt_add(ipaddr_t in_src_addr, int flags, ipif_t *ipif_arg, flags, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); if (ire == NULL) { freeb(dlureq_mp); @@ -23244,7 +23503,7 @@ ip_mrtun_rt_delete(ipaddr_t in_src_addr, ipif_t *src_ipif) * It is used only by SO_DONTROUTE at the moment. */ ipif_t * -ipif_lookup_onlink_addr(ipaddr_t addr, zoneid_t zoneid) +ipif_lookup_onlink_addr(ipaddr_t addr, zoneid_t zoneid, ip_stack_t *ipst) { ipif_t *ipif, *best_ipif; ill_t *ill; @@ -23253,8 +23512,8 @@ ipif_lookup_onlink_addr(ipaddr_t addr, zoneid_t zoneid) ASSERT(zoneid != ALL_ZONES); best_ipif = NULL; - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V4(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V4(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { mutex_enter(&ill->ill_lock); for (ipif = ill->ill_ipif; ipif != NULL; @@ -23272,7 +23531,7 @@ ipif_lookup_onlink_addr(ipaddr_t addr, zoneid_t zoneid) if (ipif->ipif_pp_dst_addr == addr) { ipif_refhold_locked(ipif); mutex_exit(&ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (best_ipif != NULL) ipif_refrele(best_ipif); return (ipif); @@ -23295,18 +23554,19 @@ ipif_lookup_onlink_addr(ipaddr_t addr, zoneid_t zoneid) (!(best_ipif->ipif_flags & IPIF_UP))))) { ipif_refhold_locked(ipif); mutex_exit(&ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (best_ipif != NULL) ipif_refrele(best_ipif); best_ipif = ipif; - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, + RW_READER); mutex_enter(&ill->ill_lock); } } } mutex_exit(&ill->ill_lock); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (best_ipif); } @@ -23381,7 +23641,7 @@ ipif_remove_ire(ipif_t *ipif, ire_t *ire) * at ifconfig time. */ static void -ip_cgtp_bcast_add(ire_t *ire, ire_t *ire_dst) +ip_cgtp_bcast_add(ire_t *ire, ire_t *ire_dst, ip_stack_t *ipst) { ire_t *ire_prim; @@ -23389,7 +23649,7 @@ ip_cgtp_bcast_add(ire_t *ire, ire_t *ire_dst) ASSERT(ire_dst != NULL); ire_prim = ire_ctable_lookup(ire->ire_gateway_addr, 0, - IRE_BROADCAST, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + IRE_BROADCAST, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (ire_prim != NULL) { /* * We are in the special case of broadcasts for @@ -23429,7 +23689,8 @@ ip_cgtp_bcast_add(ire_t *ire, ire_t *ire_dst) ire->ire_flags, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); if (bcast_ire != NULL) { @@ -23454,18 +23715,18 @@ ip_cgtp_bcast_add(ire_t *ire, ire_t *ire_dst) * Remove the broadcast ire */ static void -ip_cgtp_bcast_delete(ire_t *ire) +ip_cgtp_bcast_delete(ire_t *ire, ip_stack_t *ipst) { ire_t *ire_dst; ASSERT(ire != NULL); ire_dst = ire_ctable_lookup(ire->ire_addr, 0, IRE_BROADCAST, - NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (ire_dst != NULL) { ire_t *ire_prim; ire_prim = ire_ctable_lookup(ire->ire_gateway_addr, 0, - IRE_BROADCAST, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + IRE_BROADCAST, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (ire_prim != NULL) { ipif_t *ipif_prim; ire_t *bcast_ire; @@ -23483,7 +23744,7 @@ ip_cgtp_bcast_delete(ire_t *ire) ipif_prim, ALL_ZONES, NULL, MATCH_IRE_TYPE | MATCH_IRE_GW | MATCH_IRE_IPIF | - MATCH_IRE_MASK); + MATCH_IRE_MASK, ipst); if (bcast_ire != NULL) { ip2dbg(("ip_cgtp_filter_bcast_delete: " @@ -23604,16 +23865,17 @@ ill_ipsec_capab_resize_algparm(ill_ipsec_capab_t *capab, int algid) */ boolean_t ipsec_capab_match(ill_t *ill, uint_t ill_index, boolean_t ill_isv6, - ipsa_t *sa) + ipsa_t *sa, netstack_t *ns) { boolean_t sa_isv6; uint_t algid; struct ill_ipsec_capab_s *cpp; boolean_t need_refrele = B_FALSE; + ip_stack_t *ipst = ns->netstack_ip; if (ill == NULL) { ill = ill_lookup_on_ifindex(ill_index, ill_isv6, NULL, - NULL, NULL, NULL); + NULL, NULL, NULL, ipst); if (ill == NULL) { ip0dbg(("ipsec_capab_match: ill doesn't exist\n")); return (B_FALSE); @@ -23694,6 +23956,7 @@ ill_ipsec_capab_add(ill_t *ill, uint_t dl_cap, boolean_t sadb_resync) ipsec_capab_ill_t **ills, *cur_ill, *new_ill; uint_t sa_type; uint_t ipproto; + ip_stack_t *ipst = ill->ill_ipst; ASSERT((dl_cap == DL_CAPAB_IPSEC_AH) || (dl_cap == DL_CAPAB_IPSEC_ESP)); @@ -23701,17 +23964,17 @@ ill_ipsec_capab_add(ill_t *ill, uint_t dl_cap, boolean_t sadb_resync) switch (dl_cap) { case DL_CAPAB_IPSEC_AH: sa_type = SADB_SATYPE_AH; - ills = &ipsec_capab_ills_ah; + ills = &ipst->ips_ipsec_capab_ills_ah; ipproto = IPPROTO_AH; break; case DL_CAPAB_IPSEC_ESP: sa_type = SADB_SATYPE_ESP; - ills = &ipsec_capab_ills_esp; + ills = &ipst->ips_ipsec_capab_ills_esp; ipproto = IPPROTO_ESP; break; } - rw_enter(&ipsec_capab_ills_lock, RW_WRITER); + rw_enter(&ipst->ips_ipsec_capab_ills_lock, RW_WRITER); /* * Add ill index to list of hardware accelerators. If @@ -23726,7 +23989,7 @@ ill_ipsec_capab_add(ill_t *ill, uint_t dl_cap, boolean_t sadb_resync) /* if this is a new entry for this ill */ new_ill = kmem_zalloc(sizeof (ipsec_capab_ill_t), KM_NOSLEEP); if (new_ill == NULL) { - rw_exit(&ipsec_capab_ills_lock); + rw_exit(&ipst->ips_ipsec_capab_ills_lock); return; } @@ -23736,13 +23999,13 @@ ill_ipsec_capab_add(ill_t *ill, uint_t dl_cap, boolean_t sadb_resync) *ills = new_ill; } else if (!sadb_resync) { /* not resync'ing SADB and an entry exists for this ill */ - rw_exit(&ipsec_capab_ills_lock); + rw_exit(&ipst->ips_ipsec_capab_ills_lock); return; } - rw_exit(&ipsec_capab_ills_lock); + rw_exit(&ipst->ips_ipsec_capab_ills_lock); - if (ipcl_proto_fanout_v6[ipproto].connf_head != NULL) + if (ipst->ips_ipcl_proto_fanout_v6[ipproto].connf_head != NULL) /* * IPsec module for protocol loaded, initiate dump * of the SADB to this ill. @@ -23757,14 +24020,15 @@ static void ill_ipsec_capab_delete(ill_t *ill, uint_t dl_cap) { ipsec_capab_ill_t **ills, *cur_ill, *prev_ill; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(dl_cap == DL_CAPAB_IPSEC_AH || dl_cap == DL_CAPAB_IPSEC_ESP); - ills = (dl_cap == DL_CAPAB_IPSEC_AH) ? &ipsec_capab_ills_ah : - &ipsec_capab_ills_esp; + ills = (dl_cap == DL_CAPAB_IPSEC_AH) ? &ipst->ips_ipsec_capab_ills_ah : + &ipst->ips_ipsec_capab_ills_esp; - rw_enter(&ipsec_capab_ills_lock, RW_WRITER); + rw_enter(&ipst->ips_ipsec_capab_ills_lock, RW_WRITER); prev_ill = NULL; for (cur_ill = *ills; cur_ill != NULL && (cur_ill->ill_index != @@ -23773,7 +24037,7 @@ ill_ipsec_capab_delete(ill_t *ill, uint_t dl_cap) ; if (cur_ill == NULL) { /* entry not found */ - rw_exit(&ipsec_capab_ills_lock); + rw_exit(&ipst->ips_ipsec_capab_ills_lock); return; } if (prev_ill == NULL) { @@ -23783,7 +24047,7 @@ ill_ipsec_capab_delete(ill_t *ill, uint_t dl_cap) prev_ill->next = cur_ill->next; } kmem_free(cur_ill, sizeof (ipsec_capab_ill_t)); - rw_exit(&ipsec_capab_ills_lock); + rw_exit(&ipst->ips_ipsec_capab_ills_lock); } @@ -23808,20 +24072,22 @@ ill_ipsec_capab_send_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg) * We free the mblk and, if sa is non-null, release the held referece. */ void -ill_ipsec_capab_send_all(uint_t sa_type, mblk_t *mp, ipsa_t *sa) +ill_ipsec_capab_send_all(uint_t sa_type, mblk_t *mp, ipsa_t *sa, + netstack_t *ns) { ipsec_capab_ill_t *ici, *cur_ici; ill_t *ill; mblk_t *nmp, *mp_ship_list = NULL, *next_mp; + ip_stack_t *ipst = ns->netstack_ip; - ici = (sa_type == SADB_SATYPE_AH) ? ipsec_capab_ills_ah : - ipsec_capab_ills_esp; + ici = (sa_type == SADB_SATYPE_AH) ? ipst->ips_ipsec_capab_ills_ah : + ipst->ips_ipsec_capab_ills_esp; - rw_enter(&ipsec_capab_ills_lock, RW_READER); + rw_enter(&ipst->ips_ipsec_capab_ills_lock, RW_READER); for (cur_ici = ici; cur_ici != NULL; cur_ici = cur_ici->next) { ill = ill_lookup_on_ifindex(cur_ici->ill_index, - cur_ici->ill_isv6, NULL, NULL, NULL, NULL); + cur_ici->ill_isv6, NULL, NULL, NULL, NULL, ipst); /* * Handle the case where the ill goes away while the SADB is @@ -23838,7 +24104,7 @@ ill_ipsec_capab_send_all(uint_t sa_type, mblk_t *mp, ipsa_t *sa) * sending SA to ill. */ if (!ipsec_capab_match(ill, cur_ici->ill_index, - cur_ici->ill_isv6, sa)) { + cur_ici->ill_isv6, sa, ipst->ips_netstack)) { ill_refrele(ill); continue; } @@ -23869,7 +24135,7 @@ ill_ipsec_capab_send_all(uint_t sa_type, mblk_t *mp, ipsa_t *sa) nmp->b_prev = (mblk_t *)ill; } - rw_exit(&ipsec_capab_ills_lock); + rw_exit(&ipst->ips_ipsec_capab_ills_lock); nmp = mp_ship_list; while (nmp != NULL) { @@ -24143,6 +24409,7 @@ boolean_t ipif_lookup_zoneid_group(ill_t *ill, zoneid_t zoneid, int flags, ipif_t **ipifp) { ill_t *illg; + ip_stack_t *ipst = ill->ill_ipst; /* * We look at the passed-in ill first without grabbing ill_g_lock. @@ -24150,10 +24417,10 @@ ipif_lookup_zoneid_group(ill_t *ill, zoneid_t zoneid, int flags, ipif_t **ipifp) if (ipif_lookup_zoneid(ill, zoneid, flags, ipifp)) { return (B_TRUE); } - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); if (ill->ill_group == NULL) { /* ill not in a group */ - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (B_FALSE); } @@ -24173,7 +24440,7 @@ ipif_lookup_zoneid_group(ill_t *ill, zoneid_t zoneid, int flags, ipif_t **ipifp) break; } } while ((illg = illg->ill_group_next) != NULL); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (illg != NULL); } @@ -24198,12 +24465,14 @@ ill_is_probeonly(ill_t *ill) * an ill_refrele(). */ ipif_t * -ipif_getby_indexes(uint_t ifindex, uint_t lifidx, boolean_t isv6) +ipif_getby_indexes(uint_t ifindex, uint_t lifidx, boolean_t isv6, + ip_stack_t *ipst) { ipif_t *ipif; ill_t *ill; - ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL); + ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL, + ipst); if (ill == NULL) return (NULL); @@ -24238,9 +24507,11 @@ ipif_getby_indexes(uint_t ifindex, uint_t lifidx, boolean_t isv6) void ill_fastpath_flush(ill_t *ill) { + ip_stack_t *ipst = ill->ill_ipst; + nce_fastpath_list_dispatch(ill, NULL, NULL); - ndp_walk_common((ill->ill_isv6 ? &ndp6 : &ndp4), ill, - (pfi_t)ndp_fastpath_flush, NULL, B_TRUE); + ndp_walk_common((ill->ill_isv6 ? ipst->ips_ndp6 : ipst->ips_ndp4), + ill, (pfi_t)ndp_fastpath_flush, NULL, B_TRUE); } /* @@ -24362,3 +24633,110 @@ ill_set_ndmp(ill_t *ill, mblk_t *ndmp, uint_t addroff, uint_t addrlen) ill->ill_nd_lla_mp = ndmp; ill->ill_nd_lla_len = addrlen; } + + + +major_t IP_MAJ; +#define IP "ip" + +#define UDP6DEV "/devices/pseudo/udp6@0:udp6" +#define UDPDEV "/devices/pseudo/udp@0:udp" + +/* + * Issue REMOVEIF ioctls to have the loopback interfaces + * go away. Other interfaces are either I_LINKed or I_PLINKed; + * the former going away when the user-level processes in the zone + * are killed * and the latter are cleaned up by the stream head + * str_stack_shutdown callback that undoes all I_PLINKs. + */ +void +ip_loopback_cleanup(ip_stack_t *ipst) +{ + int error; + ldi_handle_t lh = NULL; + ldi_ident_t li = NULL; + int rval; + cred_t *cr; + struct strioctl iocb; + struct lifreq lifreq; + + IP_MAJ = ddi_name_to_major(IP); + +#ifdef NS_DEBUG + (void) printf("ip_loopback_cleanup() stackid %d\n", + ipst->ips_netstack->netstack_stackid); +#endif + + bzero(&lifreq, sizeof (lifreq)); + (void) strcpy(lifreq.lifr_name, ipif_loopback_name); + + error = ldi_ident_from_major(IP_MAJ, &li); + if (error) { +#ifdef DEBUG + printf("ip_loopback_cleanup: lyr ident get failed error %d\n", + error); +#endif + return; + } + + cr = zone_get_kcred(netstackid_to_zoneid( + ipst->ips_netstack->netstack_stackid)); + ASSERT(cr != NULL); + error = ldi_open_by_name(UDP6DEV, FREAD|FWRITE, cr, &lh, li); + if (error) { +#ifdef DEBUG + printf("ip_loopback_cleanup: open of UDP6DEV failed error %d\n", + error); +#endif + goto out; + } + iocb.ic_cmd = SIOCLIFREMOVEIF; + iocb.ic_timout = 15; + iocb.ic_len = sizeof (lifreq); + iocb.ic_dp = (char *)&lifreq; + + error = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, cr, &rval); + /* LINTED - statement has no consequent */ + if (error) { +#ifdef NS_DEBUG + printf("ip_loopback_cleanup: ioctl SIOCLIFREMOVEIF failed on " + "UDP6 error %d\n", error); +#endif + } + (void) ldi_close(lh, FREAD|FWRITE, cr); + lh = NULL; + + error = ldi_open_by_name(UDPDEV, FREAD|FWRITE, cr, &lh, li); + if (error) { +#ifdef NS_DEBUG + printf("ip_loopback_cleanup: open of UDPDEV failed error %d\n", + error); +#endif + goto out; + } + + iocb.ic_cmd = SIOCLIFREMOVEIF; + iocb.ic_timout = 15; + iocb.ic_len = sizeof (lifreq); + iocb.ic_dp = (char *)&lifreq; + + error = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, cr, &rval); + /* LINTED - statement has no consequent */ + if (error) { +#ifdef NS_DEBUG + printf("ip_loopback_cleanup: ioctl SIOCLIFREMOVEIF failed on " + "UDP error %d\n", error); +#endif + } + (void) ldi_close(lh, FREAD|FWRITE, cr); + lh = NULL; + +out: + /* Close layered handles */ + if (lh) + (void) ldi_close(lh, FREAD|FWRITE, cr); + if (li) + ldi_ident_release(li); + + crfree(cr); +} diff --git a/usr/src/uts/common/inet/ip/ip_ire.c b/usr/src/uts/common/inet/ip/ip_ire.c index 31e51a6b51..2e0985d11c 100644 --- a/usr/src/uts/common/inet/ip/ip_ire.c +++ b/usr/src/uts/common/inet/ip/ip_ire.c @@ -69,6 +69,8 @@ #include <inet/tcp.h> #include <inet/ipclassifier.h> #include <sys/zone.h> +#include <sys/cpuvar.h> + #include <sys/tsol/label.h> #include <sys/tsol/tnet.h> @@ -146,11 +148,11 @@ struct kmem_cache *rt_entry_cache; * Cache table (ip_cache_table/ip_cache_table_v6) is a pointer to an * array of irb_t structure and forwarding table (ip_forwarding_table/ * ip_forwarding_table_v6) is an array of pointers to array of irb_t - * structure. ip_forwarding_table[_v6] is allocated dynamically in - * ire_add_v4/v6. ire_ft_init_lock is used to serialize multiple threads + * structure. ip_forwarding_table_v6 is allocated dynamically in + * ire_add_v6. ire_ft_init_lock is used to serialize multiple threads * initializing the same bucket. Once a bucket is initialized, it is never - * de-alloacted. This assumption enables us to access ip_forwarding_table[i] - * or ip_forwarding_table_v6[i] without any locks. + * de-alloacted. This assumption enables us to access + * ip_forwarding_table_v6[i] without any locks. * * Each irb_t - ire bucket structure has a lock to protect * a bucket and the ires residing in the bucket have a back pointer to @@ -231,26 +233,6 @@ struct kmem_cache *rt_entry_cache; * comment in ire_walk_ill_match(). */ -/* This is dynamically allocated in ip_ire_init */ -irb_t *ip_cache_table; -/* This is dynamically allocated in ire_add_mrtun */ -irb_t *ip_mrtun_table; - -uint32_t ire_handle = 1; -/* - * ire_ft_init_lock is used while initializing ip_forwarding_table - * dynamically in ire_add. - */ -kmutex_t ire_ft_init_lock; -kmutex_t ire_mrtun_lock; /* Protects creation of table and it's count */ -kmutex_t ire_srcif_table_lock; /* Same as above */ -/* - * The following counts are used to determine whether a walk is - * needed through the reverse tunnel table or through ills - */ -kmutex_t ire_handle_lock; /* Protects ire_handle */ -uint_t ire_mrtun_count; /* Number of ires in reverse tun table */ - /* * A per-interface routing table is created ( if not present) * when the first entry is added to this special routing table. @@ -269,11 +251,10 @@ uint_t ire_mrtun_count; /* Number of ires in reverse tun table */ * of ires in that table. */ -uint_t ire_srcif_table_count; /* Number of ires in all srcif tables */ - /* * The minimum size of IRE cache table. It will be recalcuated in * ip_ire_init(). + * Setable in /etc/system */ uint32_t ip_cache_table_size = IP_CACHE_TABLE_SIZE; uint32_t ip6_cache_table_size = IP6_CACHE_TABLE_SIZE; @@ -281,15 +262,13 @@ uint32_t ip6_cache_table_size = IP6_CACHE_TABLE_SIZE; /* * The size of the forwarding table. We will make sure that it is a * power of 2 in ip_ire_init(). + * Setable in /etc/system */ uint32_t ip6_ftable_hash_size = IP6_FTABLE_HASH_SIZE; struct kmem_cache *ire_cache; static ire_t ire_null; -ire_stats_t ire_stats_v4; /* IPv4 ire statistics */ -ire_stats_t ire_stats_v6; /* IPv6 ire statistics */ - /* * The threshold number of IRE in a bucket when the IREs are * cleaned up. This threshold is calculated later in ip_open() @@ -342,7 +321,7 @@ ire_stats_t ire_stats_v6; /* IPv6 ire statistics */ * different. In future, when we have more experience, we * may want to change this behavior. */ -uint32_t ip_ire_max_bucket_cnt = 10; +uint32_t ip_ire_max_bucket_cnt = 10; /* Setable in /etc/system */ uint32_t ip6_ire_max_bucket_cnt = 10; /* @@ -351,7 +330,7 @@ uint32_t ip6_ire_max_bucket_cnt = 10; * performance of some apps as the temporary IREs are removed too * often. */ -uint32_t ip_ire_min_bucket_cnt = 3; +uint32_t ip_ire_min_bucket_cnt = 3; /* /etc/system - not used */ uint32_t ip6_ire_min_bucket_cnt = 3; /* @@ -359,9 +338,9 @@ uint32_t ip6_ire_min_bucket_cnt = 3; * memory. This is a shift factor, so 6 means the ratio 1 to 64. This * value can be changed in /etc/system. 6 is a reasonable number. */ -uint32_t ip_ire_mem_ratio = 6; +uint32_t ip_ire_mem_ratio = 6; /* /etc/system */ /* The shift factor for CPU speed to calculate the max IRE bucket length. */ -uint32_t ip_ire_cpu_ratio = 7; +uint32_t ip_ire_cpu_ratio = 7; /* /etc/system */ typedef struct nce_clookup_s { ipaddr_t ncecl_addr; @@ -376,6 +355,7 @@ typedef struct nce_clookup_s { */ #define IP_MAX_CACHE_TABLE_SIZE 4096 +/* Setable in /etc/system */ static uint32_t ip_max_cache_table_size = IP_MAX_CACHE_TABLE_SIZE; static uint32_t ip6_max_cache_table_size = IP_MAX_CACHE_TABLE_SIZE; @@ -392,9 +372,9 @@ static ire_t *ire_update_srcif_v4(ire_t *ire); static void ire_delete_v4(ire_t *ire); static void ire_report_ctable(ire_t *ire, char *mp); static void ire_report_mrtun_table(ire_t *ire, char *mp); -static void ire_report_srcif_table(ire_t *ire, char *mp); +static void ire_report_srcif_table(ire_t *ire, char *mp, ip_stack_t *ipst); static void ire_walk_ipvers(pfv_t func, void *arg, uchar_t vers, - zoneid_t zoneid); + zoneid_t zoneid, ip_stack_t *); static void ire_walk_ill_ipvers(uint_t match_flags, uint_t ire_type, pfv_t func, void *arg, uchar_t vers, ill_t *ill); static void ire_cache_cleanup(irb_t *irb, uint32_t threshold, int cnt); @@ -470,6 +450,7 @@ ip_ire_advise(queue_t *q, mblk_t *mp, cred_t *ioc_cr) in6_addr_t v6addr; irb_t *irb; zoneid_t zoneid; + ip_stack_t *ipst = CONNQ_TO_IPST(q); ASSERT(q->q_next == NULL); zoneid = Q_TO_CONN(q)->conn_zoneid; @@ -478,7 +459,7 @@ ip_ire_advise(queue_t *q, mblk_t *mp, cred_t *ioc_cr) * Check privilege using the ioctl credential; if it is NULL * then this is a kernel message and therefor privileged. */ - if (ioc_cr != NULL && secpolicy_net_config(ioc_cr, B_FALSE) != 0) + if (ioc_cr != NULL && secpolicy_ip_config(ioc_cr, B_FALSE) != 0) return (EPERM); ipic = (ipic_t *)mp->b_rptr; @@ -493,14 +474,14 @@ ip_ire_advise(queue_t *q, mblk_t *mp, cred_t *ioc_cr) /* Extract the destination address. */ addr = *(ipaddr_t *)addr_ucp; /* Find the corresponding IRE. */ - ire = ire_cache_lookup(addr, zoneid, NULL); + ire = ire_cache_lookup(addr, zoneid, NULL, ipst); break; } case IPV6_ADDR_LEN: { /* Extract the destination address. */ v6addr = *(in6_addr_t *)addr_ucp; /* Find the corresponding IRE. */ - ire = ire_cache_lookup_v6(&v6addr, zoneid, NULL); + ire = ire_cache_lookup_v6(&v6addr, zoneid, NULL, ipst); break; } default: @@ -596,15 +577,17 @@ ip_ire_delete(queue_t *q, mblk_t *mp, cred_t *ioc_cr) ire_t *gire = NULL; ill_t *ill; mblk_t *arp_mp; + ip_stack_t *ipst; ASSERT(q->q_next == NULL); zoneid = Q_TO_CONN(q)->conn_zoneid; + ipst = CONNQ_TO_IPST(q); /* * Check privilege using the ioctl credential; if it is NULL * then this is a kernel message and therefor privileged. */ - if (ioc_cr != NULL && secpolicy_net_config(ioc_cr, B_FALSE) != 0) + if (ioc_cr != NULL && secpolicy_ip_config(ioc_cr, B_FALSE) != 0) return (EPERM); ipid = (ipid_t *)mp->b_rptr; @@ -638,7 +621,7 @@ ip_ire_delete(queue_t *q, mblk_t *mp, cred_t *ioc_cr) bcopy(addr_ucp, &addr, IP_ADDR_LEN); /* Try to find the CACHED IRE. */ - ire = ire_cache_lookup(addr, zoneid, NULL); + ire = ire_cache_lookup(addr, zoneid, NULL, ipst); /* Nail it. */ if (ire) { @@ -654,7 +637,7 @@ ip_ire_delete(queue_t *q, mblk_t *mp, cred_t *ioc_cr) * that are too eager in sending delete messages. */ if (gethrestime_sec() < - ire->ire_create_time + ip_ignore_delete_time) { + ire->ire_create_time + ipst->ips_ip_ignore_delete_time) { ire_refrele(ire); return (EINVAL); } @@ -679,7 +662,8 @@ ip_ire_delete(queue_t *q, mblk_t *mp, cred_t *ioc_cr) gire = ire_ftable_lookup(ire->ire_addr, ire->ire_cmask, 0, 0, - ire->ire_ipif, NULL, zoneid, 0, NULL, match_flags); + ire->ire_ipif, NULL, zoneid, 0, NULL, match_flags, + ipst); ip3dbg(("ire_ftable_lookup() returned gire %p\n", (void *)gire)); @@ -720,7 +704,7 @@ done: /* report the bad route to routing sockets */ ip_rts_change(RTM_LOSING, ire->ire_addr, ire->ire_gateway_addr, ire->ire_mask, ire->ire_src_addr, 0, 0, 0, - (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_IFA)); + (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_IFA), ipst); routing_sock_info = B_TRUE; /* @@ -742,7 +726,7 @@ done: * remove it if present. */ ire = ire_route_lookup(addr, 0, 0, IRE_HOST, NULL, NULL, - ALL_ZONES, NULL, MATCH_IRE_TYPE); + ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); /* Nail it. */ if (ire != NULL) { @@ -751,7 +735,8 @@ done: ip_rts_change(RTM_LOSING, ire->ire_addr, ire->ire_gateway_addr, ire->ire_mask, ire->ire_src_addr, 0, 0, 0, - (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_IFA)); + (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_IFA), + ipst); } ire_delete(ire); } @@ -770,6 +755,12 @@ int ip_ire_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) { zoneid_t zoneid; + ip_stack_t *ipst; + + if (CONN_Q(q)) + ipst = CONNQ_TO_IPST(q); + else + ipst = ILLQ_TO_IPST(q); (void) mi_mpprintf(mp, "IRE " MI_COL_HDRPAD_STR @@ -805,8 +796,8 @@ ip_ire_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) if (zoneid == GLOBAL_ZONEID) zoneid = ALL_ZONES; - ire_walk_v4(ire_report_ftable, mp->b_cont, zoneid); - ire_walk_v4(ire_report_ctable, mp->b_cont, zoneid); + ire_walk_v4(ire_report_ftable, mp->b_cont, zoneid, ipst); + ire_walk_v4(ire_report_ctable, mp->b_cont, zoneid, ipst); return (0); } @@ -871,6 +862,13 @@ ire_report_ctable(ire_t *ire, char *mp) int ip_ire_report_mrtun(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) { + ip_stack_t *ipst; + + if (CONN_Q(q)) + ipst = CONNQ_TO_IPST(q); + else + ipst = ILLQ_TO_IPST(q); + (void) mi_mpprintf(mp, "IRE " MI_COL_HDRPAD_STR /* 01234567[89ABCDEF] */ @@ -885,7 +883,8 @@ ip_ire_report_mrtun(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) "ref "); /* 123 */ - ire_walk_ill_mrtun(0, 0, ire_report_mrtun_table, mp, NULL); + ire_walk_ill_mrtun(0, 0, ire_report_mrtun_table, mp, NULL, + ipst); return (0); } @@ -916,6 +915,12 @@ ire_report_mrtun_table(ire_t *ire, char *mp) int ip_ire_report_srcif(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) { + ip_stack_t *ipst; + + if (CONN_Q(q)) + ipst = CONNQ_TO_IPST(q); + else + ipst = ILLQ_TO_IPST(q); /* Report all interface based ires */ @@ -937,13 +942,14 @@ ip_ire_report_srcif(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) "type " /* ABCDEFGH */ "in/out/forward"); - ire_walk_srcif_table_v4(ire_report_srcif_table, mp); + ire_walk_srcif_table_v4(ire_report_srcif_table, mp, ipst); return (0); } /* Reports the interface table ires */ +/* ARGSUSED2 */ static void -ire_report_srcif_table(ire_t *ire, char *mp) +ire_report_srcif_table(ire_t *ire, char *mp, ip_stack_t *ipst) { char buf1[INET_ADDRSTRLEN]; char buf2[INET_ADDRSTRLEN]; @@ -986,6 +992,9 @@ ip_ire_req(queue_t *q, mblk_t *mp) mblk_t *mp1; ire_t *sire = NULL; zoneid_t zoneid = Q_TO_CONN(q)->conn_zoneid; + ip_stack_t *ipst = CONNQ_TO_IPST(q); + + ASSERT(q->q_next == NULL); if ((mp->b_wptr - mp->b_rptr) < sizeof (ire_t) || !OK_32PTR(mp->b_rptr)) { @@ -999,12 +1008,12 @@ ip_ire_req(queue_t *q, mblk_t *mp) if (inire->ire_ipversion == IPV6_VERSION) { ire = ire_route_lookup_v6(&inire->ire_addr_v6, 0, 0, 0, NULL, &sire, zoneid, NULL, - (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT)); + (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT), ipst); } else { ASSERT(inire->ire_ipversion == IPV4_VERSION); ire = ire_route_lookup(inire->ire_addr, 0, 0, 0, NULL, &sire, zoneid, NULL, - (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT)); + (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT), ipst); } /* @@ -1035,7 +1044,8 @@ ip_ire_req(queue_t *q, mblk_t *mp) inire->ire_ipsec_overhead = conn_ipsec_length(Q_TO_CONN(q)); /* Pass the latest setting of the ip_path_mtu_discovery */ - inire->ire_frag_flag |= (ip_path_mtu_discovery) ? IPH_DF : 0; + inire->ire_frag_flag |= + (ipst->ips_ip_path_mtu_discovery) ? IPH_DF : 0; } if (ire != NULL) ire_refrele(ire); @@ -1067,6 +1077,7 @@ ire_send(queue_t *q, mblk_t *pkt, ire_t *ire) uint_t ifindex; ill_t *ill; zoneid_t zoneid = ire->ire_zoneid; + ip_stack_t *ipst = ire->ire_ipst; ASSERT(ire->ire_ipversion == IPV4_VERSION); ASSERT(!(ire->ire_type & IRE_LOCAL)); /* Has different ire_zoneid */ @@ -1091,7 +1102,7 @@ ire_send(queue_t *q, mblk_t *pkt, ire_t *ire) */ ifindex = (uint_t)(uintptr_t)pkt->b_prev; ill = ill_lookup_on_ifindex(ifindex, B_FALSE, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, ipst); if (ill == NULL) { pkt->b_prev = NULL; pkt->b_next = NULL; @@ -1239,6 +1250,7 @@ ire_send_v6(queue_t *q, mblk_t *pkt, ire_t *ire) boolean_t secure; uint_t ifindex; zoneid_t zoneid = ire->ire_zoneid; + ip_stack_t *ipst = ire->ire_ipst; ASSERT(ire->ire_ipversion == IPV6_VERSION); ASSERT(!(ire->ire_type & IRE_LOCAL)); /* Has different ire_zoneid */ @@ -1267,7 +1279,7 @@ ire_send_v6(queue_t *q, mblk_t *pkt, ire_t *ire) */ ifindex = (uint_t)(uintptr_t)pkt->b_prev; ill = ill_lookup_on_ifindex(ifindex, B_TRUE, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, ipst); if (ill == NULL) { pkt->b_prev = NULL; pkt->b_next = NULL; @@ -1434,9 +1446,15 @@ ire_cache_cleanup(irb_t *irb, uint32_t threshold, int cnt) if (irb->irb_ire_cnt - irb->irb_tmp_ire_cnt > threshold) { for (ire = irb->irb_ire; ire != NULL && cnt > 0; ire = ire->ire_next) { - if (ire->ire_marks & IRE_MARK_CONDEMNED || - ire->ire_gateway_addr == 0) { + if (ire->ire_marks & IRE_MARK_CONDEMNED) continue; + if (ire->ire_ipversion == IPV4_VERSION) { + if (ire->ire_gateway_addr == 0) + continue; + } else { + if (IN6_IS_ADDR_UNSPECIFIED( + &ire->ire_gateway_addr_v6)) + continue; } if ((ire->ire_type == IRE_CACHE) && (lbolt - ire->ire_last_used_time > cut_off) && @@ -1478,6 +1496,7 @@ ire_add_then_send(queue_t *q, ire_t *ire, mblk_t *mp) ire_t *dst_ire; ipha_t *ipha; ip6_t *ip6h; + ip_stack_t *ipst = ire->ire_ipst; if (mp != NULL) { /* @@ -1499,7 +1518,7 @@ ire_add_then_send(queue_t *q, ire_t *ire, mblk_t *mp) mp = first_mp; dst_ire = ire_cache_lookup(ipha->ipha_dst, - ire->ire_zoneid, MBLK_GETLABEL(mp)); + ire->ire_zoneid, MBLK_GETLABEL(mp), ipst); } else { ASSERT(ire->ire_ipversion == IPV6_VERSION); /* @@ -1514,7 +1533,7 @@ ire_add_then_send(queue_t *q, ire_t *ire, mblk_t *mp) save_mp = mp; mp = first_mp; dst_ire = ire_cache_lookup_v6(&ip6h->ip6_dst, - ire->ire_zoneid, MBLK_GETLABEL(mp)); + ire->ire_zoneid, MBLK_GETLABEL(mp), ipst); } if (dst_ire != NULL) { if (dst_ire->ire_flags & RTF_MULTIRT) { @@ -1610,11 +1629,11 @@ ire_add_then_send(queue_t *q, ire_t *ire, mblk_t *mp) */ ip_newroute(q, mp, ipha->ipha_dst, 0, (CONN_Q(q) ? Q_TO_CONN(q) : NULL), - ire->ire_zoneid); + ire->ire_zoneid, ipst); } else { ASSERT(ire->ire_ipversion == IPV6_VERSION); ip_newroute_v6(q, mp, &ip6h->ip6_dst, NULL, - NULL, ire->ire_zoneid); + NULL, ire->ire_zoneid, ipst); } } @@ -1649,7 +1668,8 @@ ire_init(ire_t *ire, uchar_t *addr, uchar_t *mask, uchar_t *src_addr, uchar_t *gateway, uchar_t *in_src_addr, uint_t *max_fragp, mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, mblk_t *dlureq_mp, ipif_t *ipif, ill_t *in_ill, ipaddr_t cmask, uint32_t phandle, uint32_t ihandle, - uint32_t flags, const iulp_t *ulp_info, tsol_gc_t *gc, tsol_gcgrp_t *gcgrp) + uint32_t flags, const iulp_t *ulp_info, tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, + ip_stack_t *ipst) { /* * Reject IRE security attribute creation/initialization @@ -1695,7 +1715,7 @@ ire_init(ire_t *ire, uchar_t *addr, uchar_t *mask, uchar_t *src_addr, return (NULL); } - BUMP_IRE_STATS(ire_stats_v4, ire_stats_alloced); + BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_alloced); if (addr != NULL) bcopy(addr, &ire->ire_addr, IP_ADDR_LEN); @@ -1718,7 +1738,7 @@ ire_init(ire_t *ire, uchar_t *addr, uchar_t *mask, uchar_t *src_addr, /* ire_init_common will free the mblks upon encountering any failure */ if (!ire_init_common(ire, max_fragp, fp_mp, rfq, stq, type, dlureq_mp, ipif, in_ill, phandle, ihandle, flags, IPV4_VERSION, ulp_info, - gc, gcgrp)) + gc, gcgrp, ipst)) return (NULL); return (ire); @@ -1734,7 +1754,8 @@ ire_create_mp(uchar_t *addr, uchar_t *mask, uchar_t *src_addr, uchar_t *gateway, uchar_t *in_src_addr, uint_t max_frag, mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, mblk_t *dlureq_mp, ipif_t *ipif, ill_t *in_ill, ipaddr_t cmask, uint32_t phandle, uint32_t ihandle, uint32_t flags, - const iulp_t *ulp_info, tsol_gc_t *gc, tsol_gcgrp_t *gcgrp) + const iulp_t *ulp_info, tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, + ip_stack_t *ipst) { ire_t *ire, *buf; ire_t *ret_ire; @@ -1779,11 +1800,13 @@ ire_create_mp(uchar_t *addr, uchar_t *mask, uchar_t *src_addr, uchar_t *gateway, ret_ire = ire_init(ire, addr, mask, src_addr, gateway, in_src_addr, NULL, fp_mp, rfq, stq, type, dlureq_mp, ipif, in_ill, cmask, - phandle, ihandle, flags, ulp_info, gc, gcgrp); + phandle, ihandle, flags, ulp_info, gc, gcgrp, ipst); ill = (ill_t *)(stq->q_ptr); if (ret_ire == NULL) { + /* ire_freemblk needs these set */ ire->ire_stq_ifindex = ill->ill_phyint->phyint_ifindex; + ire->ire_ipst = ipst; freeb(ire->ire_mp); return (NULL); } @@ -1810,7 +1833,8 @@ ire_create(uchar_t *addr, uchar_t *mask, uchar_t *src_addr, uchar_t *gateway, uchar_t *in_src_addr, uint_t *max_fragp, mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, mblk_t *dlureq_mp, ipif_t *ipif, ill_t *in_ill, ipaddr_t cmask, uint32_t phandle, uint32_t ihandle, uint32_t flags, - const iulp_t *ulp_info, tsol_gc_t *gc, tsol_gcgrp_t *gcgrp) + const iulp_t *ulp_info, tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, + ip_stack_t *ipst) { ire_t *ire; ire_t *ret_ire; @@ -1824,7 +1848,7 @@ ire_create(uchar_t *addr, uchar_t *mask, uchar_t *src_addr, uchar_t *gateway, ret_ire = ire_init(ire, addr, mask, src_addr, gateway, in_src_addr, max_fragp, fp_mp, rfq, stq, type, dlureq_mp, ipif, in_ill, cmask, - phandle, ihandle, flags, ulp_info, gc, gcgrp); + phandle, ihandle, flags, ulp_info, gc, gcgrp, ipst); if (ret_ire == NULL) { kmem_cache_free(ire_cache, ire); @@ -1843,10 +1867,11 @@ ire_init_common(ire_t *ire, uint_t *max_fragp, mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, mblk_t *dlureq_mp, ipif_t *ipif, ill_t *in_ill, uint32_t phandle, uint32_t ihandle, uint32_t flags, uchar_t ipversion, - const iulp_t *ulp_info, tsol_gc_t *gc, tsol_gcgrp_t *gcgrp) + const iulp_t *ulp_info, tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, + ip_stack_t *ipst) { ire->ire_max_fragp = max_fragp; - ire->ire_frag_flag |= (ip_path_mtu_discovery) ? IPH_DF : 0; + ire->ire_frag_flag |= (ipst->ips_ip_path_mtu_discovery) ? IPH_DF : 0; ASSERT(fp_mp == NULL || fp_mp->b_datap->db_type == M_DATA); #ifdef DEBUG @@ -1910,13 +1935,13 @@ ire_init_common(ire_t *ire, uint_t *max_fragp, mblk_t *fp_mp, * called as a writer. */ if (ire->ire_type & IRE_OFFSUBNET) { - mutex_enter(&ire_handle_lock); - ire->ire_phandle = (uint32_t)ire_handle++; - mutex_exit(&ire_handle_lock); + mutex_enter(&ipst->ips_ire_handle_lock); + ire->ire_phandle = (uint32_t)ipst->ips_ire_handle++; + mutex_exit(&ipst->ips_ire_handle_lock); } else if (ire->ire_type & IRE_INTERFACE) { - mutex_enter(&ire_handle_lock); - ire->ire_ihandle = (uint32_t)ire_handle++; - mutex_exit(&ire_handle_lock); + mutex_enter(&ipst->ips_ire_handle_lock); + ire->ire_ihandle = (uint32_t)ipst->ips_ire_handle++; + mutex_exit(&ipst->ips_ire_handle_lock); } else if (ire->ire_type == IRE_CACHE) { ire->ire_phandle = phandle; ire->ire_ihandle = ihandle; @@ -1949,6 +1974,7 @@ ire_init_common(ire_t *ire, uint_t *max_fragp, mblk_t *fp_mp, freemsg(fp_mp); } ire->ire_refcnt = 1; + ire->ire_ipst = ipst; /* No netstack_hold */ #ifdef IRE_DEBUG bzero(ire->ire_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); @@ -1979,6 +2005,7 @@ ire_check_and_create_bcast(ipif_t *ipif, ipaddr_t addr, ire_t **irep, { ire_t *ire; uint64_t check_flags = IPIF_DEPRECATED | IPIF_NOLOCAL | IPIF_ANYCAST; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; /* * No broadcast IREs for the LOOPBACK interface @@ -1990,7 +2017,7 @@ ire_check_and_create_bcast(ipif_t *ipif, ipaddr_t addr, ire_t **irep, /* If this would be a duplicate, don't bother. */ if ((ire = ire_ctable_lookup(addr, 0, IRE_BROADCAST, ipif, - ipif->ipif_zoneid, NULL, match_flags)) != NULL) { + ipif->ipif_zoneid, NULL, match_flags, ipst)) != NULL) { /* * We look for non-deprecated (and non-anycast, non-nolocal) * ipifs as the best choice. ipifs with check_flags matching @@ -2030,6 +2057,8 @@ uint_t ip_loopback_mtu = IP_LOOPBACK_MTU; ire_t ** ire_create_bcast(ipif_t *ipif, ipaddr_t addr, ire_t **irep) { + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; + *irep++ = ire_create( (uchar_t *)&addr, /* dest addr */ (uchar_t *)&ip_g_all_ones, /* mask */ @@ -2050,7 +2079,8 @@ ire_create_bcast(ipif_t *ipif, ipaddr_t addr, ire_t **irep) 0, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); *irep++ = ire_create( (uchar_t *)&addr, /* dest address */ @@ -2072,7 +2102,8 @@ ire_create_bcast(ipif_t *ipif, ipaddr_t addr, ire_t **irep) 0, &ire_uinfo_null, NULL, - NULL); + NULL, + ipst); return (irep); } @@ -2089,16 +2120,18 @@ ire_create_bcast(ipif_t *ipif, ipaddr_t addr, ire_t **irep) void ire_expire(ire_t *ire, char *arg) { - int flush_flags = (int)(uintptr_t)arg; - ill_t *stq_ill; + ire_expire_arg_t *ieap = (ire_expire_arg_t *)(uintptr_t)arg; + ill_t *stq_ill; + int flush_flags = ieap->iea_flush_flag; + ip_stack_t *ipst = ieap->iea_ipst; if ((flush_flags & FLUSH_REDIRECT_TIME) && (ire->ire_flags & RTF_DYNAMIC)) { /* Make sure we delete the corresponding IRE_CACHE */ ip1dbg(("ire_expire: all redirects\n")); - ip_rts_rtmsg(RTM_DELETE, ire, 0); + ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); ire_delete(ire); - atomic_dec_32(&ip_redirect_cnt); + atomic_dec_32(&ipst->ips_ip_redirect_cnt); return; } if (ire->ire_type != IRE_CACHE) @@ -2110,13 +2143,13 @@ ire_expire(ire_t *ire, char *arg) * Verify that create time is more than * ip_ire_arp_interval milliseconds ago. */ - if (NCE_EXPIRED(ire->ire_nce)) { + if (NCE_EXPIRED(ire->ire_nce, ipst)) { ire_delete(ire); return; } } - if (ip_path_mtu_discovery && (flush_flags & FLUSH_MTU_TIME) && + if (ipst->ips_ip_path_mtu_discovery && (flush_flags & FLUSH_MTU_TIME) && (ire->ire_ipif != NULL)) { /* Increase pmtu if it is less than the interface mtu */ mutex_enter(&ire->ire_lock); @@ -2143,15 +2176,15 @@ ire_expire(ire_t *ire, char *arg) * Preference for IRE_LOCAL entries. */ ire_t * -ire_lookup_local(zoneid_t zoneid) +ire_lookup_local(zoneid_t zoneid, ip_stack_t *ipst) { ire_t *ire; irb_t *irb; ire_t *maybe = NULL; int i; - for (i = 0; i < ip_cache_table_size; i++) { - irb = &ip_cache_table[i]; + for (i = 0; i < ipst->ips_ip_cache_table_size; i++) { + irb = &ipst->ips_ip_cache_table[i]; if (irb->irb_ire == NULL) continue; rw_enter(&irb->irb_lock, RW_READER); @@ -2243,42 +2276,46 @@ ire_to_ill(const ire_t *ire) /* Arrange to call the specified function for every IRE in the world. */ void -ire_walk(pfv_t func, void *arg) +ire_walk(pfv_t func, void *arg, ip_stack_t *ipst) { - ire_walk_ipvers(func, arg, 0, ALL_ZONES); + ire_walk_ipvers(func, arg, 0, ALL_ZONES, ipst); } void -ire_walk_v4(pfv_t func, void *arg, zoneid_t zoneid) +ire_walk_v4(pfv_t func, void *arg, zoneid_t zoneid, ip_stack_t *ipst) { - ire_walk_ipvers(func, arg, IPV4_VERSION, zoneid); + ire_walk_ipvers(func, arg, IPV4_VERSION, zoneid, ipst); } void -ire_walk_v6(pfv_t func, void *arg, zoneid_t zoneid) +ire_walk_v6(pfv_t func, void *arg, zoneid_t zoneid, ip_stack_t *ipst) { - ire_walk_ipvers(func, arg, IPV6_VERSION, zoneid); + ire_walk_ipvers(func, arg, IPV6_VERSION, zoneid, ipst); } /* * Walk a particular version. version == 0 means both v4 and v6. */ static void -ire_walk_ipvers(pfv_t func, void *arg, uchar_t vers, zoneid_t zoneid) +ire_walk_ipvers(pfv_t func, void *arg, uchar_t vers, zoneid_t zoneid, + ip_stack_t *ipst) { if (vers != IPV6_VERSION) { /* * ip_forwarding_table variable doesn't matter for IPv4 since - * ire_walk_ill_tables directly calls with the ip_ftable global + * ire_walk_ill_tables uses ips_ip_ftable for IPv4. */ ire_walk_ill_tables(0, 0, func, arg, IP_MASK_TABLE_SIZE, 0, NULL, - ip_cache_table_size, ip_cache_table, NULL, zoneid); + ipst->ips_ip_cache_table_size, ipst->ips_ip_cache_table, + NULL, zoneid, ipst); } if (vers != IPV4_VERSION) { ire_walk_ill_tables(0, 0, func, arg, IP6_MASK_TABLE_SIZE, - ip6_ftable_hash_size, ip_forwarding_table_v6, - ip6_cache_table_size, ip_cache_table_v6, NULL, zoneid); + ipst->ips_ip6_ftable_hash_size, + ipst->ips_ip_forwarding_table_v6, + ipst->ips_ip6_cache_table_size, + ipst->ips_ip_cache_table_v6, NULL, zoneid, ipst); } } @@ -2316,23 +2353,26 @@ static void ire_walk_ill_ipvers(uint_t match_flags, uint_t ire_type, pfv_t func, void *arg, uchar_t vers, ill_t *ill) { + ip_stack_t *ipst = ill->ill_ipst; + if (vers != IPV6_VERSION) { ire_walk_ill_tables(match_flags, ire_type, func, arg, IP_MASK_TABLE_SIZE, 0, - NULL, ip_cache_table_size, - ip_cache_table, ill, ALL_ZONES); + NULL, ipst->ips_ip_cache_table_size, + ipst->ips_ip_cache_table, ill, ALL_ZONES, ipst); } if (vers != IPV4_VERSION) { ire_walk_ill_tables(match_flags, ire_type, func, arg, - IP6_MASK_TABLE_SIZE, ip6_ftable_hash_size, - ip_forwarding_table_v6, ip6_cache_table_size, - ip_cache_table_v6, ill, ALL_ZONES); + IP6_MASK_TABLE_SIZE, ipst->ips_ip6_ftable_hash_size, + ipst->ips_ip_forwarding_table_v6, + ipst->ips_ip6_cache_table_size, + ipst->ips_ip_cache_table_v6, ill, ALL_ZONES, ipst); } } boolean_t ire_walk_ill_match(uint_t match_flags, uint_t ire_type, ire_t *ire, - ill_t *ill, zoneid_t zoneid) + ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) { ill_t *ire_stq_ill = NULL; ill_t *ire_ipif_ill = NULL; @@ -2442,7 +2482,7 @@ ire_walk_ill_match(uint_t match_flags, uint_t ire_type, ire_t *ire, if (ire->ire_ipversion == IPV4_VERSION) { rire = ire_route_lookup(ire->ire_gateway_addr, 0, 0, IRE_INTERFACE, ire->ire_ipif, NULL, - zoneid, NULL, ire_match_flags); + zoneid, NULL, ire_match_flags, ipst); } else { ASSERT(ire->ire_ipversion == IPV6_VERSION); mutex_enter(&ire->ire_lock); @@ -2450,7 +2490,7 @@ ire_walk_ill_match(uint_t match_flags, uint_t ire_type, ire_t *ire, mutex_exit(&ire->ire_lock); rire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, IRE_INTERFACE, ire->ire_ipif, - NULL, zoneid, NULL, ire_match_flags); + NULL, zoneid, NULL, ire_match_flags, ipst); } if (rire == NULL) { return (B_FALSE); @@ -2491,7 +2531,7 @@ rtfunc(struct radix_node *rn, void *arg) (rtf->rt_zoneid != ALL_ZONES)) { ret = ire_walk_ill_match(rtf->rt_match_flags, rtf->rt_ire_type, ire, - rtf->rt_ill, rtf->rt_zoneid); + rtf->rt_ill, rtf->rt_zoneid, rtf->rt_ipst); } else ret = B_TRUE; if (ret) @@ -2506,7 +2546,8 @@ rtfunc(struct radix_node *rn, void *arg) void ire_walk_ill_tables(uint_t match_flags, uint_t ire_type, pfv_t func, void *arg, size_t ftbl_sz, size_t htbl_sz, irb_t **ipftbl, - size_t ctbl_sz, irb_t *ipctbl, ill_t *ill, zoneid_t zoneid) + size_t ctbl_sz, irb_t *ipctbl, ill_t *ill, zoneid_t zoneid, + ip_stack_t *ipst) { irb_t *irb_ptr; irb_t *irb; @@ -2526,7 +2567,7 @@ ire_walk_ill_tables(uint_t match_flags, uint_t ire_type, pfv_t func, if (!(match_flags & MATCH_IRE_TYPE) || ((ire_type & IRE_FORWARDTABLE) != 0)) { /* knobs such that routine is called only for v6 case */ - if (ipftbl == ip_forwarding_table_v6) { + if (ipftbl == ipst->ips_ip_forwarding_table_v6) { for (i = (ftbl_sz - 1); i >= 0; i--) { if ((irb_ptr = ipftbl[i]) == NULL) continue; @@ -2546,7 +2587,7 @@ ire_walk_ill_tables(uint_t match_flags, uint_t ire_type, pfv_t func, ire_walk_ill_match( match_flags, ire_type, ire, ill, - zoneid); + zoneid, ipst); } if (ret) (*func)(ire, arg); @@ -2564,8 +2605,10 @@ ire_walk_ill_tables(uint_t match_flags, uint_t ire_type, pfv_t func, rtfarg.rt_ire_type = ire_type; rtfarg.rt_ill = ill; rtfarg.rt_zoneid = zoneid; - (void) ip_ftable->rnh_walktree_mt(ip_ftable, rtfunc, - &rtfarg, irb_refhold_rn, irb_refrele_rn); + rtfarg.rt_ipst = ipst; /* No netstack_hold */ + (void) ipst->ips_ip_ftable->rnh_walktree_mt( + ipst->ips_ip_ftable, + rtfunc, &rtfarg, irb_refhold_rn, irb_refrele_rn); } } @@ -2588,7 +2631,7 @@ ire_walk_ill_tables(uint_t match_flags, uint_t ire_type, pfv_t func, } else { ret = ire_walk_ill_match( match_flags, ire_type, - ire, ill, zoneid); + ire, ill, zoneid, ipst); } if (ret) (*func)(ire, arg); @@ -2605,7 +2648,7 @@ ire_walk_ill_tables(uint_t match_flags, uint_t ire_type, pfv_t func, * down/deleted or the 'ipv4_ire_srcif_status' report is printed. */ void -ire_walk_srcif_table_v4(pfv_t func, void *arg) +ire_walk_srcif_table_v4(pfv_t func, void *arg, ip_stack_t *ipst) { irb_t *irb; ire_t *ire; @@ -2619,18 +2662,18 @@ ire_walk_srcif_table_v4(pfv_t func, void *arg) * table. Check if any ire in any of the ill's ill_srcif_table * is pointing to this ill. */ - mutex_enter(&ire_srcif_table_lock); - if (ire_srcif_table_count == 0) { - mutex_exit(&ire_srcif_table_lock); + mutex_enter(&ipst->ips_ire_srcif_table_lock); + if (ipst->ips_ire_srcif_table_count == 0) { + mutex_exit(&ipst->ips_ire_srcif_table_lock); return; } - mutex_exit(&ire_srcif_table_lock); + mutex_exit(&ipst->ips_ire_srcif_table_lock); #ifdef DEBUG /* Keep accounting of all interface based table ires */ total_count = 0; - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V4(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V4(&ctx, ipst); while (ill != NULL) { mutex_enter(&ill->ill_lock); total_count += ill->ill_srcif_refcnt; @@ -2638,17 +2681,17 @@ ire_walk_srcif_table_v4(pfv_t func, void *arg) mutex_exit(&ill->ill_lock); ill = next_ill; } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); /* Hold lock here to make sure ire_srcif_table_count is stable */ - mutex_enter(&ire_srcif_table_lock); - i = ire_srcif_table_count; - mutex_exit(&ire_srcif_table_lock); + mutex_enter(&ipst->ips_ire_srcif_table_lock); + i = ipst->ips_ire_srcif_table_count; + mutex_exit(&ipst->ips_ire_srcif_table_lock); ip1dbg(("ire_walk_srcif_v4: ire_srcif_table_count %d " "total ill_srcif_refcnt %d\n", i, total_count)); #endif - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_V4(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_V4(&ctx, ipst); while (ill != NULL) { mutex_enter(&ill->ill_lock); if ((ill->ill_srcif_refcnt == 0) || !ILL_CAN_LOOKUP(ill)) { @@ -2659,7 +2702,7 @@ ire_walk_srcif_table_v4(pfv_t func, void *arg) } ill_refhold_locked(ill); mutex_exit(&ill->ill_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (ill->ill_srcif_table != NULL) { for (i = 0; i < IP_SRCIF_TABLE_SIZE; i++) { irb = &(ill->ill_srcif_table[i]); @@ -2673,12 +2716,12 @@ ire_walk_srcif_table_v4(pfv_t func, void *arg) IRB_REFRELE(irb); } } - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); next_ill = ill_next(&ctx, ill); ill_refrele(ill); ill = next_ill; } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); } /* @@ -2706,13 +2749,14 @@ void ire_atomic_end(irb_t *irb_ptr, ire_t *ire) { ill_t *ill_list[NUM_ILLS]; + ip_stack_t *ipst = ire->ire_ipst; ill_list[0] = ire->ire_stq != NULL ? ire->ire_stq->q_ptr : NULL; ill_list[1] = ire->ire_ipif != NULL ? ire->ire_ipif->ipif_ill : NULL; ill_list[2] = ire->ire_in_ill; ill_unlock_ills(ill_list, NUM_ILLS); rw_exit(&irb_ptr->irb_lock); - rw_exit(&ill_g_usesrc_lock); + rw_exit(&ipst->ips_ill_g_usesrc_lock); } /* @@ -2735,6 +2779,7 @@ ire_atomic_start(irb_t *irb_ptr, ire_t *ire, queue_t *q, mblk_t *mp, int cnt = NUM_ILLS; int error = 0; ill_t *ill = NULL; + ip_stack_t *ipst = ire->ire_ipst; ill_list[0] = stq_ill = ire->ire_stq != NULL ? ire->ire_stq->q_ptr : NULL; @@ -2744,7 +2789,7 @@ ire_atomic_start(irb_t *irb_ptr, ire_t *ire, queue_t *q, mblk_t *mp, ASSERT((q != NULL && mp != NULL && func != NULL) || (q == NULL && mp == NULL && func == NULL)); - rw_enter(&ill_g_usesrc_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_usesrc_lock, RW_READER); GRAB_CONN_LOCK(q); rw_enter(&irb_ptr->irb_lock, RW_WRITER); ill_lock_ills(ill_list, cnt); @@ -2864,6 +2909,7 @@ ire_add(ire_t **irep, queue_t *q, mblk_t *mp, ipsq_func_t func, boolean_t ire_is_mblk = B_FALSE; tsol_gcgrp_t *gcgrp = NULL; tsol_gcgrp_addr_t ga; + ip_stack_t *ipst = ire->ire_ipst; ASSERT(ire->ire_type != IRE_MIPRTUN); @@ -2922,8 +2968,8 @@ ire_add(ire_t **irep, queue_t *q, mblk_t *mp, ipsq_func_t func, if (stq_ill != NULL && ire->ire_type == IRE_CACHE && stq_ill->ill_net_type == IRE_IF_RESOLVER) { - rw_enter(&ill_g_lock, RW_READER); - ill = ILL_START_WALK_ALL(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + ill = ILL_START_WALK_ALL(&ctx, ipst); for (; ill != NULL; ill = ill_next(&ctx, ill)) { mutex_enter(&ill->ill_lock); if (ill->ill_state_flags & ILL_CONDEMNED) { @@ -2954,7 +3000,7 @@ ire_add(ire_t **irep, queue_t *q, mblk_t *mp, ipsq_func_t func, } mutex_exit(&ill->ill_lock); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (ipif == NULL || (ipif->ipif_isv6 && !IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6, @@ -3064,6 +3110,7 @@ ire_add_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func, int error; boolean_t need_refrele = B_FALSE; nce_t *nce; + ip_stack_t *ipst = ire->ire_ipst; if (ire->ire_ipif != NULL) ASSERT(!MUTEX_HELD(&ire->ire_ipif->ipif_ill->ill_lock)); @@ -3216,8 +3263,8 @@ ire_add_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func, return (EINVAL); } } else { - irb_ptr = &(ip_cache_table[IRE_ADDR_HASH(ire->ire_addr, - ip_cache_table_size)]); + irb_ptr = &(ipst->ips_ip_cache_table[IRE_ADDR_HASH( + ire->ire_addr, ipst->ips_ip_cache_table_size)]); } /* @@ -3226,12 +3273,12 @@ ire_add_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func, * * If ipif or ill is changing ire_atomic_start() may queue the * request and return EINPROGRESS. - * To avoid lock order problems, get the ndp4.ndp_g_lock. + * To avoid lock order problems, get the ndp4->ndp_g_lock. */ - mutex_enter(&ndp4.ndp_g_lock); + mutex_enter(&ipst->ips_ndp4->ndp_g_lock); error = ire_atomic_start(irb_ptr, ire, q, mp, func); if (error != 0) { - mutex_exit(&ndp4.ndp_g_lock); + mutex_exit(&ipst->ips_ndp4->ndp_g_lock); /* * We don't know whether it is a valid ipif or not. * So, set it to NULL. This assumes that the ire has not added @@ -3319,7 +3366,7 @@ ire_add_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func, (void *)ire1, (void *)ire)); IRE_REFHOLD(ire1); ire_atomic_end(irb_ptr, ire); - mutex_exit(&ndp4.ndp_g_lock); + mutex_exit(&ipst->ips_ndp4->ndp_g_lock); ire_delete(ire); if (pire != NULL) { /* @@ -3358,7 +3405,7 @@ ire_add_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func, if (nce != NULL) mutex_exit(&nce->nce_lock); ire_atomic_end(irb_ptr, ire); - mutex_exit(&ndp4.ndp_g_lock); + mutex_exit(&ipst->ips_ndp4->ndp_g_lock); if (nce != NULL) NCE_REFRELE(nce); DTRACE_PROBE1(ire__no__nce, ire_t *, ire); @@ -3511,7 +3558,7 @@ ire_add_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func, * up the reference count on this yet. */ IRE_REFHOLD_LOCKED(ire); - BUMP_IRE_STATS(ire_stats_v4, ire_stats_inserted); + BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_inserted); irb_ptr->irb_ire_cnt++; if (irb_ptr->irb_marks & IRB_MARK_FTABLE) @@ -3531,7 +3578,7 @@ ire_add_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func, } ire_atomic_end(irb_ptr, ire); - mutex_exit(&ndp4.ndp_g_lock); + mutex_exit(&ipst->ips_ndp4->ndp_g_lock); if (pire != NULL) { /* Assert that it is not removed from the list yet */ @@ -3551,7 +3598,7 @@ ire_add_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func, if (ire->ire_mask == IP_HOST_MASK) { ire_t *lire; lire = ire_ctable_lookup(ire->ire_addr, NULL, IRE_CACHE, - NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (lire != NULL) { ire_refrele(lire); ire_flush_cache_v4(ire, IRE_FLUSH_ADD); @@ -3587,6 +3634,7 @@ void ire_cleanup(ire_t *ire) { ire_t *ire_next; + ip_stack_t *ipst = ire->ire_ipst; ASSERT(ire != NULL); @@ -3594,11 +3642,13 @@ ire_cleanup(ire_t *ire) ire_next = ire->ire_next; if (ire->ire_ipversion == IPV4_VERSION) { ire_delete_v4(ire); - BUMP_IRE_STATS(ire_stats_v4, ire_stats_deleted); + BUMP_IRE_STATS(ipst->ips_ire_stats_v4, + ire_stats_deleted); } else { ASSERT(ire->ire_ipversion == IPV6_VERSION); ire_delete_v6(ire); - BUMP_IRE_STATS(ire_stats_v6, ire_stats_deleted); + BUMP_IRE_STATS(ipst->ips_ire_stats_v6, + ire_stats_deleted); } /* * Now it's really out of the list. Before doing the @@ -3630,6 +3680,8 @@ ire_unlink(irb_t *irb) ASSERT(irb->irb_ire != NULL); for (ire = irb->irb_ire; ire != NULL; ire = ire1) { + ip_stack_t *ipst = ire->ire_ipst; + ire1 = ire->ire_next; if (ire->ire_marks & IRE_MARK_CONDEMNED) { ptpn = ire->ire_ptpn; @@ -3646,8 +3698,10 @@ ire_unlink(irb_t *irb) * the lock. */ if (ire->ire_ipversion == IPV6_VERSION) { - ASSERT(ipv6_ire_default_count != 0); - ipv6_ire_default_count--; + ASSERT(ipst-> + ips_ipv6_ire_default_count != + 0); + ipst->ips_ipv6_ire_default_count--; } } /* @@ -3683,7 +3737,7 @@ ire_unlink(irb_t *irb) * ip_if->ipif_ill also needs to be matched. */ boolean_t -ip_ire_clookup_and_delete(ipaddr_t addr, ipif_t *ipif) +ip_ire_clookup_and_delete(ipaddr_t addr, ipif_t *ipif, ip_stack_t *ipst) { ill_t *ill; nce_t *nce; @@ -3710,7 +3764,7 @@ ip_ire_clookup_and_delete(ipaddr_t addr, ipif_t *ipif) cl.ncecl_addr = addr; cl.ncecl_found = B_FALSE; - ndp_walk_common(&ndp4, NULL, + ndp_walk_common(ipst->ips_ndp4, NULL, (pfi_t)ip_nce_clookup_and_delete, (uchar_t *)&cl, B_TRUE); /* @@ -3748,25 +3802,28 @@ irb_inactive(irb_t *irb) { struct rt_entry *rt; struct radix_node *rn; + ip_stack_t *ipst = irb->irb_ipst; + + ASSERT(irb->irb_ipst != NULL); rt = IRB2RT(irb); rn = (struct radix_node *)rt; /* first remove it from the radix tree. */ - RADIX_NODE_HEAD_WLOCK(ip_ftable); + RADIX_NODE_HEAD_WLOCK(ipst->ips_ip_ftable); rw_enter(&irb->irb_lock, RW_WRITER); if (irb->irb_refcnt == 1 && irb->irb_nire == 0) { - rn = ip_ftable->rnh_deladdr(rn->rn_key, rn->rn_mask, - ip_ftable); + rn = ipst->ips_ip_ftable->rnh_deladdr(rn->rn_key, rn->rn_mask, + ipst->ips_ip_ftable); DTRACE_PROBE1(irb__free, rt_t *, rt); ASSERT((void *)rn == (void *)rt); Free(rt, rt_entry_cache); /* irb_lock is freed */ - RADIX_NODE_HEAD_UNLOCK(ip_ftable); + RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable); return (B_TRUE); } rw_exit(&irb->irb_lock); - RADIX_NODE_HEAD_UNLOCK(ip_ftable); + RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable); return (B_FALSE); } @@ -3779,6 +3836,7 @@ ire_delete(ire_t *ire) ire_t *ire1; ire_t **ptpn; irb_t *irb; + ip_stack_t *ipst = ire->ire_ipst; if ((irb = ire->ire_bucket) == NULL) { /* @@ -3848,9 +3906,9 @@ ire_delete(ire_t *ire) ire->ire_ptpn = NULL; ire->ire_next = NULL; if (ire->ire_ipversion == IPV6_VERSION) { - BUMP_IRE_STATS(ire_stats_v6, ire_stats_deleted); + BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_deleted); } else { - BUMP_IRE_STATS(ire_stats_v4, ire_stats_deleted); + BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_deleted); } /* * ip_wput/ip_wput_v6 checks this flag to see whether @@ -3863,8 +3921,8 @@ ire_delete(ire_t *ire) * accounting before we drop the lock. */ if (ire->ire_ipversion == IPV6_VERSION) { - ASSERT(ipv6_ire_default_count != 0); - ipv6_ire_default_count--; + ASSERT(ipst->ips_ipv6_ire_default_count != 0); + ipst->ips_ipv6_ire_default_count--; } } irb->irb_ire_cnt--; @@ -3896,6 +3954,8 @@ ire_delete(ire_t *ire) static void ire_delete_v4(ire_t *ire) { + ip_stack_t *ipst = ire->ire_ipst; + ASSERT(ire->ire_refcnt >= 1); ASSERT(ire->ire_ipversion == IPV4_VERSION); @@ -3907,7 +3967,7 @@ ire_delete_v4(ire_t *ire) * delete all the host redirects pointing at that * gateway. */ - ire_delete_host_redirects(ire->ire_gateway_addr); + ire_delete_host_redirects(ire->ire_gateway_addr, ipst); } } @@ -3925,6 +3985,7 @@ ire_inactive(ire_t *ire) ipif_t *ipif; boolean_t need_wakeup = B_FALSE; irb_t *irb; + ip_stack_t *ipst = ire->ire_ipst; ASSERT(ire->ire_refcnt == 0); ASSERT(ire->ire_ptpn == NULL); @@ -3938,7 +3999,7 @@ ire_inactive(ire_t *ire) if (ire->ire_mp != NULL) { ASSERT(ire->ire_bucket == NULL); mutex_destroy(&ire->ire_lock); - BUMP_IRE_STATS(ire_stats_v4, ire_stats_freed); + BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_freed); if (ire->ire_nce != NULL) NCE_REFRELE_NOTR(ire->ire_nce); freeb(ire->ire_mp); @@ -4040,9 +4101,9 @@ ire_inactive(ire_t *ire) * mipagent is deleting reverse tunnel * route for a particular mobile node. */ - mutex_enter(&ire_mrtun_lock); - ire_mrtun_count--; - mutex_exit(&ire_mrtun_lock); + mutex_enter(&ipst->ips_ire_mrtun_lock); + ipst->ips_ire_mrtun_count--; + mutex_exit(&ipst->ips_ire_mrtun_lock); ASSERT(in_ill->ill_mrtun_refcnt != 0); in_ill->ill_mrtun_refcnt--; if (in_ill->ill_mrtun_refcnt == 0) { @@ -4052,9 +4113,9 @@ ire_inactive(ire_t *ire) mutex_exit(&in_ill->ill_lock); } } else { - mutex_enter(&ire_srcif_table_lock); - ire_srcif_table_count--; - mutex_exit(&ire_srcif_table_lock); + mutex_enter(&ipst->ips_ire_srcif_table_lock); + ipst->ips_ire_srcif_table_count--; + mutex_exit(&ipst->ips_ire_srcif_table_lock); ASSERT(in_ill->ill_srcif_refcnt != 0); in_ill->ill_srcif_refcnt--; if (in_ill->ill_srcif_refcnt == 0) { @@ -4096,9 +4157,9 @@ end: #endif mutex_destroy(&ire->ire_lock); if (ire->ire_ipversion == IPV6_VERSION) { - BUMP_IRE_STATS(ire_stats_v6, ire_stats_freed); + BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_freed); } else { - BUMP_IRE_STATS(ire_stats_v4, ire_stats_freed); + BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_freed); } ASSERT(ire->ire_mp == NULL); /* Has been allocated out of the cache */ @@ -4153,6 +4214,7 @@ ire_flush_cache_v4(ire_t *ire, int flag) int i; ire_t *cire; irb_t *irb; + ip_stack_t *ipst = ire->ire_ipst; if (ire->ire_type & IRE_CACHE) return; @@ -4169,8 +4231,8 @@ ire_flush_cache_v4(ire_t *ire, int flag) * This selective flush is due to the addition of * new IRE. */ - for (i = 0; i < ip_cache_table_size; i++) { - irb = &ip_cache_table[i]; + for (i = 0; i < ipst->ips_ip_cache_table_size; i++) { + irb = &ipst->ips_ip_cache_table[i]; if ((cire = irb->irb_ire) == NULL) continue; IRB_REFHOLD(irb); @@ -4240,8 +4302,8 @@ ire_flush_cache_v4(ire_t *ire, int flag) * handle in the IRE as this IRE is * being deleted/changed. */ - for (i = 0; i < ip_cache_table_size; i++) { - irb = &ip_cache_table[i]; + for (i = 0; i < ipst->ips_ip_cache_table_size; i++) { + irb = &ipst->ips_ip_cache_table[i]; if ((cire = irb->irb_ire) == NULL) continue; IRB_REFHOLD(irb); @@ -4443,7 +4505,7 @@ ire_match_args(ire_t *ire, ipaddr_t addr, ipaddr_t mask, ipaddr_t gateway, ire_t * ire_route_lookup(ipaddr_t addr, ipaddr_t mask, ipaddr_t gateway, int type, const ipif_t *ipif, ire_t **pire, zoneid_t zoneid, - const ts_label_t *tsl, int flags) + const ts_label_t *tsl, int flags, ip_stack_t *ipst) { ire_t *ire = NULL; @@ -4465,13 +4527,13 @@ ire_route_lookup(ipaddr_t addr, ipaddr_t mask, ipaddr_t gateway, */ if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { ire = ire_ctable_lookup(addr, gateway, type, ipif, zoneid, - tsl, flags); + tsl, flags, ipst); if (ire != NULL) return (ire); } if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { ire = ire_ftable_lookup(addr, mask, gateway, type, ipif, pire, - zoneid, 0, tsl, flags); + zoneid, 0, tsl, flags, ipst); } return (ire); } @@ -4483,12 +4545,13 @@ ire_route_lookup(ipaddr_t addr, ipaddr_t mask, ipaddr_t gateway, * be created on demand by ip_newroute. */ void -ire_clookup_delete_cache_gw(ipaddr_t addr, zoneid_t zoneid) +ire_clookup_delete_cache_gw(ipaddr_t addr, zoneid_t zoneid, ip_stack_t *ipst) { irb_t *irb; ire_t *ire; - irb = &ip_cache_table[IRE_ADDR_HASH(addr, ip_cache_table_size)]; + irb = &ipst->ips_ip_cache_table[IRE_ADDR_HASH(addr, + ipst->ips_ip_cache_table_size)]; IRB_REFHOLD(irb); for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { if (ire->ire_marks & IRE_MARK_CONDEMNED) @@ -4503,7 +4566,7 @@ ire_clookup_delete_cache_gw(ipaddr_t addr, zoneid_t zoneid) } IRB_REFRELE(irb); - ire_walk_v4(ire_delete_cache_gw, &addr, zoneid); + ire_walk_v4(ire_delete_cache_gw, &addr, zoneid, ipst); } /* @@ -4514,7 +4577,7 @@ ire_clookup_delete_cache_gw(ipaddr_t addr, zoneid_t zoneid) */ ire_t * ire_ctable_lookup(ipaddr_t addr, ipaddr_t gateway, int type, const ipif_t *ipif, - zoneid_t zoneid, const ts_label_t *tsl, int flags) + zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst) { irb_t *irb_ptr; ire_t *ire; @@ -4527,7 +4590,8 @@ ire_ctable_lookup(ipaddr_t addr, ipaddr_t gateway, int type, const ipif_t *ipif, (ipif == NULL)) return (NULL); - irb_ptr = &ip_cache_table[IRE_ADDR_HASH(addr, ip_cache_table_size)]; + irb_ptr = &ipst->ips_ip_cache_table[IRE_ADDR_HASH(addr, + ipst->ips_ip_cache_table_size)]; rw_enter(&irb_ptr->irb_lock, RW_READER); for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) { if (ire->ire_marks & IRE_MARK_CONDEMNED) @@ -4584,7 +4648,7 @@ ire_local_same_ill_group(ire_t *ire_local, ire_t *xmit_ire) */ boolean_t ire_local_ok_across_zones(ire_t *ire_local, zoneid_t zoneid, void *addr, - const ts_label_t *tsl) + const ts_label_t *tsl, ip_stack_t *ipst) { ire_t *alt_ire; boolean_t rval; @@ -4593,12 +4657,12 @@ ire_local_ok_across_zones(ire_t *ire_local, zoneid_t zoneid, void *addr, alt_ire = ire_ftable_lookup(*((ipaddr_t *)addr), 0, 0, 0, NULL, NULL, zoneid, 0, tsl, MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | - MATCH_IRE_RJ_BHOLE); + MATCH_IRE_RJ_BHOLE, ipst); } else { alt_ire = ire_ftable_lookup_v6((in6_addr_t *)addr, NULL, NULL, 0, NULL, NULL, zoneid, 0, tsl, MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | - MATCH_IRE_RJ_BHOLE); + MATCH_IRE_RJ_BHOLE, ipst); } if (alt_ire == NULL) @@ -4633,12 +4697,14 @@ ire_local_ok_across_zones(ire_t *ire_local, zoneid_t zoneid, void *addr, * ip_restrict_interzone_loopback is turned off. */ ire_t * -ire_cache_lookup(ipaddr_t addr, zoneid_t zoneid, const ts_label_t *tsl) +ire_cache_lookup(ipaddr_t addr, zoneid_t zoneid, const ts_label_t *tsl, + ip_stack_t *ipst) { irb_t *irb_ptr; ire_t *ire; - irb_ptr = &ip_cache_table[IRE_ADDR_HASH(addr, ip_cache_table_size)]; + irb_ptr = &ipst->ips_ip_cache_table[IRE_ADDR_HASH(addr, + ipst->ips_ip_cache_table_size)]; rw_enter(&irb_ptr->irb_lock, RW_READER); for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) { if (ire->ire_marks & (IRE_MARK_CONDEMNED | @@ -4665,9 +4731,9 @@ ire_cache_lookup(ipaddr_t addr, zoneid_t zoneid, const ts_label_t *tsl) } if (ire->ire_type == IRE_LOCAL) { - if (ip_restrict_interzone_loopback && + if (ipst->ips_ip_restrict_interzone_loopback && !ire_local_ok_across_zones(ire, zoneid, - &addr, tsl)) + &addr, tsl, ipst)) continue; IRE_REFHOLD(ire); @@ -4696,6 +4762,7 @@ ire_ihandle_lookup_offlink(ire_t *cire, ire_t *pire) int match_flags; ipaddr_t gw_addr; ipif_t *gw_ipif; + ip_stack_t *ipst = cire->ire_ipst; ASSERT(cire != NULL && pire != NULL); @@ -4718,7 +4785,7 @@ ire_ihandle_lookup_offlink(ire_t *cire, ire_t *pire) */ ire = ire_ftable_lookup(cire->ire_addr, cire->ire_cmask, 0, IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, - NULL, match_flags); + NULL, match_flags, ipst); if (ire != NULL) return (ire); /* @@ -4749,7 +4816,7 @@ ire_ihandle_lookup_offlink(ire_t *cire, ire_t *pire) if (pire->ire_ipif != NULL) match_flags |= MATCH_IRE_ILL_GROUP; ire = ire_ftable_lookup(pire->ire_gateway_addr, 0, 0, IRE_OFFSUBNET, - pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags); + pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); if (ire == NULL) return (NULL); /* @@ -4762,7 +4829,8 @@ ire_ihandle_lookup_offlink(ire_t *cire, ire_t *pire) match_flags |= MATCH_IRE_IHANDLE; ire = ire_ftable_lookup(gw_addr, 0, 0, IRE_INTERFACE, - gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, NULL, match_flags); + gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, NULL, match_flags, + ipst); return (ire); } @@ -4777,13 +4845,15 @@ ire_mrtun_lookup(ipaddr_t srcaddr, ill_t *ill) { irb_t *irb_ptr; ire_t *ire; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill != NULL); ASSERT(!(ill->ill_isv6)); - if (ip_mrtun_table == NULL) + if (ipst->ips_ip_mrtun_table == NULL) return (NULL); - irb_ptr = &ip_mrtun_table[IRE_ADDR_HASH(srcaddr, IP_MRTUN_TABLE_SIZE)]; + irb_ptr = &ipst->ips_ip_mrtun_table[IRE_ADDR_HASH(srcaddr, + IP_MRTUN_TABLE_SIZE)]; rw_enter(&irb_ptr->irb_lock, RW_READER); for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) { if (ire->ire_marks & IRE_MARK_CONDEMNED) @@ -4814,21 +4884,23 @@ ire_t * ipif_to_ire(const ipif_t *ipif) { ire_t *ire; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; ASSERT(!ipif->ipif_isv6); if (ipif->ipif_ire_type == IRE_LOOPBACK) { ire = ire_ctable_lookup(ipif->ipif_lcl_addr, 0, IRE_LOOPBACK, - ipif, ALL_ZONES, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF)); + ipif, ALL_ZONES, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF), + ipst); } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { /* In this case we need to lookup destination address. */ ire = ire_ftable_lookup(ipif->ipif_pp_dst_addr, IP_HOST_MASK, 0, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, NULL, - (MATCH_IRE_TYPE | MATCH_IRE_IPIF | MATCH_IRE_MASK)); + (MATCH_IRE_TYPE | MATCH_IRE_IPIF | MATCH_IRE_MASK), ipst); } else { ire = ire_ftable_lookup(ipif->ipif_subnet, ipif->ipif_net_mask, 0, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | - MATCH_IRE_MASK)); + MATCH_IRE_MASK), ipst); } return (ire); } @@ -4885,13 +4957,15 @@ ire_cache_reclaim(ire_t *ire, char *arg) { ire_cache_reclaim_t *icr = (ire_cache_reclaim_t *)arg; uint_t rand; + ip_stack_t *ipst = icr->icr_ipst; if (ire->ire_type != IRE_CACHE) return; if (ire->ire_ipversion == IPV6_VERSION) { rand = (uint_t)lbolt + - IRE_ADDR_HASH_V6(ire->ire_addr_v6, ip6_cache_table_size); + IRE_ADDR_HASH_V6(ire->ire_addr_v6, + ipst->ips_ip6_cache_table_size); mutex_enter(&ire->ire_lock); if (IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) { mutex_exit(&ire->ire_lock); @@ -4905,7 +4979,7 @@ ire_cache_reclaim(ire_t *ire, char *arg) mutex_exit(&ire->ire_lock); } else { rand = (uint_t)lbolt + - IRE_ADDR_HASH(ire->ire_addr, ip_cache_table_size); + IRE_ADDR_HASH(ire->ire_addr, ipst->ips_ip_cache_table_size); if (ire->ire_gateway_addr == 0) { if (icr->icr_onlink != 0 && (rand/icr->icr_onlink)*icr->icr_onlink == rand) { @@ -4960,114 +5034,204 @@ power2_roundup(uint32_t *value) *value = (1 << i); } +/* Global init for all zones */ void -ip_ire_init() +ip_ire_g_init() { - int i; - - mutex_init(&ire_ft_init_lock, NULL, MUTEX_DEFAULT, 0); - mutex_init(&ire_handle_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&ire_mrtun_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&ire_srcif_table_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&ndp4.ndp_g_lock, NULL, MUTEX_DEFAULT, NULL); + /* + * Create ire caches, ire_reclaim() + * will give IRE_CACHE back to system when needed. + * This needs to be done here before anything else, since + * ire_add() expects the cache to be created. + */ + ire_cache = kmem_cache_create("ire_cache", + sizeof (ire_t), 0, ip_ire_constructor, + ip_ire_destructor, ip_trash_ire_reclaim, NULL, NULL, 0); - rn_init(); - (void) rn_inithead((void **)&ip_ftable, 32); rt_entry_cache = kmem_cache_create("rt_entry", sizeof (struct rt_entry), 0, NULL, NULL, NULL, NULL, NULL, 0); + /* + * Have radix code setup kmem caches etc. + */ + rn_init(); +} + +void +ip_ire_init(ip_stack_t *ipst) +{ + int i; + uint32_t mem_cnt; + uint32_t cpu_cnt; + uint32_t min_cnt; + pgcnt_t mem_avail; + + /* + * ip_ire_max_bucket_cnt is sized below based on the memory + * size and the cpu speed of the machine. This is upper + * bounded by the compile time value of ip_ire_max_bucket_cnt + * and is lower bounded by the compile time value of + * ip_ire_min_bucket_cnt. Similar logic applies to + * ip6_ire_max_bucket_cnt. + * + * We calculate this for each IP Instances in order to use + * the kmem_avail and ip_ire_{min,max}_bucket_cnt that are + * in effect when the zone is booted. + */ + mem_avail = kmem_avail(); + mem_cnt = (mem_avail >> ip_ire_mem_ratio) / + ip_cache_table_size / sizeof (ire_t); + cpu_cnt = CPU->cpu_type_info.pi_clock >> ip_ire_cpu_ratio; + + min_cnt = MIN(cpu_cnt, mem_cnt); + if (min_cnt < ip_ire_min_bucket_cnt) + min_cnt = ip_ire_min_bucket_cnt; + if (ip_ire_max_bucket_cnt > min_cnt) { + ip_ire_max_bucket_cnt = min_cnt; + } + + mem_cnt = (mem_avail >> ip_ire_mem_ratio) / + ip6_cache_table_size / sizeof (ire_t); + min_cnt = MIN(cpu_cnt, mem_cnt); + if (min_cnt < ip6_ire_min_bucket_cnt) + min_cnt = ip6_ire_min_bucket_cnt; + if (ip6_ire_max_bucket_cnt > min_cnt) { + ip6_ire_max_bucket_cnt = min_cnt; + } + + mutex_init(&ipst->ips_ire_ft_init_lock, NULL, MUTEX_DEFAULT, 0); + mutex_init(&ipst->ips_ire_handle_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ipst->ips_ire_mrtun_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ipst->ips_ire_srcif_table_lock, NULL, MUTEX_DEFAULT, NULL); + + (void) rn_inithead((void **)&ipst->ips_ip_ftable, 32); + + /* Calculate the IPv4 cache table size. */ - ip_cache_table_size = MAX(ip_cache_table_size, - ((kmem_avail() >> ip_ire_mem_ratio) / sizeof (ire_t) / + ipst->ips_ip_cache_table_size = MAX(ip_cache_table_size, + ((mem_avail >> ip_ire_mem_ratio) / sizeof (ire_t) / ip_ire_max_bucket_cnt)); - if (ip_cache_table_size > ip_max_cache_table_size) - ip_cache_table_size = ip_max_cache_table_size; + if (ipst->ips_ip_cache_table_size > ip_max_cache_table_size) + ipst->ips_ip_cache_table_size = ip_max_cache_table_size; /* * Make sure that the table size is always a power of 2. The * hash macro IRE_ADDR_HASH() depends on that. */ - power2_roundup(&ip_cache_table_size); + power2_roundup(&ipst->ips_ip_cache_table_size); - ip_cache_table = (irb_t *)kmem_zalloc(ip_cache_table_size * + ipst->ips_ip_cache_table = kmem_zalloc(ipst->ips_ip_cache_table_size * sizeof (irb_t), KM_SLEEP); - for (i = 0; i < ip_cache_table_size; i++) { - rw_init(&ip_cache_table[i].irb_lock, NULL, + for (i = 0; i < ipst->ips_ip_cache_table_size; i++) { + rw_init(&ipst->ips_ip_cache_table[i].irb_lock, NULL, RW_DEFAULT, NULL); } /* Calculate the IPv6 cache table size. */ - ip6_cache_table_size = MAX(ip6_cache_table_size, - ((kmem_avail() >> ip_ire_mem_ratio) / sizeof (ire_t) / + ipst->ips_ip6_cache_table_size = MAX(ip6_cache_table_size, + ((mem_avail >> ip_ire_mem_ratio) / sizeof (ire_t) / ip6_ire_max_bucket_cnt)); - if (ip6_cache_table_size > ip6_max_cache_table_size) - ip6_cache_table_size = ip6_max_cache_table_size; + if (ipst->ips_ip6_cache_table_size > ip6_max_cache_table_size) + ipst->ips_ip6_cache_table_size = ip6_max_cache_table_size; /* * Make sure that the table size is always a power of 2. The * hash macro IRE_ADDR_HASH_V6() depends on that. */ - power2_roundup(&ip6_cache_table_size); + power2_roundup(&ipst->ips_ip6_cache_table_size); - ip_cache_table_v6 = (irb_t *)kmem_zalloc(ip6_cache_table_size * - sizeof (irb_t), KM_SLEEP); + ipst->ips_ip_cache_table_v6 = kmem_zalloc( + ipst->ips_ip6_cache_table_size * sizeof (irb_t), KM_SLEEP); - for (i = 0; i < ip6_cache_table_size; i++) { - rw_init(&ip_cache_table_v6[i].irb_lock, NULL, + for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { + rw_init(&ipst->ips_ip_cache_table_v6[i].irb_lock, NULL, RW_DEFAULT, NULL); } - /* - * Create ire caches, ire_reclaim() - * will give IRE_CACHE back to system when needed. - * This needs to be done here before anything else, since - * ire_add() expects the cache to be created. - */ - ire_cache = kmem_cache_create("ire_cache", - sizeof (ire_t), 0, ip_ire_constructor, - ip_ire_destructor, ip_trash_ire_reclaim, NULL, NULL, 0); /* * Initialize ip_mrtun_table to NULL now, it will be * populated by ip_rt_add if reverse tunnel is created */ - ip_mrtun_table = NULL; + ipst->ips_ip_mrtun_table = NULL; /* * Make sure that the forwarding table size is a power of 2. * The IRE*_ADDR_HASH() macroes depend on that. */ - power2_roundup(&ip6_ftable_hash_size); + ipst->ips_ip6_ftable_hash_size = ip6_ftable_hash_size; + power2_roundup(&ipst->ips_ip6_ftable_hash_size); + + ipst->ips_ire_handle = 1; } void -ip_ire_fini() +ip_ire_g_fini(void) +{ + kmem_cache_destroy(ire_cache); + kmem_cache_destroy(rt_entry_cache); + + rn_fini(); +} + +void +ip_ire_fini(ip_stack_t *ipst) { int i; - mutex_destroy(&ire_ft_init_lock); - mutex_destroy(&ire_handle_lock); - mutex_destroy(&ndp4.ndp_g_lock); + /* + * Delete all IREs - assumes that the ill/ipifs have + * been removed so what remains are just the ftable and IRE_CACHE. + */ + ire_walk_ill_mrtun(0, 0, ire_delete, NULL, NULL, ipst); + ire_walk(ire_delete, NULL, ipst); - rn_fini(); - RADIX_NODE_HEAD_DESTROY(ip_ftable); - kmem_cache_destroy(rt_entry_cache); + rn_freehead(ipst->ips_ip_ftable); + ipst->ips_ip_ftable = NULL; + + mutex_destroy(&ipst->ips_ire_ft_init_lock); + mutex_destroy(&ipst->ips_ire_handle_lock); + mutex_destroy(&ipst->ips_ire_mrtun_lock); + mutex_destroy(&ipst->ips_ire_srcif_table_lock); - for (i = 0; i < ip_cache_table_size; i++) { - rw_destroy(&ip_cache_table[i].irb_lock); + for (i = 0; i < ipst->ips_ip_cache_table_size; i++) { + ASSERT(ipst->ips_ip_cache_table[i].irb_ire == NULL); + rw_destroy(&ipst->ips_ip_cache_table[i].irb_lock); } - kmem_free(ip_cache_table, ip_cache_table_size * sizeof (irb_t)); + kmem_free(ipst->ips_ip_cache_table, + ipst->ips_ip_cache_table_size * sizeof (irb_t)); + ipst->ips_ip_cache_table = NULL; - for (i = 0; i < ip6_cache_table_size; i++) { - rw_destroy(&ip_cache_table_v6[i].irb_lock); + for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { + ASSERT(ipst->ips_ip_cache_table_v6[i].irb_ire == NULL); + rw_destroy(&ipst->ips_ip_cache_table_v6[i].irb_lock); } - kmem_free(ip_cache_table_v6, ip6_cache_table_size * sizeof (irb_t)); + kmem_free(ipst->ips_ip_cache_table_v6, + ipst->ips_ip6_cache_table_size * sizeof (irb_t)); + ipst->ips_ip_cache_table_v6 = NULL; - if (ip_mrtun_table != NULL) { + if (ipst->ips_ip_mrtun_table != NULL) { for (i = 0; i < IP_MRTUN_TABLE_SIZE; i++) { - rw_destroy(&ip_mrtun_table[i].irb_lock); + ASSERT(ipst->ips_ip_mrtun_table[i].irb_ire == NULL); + rw_destroy(&ipst->ips_ip_mrtun_table[i].irb_lock); } - kmem_free(ip_mrtun_table, IP_MRTUN_TABLE_SIZE * sizeof (irb_t)); + kmem_free(ipst->ips_ip_mrtun_table, + IP_MRTUN_TABLE_SIZE * sizeof (irb_t)); + ipst->ips_ip_mrtun_table = NULL; + } + + for (i = 0; i < IP6_MASK_TABLE_SIZE; i++) { + irb_t *ptr; + int j; + + if ((ptr = ipst->ips_ip_forwarding_table_v6[i]) == NULL) + continue; + + for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) { + ASSERT(ptr[j].irb_ire == NULL); + rw_destroy(&ptr[j].irb_lock); + } + mi_free(ptr); + ipst->ips_ip_forwarding_table_v6[i] = NULL; } - kmem_cache_destroy(ire_cache); } int @@ -5076,46 +5240,45 @@ ire_add_mrtun(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) ire_t *ire1; irb_t *irb_ptr; ire_t **irep; - ire_t *ire; + ire_t *ire = *ire_p; int i; uint_t max_frag; ill_t *stq_ill; int error; + ip_stack_t *ipst = ire->ire_ipst; - ire = *ire_p; ASSERT(ire->ire_ipversion == IPV4_VERSION); /* Is ip_mrtun_table empty ? */ - if (ip_mrtun_table == NULL) { + if (ipst->ips_ip_mrtun_table == NULL) { /* create the mrtun table */ - mutex_enter(&ire_mrtun_lock); - if (ip_mrtun_table == NULL) { - ip_mrtun_table = - (irb_t *)kmem_zalloc(IP_MRTUN_TABLE_SIZE * - sizeof (irb_t), KM_NOSLEEP); + mutex_enter(&ipst->ips_ire_mrtun_lock); + if (ipst->ips_ip_mrtun_table == NULL) { + ipst->ips_ip_mrtun_table = kmem_zalloc( + IP_MRTUN_TABLE_SIZE * sizeof (irb_t), KM_NOSLEEP); - if (ip_mrtun_table == NULL) { + if (ipst->ips_ip_mrtun_table == NULL) { ip2dbg(("ire_add_mrtun: allocation failure\n")); - mutex_exit(&ire_mrtun_lock); + mutex_exit(&ipst->ips_ire_mrtun_lock); ire_refrele(ire); *ire_p = NULL; return (ENOMEM); } for (i = 0; i < IP_MRTUN_TABLE_SIZE; i++) { - rw_init(&ip_mrtun_table[i].irb_lock, NULL, + rw_init(&ipst->ips_ip_mrtun_table[i].irb_lock, NULL, RW_DEFAULT, NULL); } ip2dbg(("ire_add_mrtun: mrtun table is created\n")); } /* some other thread got it and created the table */ - mutex_exit(&ire_mrtun_lock); + mutex_exit(&ipst->ips_ire_mrtun_lock); } /* * Check for duplicate in the bucket and insert in the table */ - irb_ptr = &(ip_mrtun_table[IRE_ADDR_HASH(ire->ire_in_src_addr, + irb_ptr = &(ipst->ips_ip_mrtun_table[IRE_ADDR_HASH(ire->ire_in_src_addr, IP_MRTUN_TABLE_SIZE)]); /* @@ -5187,9 +5350,9 @@ ire_add_mrtun(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) * Protect ire_mrtun_count and ill_mrtun_refcnt from * another thread trying to add ire in the table */ - mutex_enter(&ire_mrtun_lock); - ire_mrtun_count++; - mutex_exit(&ire_mrtun_lock); + mutex_enter(&ipst->ips_ire_mrtun_lock); + ipst->ips_ire_mrtun_count++; + mutex_exit(&ipst->ips_ire_mrtun_lock); /* * ill_mrtun_refcnt is protected by the ill_lock held via * ire_atomic_start @@ -5217,7 +5380,7 @@ ire_add_mrtun(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) void ire_walk_ill_mrtun(uint_t match_flags, uint_t ire_type, pfv_t func, void *arg, - ill_t *ill) + ill_t *ill, ip_stack_t *ipst) { irb_t *irb; ire_t *ire; @@ -5228,17 +5391,17 @@ ire_walk_ill_mrtun(uint_t match_flags, uint_t ire_type, pfv_t func, void *arg, MATCH_IRE_ILL_GROUP))) || (ill != NULL)); ASSERT(match_flags == 0 || ire_type == IRE_MIPRTUN); - mutex_enter(&ire_mrtun_lock); - if (ire_mrtun_count == 0) { - mutex_exit(&ire_mrtun_lock); + mutex_enter(&ipst->ips_ire_mrtun_lock); + if (ipst->ips_ire_mrtun_count == 0) { + mutex_exit(&ipst->ips_ire_mrtun_lock); return; } - mutex_exit(&ire_mrtun_lock); + mutex_exit(&ipst->ips_ire_mrtun_lock); ip2dbg(("ire_walk_ill_mrtun:walking the reverse tunnel table \n")); for (i = 0; i < IP_MRTUN_TABLE_SIZE; i++) { - irb = &(ip_mrtun_table[i]); + irb = &(ipst->ips_ip_mrtun_table[i]); if (irb->irb_ire == NULL) continue; IRB_REFHOLD(irb); @@ -5248,7 +5411,7 @@ ire_walk_ill_mrtun(uint_t match_flags, uint_t ire_type, pfv_t func, void *arg, if (match_flags != 0) { ret = ire_walk_ill_match( match_flags, ire_type, - ire, ill, ALL_ZONES); + ire, ill, ALL_ZONES, ipst); } if (match_flags == 0 || ret) (*func)(ire, arg); @@ -5337,8 +5500,10 @@ ire_add_srcif_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) ill_t *stq_ill; uint_t max_frag; int error = 0; + ip_stack_t *ipst; ire = *ire_p; + ipst = ire->ire_ipst; ASSERT(ire->ire_in_ill != NULL); ASSERT(ire->ire_ipversion == IPV4_VERSION); ASSERT(ire->ire_type == IRE_IF_NORESOLVER || @@ -5374,9 +5539,8 @@ ire_add_srcif_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) mutex_enter(&ire->ire_in_ill->ill_lock); if (ire->ire_in_ill->ill_srcif_table == NULL) { /* create the incoming interface based table */ - ire->ire_in_ill->ill_srcif_table = - (irb_t *)kmem_zalloc(IP_SRCIF_TABLE_SIZE * - sizeof (irb_t), KM_NOSLEEP); + ire->ire_in_ill->ill_srcif_table = kmem_zalloc( + IP_SRCIF_TABLE_SIZE * sizeof (irb_t), KM_NOSLEEP); if (ire->ire_in_ill->ill_srcif_table == NULL) { ip1dbg(("ire_add_srcif_v4: Allocation fail\n")); mutex_exit(&ire->ire_in_ill->ill_lock); @@ -5467,9 +5631,9 @@ ire_add_srcif_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) * so ill_srcif_refcnt is already protected. */ ire->ire_in_ill->ill_srcif_refcnt++; - mutex_enter(&ire_srcif_table_lock); - ire_srcif_table_count++; - mutex_exit(&ire_srcif_table_lock); + mutex_enter(&ipst->ips_ire_srcif_table_lock); + ipst->ips_ire_srcif_table_count++; + mutex_exit(&ipst->ips_ire_srcif_table_lock); irb_ptr->irb_ire_cnt++; if (ire->ire_ipif != NULL) { ire->ire_ipif->ipif_ire_cnt++; @@ -5494,7 +5658,6 @@ ire_add_srcif_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) * information for the ire. The passed ire is returned to the caller as it * is the ire which is created as mblk. */ - static ire_t * ire_update_srcif_v4(ire_t *ire) { @@ -5573,7 +5736,7 @@ ire_update_srcif_v4(ire_t *ire) * This only works in the global zone. */ boolean_t -ire_multirt_need_resolve(ipaddr_t dst, const ts_label_t *tsl) +ire_multirt_need_resolve(ipaddr_t dst, const ts_label_t *tsl, ip_stack_t *ipst) { ire_t *first_fire; ire_t *first_cire; @@ -5587,7 +5750,7 @@ ire_multirt_need_resolve(ipaddr_t dst, const ts_label_t *tsl) /* Retrieve the first IRE_HOST that matches the destination */ first_fire = ire_ftable_lookup(dst, IP_HOST_MASK, 0, IRE_HOST, NULL, NULL, ALL_ZONES, 0, tsl, - MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_SECATTR); + MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_SECATTR, ipst); /* No route at all */ if (first_fire == NULL) { @@ -5598,7 +5761,7 @@ ire_multirt_need_resolve(ipaddr_t dst, const ts_label_t *tsl) ASSERT(firb != NULL); /* Retrieve the first IRE_CACHE ire for that destination. */ - first_cire = ire_cache_lookup(dst, GLOBAL_ZONEID, tsl); + first_cire = ire_cache_lookup(dst, GLOBAL_ZONEID, tsl, ipst); /* No resolved route. */ if (first_cire == NULL) { @@ -5643,7 +5806,7 @@ ire_multirt_need_resolve(ipaddr_t dst, const ts_label_t *tsl) /* At least one route is unresolved; search for a resolvable route. */ if (unres_cnt > 0) resolvable = ire_multirt_lookup(&first_cire, &first_fire, - MULTIRT_USESTAMP | MULTIRT_CACHEGW, tsl); + MULTIRT_USESTAMP | MULTIRT_CACHEGW, tsl, ipst); if (first_fire != NULL) ire_refrele(first_fire); @@ -5707,7 +5870,7 @@ ire_multirt_need_resolve(ipaddr_t dst, const ts_label_t *tsl) */ boolean_t ire_multirt_lookup(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, - const ts_label_t *tsl) + const ts_label_t *tsl, ip_stack_t *ipst) { clock_t delta; ire_t *best_fire = NULL; @@ -5749,7 +5912,7 @@ ire_multirt_lookup(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, * if we don't find one, no route for that dest is * resolved yet. */ - first_cire = ire_cache_lookup(dst, GLOBAL_ZONEID, tsl); + first_cire = ire_cache_lookup(dst, GLOBAL_ZONEID, tsl, ipst); if (first_cire != NULL) { cirb = first_cire->ire_bucket; } @@ -5844,7 +6007,7 @@ ire_multirt_lookup(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, */ gw_ire = ire_route_lookup(gw, 0, 0, 0, NULL, NULL, ALL_ZONES, tsl, - MATCH_IRE_RECURSIVE | MATCH_IRE_SECATTR); + MATCH_IRE_RECURSIVE | MATCH_IRE_SECATTR, ipst); ip2dbg(("ire_multirt_lookup: looked up gw_ire %p\n", (void *)gw_ire)); @@ -5893,8 +6056,8 @@ ire_multirt_lookup(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, delta = lbolt - fire->ire_last_used_time; delta = TICK_TO_MSEC(delta); - res = (boolean_t) - ((delta > ip_multirt_resolution_interval) || + res = (boolean_t)((delta > + ipst->ips_ip_multirt_resolution_interval) || (!(flags & MULTIRT_USESTAMP))); ip2dbg(("ire_multirt_lookup: fire %p, delta %lu, " @@ -5983,7 +6146,7 @@ ire_multirt_lookup(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, gw_ire = ire_ftable_lookup(gw, 0, 0, IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE | - MATCH_IRE_SECATTR); + MATCH_IRE_SECATTR, ipst); /* No resolver for the gateway; we skip this ire. */ if (gw_ire == NULL) { @@ -6056,9 +6219,9 @@ ire_multirt_lookup(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, delta = lbolt - fire->ire_last_used_time; delta = TICK_TO_MSEC(delta); - res = (boolean_t) - ((delta > ip_multirt_resolution_interval) || - (!(flags & MULTIRT_USESTAMP))); + res = (boolean_t)((delta > + ipst->ips_ip_multirt_resolution_interval) || + (!(flags & MULTIRT_USESTAMP))); ip3dbg(("ire_multirt_lookup: fire %p, delta %lx, " "flags %04x, res %d\n", @@ -6340,6 +6503,7 @@ ire_arpresolve(ire_t *in_ire, ill_t *dst_ill) size_t bufsize; frtn_t *frtnp; ill_t *ill; + ip_stack_t *ipst = dst_ill->ill_ipst; /* * Construct message chain for the resolver @@ -6405,6 +6569,8 @@ ire_arpresolve(ire_t *in_ire, ill_t *dst_ill) ill = ire_to_ill(ire); ire->ire_stq_ifindex = ill->ill_phyint->phyint_ifindex; ire->ire_zoneid = in_ire->ire_zoneid; + ire->ire_ipst = ipst; + /* * ire_freemblk will be called when ire_mp is freed, both for * successful and failed arp resolution. IRE_MARK_UNCACHED will be set @@ -6460,6 +6626,7 @@ ire_freemblk(ire_t *ire_mp) { nce_t *nce = NULL; ill_t *ill; + ip_stack_t *ipst; ASSERT(ire_mp != NULL); @@ -6479,12 +6646,16 @@ ire_freemblk(ire_t *ire_mp) */ ASSERT(ire_mp->ire_stq != NULL); ASSERT(ire_mp->ire_stq_ifindex != 0); + ASSERT(ire_mp->ire_ipst != NULL); + + ipst = ire_mp->ire_ipst; + /* * Get any nce's corresponding to this ire_mp. We first have to * make sure that the ill is still around. */ - ill = ill_lookup_on_ifindex(ire_mp->ire_stq_ifindex, B_FALSE, - NULL, NULL, NULL, NULL); + ill = ill_lookup_on_ifindex(ire_mp->ire_stq_ifindex, + B_FALSE, NULL, NULL, NULL, NULL, ipst); if (ill == NULL || (ire_mp->ire_stq != ill->ill_wq) || (ill->ill_state_flags & ILL_CONDEMNED)) { /* @@ -6560,6 +6731,7 @@ ire_nce_init(ire_t *ire, mblk_t *fp_mp, mblk_t *res_mp) nce_t *arpce = NULL; ill_t *ire_ill; uint16_t nce_state, nce_flags; + ip_stack_t *ipst; if (ire->ire_stq == NULL) { if (res_mp) @@ -6594,6 +6766,7 @@ ire_nce_init(ire_t *ire, mblk_t *fp_mp, mblk_t *res_mp) * for the outgoing interface, which we get from the ire_stq. */ ire_ill = ire_to_ill(ire); + ipst = ire_ill->ill_ipst; /* * if we are creating an nce for the first time, and this is @@ -6680,7 +6853,7 @@ ire_nce_init(ire_t *ire, mblk_t *fp_mp, mblk_t *res_mp) */ NCE_REFHOLD_TO_REFHOLD_NOTR(ire->ire_nce); } else { - if (NCE_EXPIRED(arpce)) + if (NCE_EXPIRED(arpce, ipst)) arpce = nce_reinit(arpce); if (arpce != NULL) { /* diff --git a/usr/src/uts/common/inet/ip/ip_mroute.c b/usr/src/uts/common/inet/ip/ip_mroute.c index 91fe418366..eeb08607ae 100644 --- a/usr/src/uts/common/inet/ip/ip_mroute.c +++ b/usr/src/uts/common/inet/ip/ip_mroute.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. + * Copyright 2007 Sun Microsystems, Inc. * All rights reserved. Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -137,17 +137,6 @@ * causing the stats be be approximate, not exact. */ -/* - * Globals - * All but ip_g_mrouter and ip_mrtproto could be static, - * except for netstat or debugging purposes. - */ -queue_t *ip_g_mrouter = NULL; -static kmutex_t ip_g_mrouter_mutex; - -int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ -struct mrtstat mrtstat; /* Stats for netstat */ - #define NO_VIF MAXVIFS /* from mrouted, no route for src */ /* @@ -173,44 +162,33 @@ struct mrtstat mrtstat; /* Stats for netstat */ #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ ((g) >> 20) ^ ((g) >> 10) ^ (g)) -/* - * mfctable: - * Includes all mfcs, including waiting upcalls. - * Multiple mfcs per bucket. - */ -static struct mfcb mfctable[MFCTBLSIZ]; /* kernel routing table */ - -/* - * Define the token bucket filter structures. - * tbftable -> each vif has one of these for storing info. - */ -struct tbf tbftable[MAXVIFS]; #define TBF_REPROCESS (hz / 100) /* 100x /second */ /* Identify PIM packet that came on a Register interface */ #define PIM_REGISTER_MARKER 0xffffffff /* Function declarations */ -static int add_mfc(struct mfcctl *); -static int add_vif(struct vifctl *, queue_t *, mblk_t *); -static int del_mfc(struct mfcctl *); -static int del_vif(vifi_t *, queue_t *, mblk_t *); +static int add_mfc(struct mfcctl *, ip_stack_t *); +static int add_vif(struct vifctl *, queue_t *, mblk_t *, ip_stack_t *); +static int del_mfc(struct mfcctl *, ip_stack_t *); +static int del_vif(vifi_t *, queue_t *, mblk_t *, ip_stack_t *); static void del_vifp(struct vif *); static void encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); static void expire_upcalls(void *); -static void fill_route(struct mfc *, struct mfcctl *); -static int get_assert(uchar_t *); -static int get_lsg_cnt(struct sioc_lsg_req *); -static int get_sg_cnt(struct sioc_sg_req *); +static void fill_route(struct mfc *, struct mfcctl *, ip_stack_t *); +static void free_queue(struct mfc *); +static int get_assert(uchar_t *, ip_stack_t *); +static int get_lsg_cnt(struct sioc_lsg_req *, ip_stack_t *); +static int get_sg_cnt(struct sioc_sg_req *, ip_stack_t *); static int get_version(uchar_t *); -static int get_vif_cnt(struct sioc_vif_req *); +static int get_vif_cnt(struct sioc_vif_req *, ip_stack_t *); static int ip_mdq(mblk_t *, ipha_t *, ill_t *, ipaddr_t, struct mfc *); -static int ip_mrouter_init(queue_t *, uchar_t *, int); +static int ip_mrouter_init(queue_t *, uchar_t *, int, ip_stack_t *); static void phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); -static int register_mforward(queue_t *, mblk_t *); +static int register_mforward(queue_t *, mblk_t *, ill_t *); static void register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); -static int set_assert(int *); +static int set_assert(int *, ip_stack_t *); /* * Token Bucket Filter functions @@ -225,7 +203,7 @@ static void tbf_send_packet(struct vif *, mblk_t *); static void tbf_update_tokens(struct vif *); static void release_mfc(struct mfcb *); -static boolean_t is_mrouter_off(void); +static boolean_t is_mrouter_off(ip_stack_t *); /* * Encapsulation packets */ @@ -244,38 +222,6 @@ static ipha_t multicast_encap_iphdr = { }; /* - * Private variables. - */ -static int saved_ip_g_forward = -1; - -/* - * numvifs is only a hint about the max interface being used. - */ -static vifi_t numvifs = 0; -static kmutex_t numvifs_mutex; - -static struct vif viftable[MAXVIFS+1]; /* Index needs to accomodate */ -/* the value of NO_VIF, which */ -/* is MAXVIFS. */ - -/* - * One-back cache used to locate a tunnel's vif, - * given a datagram's src ip address. - */ -static ipaddr_t last_encap_src; -static struct vif *last_encap_vif; -static kmutex_t last_encap_lock; /* Protects the above */ - -/* - * Whether or not special PIM assert processing is enabled. - */ -/* - * reg_vif_num is protected by numvifs_mutex - */ -static vifi_t reg_vif_num = ALL_VIFS; /* Index to Register vif */ -static int pim_assert; - -/* * Rate limit for assert notification messages, in nsec. */ #define ASSERT_MSG_TIME 3000000000 @@ -386,12 +332,14 @@ int ip_mrouter_set(int cmd, queue_t *q, int checkonly, uchar_t *data, int datalen, mblk_t *first_mp) { - mutex_enter(&ip_g_mrouter_mutex); - if (cmd != MRT_INIT && q != ip_g_mrouter) { - mutex_exit(&ip_g_mrouter_mutex); + ip_stack_t *ipst = CONNQ_TO_IPST(q); + + mutex_enter(&ipst->ips_ip_g_mrouter_mutex); + if (cmd != MRT_INIT && q != ipst->ips_ip_g_mrouter) { + mutex_exit(&ipst->ips_ip_g_mrouter_mutex); return (EACCES); } - mutex_exit(&ip_g_mrouter_mutex); + mutex_exit(&ipst->ips_ip_g_mrouter_mutex); if (checkonly) { /* @@ -419,18 +367,19 @@ ip_mrouter_set(int cmd, queue_t *q, int checkonly, uchar_t *data, * turned off. */ if (cmd != MRT_INIT && cmd != MRT_DONE) { - if (is_mrouter_off()) + if (is_mrouter_off(ipst)) return (EINVAL); } switch (cmd) { - case MRT_INIT: return (ip_mrouter_init(q, data, datalen)); - case MRT_DONE: return (ip_mrouter_done(first_mp)); - case MRT_ADD_VIF: return (add_vif((struct vifctl *)data, q, first_mp)); - case MRT_DEL_VIF: return (del_vif((vifi_t *)data, q, first_mp)); - case MRT_ADD_MFC: return (add_mfc((struct mfcctl *)data)); - case MRT_DEL_MFC: return (del_mfc((struct mfcctl *)data)); - case MRT_ASSERT: return (set_assert((int *)data)); + case MRT_INIT: return (ip_mrouter_init(q, data, datalen, ipst)); + case MRT_DONE: return (ip_mrouter_done(first_mp, ipst)); + case MRT_ADD_VIF: return (add_vif((struct vifctl *)data, q, first_mp, + ipst)); + case MRT_DEL_VIF: return (del_vif((vifi_t *)data, q, first_mp, ipst)); + case MRT_ADD_MFC: return (add_mfc((struct mfcctl *)data, ipst)); + case MRT_DEL_MFC: return (del_mfc((struct mfcctl *)data, ipst)); + case MRT_ASSERT: return (set_assert((int *)data, ipst)); default: return (EOPNOTSUPP); } } @@ -441,12 +390,14 @@ ip_mrouter_set(int cmd, queue_t *q, int checkonly, uchar_t *data, int ip_mrouter_get(int cmd, queue_t *q, uchar_t *data) { - if (q != ip_g_mrouter) + ip_stack_t *ipst = CONNQ_TO_IPST(q); + + if (q != ipst->ips_ip_g_mrouter) return (EACCES); switch (cmd) { case MRT_VERSION: return (get_version((uchar_t *)data)); - case MRT_ASSERT: return (get_assert((uchar_t *)data)); + case MRT_ASSERT: return (get_assert((uchar_t *)data, ipst)); default: return (EOPNOTSUPP); } } @@ -462,17 +413,18 @@ mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, { mblk_t *mp1; struct iocblk *iocp = (struct iocblk *)mp->b_rptr; + ip_stack_t *ipst = CONNQ_TO_IPST(q); /* Existence verified in ip_wput_nondata */ mp1 = mp->b_cont->b_cont; switch (iocp->ioc_cmd) { case (SIOCGETVIFCNT): - return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr)); + return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr, ipst)); case (SIOCGETSGCNT): - return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr)); + return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr, ipst)); case (SIOCGETLSGCNT): - return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr)); + return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr, ipst)); default: return (EINVAL); } @@ -482,12 +434,12 @@ mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, * Returns the packet, byte, rpf-failure count for the source, group provided. */ static int -get_sg_cnt(struct sioc_sg_req *req) +get_sg_cnt(struct sioc_sg_req *req, ip_stack_t *ipst) { struct mfc *rt; struct mfcb *mfcbp; - mfcbp = &mfctable[MFCHASH(req->src.s_addr, req->grp.s_addr)]; + mfcbp = &ipst->ips_mfcs[MFCHASH(req->src.s_addr, req->grp.s_addr)]; MFCB_REFHOLD(mfcbp); MFCFIND(mfcbp, req->src.s_addr, req->grp.s_addr, rt); @@ -510,7 +462,7 @@ get_sg_cnt(struct sioc_sg_req *req) */ /* ARGSUSED XXX until implemented */ static int -get_lsg_cnt(struct sioc_lsg_req *req) +get_lsg_cnt(struct sioc_lsg_req *req, ip_stack_t *ipst) { /* XXX TODO SIOCGETLSGCNT */ return (ENXIO); @@ -520,20 +472,20 @@ get_lsg_cnt(struct sioc_lsg_req *req) * Returns the input and output packet and byte counts on the vif provided. */ static int -get_vif_cnt(struct sioc_vif_req *req) +get_vif_cnt(struct sioc_vif_req *req, ip_stack_t *ipst) { vifi_t vifi = req->vifi; - if (vifi >= numvifs) + if (vifi >= ipst->ips_numvifs) return (EINVAL); /* * No locks here, an approximation is fine. */ - req->icount = viftable[vifi].v_pkt_in; - req->ocount = viftable[vifi].v_pkt_out; - req->ibytes = viftable[vifi].v_bytes_in; - req->obytes = viftable[vifi].v_bytes_out; + req->icount = ipst->ips_vifs[vifi].v_pkt_in; + req->ocount = ipst->ips_vifs[vifi].v_pkt_out; + req->ibytes = ipst->ips_vifs[vifi].v_bytes_in; + req->obytes = ipst->ips_vifs[vifi].v_bytes_out; return (0); } @@ -552,12 +504,12 @@ get_version(uchar_t *data) * Set PIM assert processing global. */ static int -set_assert(int *i) +set_assert(int *i, ip_stack_t *ipst) { if ((*i != 1) && (*i != 0)) return (EINVAL); - pim_assert = *i; + ipst->ips_pim_assert = *i; return (0); } @@ -566,11 +518,11 @@ set_assert(int *i) * Get PIM assert processing global. */ static int -get_assert(uchar_t *data) +get_assert(uchar_t *data, ip_stack_t *ipst) { int *i = (int *)data; - *i = pim_assert; + *i = ipst->ips_pim_assert; return (0); } @@ -579,7 +531,7 @@ get_assert(uchar_t *data) * Enable multicast routing. */ static int -ip_mrouter_init(queue_t *q, uchar_t *data, int datalen) +ip_mrouter_init(queue_t *q, uchar_t *data, int datalen, ip_stack_t *ipst) { conn_t *connp = Q_TO_CONN(q); int *v; @@ -591,83 +543,103 @@ ip_mrouter_init(queue_t *q, uchar_t *data, int datalen) if (*v != 1) return (ENOPROTOOPT); - mutex_enter(&ip_g_mrouter_mutex); - if (ip_g_mrouter != NULL) { - mutex_exit(&ip_g_mrouter_mutex); + mutex_enter(&ipst->ips_ip_g_mrouter_mutex); + if (ipst->ips_ip_g_mrouter != NULL) { + mutex_exit(&ipst->ips_ip_g_mrouter_mutex); return (EADDRINUSE); } - ip_g_mrouter = q; + ipst->ips_ip_g_mrouter = q; connp->conn_multi_router = 1; - - mutex_init(&last_encap_lock, NULL, MUTEX_DEFAULT, NULL); - - mrtstat.mrts_vifctlSize = sizeof (struct vifctl); - mrtstat.mrts_mfcctlSize = sizeof (struct mfcctl); - - pim_assert = 0; - /* In order for tunnels to work we have to turn ip_g_forward on */ - if (!WE_ARE_FORWARDING) { - if (ip_mrtdebug > 1) { + if (!WE_ARE_FORWARDING(ipst)) { + if (ipst->ips_ip_mrtdebug > 1) { (void) mi_strlog(q, 1, SL_TRACE, "ip_mrouter_init: turning on forwarding"); } - saved_ip_g_forward = ip_g_forward; - ip_g_forward = IP_FORWARD_ALWAYS; + ipst->ips_saved_ip_g_forward = ipst->ips_ip_g_forward; + ipst->ips_ip_g_forward = IP_FORWARD_ALWAYS; } - mutex_exit(&ip_g_mrouter_mutex); + mutex_exit(&ipst->ips_ip_g_mrouter_mutex); return (0); } +void +ip_mrouter_stack_init(ip_stack_t *ipst) +{ + mutex_init(&ipst->ips_ip_g_mrouter_mutex, NULL, MUTEX_DEFAULT, NULL); + + ipst->ips_vifs = kmem_zalloc(sizeof (struct vif) * (MAXVIFS+1), + KM_SLEEP); + ipst->ips_mrtstat = kmem_zalloc(sizeof (struct mrtstat), KM_SLEEP); + /* + * mfctable: + * Includes all mfcs, including waiting upcalls. + * Multiple mfcs per bucket. + */ + ipst->ips_mfcs = kmem_zalloc(sizeof (struct mfcb) * MFCTBLSIZ, + KM_SLEEP); + /* + * Define the token bucket filter structures. + * tbftable -> each vif has one of these for storing info. + */ + ipst->ips_tbfs = kmem_zalloc(sizeof (struct tbf) * MAXVIFS, KM_SLEEP); + + mutex_init(&ipst->ips_last_encap_lock, NULL, MUTEX_DEFAULT, NULL); + + ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl); + ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl); +} + /* * Disable multicast routing. * Didn't use global timeout_val (BSD version), instead check the mfctable. */ int -ip_mrouter_done(mblk_t *mp) +ip_mrouter_done(mblk_t *mp, ip_stack_t *ipst) { conn_t *connp; vifi_t vifi; struct mfc *mfc_rt; int i; - mutex_enter(&ip_g_mrouter_mutex); - if (ip_g_mrouter == NULL) { - mutex_exit(&ip_g_mrouter_mutex); + mutex_enter(&ipst->ips_ip_g_mrouter_mutex); + if (ipst->ips_ip_g_mrouter == NULL) { + mutex_exit(&ipst->ips_ip_g_mrouter_mutex); return (EINVAL); } - connp = Q_TO_CONN(ip_g_mrouter); + connp = Q_TO_CONN(ipst->ips_ip_g_mrouter); - if (saved_ip_g_forward != -1) { - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_saved_ip_g_forward != -1) { + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "ip_mrouter_done: turning off forwarding"); } - ip_g_forward = saved_ip_g_forward; - saved_ip_g_forward = -1; + ipst->ips_ip_g_forward = ipst->ips_saved_ip_g_forward; + ipst->ips_saved_ip_g_forward = -1; } /* * Always clear cache when vifs change. - * No need to get last_encap_lock since we are running as a writer. + * No need to get ipst->ips_last_encap_lock since we are running as + * a writer. */ - mutex_enter(&last_encap_lock); - last_encap_src = 0; - last_encap_vif = NULL; - mutex_exit(&last_encap_lock); + mutex_enter(&ipst->ips_last_encap_lock); + ipst->ips_last_encap_src = 0; + ipst->ips_last_encap_vif = NULL; + mutex_exit(&ipst->ips_last_encap_lock); connp->conn_multi_router = 0; - mutex_exit(&ip_g_mrouter_mutex); + mutex_exit(&ipst->ips_ip_g_mrouter_mutex); /* * For each phyint in use, * disable promiscuous reception of all IP multicasts. */ for (vifi = 0; vifi < MAXVIFS; vifi++) { - struct vif *vifp = viftable + vifi; + struct vif *vifp = ipst->ips_vifs + vifi; mutex_enter(&vifp->v_lock); /* @@ -699,7 +671,7 @@ ip_mrouter_done(mblk_t *mp) ipsq = ill->ill_phyint->phyint_ipsq; } else { ipsq = ipsq_try_enter(ipif, NULL, - ip_g_mrouter, mp, + ipst->ips_ip_g_mrouter, mp, ip_restart_optmgmt, NEW_OP, B_TRUE); if (ipsq == NULL) { mutex_exit(&(vifp)->v_lock); @@ -733,11 +705,11 @@ ip_mrouter_done(mblk_t *mp) } } - mutex_enter(&numvifs_mutex); - numvifs = 0; - pim_assert = 0; - reg_vif_num = ALL_VIFS; - mutex_exit(&numvifs_mutex); + mutex_enter(&ipst->ips_numvifs_mutex); + ipst->ips_numvifs = 0; + ipst->ips_pim_assert = 0; + ipst->ips_reg_vif_num = ALL_VIFS; + mutex_exit(&ipst->ips_numvifs_mutex); /* * Free upcall msgs. @@ -745,11 +717,11 @@ ip_mrouter_done(mblk_t *mp) * timeouts remaining on mfcs. */ for (i = 0; i < MFCTBLSIZ; i++) { - mutex_enter(&mfctable[i].mfcb_lock); - mfctable[i].mfcb_refcnt++; - mfctable[i].mfcb_marks |= MFCB_MARK_CONDEMNED; - mutex_exit(&mfctable[i].mfcb_lock); - mfc_rt = mfctable[i].mfcb_mfc; + mutex_enter(&ipst->ips_mfcs[i].mfcb_lock); + ipst->ips_mfcs[i].mfcb_refcnt++; + ipst->ips_mfcs[i].mfcb_marks |= MFCB_MARK_CONDEMNED; + mutex_exit(&ipst->ips_mfcs[i].mfcb_lock); + mfc_rt = ipst->ips_mfcs[i].mfcb_mfc; while (mfc_rt) { /* Free upcalls */ mutex_enter(&mfc_rt->mfc_mutex); @@ -782,32 +754,64 @@ ip_mrouter_done(mblk_t *mp) mutex_exit(&mfc_rt->mfc_mutex); mfc_rt = mfc_rt->mfc_next; } - MFCB_REFRELE(&mfctable[i]); + MFCB_REFRELE(&ipst->ips_mfcs[i]); } - mutex_enter(&ip_g_mrouter_mutex); - ip_g_mrouter = NULL; - mutex_exit(&ip_g_mrouter_mutex); + mutex_enter(&ipst->ips_ip_g_mrouter_mutex); + ipst->ips_ip_g_mrouter = NULL; + mutex_exit(&ipst->ips_ip_g_mrouter_mutex); return (0); } +void +ip_mrouter_stack_destroy(ip_stack_t *ipst) +{ + struct mfcb *mfcbp; + struct mfc *rt; + int i; + + for (i = 0; i < MFCTBLSIZ; i++) { + mfcbp = &ipst->ips_mfcs[i]; + + while ((rt = mfcbp->mfcb_mfc) != NULL) { + (void) printf("ip_mrouter_stack_destroy: free for %d\n", + i); + + mfcbp->mfcb_mfc = rt->mfc_next; + free_queue(rt); + mi_free(rt); + } + } + kmem_free(ipst->ips_vifs, sizeof (struct vif) * (MAXVIFS+1)); + ipst->ips_vifs = NULL; + kmem_free(ipst->ips_mrtstat, sizeof (struct mrtstat)); + ipst->ips_mrtstat = NULL; + kmem_free(ipst->ips_mfcs, sizeof (struct mfcb) * MFCTBLSIZ); + ipst->ips_mfcs = NULL; + kmem_free(ipst->ips_tbfs, sizeof (struct tbf) * MAXVIFS); + ipst->ips_tbfs = NULL; + + mutex_destroy(&ipst->ips_last_encap_lock); + mutex_destroy(&ipst->ips_ip_g_mrouter_mutex); +} + static boolean_t -is_mrouter_off(void) +is_mrouter_off(ip_stack_t *ipst) { conn_t *connp; - mutex_enter(&ip_g_mrouter_mutex); - if (ip_g_mrouter == NULL) { - mutex_exit(&ip_g_mrouter_mutex); + mutex_enter(&ipst->ips_ip_g_mrouter_mutex); + if (ipst->ips_ip_g_mrouter == NULL) { + mutex_exit(&ipst->ips_ip_g_mrouter_mutex); return (B_TRUE); } - connp = Q_TO_CONN(ip_g_mrouter); + connp = Q_TO_CONN(ipst->ips_ip_g_mrouter); if (connp->conn_multi_router == 0) { - mutex_exit(&ip_g_mrouter_mutex); + mutex_exit(&ipst->ips_ip_g_mrouter_mutex); return (B_TRUE); } - mutex_exit(&ip_g_mrouter_mutex); + mutex_exit(&ipst->ips_ip_g_mrouter_mutex); return (B_FALSE); } @@ -846,12 +850,12 @@ lock_good_vif(struct vif *vifp) * Add a vif to the vif table. */ static int -add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp) +add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp, ip_stack_t *ipst) { - struct vif *vifp = viftable + vifcp->vifc_vifi; + struct vif *vifp = ipst->ips_vifs + vifcp->vifc_vifi; ipif_t *ipif; int error; - struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; + struct tbf *v_tbf = ipst->ips_tbfs + vifcp->vifc_vifi; conn_t *connp = Q_TO_CONN(q); ipsq_t *ipsq; @@ -860,7 +864,7 @@ add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp) if (vifcp->vifc_vifi >= MAXVIFS) return (EINVAL); - if (is_mrouter_off()) + if (is_mrouter_off(ipst)) return (EINVAL); mutex_enter(&vifp->v_lock); @@ -894,7 +898,7 @@ add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp) /* Find the interface with the local address */ ipif = ipif_lookup_addr((ipaddr_t)vifcp->vifc_lcl_addr.s_addr, NULL, connp->conn_zoneid, CONNP_TO_WQ(connp), first_mp, - ip_restart_optmgmt, &error); + ip_restart_optmgmt, &error, ipst); if (ipif == NULL) { VIF_REFRELE(vifp); if (error == EINPROGRESS) @@ -915,8 +919,8 @@ add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp) return (EINPROGRESS); } - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "add_vif: src 0x%x enter", vifcp->vifc_lcl_addr.s_addr); } @@ -928,10 +932,10 @@ add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp) * No need to get last_encap_lock, since we are running as a writer. */ - mutex_enter(&last_encap_lock); - last_encap_src = 0; - last_encap_vif = NULL; - mutex_exit(&last_encap_lock); + mutex_enter(&ipst->ips_last_encap_lock); + ipst->ips_last_encap_src = 0; + ipst->ips_last_encap_vif = NULL; + mutex_exit(&ipst->ips_last_encap_lock); if (vifcp->vifc_flags & VIFF_TUNNEL) { if ((vifcp->vifc_flags & VIFF_SRCRT) != 0) { @@ -953,12 +957,12 @@ add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp) * ip_optmgmt_writer(), a lock is not necessary to * protect reg_vif_num. */ - mutex_enter(&numvifs_mutex); - if (reg_vif_num == ALL_VIFS) { - reg_vif_num = vifcp->vifc_vifi; - mutex_exit(&numvifs_mutex); + mutex_enter(&ipst->ips_numvifs_mutex); + if (ipst->ips_reg_vif_num == ALL_VIFS) { + ipst->ips_reg_vif_num = vifcp->vifc_vifi; + mutex_exit(&ipst->ips_numvifs_mutex); } else { - mutex_exit(&numvifs_mutex); + mutex_exit(&ipst->ips_numvifs_mutex); VIF_REFRELE_LOCKED(vifp); ipif_refrele(ipif); ipsq_exit(ipsq, B_TRUE, B_TRUE); @@ -971,9 +975,9 @@ add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp) VIF_REFRELE_LOCKED(vifp); ipif_refrele(ipif); if (vifcp->vifc_flags & VIFF_REGISTER) { - mutex_enter(&numvifs_mutex); - reg_vif_num = ALL_VIFS; - mutex_exit(&numvifs_mutex); + mutex_enter(&ipst->ips_numvifs_mutex); + ipst->ips_reg_vif_num = ALL_VIFS; + mutex_exit(&ipst->ips_numvifs_mutex); } ipsq_exit(ipsq, B_TRUE, B_TRUE); return (EOPNOTSUPP); @@ -987,14 +991,14 @@ add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp) * since we released the lock lets make sure that * ip_mrouter_done() has not been called. */ - if (error != 0 || is_mrouter_off()) { + if (error != 0 || is_mrouter_off(ipst)) { if (error == 0) (void) ip_delmulti(INADDR_ANY, ipif, B_TRUE, B_TRUE); if (vifcp->vifc_flags & VIFF_REGISTER) { - mutex_enter(&numvifs_mutex); - reg_vif_num = ALL_VIFS; - mutex_exit(&numvifs_mutex); + mutex_enter(&ipst->ips_numvifs_mutex); + ipst->ips_reg_vif_num = ALL_VIFS; + mutex_exit(&ipst->ips_numvifs_mutex); } VIF_REFRELE_LOCKED(vifp); ipif_refrele(ipif); @@ -1026,13 +1030,13 @@ add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp) mutex_init(&vifp->v_tbf->tbf_lock, NULL, MUTEX_DEFAULT, NULL); /* Adjust numvifs up, if the vifi is higher than numvifs */ - mutex_enter(&numvifs_mutex); - if (numvifs <= vifcp->vifc_vifi) - numvifs = vifcp->vifc_vifi + 1; - mutex_exit(&numvifs_mutex); + mutex_enter(&ipst->ips_numvifs_mutex); + if (ipst->ips_numvifs <= vifcp->vifc_vifi) + ipst->ips_numvifs = vifcp->vifc_vifi + 1; + mutex_exit(&ipst->ips_numvifs_mutex); - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d", vifcp->vifc_vifi, ntohl(vifcp->vifc_lcl_addr.s_addr), @@ -1055,7 +1059,7 @@ del_vifp(struct vif *vifp) struct tbf *t = vifp->v_tbf; mblk_t *mp0; vifi_t vifi; - + ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED); ASSERT(t != NULL); @@ -1066,8 +1070,8 @@ del_vifp(struct vif *vifp) ASSERT(vifp->v_ipif != NULL); ipif_refrele(vifp->v_ipif); - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr); } @@ -1093,36 +1097,36 @@ del_vifp(struct vif *vifp) * Always clear cache when vifs change. * No need to get last_encap_lock since we are running as a writer. */ - mutex_enter(&last_encap_lock); - if (vifp == last_encap_vif) { - last_encap_vif = NULL; - last_encap_src = 0; + mutex_enter(&ipst->ips_last_encap_lock); + if (vifp == ipst->ips_last_encap_vif) { + ipst->ips_last_encap_vif = NULL; + ipst->ips_last_encap_src = 0; } - mutex_exit(&last_encap_lock); + mutex_exit(&ipst->ips_last_encap_lock); mutex_destroy(&t->tbf_lock); bzero(vifp->v_tbf, sizeof (*(vifp->v_tbf))); /* Adjust numvifs down */ - mutex_enter(&numvifs_mutex); - for (vifi = numvifs; vifi != 0; vifi--) /* vifi is unsigned */ - if (viftable[vifi - 1].v_lcl_addr.s_addr != 0) + mutex_enter(&ipst->ips_numvifs_mutex); + for (vifi = ipst->ips_numvifs; vifi != 0; vifi--) /* vifi is unsigned */ + if (ipst->ips_vifs[vifi - 1].v_lcl_addr.s_addr != 0) break; - numvifs = vifi; - mutex_exit(&numvifs_mutex); + ipst->ips_numvifs = vifi; + mutex_exit(&ipst->ips_numvifs_mutex); bzero(vifp, sizeof (*vifp)); } static int -del_vif(vifi_t *vifip, queue_t *q, mblk_t *first_mp) +del_vif(vifi_t *vifip, queue_t *q, mblk_t *first_mp, ip_stack_t *ipst) { - struct vif *vifp = viftable + *vifip; + struct vif *vifp = ipst->ips_vifs + *vifip; conn_t *connp; ipsq_t *ipsq; - if (*vifip >= numvifs) + if (*vifip >= ipst->ips_numvifs) return (EINVAL); @@ -1212,7 +1216,7 @@ del_vif(vifi_t *vifip, queue_t *q, mblk_t *first_mp) * Add an mfc entry. */ static int -add_mfc(struct mfcctl *mfccp) +add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) { struct mfc *rt; struct rtdetq *rte; @@ -1236,17 +1240,17 @@ add_mfc(struct mfcctl *mfccp) } if ((mfccp->mfcc_parent != NO_VIF) && - (viftable[mfccp->mfcc_parent].v_ipif == NULL)) { + (ipst->ips_vifs[mfccp->mfcc_parent].v_ipif == NULL)) { ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n", (int)mfccp->mfcc_parent)); return (EINVAL); } - if (is_mrouter_off()) { + if (is_mrouter_off(ipst)) { return (EINVAL); } - mfcbp = &mfctable[MFCHASH(mfccp->mfcc_origin.s_addr, + mfcbp = &ipst->ips_mfcs[MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr)]; MFCB_REFHOLD(mfcbp); MFCFIND(mfcbp, mfccp->mfcc_origin.s_addr, @@ -1254,8 +1258,8 @@ add_mfc(struct mfcctl *mfccp) /* If an entry already exists, just update the fields */ if (rt) { - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "add_mfc: update o %x grp %x parent %x", ntohl(mfccp->mfcc_origin.s_addr), ntohl(mfccp->mfcc_mcastgrp.s_addr), @@ -1264,10 +1268,10 @@ add_mfc(struct mfcctl *mfccp) mutex_enter(&rt->mfc_mutex); rt->mfc_parent = mfccp->mfcc_parent; - mutex_enter(&numvifs_mutex); - for (i = 0; i < (int)numvifs; i++) + mutex_enter(&ipst->ips_numvifs_mutex); + for (i = 0; i < (int)ipst->ips_numvifs; i++) rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; - mutex_exit(&numvifs_mutex); + mutex_exit(&ipst->ips_numvifs_mutex); mutex_exit(&rt->mfc_mutex); MFCB_REFRELE(mfcbp); @@ -1291,14 +1295,15 @@ add_mfc(struct mfcctl *mfccp) ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent); - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + SL_TRACE, "add_mfc: o %x g %x p %x", ntohl(mfccp->mfcc_origin.s_addr), ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent); } - fill_route(rt, mfccp); + fill_route(rt, mfccp, ipst); /* * Prevent cleanup of cache entry. @@ -1351,14 +1356,14 @@ add_mfc(struct mfcctl *mfccp) */ if (nstl == 0) { mutex_enter(&(mfcbp->mfcb_lock)); - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "add_mfc: no upcall o %x g %x p %x", ntohl(mfccp->mfcc_origin.s_addr), ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent); } - if (is_mrouter_off()) { + if (is_mrouter_off(ipst)) { mutex_exit(&mfcbp->mfcb_lock); MFCB_REFRELE(mfcbp); return (EINVAL); @@ -1372,7 +1377,7 @@ add_mfc(struct mfcctl *mfccp) (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) { - fill_route(rt, mfccp); + fill_route(rt, mfccp, ipst); mutex_exit(&rt->mfc_mutex); break; } @@ -1391,7 +1396,7 @@ add_mfc(struct mfcctl *mfccp) /* Insert new entry at head of hash chain */ mutex_enter(&rt->mfc_mutex); - fill_route(rt, mfccp); + fill_route(rt, mfccp, ipst); /* Link into table */ rt->mfc_next = mfcbp->mfcb_mfc; @@ -1409,18 +1414,18 @@ add_mfc(struct mfcctl *mfccp) * Fills in mfc structure from mrouted mfcctl. */ static void -fill_route(struct mfc *rt, struct mfcctl *mfccp) +fill_route(struct mfc *rt, struct mfcctl *mfccp, ip_stack_t *ipst) { int i; rt->mfc_origin = mfccp->mfcc_origin; rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; rt->mfc_parent = mfccp->mfcc_parent; - mutex_enter(&numvifs_mutex); - for (i = 0; i < (int)numvifs; i++) { + mutex_enter(&ipst->ips_numvifs_mutex); + for (i = 0; i < (int)ipst->ips_numvifs; i++) { rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; } - mutex_exit(&numvifs_mutex); + mutex_exit(&ipst->ips_numvifs_mutex); /* Initialize pkt counters per src-grp */ rt->mfc_pkt_cnt = 0; rt->mfc_byte_cnt = 0; @@ -1484,7 +1489,7 @@ release_mfc(struct mfcb *mfcbp) * Delete an mfc entry. */ static int -del_mfc(struct mfcctl *mfccp) +del_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) { struct in_addr origin; struct in_addr mcastgrp; @@ -1495,17 +1500,17 @@ del_mfc(struct mfcctl *mfccp) mcastgrp = mfccp->mfcc_mcastgrp; hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "del_mfc: o %x g %x", ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); } - MFCB_REFHOLD(&mfctable[hash]); + MFCB_REFHOLD(&ipst->ips_mfcs[hash]); /* Find mfc in mfctable, finds only entries without upcalls */ - for (rt = mfctable[hash].mfcb_mfc; rt; rt = rt->mfc_next) { + for (rt = ipst->ips_mfcs[hash].mfcb_mfc; rt; rt = rt->mfc_next) { mutex_enter(&rt->mfc_mutex); if (origin.s_addr == rt->mfc_origin.s_addr && mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && @@ -1520,7 +1525,7 @@ del_mfc(struct mfcctl *mfccp) * or rt not in mfctable. */ if (rt == NULL) { - MFCB_REFRELE(&mfctable[hash]); + MFCB_REFRELE(&ipst->ips_mfcs[hash]); return (EADDRNOTAVAIL); } @@ -1528,7 +1533,7 @@ del_mfc(struct mfcctl *mfccp) /* * no need to hold lock as we have a reference. */ - mfctable[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; + ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; /* error checking */ if (rt->mfc_timeout_id != 0) { ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null")); @@ -1551,7 +1556,7 @@ del_mfc(struct mfcctl *mfccp) rt->mfc_marks |= MFCB_MARK_CONDEMNED; mutex_exit(&rt->mfc_mutex); - MFCB_REFRELE(&mfctable[hash]); + MFCB_REFRELE(&ipst->ips_mfcs[hash]); return (0); } @@ -1585,9 +1590,10 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) vifi_t vifi; boolean_t pim_reg_packet = B_FALSE; struct mfcb *mfcbp; + ip_stack_t *ipst = ill->ill_ipst; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s", ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), ill->ill_name); @@ -1605,8 +1611,8 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) */ if (CLASSD(dst) && (ipha->ipha_ttl <= 1 || (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) { - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "ip_mforward: not forwarded ttl %d," " dst 0x%x ill %s", ipha->ipha_ttl, ntohl(dst), ill->ill_name); @@ -1625,13 +1631,15 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) * encode information in mp->b_prev. */ mp->b_prev = NULL; - if (ip_mrtdebug > 1) { + if (ipst->ips_ip_mrtdebug > 1) { if (tunnel_src != 0) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + SL_TRACE, "ip_mforward: ill %s arrived via ENCAP TUN", ill->ill_name); } else if (pim_reg_packet) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + SL_TRACE, "ip_mforward: ill %s arrived via" " REGISTER VIF", ill->ill_name); @@ -1641,8 +1649,8 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) (uint_t)(IP_SIMPLE_HDR_LENGTH + TUNNEL_LEN) >> 2 || ((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) { /* Packet arrived via a physical interface. */ - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "ip_mforward: ill %s arrived via PHYINT", ill->ill_name); } @@ -1661,7 +1669,7 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) return (-1); } - mrtstat.mrts_fwd_in++; + ipst->ips_mrtstat->mrts_fwd_in++; src = ipha->ipha_src; /* Find route in cache, return NULL if not there or upcalls q'ed. */ @@ -1673,21 +1681,23 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) * guard against the rt being deleted, so release lock after reading. */ - if (is_mrouter_off()) + if (is_mrouter_off(ipst)) return (-1); - mfcbp = &mfctable[MFCHASH(src, dst)]; + mfcbp = &ipst->ips_mfcs[MFCHASH(src, dst)]; MFCB_REFHOLD(mfcbp); MFCFIND(mfcbp, src, dst, rt); /* Entry exists, so forward if necessary */ if (rt != NULL) { int ret = 0; - mrtstat.mrts_mfc_hits++; + ipst->ips_mrtstat->mrts_mfc_hits++; if (pim_reg_packet) { - ASSERT(reg_vif_num != ALL_VIFS); + ASSERT(ipst->ips_reg_vif_num != ALL_VIFS); ret = ip_mdq(mp, ipha, - viftable[reg_vif_num].v_ipif->ipif_ill, 0, rt); + ipst->ips_vifs[ipst->ips_reg_vif_num]. + v_ipif->ipif_ill, + 0, rt); } else { ret = ip_mdq(mp, ipha, ill, tunnel_src, rt); } @@ -1712,13 +1722,13 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) uint_t hash; int npkts; boolean_t new_mfc = B_FALSE; - mrtstat.mrts_mfc_misses++; + ipst->ips_mrtstat->mrts_mfc_misses++; /* BSD uses mrts_no_route++ */ - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "ip_mforward: no rte ill %s src %x g %x misses %d", ill->ill_name, ntohl(src), ntohl(dst), - (int)mrtstat.mrts_mfc_misses); + (int)ipst->ips_mrtstat->mrts_mfc_misses); } /* * The order of the following code differs from the BSD code. @@ -1729,23 +1739,24 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) /* Lock mfctable. */ hash = MFCHASH(src, dst); - mutex_enter(&(mfctable[hash].mfcb_lock)); + mutex_enter(&(ipst->ips_mfcs[hash].mfcb_lock)); /* * If we are turning off mrouted return an error */ - if (is_mrouter_off()) { + if (is_mrouter_off(ipst)) { mutex_exit(&mfcbp->mfcb_lock); MFCB_REFRELE(mfcbp); return (-1); } /* Is there an upcall waiting for this packet? */ - for (mfc_rt = mfctable[hash].mfcb_mfc; mfc_rt; + for (mfc_rt = ipst->ips_mfcs[hash].mfcb_mfc; mfc_rt; mfc_rt = mfc_rt->mfc_next) { mutex_enter(&mfc_rt->mfc_mutex); - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + SL_TRACE, "ip_mforward: MFCTAB hash %d o 0x%x" " g 0x%x\n", hash, ntohl(mfc_rt->mfc_origin.s_addr), @@ -1764,7 +1775,7 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) if (mfc_rt == NULL) { mfc_rt = (struct mfc *)mi_zalloc(sizeof (struct mfc)); if (mfc_rt == NULL) { - mrtstat.mrts_fwd_drop++; + ipst->ips_mrtstat->mrts_fwd_drop++; ip1dbg(("ip_mforward: out of memory " "for mfc, mfc_rt\n")); goto error_return; @@ -1774,7 +1785,7 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) /* TODO could copy header and dup rest */ mp_copy = copymsg(mp); if (mp_copy == NULL) { - mrtstat.mrts_fwd_drop++; + ipst->ips_mrtstat->mrts_fwd_drop++; ip1dbg(("ip_mforward: out of memory for " "mblk, mp_copy\n")); goto error_return; @@ -1785,7 +1796,7 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) /* Add this packet into rtdetq */ rte = (struct rtdetq *)mi_zalloc(sizeof (struct rtdetq)); if (rte == NULL) { - mrtstat.mrts_fwd_drop++; + ipst->ips_mrtstat->mrts_fwd_drop++; mutex_exit(&mfc_rt->mfc_mutex); ip1dbg(("ip_mforward: out of memory for" " rtdetq, rte\n")); @@ -1794,15 +1805,17 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) mp0 = copymsg(mp); if (mp0 == NULL) { - mrtstat.mrts_fwd_drop++; + ipst->ips_mrtstat->mrts_fwd_drop++; ip1dbg(("ip_mforward: out of memory for mblk, mp0\n")); mutex_exit(&mfc_rt->mfc_mutex); goto error_return; } rte->mp = mp0; if (pim_reg_packet) { - ASSERT(reg_vif_num != ALL_VIFS); - rte->ill = viftable[reg_vif_num].v_ipif->ipif_ill; + ASSERT(ipst->ips_reg_vif_num != ALL_VIFS); + rte->ill = + ipst->ips_vifs[ipst->ips_reg_vif_num]. + v_ipif->ipif_ill; } else { rte->ill = ill; } @@ -1816,12 +1829,12 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) for (rte_m = mfc_rt->mfc_rte, npkts = 0; rte_m; rte_m = rte_m->rte_next) npkts++; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "ip_mforward: upcalls %d\n", npkts); } if (npkts > MAX_UPQ) { - mrtstat.mrts_upq_ovflw++; + ipst->ips_mrtstat->mrts_upq_ovflw++; mutex_exit(&mfc_rt->mfc_mutex); goto error_return; } @@ -1836,22 +1849,23 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) mfc_rt->mfc_origin.s_addr = src; mfc_rt->mfc_mcastgrp.s_addr = dst; - mutex_enter(&numvifs_mutex); - for (i = 0; i < (int)numvifs; i++) + mutex_enter(&ipst->ips_numvifs_mutex); + for (i = 0; i < (int)ipst->ips_numvifs; i++) mfc_rt->mfc_ttls[i] = 0; - mutex_exit(&numvifs_mutex); + mutex_exit(&ipst->ips_numvifs_mutex); mfc_rt->mfc_parent = ALL_VIFS; /* Link into table */ - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + SL_TRACE, "ip_mforward: NEW MFCTAB hash %d o 0x%x " "g 0x%x\n", hash, ntohl(mfc_rt->mfc_origin.s_addr), ntohl(mfc_rt->mfc_mcastgrp.s_addr)); } - mfc_rt->mfc_next = mfctable[hash].mfcb_mfc; - mfctable[hash].mfcb_mfc = mfc_rt; + mfc_rt->mfc_next = ipst->ips_mfcs[hash].mfcb_mfc; + ipst->ips_mfcs[hash].mfcb_mfc = mfc_rt; mfc_rt->mfc_rte = NULL; } @@ -1878,38 +1892,40 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) im = (struct igmpmsg *)mp_copy->b_rptr; im->im_msgtype = IGMPMSG_NOCACHE; im->im_mbz = 0; - mutex_enter(&numvifs_mutex); + mutex_enter(&ipst->ips_numvifs_mutex); if (pim_reg_packet) { - im->im_vif = (uchar_t)reg_vif_num; - mutex_exit(&numvifs_mutex); + im->im_vif = (uchar_t)ipst->ips_reg_vif_num; + mutex_exit(&ipst->ips_numvifs_mutex); } else { /* * XXX do we need to hold locks here ? */ - for (vifi = 0; vifi < numvifs; vifi++) { - if (viftable[vifi].v_ipif == NULL) + for (vifi = 0; + vifi < ipst->ips_numvifs; + vifi++) { + if (ipst->ips_vifs[vifi].v_ipif == NULL) continue; - if (viftable[vifi].v_ipif->ipif_ill == - ill) { + if (ipst->ips_vifs[vifi]. + v_ipif->ipif_ill == ill) { im->im_vif = (uchar_t)vifi; break; } } - mutex_exit(&numvifs_mutex); - ASSERT(vifi < numvifs); + mutex_exit(&ipst->ips_numvifs_mutex); + ASSERT(vifi < ipst->ips_numvifs); } - mrtstat.mrts_upcalls++; + ipst->ips_mrtstat->mrts_upcalls++; /* Timer to discard upcalls if mrouted is too slow */ mfc_rt->mfc_timeout_id = timeout(expire_upcalls, mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE); mutex_exit(&mfc_rt->mfc_mutex); - mutex_exit(&(mfctable[hash].mfcb_lock)); - putnext(RD(ip_g_mrouter), mp_copy); + mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); + putnext(RD(ipst->ips_ip_g_mrouter), mp_copy); } else { mutex_exit(&mfc_rt->mfc_mutex); - mutex_exit(&(mfctable[hash].mfcb_lock)); + mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); freemsg(mp_copy); } @@ -1919,7 +1935,7 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) else return (0); error_return: - mutex_exit(&(mfctable[hash].mfcb_lock)); + mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); MFCB_REFRELE(mfcbp); if (mfc_rt != NULL && (new_mfc == B_TRUE)) mi_free((char *)mfc_rt); @@ -1943,15 +1959,22 @@ expire_upcalls(void *arg) struct mfc *mfc_rt = arg; uint_t hash; struct mfc *prev_mfc, *mfc0; + ip_stack_t *ipst; + + if (mfc_rt->mfc_rte == NULL || mfc_rt->mfc_rte->ill != NULL) { + cmn_err(CE_WARN, "expire_upcalls: no ILL\n"); + return; + } + ipst = mfc_rt->mfc_rte->ill->ill_ipst; hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr); - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "expire_upcalls: hash %d s %x g %x", hash, ntohl(mfc_rt->mfc_origin.s_addr), ntohl(mfc_rt->mfc_mcastgrp.s_addr)); } - MFCB_REFHOLD(&mfctable[hash]); + MFCB_REFHOLD(&ipst->ips_mfcs[hash]); mutex_enter(&mfc_rt->mfc_mutex); /* * if timeout has been set to zero, than the @@ -1959,11 +1982,11 @@ expire_upcalls(void *arg) */ if (mfc_rt->mfc_timeout_id == 0) goto done; - mrtstat.mrts_cache_cleanups++; + ipst->ips_mrtstat->mrts_cache_cleanups++; mfc_rt->mfc_timeout_id = 0; /* Determine entry to be cleaned up in cache table. */ - for (prev_mfc = mfc0 = mfctable[hash].mfcb_mfc; mfc0; + for (prev_mfc = mfc0 = ipst->ips_mfcs[hash].mfcb_mfc; mfc0; prev_mfc = mfc0, mfc0 = mfc0->mfc_next) if (mfc0 == mfc_rt) break; @@ -1975,7 +1998,7 @@ expire_upcalls(void *arg) /* * Delete the entry from the cache */ - mfctable[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; + ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED; /* @@ -1984,7 +2007,7 @@ expire_upcalls(void *arg) */ done: mutex_exit(&mfc_rt->mfc_mutex); - MFCB_REFRELE(&mfctable[hash]); + MFCB_REFRELE(&ipst->ips_mfcs[hash]); } /* @@ -1999,9 +2022,10 @@ ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, ipaddr_t dst = ipha->ipha_dst; size_t plen = msgdsize(mp); vifi_t num_of_vifs; + ip_stack_t *ipst = ill->ill_ipst; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "ip_mdq: SEND src %x, ipha_dst %x, ill %s", ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), ill->ill_name); @@ -2030,14 +2054,14 @@ ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, if (vifi == NO_VIF) { ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n", ill->ill_name)); - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name); } return (-1); /* drop pkt */ } - if (!lock_good_vif(&viftable[vifi])) + if (!lock_good_vif(&ipst->ips_vifs[vifi])) return (-1); /* * The MFC entries are not cleaned up when an ipif goes @@ -2045,14 +2069,14 @@ ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, * an ipif that has been closed. Note: reset_mrt_vif_ipif * sets the v_ipif to NULL when the ipif disappears. */ - ASSERT(viftable[vifi].v_ipif != NULL); + ASSERT(ipst->ips_vifs[vifi].v_ipif != NULL); - if (vifi >= numvifs) { + if (vifi >= ipst->ips_numvifs) { cmn_err(CE_WARN, "ip_mdq: illegal vifi %d numvifs " "%d ill %s viftable ill %s\n", - (int)vifi, (int)numvifs, ill->ill_name, - viftable[vifi].v_ipif->ipif_ill->ill_name); - unlock_good_vif(&viftable[vifi]); + (int)vifi, (int)ipst->ips_numvifs, ill->ill_name, + ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name); + unlock_good_vif(&ipst->ips_vifs[vifi]); return (-1); } /* @@ -2060,23 +2084,24 @@ ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, * origin. But do match on the groups as we nominate only one * ill in the group for receiving allmulti packets. */ - if ((viftable[vifi].v_ipif->ipif_ill != ill && + if ((ipst->ips_vifs[vifi].v_ipif->ipif_ill != ill && (ill->ill_group == NULL || - viftable[vifi].v_ipif->ipif_ill->ill_group != ill->ill_group)) || - (viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { + ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_group != + ill->ill_group)) || + (ipst->ips_vifs[vifi].v_rmt_addr.s_addr != tunnel_src)) { /* Came in the wrong interface */ ip1dbg(("ip_mdq: arrived wrong if, vifi %d " "numvifs %d ill %s viftable ill %s\n", - (int)vifi, (int)numvifs, ill->ill_name, - viftable[vifi].v_ipif->ipif_ill->ill_name)); - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + (int)vifi, (int)ipst->ips_numvifs, ill->ill_name, + ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name)); + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "ip_mdq: arrived wrong if, vifi %d ill " "%s viftable ill %s\n", (int)vifi, ill->ill_name, - viftable[vifi].v_ipif->ipif_ill->ill_name); + ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name); } - mrtstat.mrts_wrong_if++; + ipst->ips_mrtstat->mrts_wrong_if++; rt->mfc_wrong_if++; /* @@ -2087,19 +2112,19 @@ ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, * We use the first ipif on the list, since it's all we have. * Chances are the ipif_flags are the same for ipifs on the ill. */ - if (pim_assert && rt->mfc_ttls[vifi] > 0 && + if (ipst->ips_pim_assert && rt->mfc_ttls[vifi] > 0 && (ill->ill_ipif->ipif_flags & IPIF_BROADCAST) && - !(viftable[vifi].v_flags & VIFF_TUNNEL)) { + !(ipst->ips_vifs[vifi].v_flags & VIFF_TUNNEL)) { mblk_t *mp_copy; struct igmpmsg *im; /* TODO could copy header and dup rest */ mp_copy = copymsg(mp); if (mp_copy == NULL) { - mrtstat.mrts_fwd_drop++; + ipst->ips_mrtstat->mrts_fwd_drop++; ip1dbg(("ip_mdq: out of memory " "for mblk, mp_copy\n")); - unlock_good_vif(&viftable[vifi]); + unlock_good_vif(&ipst->ips_vifs[vifi]); return (-1); } @@ -2107,9 +2132,9 @@ ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, im->im_msgtype = IGMPMSG_WRONGVIF; im->im_mbz = 0; im->im_vif = (ushort_t)vifi; - putnext(RD(ip_g_mrouter), mp_copy); + putnext(RD(ipst->ips_ip_g_mrouter), mp_copy); } - unlock_good_vif(&viftable[vifi]); + unlock_good_vif(&ipst->ips_vifs[vifi]); if (tunnel_src != 0) return (1); else @@ -2118,18 +2143,18 @@ ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, /* * If I sourced this packet, it counts as output, else it was input. */ - if (ipha->ipha_src == viftable[vifi].v_lcl_addr.s_addr) { - viftable[vifi].v_pkt_out++; - viftable[vifi].v_bytes_out += plen; + if (ipha->ipha_src == ipst->ips_vifs[vifi].v_lcl_addr.s_addr) { + ipst->ips_vifs[vifi].v_pkt_out++; + ipst->ips_vifs[vifi].v_bytes_out += plen; } else { - viftable[vifi].v_pkt_in++; - viftable[vifi].v_bytes_in += plen; + ipst->ips_vifs[vifi].v_pkt_in++; + ipst->ips_vifs[vifi].v_bytes_in += plen; } mutex_enter(&rt->mfc_mutex); rt->mfc_pkt_cnt++; rt->mfc_byte_cnt += plen; mutex_exit(&rt->mfc_mutex); - unlock_good_vif(&viftable[vifi]); + unlock_good_vif(&ipst->ips_vifs[vifi]); /* * For each vif, decide if a copy of the packet should be forwarded. * Forward if: @@ -2138,10 +2163,12 @@ ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, * A non-zero mfc_ttl indicates that the vif is part of * the output set for the mfc entry. */ - mutex_enter(&numvifs_mutex); - num_of_vifs = numvifs; - mutex_exit(&numvifs_mutex); - for (vifp = viftable, vifi = 0; vifi < num_of_vifs; vifp++, vifi++) { + mutex_enter(&ipst->ips_numvifs_mutex); + num_of_vifs = ipst->ips_numvifs; + mutex_exit(&ipst->ips_numvifs_mutex); + for (vifp = ipst->ips_vifs, vifi = 0; + vifi < num_of_vifs; + vifp++, vifi++) { if (!lock_good_vif(vifp)) continue; if ((rt->mfc_ttls[vifi] > 0) && @@ -2154,7 +2181,7 @@ ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, vifp->v_pkt_out++; vifp->v_bytes_out += plen; MC_SEND(ipha, mp, vifp, dst); - mrtstat.mrts_fwd_out++; + ipst->ips_mrtstat->mrts_fwd_out++; } unlock_good_vif(vifp); } @@ -2173,19 +2200,20 @@ static void phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) { mblk_t *mp_copy; + ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; /* Make a new reference to the packet */ mp_copy = copymsg(mp); /* TODO could copy header and dup rest */ if (mp_copy == NULL) { - mrtstat.mrts_fwd_drop++; + ipst->ips_mrtstat->mrts_fwd_drop++; ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n")); return; } if (vifp->v_rate_limit <= 0) tbf_send_packet(vifp, mp_copy); else { - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "phyint_send: tbf_contr rate %d " "vifp 0x%p mp 0x%p dst 0x%x", vifp->v_rate_limit, (void *)vifp, (void *)mp, dst); @@ -2205,9 +2233,10 @@ register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) struct igmpmsg *im; mblk_t *mp_copy; ipha_t *ipha_copy; + ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "register_send: src %x, dst %x\n", ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); } @@ -2219,9 +2248,9 @@ register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) */ mp_copy = allocb(sizeof (struct igmpmsg) + sizeof (ipha_t), BPRI_MED); if (mp_copy == NULL) { - ++mrtstat.mrts_pim_nomemory; - if (ip_mrtdebug > 3) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + ++ipst->ips_mrtstat->mrts_pim_nomemory; + if (ipst->ips_ip_mrtdebug > 3) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "register_send: allocb failure."); } return; @@ -2236,9 +2265,9 @@ register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) * Chain packet to new mblk_t. */ if ((mp_copy->b_cont = copymsg(mp)) == NULL) { - ++mrtstat.mrts_pim_nomemory; - if (ip_mrtdebug > 3) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + ++ipst->ips_mrtstat->mrts_pim_nomemory; + if (ipst->ips_ip_mrtdebug > 3) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "register_send: copymsg failure."); } freeb(mp_copy); @@ -2270,16 +2299,16 @@ register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) */ im->im_mbz = 0; - ++mrtstat.mrts_upcalls; - if (!canputnext(RD(ip_g_mrouter))) { - ++mrtstat.mrts_pim_regsend_drops; - if (ip_mrtdebug > 3) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + ++ipst->ips_mrtstat->mrts_upcalls; + if (!canputnext(RD(ipst->ips_ip_g_mrouter))) { + ++ipst->ips_mrtstat->mrts_pim_regsend_drops; + if (ipst->ips_ip_mrtdebug > 3) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "register_send: register upcall failure."); } freemsg(mp_copy); } else { - putnext(RD(ip_g_mrouter), mp_copy); + putnext(RD(ipst->ips_ip_g_mrouter), mp_copy); } } @@ -2313,23 +2342,24 @@ pim_validate_cksum(mblk_t *mp, ipha_t *ip, struct pim *pimp) /* * int - * pim_input(queue_t *, mblk_t *) - Process PIM protocol packets. + * pim_input(queue_t *, mblk_t *, ill_t *ill) - Process PIM protocol packets. * IP Protocol 103. Register messages are decapsulated and sent * onto multicast forwarding. */ int -pim_input(queue_t *q, mblk_t *mp) +pim_input(queue_t *q, mblk_t *mp, ill_t *ill) { ipha_t *eip, *ip; int iplen, pimlen, iphlen; struct pim *pimp; /* pointer to a pim struct */ uint32_t *reghdr; + ip_stack_t *ipst = ill->ill_ipst; /* * Pullup the msg for PIM protocol processing. */ if (pullupmsg(mp, -1) == 0) { - ++mrtstat.mrts_pim_nomemory; + ++ipst->ips_mrtstat->mrts_pim_nomemory; freemsg(mp); return (-1); } @@ -2343,9 +2373,9 @@ pim_input(queue_t *q, mblk_t *mp) * Validate lengths */ if (pimlen < PIM_MINLEN) { - ++mrtstat.mrts_pim_malformed; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + ++ipst->ips_mrtstat->mrts_pim_malformed; + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "pim_input: length not at least minlen"); } freemsg(mp); @@ -2361,9 +2391,9 @@ pim_input(queue_t *q, mblk_t *mp) * Check the version number. */ if (pimp->pim_vers != PIM_VERSION) { - ++mrtstat.mrts_pim_badversion; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + ++ipst->ips_mrtstat->mrts_pim_badversion; + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "pim_input: unknown version of PIM"); } freemsg(mp); @@ -2374,9 +2404,9 @@ pim_input(queue_t *q, mblk_t *mp) * Validate the checksum */ if (!pim_validate_cksum(mp, ip, pimp)) { - ++mrtstat.mrts_pim_rcv_badcsum; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + ++ipst->ips_mrtstat->mrts_pim_rcv_badcsum; + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "pim_input: invalid checksum"); } freemsg(mp); @@ -2393,16 +2423,16 @@ pim_input(queue_t *q, mblk_t *mp) * check if the inner packet is destined to mcast group */ if (!CLASSD(eip->ipha_dst)) { - ++mrtstat.mrts_pim_badregisters; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + ++ipst->ips_mrtstat->mrts_pim_badregisters; + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "pim_input: Inner pkt not mcast .. !"); } freemsg(mp); return (-1); } - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "register from %x, to %x, len %d", ntohl(eip->ipha_src), ntohl(eip->ipha_dst), @@ -2417,7 +2447,7 @@ pim_input(queue_t *q, mblk_t *mp) /* Copy the message */ if ((mp_copy = copymsg(mp)) == NULL) { - ++mrtstat.mrts_pim_nomemory; + ++ipst->ips_mrtstat->mrts_pim_nomemory; freemsg(mp); return (-1); } @@ -2428,7 +2458,7 @@ pim_input(queue_t *q, mblk_t *mp) */ mp_copy->b_rptr += iphlen + sizeof (pim_t) + sizeof (*reghdr); - if (register_mforward(q, mp_copy) != 0) { + if (register_mforward(q, mp_copy, ill) != 0) { freemsg(mp); return (-1); } @@ -2449,15 +2479,17 @@ pim_input(queue_t *q, mblk_t *mp) */ /* ARGSUSED */ static int -register_mforward(queue_t *q, mblk_t *mp) +register_mforward(queue_t *q, mblk_t *mp, ill_t *ill) { - ASSERT(reg_vif_num <= numvifs); + ip_stack_t *ipst = ill->ill_ipst; + + ASSERT(ipst->ips_reg_vif_num <= ipst->ips_numvifs); - if (ip_mrtdebug > 3) { + if (ipst->ips_ip_mrtdebug > 3) { ipha_t *ipha; ipha = (ipha_t *)mp->b_rptr; - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "register_mforward: src %x, dst %x\n", ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); } @@ -2474,7 +2506,7 @@ register_mforward(queue_t *q, mblk_t *mp) * then this will need re-examination. */ mp->b_prev = (mblk_t *)PIM_REGISTER_MARKER; - ++mrtstat.mrts_pim_regforwards; + ++ipst->ips_mrtstat->mrts_pim_regforwards; ip_rput(q, mp); return (0); } @@ -2490,10 +2522,12 @@ encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) mblk_t *mp_copy; ipha_t *ipha_copy; size_t len; + ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, - "encap_send: vif %ld enter", (ptrdiff_t)(vifp - viftable)); + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + "encap_send: vif %ld enter", + (ptrdiff_t)(vifp - ipst->ips_vifs)); } len = ntohs(ipha->ipha_length); @@ -2530,8 +2564,8 @@ encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) ipha->ipha_hdr_checksum = 0; ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "encap_send: group 0x%x", ntohl(ipha->ipha_dst)); } if (vifp->v_rate_limit <= 0) @@ -2547,13 +2581,14 @@ encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) * IPPROTO_ENCAP and a local destination address. */ void -ip_mroute_decap(queue_t *q, mblk_t *mp) +ip_mroute_decap(queue_t *q, mblk_t *mp, ill_t *ill) { ipha_t *ipha = (ipha_t *)mp->b_rptr; ipha_t *ipha_encap; int hlen = IPH_HDR_LENGTH(ipha); ipaddr_t src; struct vif *vifp; + ip_stack_t *ipst = ill->ill_ipst; /* * Dump the packet if it's not to a multicast destination or if @@ -2564,32 +2599,32 @@ ip_mroute_decap(queue_t *q, mblk_t *mp) */ ipha_encap = (ipha_t *)((char *)ipha + hlen); if (!CLASSD(ipha_encap->ipha_dst)) { - mrtstat.mrts_bad_tunnel++; + ipst->ips_mrtstat->mrts_bad_tunnel++; ip1dbg(("ip_mroute_decap: bad tunnel\n")); freemsg(mp); return; } src = (ipaddr_t)ipha->ipha_src; - mutex_enter(&last_encap_lock); - if (src != last_encap_src) { + mutex_enter(&ipst->ips_last_encap_lock); + if (src != ipst->ips_last_encap_src) { struct vif *vife; - vifp = viftable; - vife = vifp + numvifs; - last_encap_src = src; - last_encap_vif = 0; + vifp = ipst->ips_vifs; + vife = vifp + ipst->ips_numvifs; + ipst->ips_last_encap_src = src; + ipst->ips_last_encap_vif = 0; for (; vifp < vife; ++vifp) { if (!lock_good_vif(vifp)) continue; if (vifp->v_rmt_addr.s_addr == src) { if (vifp->v_flags & VIFF_TUNNEL) - last_encap_vif = vifp; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, + ipst->ips_last_encap_vif = vifp; + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "ip_mroute_decap: good tun " "vif %ld with %x", - (ptrdiff_t)(vifp - viftable), + (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(src)); } unlock_good_vif(vifp); @@ -2598,15 +2633,15 @@ ip_mroute_decap(queue_t *q, mblk_t *mp) unlock_good_vif(vifp); } } - if ((vifp = last_encap_vif) == 0) { - mutex_exit(&last_encap_lock); - mrtstat.mrts_bad_tunnel++; + if ((vifp = ipst->ips_last_encap_vif) == 0) { + mutex_exit(&ipst->ips_last_encap_lock); + ipst->ips_mrtstat->mrts_bad_tunnel++; freemsg(mp); ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n", - (ptrdiff_t)(vifp - viftable), ntohl(src))); + (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(src))); return; } - mutex_exit(&last_encap_lock); + mutex_exit(&ipst->ips_last_encap_lock); /* * Need to pass in the tunnel source to ip_mforward (so that it can @@ -2629,17 +2664,18 @@ reset_mrt_vif_ipif(ipif_t *ipif) { vifi_t vifi, tmp_vifi; vifi_t num_of_vifs; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; /* Can't check vifi >= 0 since vifi_t is unsigned! */ - mutex_enter(&numvifs_mutex); - num_of_vifs = numvifs; - mutex_exit(&numvifs_mutex); + mutex_enter(&ipst->ips_numvifs_mutex); + num_of_vifs = ipst->ips_numvifs; + mutex_exit(&ipst->ips_numvifs_mutex); for (vifi = num_of_vifs; vifi != 0; vifi--) { tmp_vifi = vifi - 1; - if (viftable[tmp_vifi].v_ipif == ipif) { - (void) del_vif(&tmp_vifi, NULL, NULL); + if (ipst->ips_vifs[tmp_vifi].v_ipif == ipif) { + (void) del_vif(&tmp_vifi, NULL, NULL, ipst); } } } @@ -2651,24 +2687,26 @@ reset_mrt_ill(ill_t *ill) struct mfc *rt; struct rtdetq *rte; int i; + ip_stack_t *ipst = ill->ill_ipst; for (i = 0; i < MFCTBLSIZ; i++) { - MFCB_REFHOLD(&mfctable[i]); - if ((rt = mfctable[i].mfcb_mfc) != NULL) { - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + MFCB_REFHOLD(&ipst->ips_mfcs[i]); + if ((rt = ipst->ips_mfcs[i].mfcb_mfc) != NULL) { + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + SL_TRACE, "reset_mrt_ill: mfctable [%d]", i); } while (rt != NULL) { mutex_enter(&rt->mfc_mutex); while ((rte = rt->mfc_rte) != NULL) { if (rte->ill == ill) { - if (ip_mrtdebug > 1) { - (void) mi_strlog( - ip_g_mrouter, - 1, SL_TRACE, - "reset_mrt_ill: " - "ill 0x%p", ill); + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog( + ipst->ips_ip_g_mrouter, + 1, SL_TRACE, + "reset_mrt_ill: " + "ill 0x%p", ill); } rt->mfc_rte = rte->rte_next; freemsg(rte->mp); @@ -2679,7 +2717,7 @@ reset_mrt_ill(ill_t *ill) rt = rt->mfc_next; } } - MFCB_REFRELE(&mfctable[i]); + MFCB_REFRELE(&ipst->ips_mfcs[i]); } } @@ -2693,17 +2731,18 @@ tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) size_t p_len = msgdsize(mp); struct tbf *t = vifp->v_tbf; timeout_id_t id = 0; + ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; /* Drop if packet is too large */ if (p_len > MAX_BKT_SIZE) { - mrtstat.mrts_pkt2large++; + ipst->ips_mrtstat->mrts_pkt2large++; freemsg(mp); return; } - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x", - (ptrdiff_t)(vifp - viftable), t->tbf_q_len, + (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len, ntohl(ipha->ipha_dst)); } @@ -2715,10 +2754,10 @@ tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) * If there are enough tokens, * and the queue is empty, send this packet out. */ - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "tbf_control: vif %ld, TOKENS %d, pkt len %lu, qlen %d", - (ptrdiff_t)(vifp - viftable), t->tbf_n_tok, p_len, + (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_n_tok, p_len, t->tbf_q_len); } /* No packets are queued */ @@ -2750,7 +2789,7 @@ tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) freemsg(mp); ip1dbg(("tbf_ctl: couldn't pullup udp hdr, " "vif %ld src 0x%x dst 0x%x\n", - (ptrdiff_t)(vifp - viftable), + (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst))); mutex_exit(&vifp->v_tbf->tbf_lock); @@ -2764,7 +2803,7 @@ tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) * try to selectively dq, or queue and process */ if (!tbf_dq_sel(vifp, ipha)) { - mrtstat.mrts_q_overflow++; + ipst->ips_mrtstat->mrts_q_overflow++; freemsg(mp); } else { tbf_queue(vifp, mp); @@ -2788,10 +2827,11 @@ static void tbf_queue(struct vif *vifp, mblk_t *mp) { struct tbf *t = vifp->v_tbf; + ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, - "tbf_queue: vif %ld", (ptrdiff_t)(vifp - viftable)); + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + "tbf_queue: vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs)); } ASSERT(MUTEX_HELD(&t->tbf_lock)); @@ -2822,11 +2862,12 @@ tbf_process_q(struct vif *vifp) mblk_t *mp; struct tbf *t = vifp->v_tbf; size_t len; + ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "tbf_process_q 1: vif %ld qlen = %d", - (ptrdiff_t)(vifp - viftable), t->tbf_q_len); + (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len); } /* @@ -2866,6 +2907,7 @@ static void tbf_reprocess_q(void *arg) { struct vif *vifp = arg; + ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; mutex_enter(&vifp->v_tbf->tbf_lock); vifp->v_timeout_id = 0; @@ -2879,10 +2921,10 @@ tbf_reprocess_q(void *arg) } mutex_exit(&vifp->v_tbf->tbf_lock); - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "tbf_reprcess_q: vif %ld timeout id = %p", - (ptrdiff_t)(vifp - viftable), vifp->v_timeout_id); + (ptrdiff_t)(vifp - ipst->ips_vifs), vifp->v_timeout_id); } } @@ -2899,11 +2941,12 @@ tbf_dq_sel(struct vif *vifp, ipha_t *ipha) struct tbf *t = vifp->v_tbf; mblk_t **np; mblk_t *last, *mp; + ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "dq_sel: vif %ld dst 0x%x", - (ptrdiff_t)(vifp - viftable), ntohl(ipha->ipha_dst)); + (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(ipha->ipha_dst)); } ASSERT(MUTEX_HELD(&t->tbf_lock)); @@ -2926,7 +2969,7 @@ tbf_dq_sel(struct vif *vifp, ipha_t *ipha) if (--t->tbf_q_len == 0) { t->tbf_t = NULL; } - mrtstat.mrts_drop_sel++; + ipst->ips_mrtstat->mrts_drop_sel++; return (1); } np = &mp->b_next; @@ -2940,13 +2983,14 @@ static void tbf_send_packet(struct vif *vifp, mblk_t *mp) { ipif_t *ipif; + ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; /* If encap tunnel options */ if (vifp->v_flags & VIFF_TUNNEL) { - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "tbf_send_pkt: ENCAP tunnel vif %ld", - (ptrdiff_t)(vifp - viftable)); + (ptrdiff_t)(vifp - ipst->ips_vifs)); } /* @@ -2978,14 +3022,15 @@ tbf_send_packet(struct vif *vifp, mblk_t *mp) ire_t *ire; mutex_exit(&ipif->ipif_ill->ill_lock); - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + SL_TRACE, "tbf_send_pkt: loopback vif %ld", - (ptrdiff_t)(vifp - viftable)); + (ptrdiff_t)(vifp - ipst->ips_vifs)); } mp_loop = copymsg(mp); ire = ire_ctable_lookup(~0, 0, IRE_BROADCAST, NULL, - ALL_ZONES, NULL, MATCH_IRE_TYPE); + ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); if (mp_loop != NULL && ire != NULL) { IP_RPUT_LOCAL(ipif->ipif_rq, mp_loop, @@ -2993,20 +3038,21 @@ tbf_send_packet(struct vif *vifp, mblk_t *mp) ire, (ill_t *)ipif->ipif_rq->q_ptr); } else { /* Either copymsg failed or no ire */ - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + SL_TRACE, "tbf_send_pkt: mp_loop 0x%p, ire 0x%p " "vif %ld\n", mp_loop, ire, - (ptrdiff_t)(vifp - viftable)); + (ptrdiff_t)(vifp - ipst->ips_vifs)); } if (ire != NULL) ire_refrele(ire); } else { mutex_exit(&ipif->ipif_ill->ill_lock); } - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "tbf_send_pkt: phyint forward vif %ld dst = 0x%x", - (ptrdiff_t)(vifp - viftable), ntohl(dst)); + (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(dst)); } ip_rput_forward_multicast(dst, mp, ipif); } @@ -3022,6 +3068,7 @@ tbf_update_tokens(struct vif *vifp) timespec_t tp; hrtime_t tm; struct tbf *t = vifp->v_tbf; + ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; ASSERT(MUTEX_HELD(&t->tbf_lock)); @@ -3044,10 +3091,10 @@ tbf_update_tokens(struct vif *vifp) if (t->tbf_n_tok > MAX_BKT_SIZE) t->tbf_n_tok = MAX_BKT_SIZE; - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "tbf_update_tok: tm %lld tok %d vif %ld", - tm, t->tbf_n_tok, (ptrdiff_t)(vifp - viftable)); + tm, t->tbf_n_tok, (ptrdiff_t)(vifp - ipst->ips_vifs)); } } @@ -3062,6 +3109,7 @@ static int priority(struct vif *vifp, ipha_t *ipha) { int prio; + ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; /* Temporary hack; may add general packet classifier some day */ @@ -3092,8 +3140,8 @@ priority(struct vif *vifp, ipha_t *ipha) prio = 50; break; } - if (ip_mrtdebug > 1) { - (void) mi_strlog(ip_g_mrouter, 1, SL_TRACE, + if (ipst->ips_ip_mrtdebug > 1) { + (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, "priority: port %x prio %d\n", ntohs(udp->uh_dport), prio); } @@ -3112,13 +3160,14 @@ priority(struct vif *vifp, ipha_t *ipha) * Produces data for netstat -M. */ int -ip_mroute_stats(mblk_t *mp) +ip_mroute_stats(mblk_t *mp, ip_stack_t *ipst) { - mrtstat.mrts_vifctlSize = sizeof (struct vifctl); - mrtstat.mrts_mfcctlSize = sizeof (struct mfcctl); - if (!snmp_append_data(mp, (char *)&mrtstat, sizeof (mrtstat))) { + ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl); + ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl); + if (!snmp_append_data(mp, (char *)ipst->ips_mrtstat, + sizeof (struct mrtstat))) { ip0dbg(("ip_mroute_stats: failed %ld bytes\n", - (size_t)sizeof (mrtstat))); + (size_t)sizeof (struct mrtstat))); return (0); } return (1); @@ -3128,26 +3177,26 @@ ip_mroute_stats(mblk_t *mp) * Sends info for SNMP's MIB. */ int -ip_mroute_vif(mblk_t *mp) +ip_mroute_vif(mblk_t *mp, ip_stack_t *ipst) { struct vifctl vi; vifi_t vifi; - mutex_enter(&numvifs_mutex); - for (vifi = 0; vifi < numvifs; vifi++) { - if (viftable[vifi].v_lcl_addr.s_addr == 0) + mutex_enter(&ipst->ips_numvifs_mutex); + for (vifi = 0; vifi < ipst->ips_numvifs; vifi++) { + if (ipst->ips_vifs[vifi].v_lcl_addr.s_addr == 0) continue; /* * No locks here, an approximation is fine. */ vi.vifc_vifi = vifi; - vi.vifc_flags = viftable[vifi].v_flags; - vi.vifc_threshold = viftable[vifi].v_threshold; - vi.vifc_rate_limit = viftable[vifi].v_rate_limit; - vi.vifc_lcl_addr = viftable[vifi].v_lcl_addr; - vi.vifc_rmt_addr = viftable[vifi].v_rmt_addr; - vi.vifc_pkt_in = viftable[vifi].v_pkt_in; - vi.vifc_pkt_out = viftable[vifi].v_pkt_out; + vi.vifc_flags = ipst->ips_vifs[vifi].v_flags; + vi.vifc_threshold = ipst->ips_vifs[vifi].v_threshold; + vi.vifc_rate_limit = ipst->ips_vifs[vifi].v_rate_limit; + vi.vifc_lcl_addr = ipst->ips_vifs[vifi].v_lcl_addr; + vi.vifc_rmt_addr = ipst->ips_vifs[vifi].v_rmt_addr; + vi.vifc_pkt_in = ipst->ips_vifs[vifi].v_pkt_in; + vi.vifc_pkt_out = ipst->ips_vifs[vifi].v_pkt_out; if (!snmp_append_data(mp, (char *)&vi, sizeof (vi))) { ip0dbg(("ip_mroute_vif: failed %ld bytes\n", @@ -3155,7 +3204,7 @@ ip_mroute_vif(mblk_t *mp) return (0); } } - mutex_exit(&numvifs_mutex); + mutex_exit(&ipst->ips_numvifs_mutex); return (1); } @@ -3163,7 +3212,7 @@ ip_mroute_vif(mblk_t *mp) * Called by ip_snmp_get to send up multicast routing table. */ int -ip_mroute_mrt(mblk_t *mp) +ip_mroute_mrt(mblk_t *mp, ip_stack_t *ipst) { int i, j; struct mfc *rt; @@ -3172,13 +3221,13 @@ ip_mroute_mrt(mblk_t *mp) /* * Make sure multicast has not been turned off. */ - if (is_mrouter_off()) + if (is_mrouter_off(ipst)) return (1); /* Loop over all hash buckets and their chains */ for (i = 0; i < MFCTBLSIZ; i++) { - MFCB_REFHOLD(&mfctable[i]); - for (rt = mfctable[i].mfcb_mfc; rt; rt = rt->mfc_next) { + MFCB_REFHOLD(&ipst->ips_mfcs[i]); + for (rt = ipst->ips_mfcs[i].mfcb_mfc; rt; rt = rt->mfc_next) { mutex_enter(&rt->mfc_mutex); if (rt->mfc_rte != NULL || (rt->mfc_marks & MFCB_MARK_CONDEMNED)) { @@ -3189,23 +3238,23 @@ ip_mroute_mrt(mblk_t *mp) mfcc.mfcc_mcastgrp = rt->mfc_mcastgrp; mfcc.mfcc_parent = rt->mfc_parent; mfcc.mfcc_pkt_cnt = rt->mfc_pkt_cnt; - mutex_enter(&numvifs_mutex); - for (j = 0; j < (int)numvifs; j++) + mutex_enter(&ipst->ips_numvifs_mutex); + for (j = 0; j < (int)ipst->ips_numvifs; j++) mfcc.mfcc_ttls[j] = rt->mfc_ttls[j]; - for (j = (int)numvifs; j < MAXVIFS; j++) + for (j = (int)ipst->ips_numvifs; j < MAXVIFS; j++) mfcc.mfcc_ttls[j] = 0; - mutex_exit(&numvifs_mutex); + mutex_exit(&ipst->ips_numvifs_mutex); mutex_exit(&rt->mfc_mutex); if (!snmp_append_data(mp, (char *)&mfcc, sizeof (mfcc))) { - MFCB_REFRELE(&mfctable[i]); + MFCB_REFRELE(&ipst->ips_mfcs[i]); ip0dbg(("ip_mroute_mrt: failed %ld bytes\n", (size_t)sizeof (mfcc))); return (0); } } - MFCB_REFRELE(&mfctable[i]); + MFCB_REFRELE(&ipst->ips_mfcs[i]); } return (1); } diff --git a/usr/src/uts/common/inet/ip/ip_multi.c b/usr/src/uts/common/inet/ip/ip_multi.c index 598ff0303f..a4e5ee8149 100644 --- a/usr/src/uts/common/inet/ip/ip_multi.c +++ b/usr/src/uts/common/inet/ip/ip_multi.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -321,6 +321,7 @@ static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) { ilm_fbld_t fbld; + ip_stack_t *ipst = ilm->ilm_ipst; fbld.fbld_ilm = ilm; fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; @@ -328,7 +329,7 @@ ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) fbld.fbld_in_overflow = B_FALSE; /* first, construct our master include and exclude lists */ - ipcl_walk(ilm_bld_flists, (caddr_t)&fbld); + ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); /* now use those master lists to generate the interface filter */ @@ -1183,6 +1184,7 @@ ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, mblk_t *mp; mblk_t *ipsec_mp; ipha_t *iph; + ip_stack_t *ipst = ill->ill_ipst; if (DB_TYPE(mp_orig) == M_DATA && ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { @@ -1208,7 +1210,7 @@ ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, mp = NULL; } } else { - mp = ip_copymsg(mp_orig); + mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */ } if (mp == NULL) @@ -1226,8 +1228,9 @@ ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, ill_t *, NULL, ill_t *, ill, ipha_t *, iph, mblk_t *, ipsec_mp); - FW_HOOKS(ip4_loopback_out_event, ipv4firewall_loopback_out, NULL, ill, - iph, ipsec_mp, mp); + FW_HOOKS(ipst->ips_ip4_loopback_out_event, + ipst->ips_ipv4firewall_loopback_out, + NULL, ill, iph, ipsec_mp, mp, ipst); DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp); @@ -1726,6 +1729,9 @@ ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, ilm->ilm_ipif = ipif; ilm->ilm_ill = NULL; } + ASSERT(ill->ill_ipst); + ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */ + /* * After this if ilm moves to a new ill, we don't change * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex, @@ -1793,6 +1799,7 @@ ilm_walker_cleanup(ill_t *ill) FREE_SLIST(ilm->ilm_pendsrcs); FREE_SLIST(ilm->ilm_rtx.rtx_allow); FREE_SLIST(ilm->ilm_rtx.rtx_block); + ilm->ilm_ipst = NULL; mi_free((char *)ilm); } else { ilmp = &(*ilmp)->ilm_next; @@ -1842,6 +1849,7 @@ ilm_delete(ilm_t *ilm) FREE_SLIST(ilm->ilm_pendsrcs); FREE_SLIST(ilm->ilm_rtx.rtx_allow); FREE_SLIST(ilm->ilm_rtx.rtx_block); + ilm->ilm_ipst = NULL; mi_free((char *)ilm); } @@ -1874,6 +1882,7 @@ ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, ipif_t *ipif; int err = 0; zoneid_t zoneid; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; if (!CLASSD(group) || CLASSD(src)) { return (EINVAL); @@ -1885,14 +1894,14 @@ ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); if (ifaddr != INADDR_ANY) { ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, - CONNP_TO_WQ(connp), first_mp, func, &err); + CONNP_TO_WQ(connp), first_mp, func, &err, ipst); if (err != 0 && err != EINPROGRESS) err = EADDRNOTAVAIL; } else if (ifindexp != NULL && *ifindexp != 0) { ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, - CONNP_TO_WQ(connp), first_mp, func, &err); + CONNP_TO_WQ(connp), first_mp, func, &err, ipst); } else { - ipif = ipif_lookup_group(group, zoneid); + ipif = ipif_lookup_group(group, zoneid, ipst); if (ipif == NULL) return (EADDRNOTAVAIL); } @@ -1920,6 +1929,7 @@ ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, int err; zoneid_t zoneid = connp->conn_zoneid; queue_t *wq = CONNP_TO_WQ(connp); + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); @@ -1949,15 +1959,15 @@ ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, if (ifindex == 0) { if (*isv6) - ill = ill_lookup_group_v6(v6group, zoneid); + ill = ill_lookup_group_v6(v6group, zoneid, ipst); else - ipif = ipif_lookup_group(*v4group, zoneid); + ipif = ipif_lookup_group(*v4group, zoneid, ipst); if (ill == NULL && ipif == NULL) return (EADDRNOTAVAIL); } else { if (*isv6) { ill = ill_lookup_on_ifindex(ifindex, B_TRUE, - wq, first_mp, func, &err); + wq, first_mp, func, &err, ipst); if (ill != NULL && !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { ill_refrele(ill); @@ -1966,7 +1976,7 @@ ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, } } else { ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, - zoneid, wq, first_mp, func, &err); + zoneid, wq, first_mp, func, &err, ipst); } if (ill == NULL && ipif == NULL) return (err); @@ -2584,9 +2594,11 @@ ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func) in6_addr_t v6grp; uint32_t index; zoneid_t zoneid; + ip_stack_t *ipst; connp = Q_TO_CONN(q); zoneid = connp->conn_zoneid; + ipst = connp->conn_netstack->netstack_ip; /* don't allow multicast operations on a tcp conn */ if (IPCL_IS_TCP(connp)) @@ -2601,12 +2613,12 @@ ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func) v4addr = imsf->imsf_interface.s_addr; v4grp = imsf->imsf_multiaddr.s_addr; if (v4addr == INADDR_ANY) { - ipif = ipif_lookup_group(v4grp, zoneid); + ipif = ipif_lookup_group(v4grp, zoneid, ipst); if (ipif == NULL) err = EADDRNOTAVAIL; } else { ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, - func, &err); + func, &err, ipst); } } else { boolean_t isv6 = B_FALSE; @@ -2628,15 +2640,17 @@ ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func) return (EAFNOSUPPORT); } if (index == 0) { - if (isv6) - ipif = ipif_lookup_group_v6(&v6grp, zoneid); - else - ipif = ipif_lookup_group(v4grp, zoneid); + if (isv6) { + ipif = ipif_lookup_group_v6(&v6grp, zoneid, + ipst); + } else { + ipif = ipif_lookup_group(v4grp, zoneid, ipst); + } if (ipif == NULL) err = EADDRNOTAVAIL; } else { ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, - q, mp, func, &err); + q, mp, func, &err, ipst); } } @@ -4010,7 +4024,9 @@ void reset_conn_ipif(ipif) ipif_t *ipif; { - ipcl_walk(conn_delete_ipif, (caddr_t)ipif); + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; + + ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst); } /* @@ -4021,7 +4037,9 @@ reset_conn_ipif(ipif) void reset_conn_ill(ill_t *ill) { - ipcl_walk(conn_delete_ill, (caddr_t)ill); + ip_stack_t *ipst = ill->ill_ipst; + + ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst); } #ifdef DEBUG @@ -4037,9 +4055,10 @@ ilm_walk_ill(ill_t *ill) ill_t *till; ilm_t *ilm; ill_walk_context_t ctx; + ip_stack_t *ipst = ill->ill_ipst; - rw_enter(&ill_g_lock, RW_READER); - till = ILL_START_WALK_ALL(&ctx); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + till = ILL_START_WALK_ALL(&ctx, ipst); for (; till != NULL; till = ill_next(&ctx, till)) { for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { if (ilm->ilm_ill == ill) { @@ -4047,7 +4066,7 @@ ilm_walk_ill(ill_t *ill) } } } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); return (cnt); } @@ -4062,8 +4081,9 @@ ilm_walk_ipif(ipif_t *ipif) ill_t *till; ilm_t *ilm; ill_walk_context_t ctx; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; - till = ILL_START_WALK_ALL(&ctx); + till = ILL_START_WALK_ALL(&ctx, ipst); for (; till != NULL; till = ill_next(&ctx, till)) { for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { if (ilm->ilm_ipif == ipif) { diff --git a/usr/src/uts/common/inet/ip/ip_ndp.c b/usr/src/uts/common/inet/ip/ip_ndp.c index e68c4119d5..90c878b16e 100644 --- a/usr/src/uts/common/inet/ip/ip_ndp.c +++ b/usr/src/uts/common/inet/ip/ip_ndp.c @@ -57,6 +57,7 @@ #include <inet/nd.h> #include <inet/ip.h> #include <inet/ip_impl.h> +#include <inet/ipclassifier.h> #include <inet/ip_if.h> #include <inet/ip_ire.h> #include <inet/ip_rts.h> @@ -117,13 +118,12 @@ static int ndp_add_v4(ill_t *, uchar_t *, const in_addr_t *, void nce_trace_inactive(nce_t *); #endif -ndp_g_t ndp4, ndp6; +#define NCE_HASH_PTR_V4(ipst, addr) \ + (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) -#define NCE_HASH_PTR_V4(addr) \ - (&(ndp4.nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) - -#define NCE_HASH_PTR_V6(addr) \ - (&(ndp6.nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) +#define NCE_HASH_PTR_V6(ipst, addr) \ + (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ + NCE_TABLE_SIZE)])) /* * Compute default flags to use for an advertisement of this nce's address. @@ -165,7 +165,7 @@ ndp_add(ill_t *ill, uchar_t *hw_addr, const void *addr, /* * NDP Cache Entry creation routine. * Mapped entries will never do NUD . - * This routine must always be called with ndp6.ndp_g_lock held. + * This routine must always be called with ndp6->ndp_g_lock held. * Prior to return, nce_refcnt is incremented. */ static int @@ -181,8 +181,9 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, nce_t **ncep; int err; boolean_t dropped = B_FALSE; + ip_stack_t *ipst = ill->ill_ipst; - ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); + ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); ASSERT(ill != NULL && ill->ill_isv6); if (IN6_IS_ADDR_UNSPECIFIED(addr)) { ip0dbg(("ndp_add: no addr\n")); @@ -256,9 +257,9 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, ASSERT(IN6_IS_ADDR_MULTICAST(addr)); ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); - ncep = &ndp6.nce_mask_entries; + ncep = &ipst->ips_ndp6->nce_mask_entries; } else { - ncep = ((nce_t **)NCE_HASH_PTR_V6(*addr)); + ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); } #ifdef NCE_DEBUG @@ -290,7 +291,7 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, err = 0; if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { mutex_enter(&nce->nce_lock); - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; mutex_exit(&nce->nce_lock); dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, @@ -301,7 +302,7 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, mutex_exit(&nce->nce_lock); } NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); - mutex_enter(&ndp6.ndp_g_lock); + mutex_enter(&ipst->ips_ndp6->ndp_g_lock); err = EINPROGRESS; } else if (flags & NCE_F_UNSOL_ADV) { /* @@ -310,8 +311,8 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, * are done in ndp_timer. */ mutex_enter(&nce->nce_lock); - mutex_exit(&ndp6.ndp_g_lock); - nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); + nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; mutex_exit(&nce->nce_lock); dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, @@ -325,10 +326,10 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, nce->nce_unsolicit_count++; if (nce->nce_unsolicit_count != 0) { nce->nce_timeout_id = timeout(ndp_timer, nce, - MSEC_TO_TICK(ip_ndp_unsolicit_interval)); + MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); } mutex_exit(&nce->nce_lock); - mutex_enter(&ndp6.ndp_g_lock); + mutex_enter(&ipst->ips_ndp6->ndp_g_lock); } /* * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then @@ -372,10 +373,13 @@ ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, { int err = 0; nce_t *nce; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill != NULL && ill->ill_isv6); - mutex_enter(&ndp6.ndp_g_lock); - nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ + mutex_enter(&ipst->ips_ndp6->ndp_g_lock); + + /* Get head of v6 hash table */ + nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); nce = nce_lookup_addr(ill, addr, nce); if (nce == NULL) { err = ndp_add(ill, @@ -393,7 +397,7 @@ ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, *newnce = nce; err = EEXIST; } - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); return (err); } @@ -446,7 +450,13 @@ ndp_delete(nce_t *nce) nce_t **ptpn; nce_t *nce1; int ipversion = nce->nce_ipversion; - ndp_g_t *ndp = (ipversion == IPV4_VERSION ? &ndp4 : &ndp6); + ndp_g_t *ndp; + ip_stack_t *ipst = nce->nce_ill->ill_ipst; + + if (ipversion == IPV4_VERSION) + ndp = ipst->ips_ndp4; + else + ndp = ipst->ips_ndp6; /* Serialize deletes */ mutex_enter(&nce->nce_lock); @@ -697,17 +707,23 @@ nce_t * ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) { nce_t *nce; + ip_stack_t *ipst; + + ASSERT(ill != NULL); + ipst = ill->ill_ipst; ASSERT(ill != NULL && ill->ill_isv6); if (!caller_holds_lock) { - mutex_enter(&ndp6.ndp_g_lock); + mutex_enter(&ipst->ips_ndp6->ndp_g_lock); } - nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ + + /* Get head of v6 hash table */ + nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); nce = nce_lookup_addr(ill, addr, nce); if (nce == NULL) nce = nce_lookup_mapping(ill, addr); if (!caller_holds_lock) - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); return (nce); } /* @@ -722,15 +738,18 @@ ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) { nce_t *nce; in6_addr_t addr6; + ip_stack_t *ipst = ill->ill_ipst; if (!caller_holds_lock) { - mutex_enter(&ndp4.ndp_g_lock); + mutex_enter(&ipst->ips_ndp4->ndp_g_lock); } - nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); /* head of v6 hash table */ + + /* Get head of v4 hash table */ + nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); nce = nce_lookup_addr(ill, &addr6, nce); if (!caller_holds_lock) - mutex_exit(&ndp4.ndp_g_lock); + mutex_exit(&ipst->ips_ndp4->ndp_g_lock); return (nce); } @@ -744,7 +763,13 @@ ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) static nce_t * nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) { - ndp_g_t *ndp = (ill->ill_isv6 ? &ndp6 : &ndp4); + ndp_g_t *ndp; + ip_stack_t *ipst = ill->ill_ipst; + + if (ill->ill_isv6) + ndp = ipst->ips_ndp6; + else + ndp = ipst->ips_ndp4; ASSERT(ill != NULL); ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); @@ -776,12 +801,13 @@ static nce_t * nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) { nce_t *nce; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill != NULL && ill->ill_isv6); - ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); + ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); if (!IN6_IS_ADDR_MULTICAST(addr)) return (NULL); - nce = ndp6.nce_mask_entries; + nce = ipst->ips_ndp6->nce_mask_entries; for (; nce != NULL; nce = nce->nce_next) if (nce->nce_ill == ill && (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { @@ -808,6 +834,7 @@ ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) mblk_t *mp; boolean_t ll_updated = B_FALSE; boolean_t ll_changed; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(nce->nce_ipversion == IPV6_VERSION); /* @@ -854,7 +881,7 @@ ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) ifindex = (uint_t)(uintptr_t)data_mp->b_prev; inbound_ill = ill_lookup_on_ifindex(ifindex, - B_TRUE, NULL, NULL, NULL, NULL); + B_TRUE, NULL, NULL, NULL, NULL, ipst); if (inbound_ill == NULL) { data_mp->b_prev = NULL; freemsg(mp); @@ -934,9 +961,9 @@ ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | - MATCH_IRE_DEFAULT); + MATCH_IRE_DEFAULT, ipst); if (ire != NULL) { - ip_rts_rtmsg(RTM_DELETE, ire, 0); + ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); ire_delete(ire); ire_refrele(ire); } @@ -1025,11 +1052,15 @@ ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, } } +/* + * Walk everything. + * Note that ill can be NULL hence can't derive the ipst from it. + */ void -ndp_walk(ill_t *ill, pfi_t pfi, void *arg1) +ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) { - ndp_walk_common(&ndp4, ill, pfi, arg1, B_TRUE); - ndp_walk_common(&ndp6, ill, pfi, arg1, B_TRUE); + ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); + ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); } /* @@ -1048,6 +1079,7 @@ ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) int err = 0; uint32_t ms; mblk_t *mp_nce = NULL; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill != NULL); ASSERT(ill->ill_isv6); @@ -1082,25 +1114,25 @@ ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) NCE_REFRELE(nce); return (0); } - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); mutex_enter(&nce->nce_lock); if (nce->nce_state != ND_INCOMPLETE) { mutex_exit(&nce->nce_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); NCE_REFRELE(nce); return (0); } - mp_nce = ip_prepend_zoneid(mp, zoneid); + mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); if (mp_nce == NULL) { /* The caller will free mp */ mutex_exit(&nce->nce_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); ndp_delete(nce); NCE_REFRELE(nce); return (ENOMEM); } ms = nce_solicit(nce, mp_nce); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (ms == 0) { /* The caller will free mp */ if (mp_nce != mp) @@ -1118,7 +1150,7 @@ ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) /* Resolution in progress just queue the packet */ mutex_enter(&nce->nce_lock); if (nce->nce_state == ND_INCOMPLETE) { - mp_nce = ip_prepend_zoneid(mp, zoneid); + mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); if (mp_nce == NULL) { err = ENOMEM; } else { @@ -1210,16 +1242,17 @@ nce_set_multicast(ill_t *ill, const in6_addr_t *dst) nce_t *nce; uchar_t *hw_addr = NULL; int err = 0; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill != NULL); ASSERT(ill->ill_isv6); ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); - mutex_enter(&ndp6.ndp_g_lock); - nce = *((nce_t **)NCE_HASH_PTR_V6(*dst)); + mutex_enter(&ipst->ips_ndp6->ndp_g_lock); + nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); nce = nce_lookup_addr(ill, dst, nce); if (nce != NULL) { - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); NCE_REFRELE(nce); return (0); } @@ -1227,7 +1260,7 @@ nce_set_multicast(ill_t *ill, const in6_addr_t *dst) mnce = nce_lookup_mapping(ill, dst); if (mnce == NULL) { /* Something broken for the interface. */ - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); return (ESRCH); } ASSERT(mnce->nce_flags & NCE_F_MAPPING); @@ -1239,7 +1272,7 @@ nce_set_multicast(ill_t *ill, const in6_addr_t *dst) */ hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); if (hw_addr == NULL) { - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); NCE_REFRELE(mnce); return (ENOMEM); } @@ -1261,7 +1294,7 @@ nce_set_multicast(ill_t *ill, const in6_addr_t *dst) &nce, NULL, NULL); - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); if (hw_addr != NULL) kmem_free(hw_addr, ill->ill_nd_lla_len); if (err != 0) { @@ -1322,6 +1355,7 @@ ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, { nce_t *nce; uchar_t *hw_addr; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill != NULL && ill->ill_isv6); ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); @@ -1330,14 +1364,14 @@ ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, freemsg(mp); return (EINVAL); } - mutex_enter(&ndp6.ndp_g_lock); + mutex_enter(&ipst->ips_ndp6->ndp_g_lock); nce = nce_lookup_mapping(ill, addr); if (nce == NULL) { - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); freemsg(mp); return (ESRCH); } - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); /* * Update dl_addr_length and dl_addr_offset for primitives that * have physical addresses as opposed to full saps @@ -1384,8 +1418,9 @@ nce_solicit(nce_t *nce, mblk_t *mp) ipif_t *ipif; ip6i_t *ip6i; boolean_t dropped = B_FALSE; + ip_stack_t *ipst = nce->nce_ill->ill_ipst; - ASSERT(RW_READ_HELD(&ill_g_lock)); + ASSERT(RW_READ_HELD(&ipst->ips_ill_g_lock)); ASSERT(MUTEX_HELD(&nce->nce_lock)); ill = nce->nce_ill; ASSERT(ill != NULL); @@ -1459,10 +1494,10 @@ nce_solicit(nce_t *nce, mblk_t *mp) src_ill = NULL; nce->nce_rcnt--; mutex_exit(&nce->nce_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, &dst, 0); - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); mutex_enter(&nce->nce_lock); if (dropped) nce->nce_rcnt++; @@ -1564,6 +1599,7 @@ ndp_do_recovery(ipif_t *ipif) { ill_t *ill = ipif->ipif_ill; mblk_t *mp; + ip_stack_t *ipst = ill->ill_ipst; mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); if (mp == NULL) { @@ -1572,7 +1608,7 @@ ndp_do_recovery(ipif_t *ipif) !(ipif->ipif_state_flags & (IPIF_MOVING | IPIF_CONDEMNED))) { ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, - ipif, MSEC_TO_TICK(ip_dup_recovery)); + ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); } mutex_exit(&ill->ill_lock); } else { @@ -1679,6 +1715,7 @@ ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) nd_neighbor_solicit_t *ns; mblk_t *dl_mp = NULL; uchar_t *haddr; + ip_stack_t *ipst = ill->ill_ipst; if (DB_TYPE(mp) != M_DATA) { dl_mp = mp; @@ -1732,9 +1769,9 @@ ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) ill->ill_net_type == IRE_IF_RESOLVER && !(ipif->ipif_state_flags & (IPIF_MOVING | IPIF_CONDEMNED)) && - ip_dup_recovery > 0) { + ipst->ips_ip_dup_recovery > 0) { ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, - ipif, MSEC_TO_TICK(ip_dup_recovery)); + ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); } mutex_exit(&ill->ill_lock); } @@ -1781,18 +1818,19 @@ ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) uint32_t now; uint_t maxdefense; uint_t defs; + ip_stack_t *ipst = ill->ill_ipst; ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, - NULL, NULL); + NULL, NULL, ipst); if (ipif == NULL) return; /* * First, figure out if this address is disposable. */ if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) - maxdefense = ip_max_temp_defend; + maxdefense = ipst->ips_ip_max_temp_defend; else - maxdefense = ip_max_defend; + maxdefense = ipst->ips_ip_max_defend; /* * Now figure out how many times we've defended ourselves. Ignore @@ -1801,7 +1839,7 @@ ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) now = gethrestime_sec(); mutex_enter(&nce->nce_lock); if ((defs = nce->nce_defense_count) > 0 && - now - nce->nce_defense_time > ip_defend_interval) { + now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { nce->nce_defense_count = defs = 0; } nce->nce_defense_count++; @@ -2055,6 +2093,7 @@ ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) nd_opt_hdr_t *opt = NULL; int len; mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; + ip_stack_t *ipst = ill->ill_ipst; ip6h = (ip6_t *)mp->b_rptr; icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); @@ -2100,7 +2139,7 @@ ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) * If this interface is part of the group look at all the * ills in the group. */ - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); if (ill->ill_group != NULL) ill = ill->ill_group->illgrp_ill; @@ -2114,7 +2153,7 @@ ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) mutex_exit(&ill->ill_lock); dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); /* We have to drop the lock since ndp_process calls put* */ - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (dst_nce != NULL) { if ((dst_nce->nce_flags & NCE_F_PERMANENT) && dst_nce->nce_state == ND_PROBE) { @@ -2153,10 +2192,10 @@ ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) } NCE_REFRELE(dst_nce); } - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); ill_refrele(ill); } - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); } /* @@ -2431,9 +2470,16 @@ nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) int ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) { + ip_stack_t *ipst; + + if (CONN_Q(q)) + ipst = CONNQ_TO_IPST(q); + else + ipst = ILLQ_TO_IPST(q); + (void) mi_mpprintf(mp, "ifname hardware addr flags" " proto addr/mask"); - ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); + ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp, ipst); return (0); } @@ -2572,6 +2618,7 @@ ndp_timer(void *arg) char addrbuf[INET6_ADDRSTRLEN]; mblk_t *mp; boolean_t dropped = B_FALSE; + ip_stack_t *ipst = ill->ill_ipst; /* * The timer has to be cancelled by ndp_delete before doing the final @@ -2585,7 +2632,7 @@ ndp_timer(void *arg) * Grab the ill_g_lock now itself to avoid lock order problems. * nce_solicit needs ill_g_lock to be able to traverse ills */ - rw_enter(&ill_g_lock, RW_READER); + rw_enter(&ipst->ips_ill_g_lock, RW_READER); mutex_enter(&nce->nce_lock); NCE_REFHOLD_LOCKED(nce); nce->nce_timeout_id = 0; @@ -2595,7 +2642,7 @@ ndp_timer(void *arg) */ switch (nce->nce_state) { case ND_DELAY: - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); nce->nce_state = ND_PROBE; mutex_exit(&nce->nce_lock); (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, @@ -2610,7 +2657,7 @@ ndp_timer(void *arg) return; case ND_PROBE: /* must be retransmit timer */ - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); nce->nce_pcnt--; ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && nce->nce_pcnt >= -1); @@ -2664,7 +2711,7 @@ ndp_timer(void *arg) nce->nce_state = ND_REACHABLE; mutex_exit(&nce->nce_lock); ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, - ALL_ZONES, NULL, NULL, NULL, NULL); + ALL_ZONES, NULL, NULL, NULL, NULL, ipst); if (ipif != NULL) { if (ipif->ipif_was_dup) { char ibuf[LIFNAMSIZ + 10]; @@ -2703,9 +2750,10 @@ ndp_timer(void *arg) if (dropped) { nce->nce_unsolicit_count = 1; NDP_RESTART_TIMER(nce, - ip_ndp_unsolicit_interval); - } else if (ip_ndp_defense_interval != 0) { - NDP_RESTART_TIMER(nce, ip_ndp_defense_interval); + ipst->ips_ip_ndp_unsolicit_interval); + } else if (ipst->ips_ip_ndp_defense_interval != 0) { + NDP_RESTART_TIMER(nce, + ipst->ips_ip_ndp_defense_interval); } } else { /* @@ -2762,7 +2810,7 @@ ndp_timer(void *arg) } if (nce->nce_qd_mp != NULL) { ms = nce_solicit(nce, NULL); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (ms == 0) { if (nce->nce_state != ND_REACHABLE) { mutex_exit(&nce->nce_lock); @@ -2779,15 +2827,15 @@ ndp_timer(void *arg) return; } mutex_exit(&nce->nce_lock); - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); NCE_REFRELE(nce); break; case ND_REACHABLE : - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); if (((nce->nce_flags & NCE_F_UNSOL_ADV) && nce->nce_unsolicit_count != 0) || ((nce->nce_flags & NCE_F_PERMANENT) && - ip_ndp_defense_interval != 0)) { + ipst->ips_ip_ndp_defense_interval != 0)) { if (nce->nce_unsolicit_count > 0) nce->nce_unsolicit_count--; mutex_exit(&nce->nce_lock); @@ -2805,10 +2853,10 @@ ndp_timer(void *arg) } if (nce->nce_unsolicit_count != 0) { NDP_RESTART_TIMER(nce, - ip_ndp_unsolicit_interval); + ipst->ips_ip_ndp_unsolicit_interval); } else { NDP_RESTART_TIMER(nce, - ip_ndp_defense_interval); + ipst->ips_ip_ndp_defense_interval); } } else { mutex_exit(&nce->nce_lock); @@ -2816,7 +2864,7 @@ ndp_timer(void *arg) NCE_REFRELE(nce); break; default: - rw_exit(&ill_g_lock); + rw_exit(&ipst->ips_ill_g_lock); mutex_exit(&nce->nce_lock); NCE_REFRELE(nce); break; @@ -3048,6 +3096,7 @@ nce_resolv_failed(nce_t *nce) char buf[INET6_ADDRSTRLEN]; ip6_t *ip6h; zoneid_t zoneid = GLOBAL_ZONEID; + ip_stack_t *ipst = nce->nce_ill->ill_ipst; ip1dbg(("nce_resolv_failed: dst %s\n", inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); @@ -3086,9 +3135,9 @@ nce_resolv_failed(nce_t *nce) * Ignore failure since icmp_unreachable_v6 will silently * drop packets with an unspecified source address. */ - (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); + (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, - ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid); + ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); mp = nxt_mp; } } @@ -3108,6 +3157,7 @@ ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) uint16_t new_flags = 0; uint16_t old_flags = 0; int inflags = lnr->lnr_flags; + ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill->ill_isv6); if ((lnr->lnr_state_create != ND_REACHABLE) && @@ -3117,9 +3167,9 @@ ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) sin6 = (sin6_t *)&lnr->lnr_addr; addr = &sin6->sin6_addr; - mutex_enter(&ndp6.ndp_g_lock); + mutex_enter(&ipst->ips_ndp6->ndp_g_lock); /* We know it can not be mapping so just look in the hash table */ - nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); + nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); nce = nce_lookup_addr(ill, addr, nce); if (nce != NULL) new_flags = nce->nce_flags; @@ -3132,7 +3182,7 @@ ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) new_flags &= ~NCE_F_ISROUTER; break; case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); if (nce != NULL) NCE_REFRELE(nce); return (EINVAL); @@ -3146,7 +3196,7 @@ ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) new_flags &= ~NCE_F_ANYCAST; break; case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); if (nce != NULL) NCE_REFRELE(nce); return (EINVAL); @@ -3160,7 +3210,7 @@ ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) new_flags &= ~NCE_F_PROXY; break; case (NDF_PROXY_OFF|NDF_PROXY_ON): - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); if (nce != NULL) NCE_REFRELE(nce); return (EINVAL); @@ -3179,7 +3229,7 @@ ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) NULL, NULL); if (err != 0) { - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); return (err); } @@ -3191,12 +3241,12 @@ ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) * XXX Just delete the entry, but we need to add too. */ nce->nce_flags &= ~NCE_F_ISROUTER; - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); ndp_delete(nce); NCE_REFRELE(nce); return (0); } - mutex_exit(&ndp6.ndp_g_lock); + mutex_exit(&ipst->ips_ndp6->ndp_g_lock); mutex_enter(&nce->nce_lock); nce->nce_flags = new_flags; @@ -3661,6 +3711,7 @@ arp_resolv_failed(nce_t *nce) char buf[INET6_ADDRSTRLEN]; zoneid_t zoneid = GLOBAL_ZONEID; struct in_addr ipv4addr; + ip_stack_t *ipst = nce->nce_ill->ill_ipst; IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); ip3dbg(("arp_resolv_failed: dst %s\n", @@ -3680,10 +3731,10 @@ arp_resolv_failed(nce_t *nce) * Send icmp unreachable messages * to the hosts. */ - (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid); + (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); icmp_unreachable(nce->nce_ill->ill_wq, first_mp, - ICMP_HOST_UNREACHABLE, zoneid); + ICMP_HOST_UNREACHABLE, zoneid, ipst); mp = nxt_mp; } } @@ -3697,9 +3748,10 @@ ndp_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, int err = 0; nce_t *nce; in6_addr_t addr6; + ip_stack_t *ipst = ill->ill_ipst; - mutex_enter(&ndp4.ndp_g_lock); - nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); + mutex_enter(&ipst->ips_ndp4->ndp_g_lock); + nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); nce = nce_lookup_addr(ill, &addr6, nce); if (nce == NULL) { @@ -3718,14 +3770,14 @@ ndp_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, *newnce = nce; err = EEXIST; } - mutex_exit(&ndp4.ndp_g_lock); + mutex_exit(&ipst->ips_ndp4->ndp_g_lock); return (err); } /* * NDP Cache Entry creation routine for IPv4. * Mapped entries are handled in arp. - * This routine must always be called with ndp4.ndp_g_lock held. + * This routine must always be called with ndp4->ndp_g_lock held. * Prior to return, nce_refcnt is incremented. */ static int @@ -3739,8 +3791,9 @@ ndp_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, mblk_t *mp; mblk_t *template; nce_t **ncep; + ip_stack_t *ipst = ill->ill_ipst; - ASSERT(MUTEX_HELD(&ndp4.ndp_g_lock)); + ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); ASSERT(ill != NULL); if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { return (EINVAL); @@ -3805,7 +3858,7 @@ ndp_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, /* This one is for nce getting created */ nce->nce_refcnt = 1; mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); - ncep = ((nce_t **)NCE_HASH_PTR_V4(*addr)); + ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); #ifdef NCE_DEBUG bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); @@ -3862,6 +3915,7 @@ nce_reinit(nce_t *nce) { nce_t *newnce = NULL; in_addr_t nce_addr, nce_mask; + ip_stack_t *ipst = nce->nce_ill->ill_ipst; IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); IN6_V4MAPPED_TO_IPADDR(&nce->nce_mask, nce_mask); @@ -3873,10 +3927,10 @@ nce_reinit(nce_t *nce) /* * create a new nce with the same addr and mask. */ - mutex_enter(&ndp4.ndp_g_lock); + mutex_enter(&ipst->ips_ndp4->ndp_g_lock); (void) ndp_add_v4(nce->nce_ill, NULL, &nce_addr, &nce_mask, NULL, 0, 0, ND_INITIAL, &newnce, NULL, NULL); - mutex_exit(&ndp4.ndp_g_lock); + mutex_exit(&ipst->ips_ndp4->ndp_g_lock); /* * refrele the old nce. */ @@ -3941,16 +3995,17 @@ nce_delete_hw_changed(nce_t *nce, void *arg) * so that it can continue to look for hardware changes on that address. */ boolean_t -ndp_lookup_ipaddr(in_addr_t addr) +ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) { nce_t *nce; struct in_addr nceaddr; + ip_stack_t *ipst = ns->netstack_ip; if (addr == INADDR_ANY) return (B_FALSE); - mutex_enter(&ndp4.ndp_g_lock); - nce = *(nce_t **)NCE_HASH_PTR_V4(addr); + mutex_enter(&ipst->ips_ndp4->ndp_g_lock); + nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); for (; nce != NULL; nce = nce->nce_next) { /* Note that only v4 mapped entries are in the table. */ IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); @@ -3961,6 +4016,6 @@ ndp_lookup_ipaddr(in_addr_t addr) break; } } - mutex_exit(&ndp4.ndp_g_lock); + mutex_exit(&ipst->ips_ndp4->ndp_g_lock); return (nce != NULL); } diff --git a/usr/src/uts/common/inet/ip/ip_netinfo.c b/usr/src/uts/common/inet/ip/ip_netinfo.c index de7efcddc7..c8d3c65029 100644 --- a/usr/src/uts/common/inet/ip/ip_netinfo.c +++ b/usr/src/uts/common/inet/ip/ip_netinfo.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -53,42 +53,49 @@ /* * IPv4 netinfo entry point declarations. */ -static int ip_getifname(phy_if_t, char *, const size_t); -static int ip_getmtu(phy_if_t, lif_if_t); -static int ip_getpmtuenabled(void); +static int ip_getifname(phy_if_t, char *, const size_t, + netstack_t *); +static int ip_getmtu(phy_if_t, lif_if_t, netstack_t *); +static int ip_getpmtuenabled(netstack_t *); static int ip_getlifaddr(phy_if_t, lif_if_t, size_t, - net_ifaddr_t [], void *); -static phy_if_t ip_phygetnext(phy_if_t); -static phy_if_t ip_phylookup(const char *); -static lif_if_t ip_lifgetnext(phy_if_t, lif_if_t); -static int ip_inject(inject_t, net_inject_t *); -static phy_if_t ip_routeto(struct sockaddr *); + net_ifaddr_t [], void *, netstack_t *); +static phy_if_t ip_phygetnext(phy_if_t, netstack_t *); +static phy_if_t ip_phylookup(const char *, netstack_t *); +static lif_if_t ip_lifgetnext(phy_if_t, lif_if_t, netstack_t *); +static int ip_inject(inject_t, net_inject_t *, netstack_t *); +static phy_if_t ip_routeto(struct sockaddr *, netstack_t *); static int ip_ispartialchecksum(mblk_t *); static int ip_isvalidchecksum(mblk_t *); -static int ipv6_getifname(phy_if_t, char *, const size_t); -static int ipv6_getmtu(phy_if_t, lif_if_t); +static int ipv6_getifname(phy_if_t, char *, const size_t, + netstack_t *); +static int ipv6_getmtu(phy_if_t, lif_if_t, netstack_t *); static int ipv6_getlifaddr(phy_if_t, lif_if_t, size_t, - net_ifaddr_t [], void *); -static phy_if_t ipv6_phygetnext(phy_if_t); -static phy_if_t ipv6_phylookup(const char *); -static lif_if_t ipv6_lifgetnext(phy_if_t, lif_if_t); -static int ipv6_inject(inject_t, net_inject_t *); -static phy_if_t ipv6_routeto(struct sockaddr *); + net_ifaddr_t [], void *, netstack_t *); +static phy_if_t ipv6_phygetnext(phy_if_t, netstack_t *); +static phy_if_t ipv6_phylookup(const char *, netstack_t *); +static lif_if_t ipv6_lifgetnext(phy_if_t, lif_if_t, netstack_t *); +static int ipv6_inject(inject_t, net_inject_t *, netstack_t *); +static phy_if_t ipv6_routeto(struct sockaddr *, netstack_t *); static int ipv6_isvalidchecksum(mblk_t *); /* Netinfo private functions */ static int ip_getifname_impl(phy_if_t, char *, - const size_t, boolean_t); -static int ip_getmtu_impl(phy_if_t, lif_if_t, boolean_t); -static phy_if_t ip_phylookup_impl(const char *, boolean_t); -static lif_if_t ip_lifgetnext_impl(phy_if_t, lif_if_t, boolean_t); -static int ip_inject_impl(inject_t, net_inject_t *, boolean_t); + const size_t, boolean_t, ip_stack_t *); +static int ip_getmtu_impl(phy_if_t, lif_if_t, boolean_t, + ip_stack_t *); +static phy_if_t ip_phylookup_impl(const char *, boolean_t, + ip_stack_t *ipst); +static lif_if_t ip_lifgetnext_impl(phy_if_t, lif_if_t, boolean_t, + ip_stack_t *ipst); +static int ip_inject_impl(inject_t, net_inject_t *, boolean_t, + ip_stack_t *); static int ip_getifaddr_type(sa_family_t, ipif_t *, lif_if_t, void *); -static phy_if_t ip_routeto_impl(struct sockaddr *); +static phy_if_t ip_routeto_impl(struct sockaddr *, ip_stack_t *); static int ip_getlifaddr_impl(sa_family_t, phy_if_t, lif_if_t, - size_t, net_ifaddr_t [], struct sockaddr *); + size_t, net_ifaddr_t [], struct sockaddr *, + ip_stack_t *); static void ip_ni_queue_in_func(void *); static void ip_ni_queue_out_func(void *); static void ip_ni_queue_func_impl(injection_t *, boolean_t); @@ -136,54 +143,12 @@ static ddi_taskq_t *eventq_queue_in = NULL; static ddi_taskq_t *eventq_queue_out = NULL; ddi_taskq_t *eventq_queue_nic = NULL; -static hook_family_t ipv4root; -static hook_family_t ipv6root; - /* - * Hooks for firewalling - */ -hook_event_t ip4_physical_in_event; -hook_event_t ip4_physical_out_event; -hook_event_t ip4_forwarding_event; -hook_event_t ip4_loopback_in_event; -hook_event_t ip4_loopback_out_event; -hook_event_t ip4_nic_events; -hook_event_t ip6_physical_in_event; -hook_event_t ip6_physical_out_event; -hook_event_t ip6_forwarding_event; -hook_event_t ip6_loopback_in_event; -hook_event_t ip6_loopback_out_event; -hook_event_t ip6_nic_events; - -hook_event_token_t ipv4firewall_physical_in; -hook_event_token_t ipv4firewall_physical_out; -hook_event_token_t ipv4firewall_forwarding; -hook_event_token_t ipv4firewall_loopback_in; -hook_event_token_t ipv4firewall_loopback_out; -hook_event_token_t ipv4nicevents; -hook_event_token_t ipv6firewall_physical_in; -hook_event_token_t ipv6firewall_physical_out; -hook_event_token_t ipv6firewall_forwarding; -hook_event_token_t ipv6firewall_loopback_in; -hook_event_token_t ipv6firewall_loopback_out; -hook_event_token_t ipv6nicevents; - -net_data_t ipv4 = NULL; -net_data_t ipv6 = NULL; - - -/* - * Register IPv4 and IPv6 netinfo functions and initialize queues for inject. + * Initialize queues for inject. */ void -ip_net_init() +ip_net_g_init() { - ipv4 = net_register(&ipv4info); - ASSERT(ipv4 != NULL); - - ipv6 = net_register(&ipv6info); - ASSERT(ipv6 != NULL); - if (eventq_queue_out == NULL) { eventq_queue_out = ddi_taskq_create(NULL, "IP_INJECT_QUEUE_OUT", 1, TASKQ_DEFAULTPRI, 0); @@ -213,10 +178,10 @@ ip_net_init() } /* - * Unregister IPv4 and IPv6 functions and inject queues + * Destroy inject queues */ void -ip_net_destroy() +ip_net_g_destroy() { if (eventq_queue_nic != NULL) { ddi_taskq_destroy(eventq_queue_nic); @@ -232,15 +197,37 @@ ip_net_destroy() ddi_taskq_destroy(eventq_queue_out); eventq_queue_out = NULL; } +} + +/* + * Register IPv4 and IPv6 netinfo functions and initialize queues for inject. + */ +void +ip_net_init(ip_stack_t *ipst, netstack_t *ns) +{ + + ipst->ips_ipv4_net_data = net_register_impl(&ipv4info, ns); + ASSERT(ipst->ips_ipv4_net_data != NULL); + + ipst->ips_ipv6_net_data = net_register_impl(&ipv6info, ns); + ASSERT(ipst->ips_ipv6_net_data != NULL); +} + - if (ipv4 != NULL) { - if (net_unregister(ipv4) == 0) - ipv4 = NULL; +/* + * Unregister IPv4 and IPv6 functions and inject queues + */ +void +ip_net_destroy(ip_stack_t *ipst) +{ + if (ipst->ips_ipv4_net_data != NULL) { + if (net_unregister(ipst->ips_ipv4_net_data) == 0) + ipst->ips_ipv4_net_data = NULL; } - if (ipv6 != NULL) { - if (net_unregister(ipv6) == 0) - ipv6 = NULL; + if (ipst->ips_ipv6_net_data != NULL) { + if (net_unregister(ipst->ips_ipv6_net_data) == 0) + ipst->ips_ipv6_net_data = NULL; } } @@ -248,213 +235,235 @@ ip_net_destroy() * Initialize IPv4 hooks family the event */ void -ipv4_hook_init() +ipv4_hook_init(ip_stack_t *ipst) { - HOOK_FAMILY_INIT(&ipv4root, Hn_IPV4); - if (net_register_family(ipv4, &ipv4root) != 0) { + HOOK_FAMILY_INIT(&ipst->ips_ipv4root, Hn_IPV4); + if (net_register_family(ipst->ips_ipv4_net_data, &ipst->ips_ipv4root) + != 0) { cmn_err(CE_NOTE, "ipv4_hook_init: " "net_register_family failed for ipv4"); } - HOOK_EVENT_INIT(&ip4_physical_in_event, NH_PHYSICAL_IN); - ipv4firewall_physical_in = net_register_event(ipv4, - &ip4_physical_in_event); - if (ipv4firewall_physical_in == NULL) { + HOOK_EVENT_INIT(&ipst->ips_ip4_physical_in_event, NH_PHYSICAL_IN); + ipst->ips_ipv4firewall_physical_in = net_register_event( + ipst->ips_ipv4_net_data, &ipst->ips_ip4_physical_in_event); + if (ipst->ips_ipv4firewall_physical_in == NULL) { cmn_err(CE_NOTE, "ipv4_hook_init: " "net_register_event failed for ipv4/physical_in"); } - HOOK_EVENT_INIT(&ip4_physical_out_event, NH_PHYSICAL_OUT); - ipv4firewall_physical_out = net_register_event(ipv4, - &ip4_physical_out_event); - if (ipv4firewall_physical_out == NULL) { + HOOK_EVENT_INIT(&ipst->ips_ip4_physical_out_event, NH_PHYSICAL_OUT); + ipst->ips_ipv4firewall_physical_out = net_register_event( + ipst->ips_ipv4_net_data, &ipst->ips_ip4_physical_out_event); + if (ipst->ips_ipv4firewall_physical_out == NULL) { cmn_err(CE_NOTE, "ipv4_hook_init: " "net_register_event failed for ipv4/physical_out"); } - HOOK_EVENT_INIT(&ip4_forwarding_event, NH_FORWARDING); - ipv4firewall_forwarding = net_register_event(ipv4, - &ip4_forwarding_event); - if (ipv4firewall_forwarding == NULL) { + HOOK_EVENT_INIT(&ipst->ips_ip4_forwarding_event, NH_FORWARDING); + ipst->ips_ipv4firewall_forwarding = net_register_event( + ipst->ips_ipv4_net_data, &ipst->ips_ip4_forwarding_event); + if (ipst->ips_ipv4firewall_forwarding == NULL) { cmn_err(CE_NOTE, "ipv4_hook_init: " "net_register_event failed for ipv4/forwarding"); } - HOOK_EVENT_INIT(&ip4_loopback_in_event, NH_LOOPBACK_IN); - ipv4firewall_loopback_in = net_register_event(ipv4, - &ip4_loopback_in_event); - if (ipv4firewall_loopback_in == NULL) { + HOOK_EVENT_INIT(&ipst->ips_ip4_loopback_in_event, NH_LOOPBACK_IN); + ipst->ips_ipv4firewall_loopback_in = net_register_event( + ipst->ips_ipv4_net_data, &ipst->ips_ip4_loopback_in_event); + if (ipst->ips_ipv4firewall_loopback_in == NULL) { cmn_err(CE_NOTE, "ipv4_hook_init: " "net_register_event failed for ipv4/loopback_in"); } - HOOK_EVENT_INIT(&ip4_loopback_out_event, NH_LOOPBACK_OUT); - ipv4firewall_loopback_out = net_register_event(ipv4, - &ip4_loopback_out_event); - if (ipv4firewall_loopback_out == NULL) { + HOOK_EVENT_INIT(&ipst->ips_ip4_loopback_out_event, NH_LOOPBACK_OUT); + ipst->ips_ipv4firewall_loopback_out = net_register_event( + ipst->ips_ipv4_net_data, &ipst->ips_ip4_loopback_out_event); + if (ipst->ips_ipv4firewall_loopback_out == NULL) { cmn_err(CE_NOTE, "ipv4_hook_init: " "net_register_event failed for ipv4/loopback_out"); } - HOOK_EVENT_INIT(&ip4_nic_events, NH_NIC_EVENTS); - ip4_nic_events.he_flags = HOOK_RDONLY; - ipv4nicevents = net_register_event(ipv4, &ip4_nic_events); - if (ipv4nicevents == NULL) { + HOOK_EVENT_INIT(&ipst->ips_ip4_nic_events, NH_NIC_EVENTS); + ipst->ips_ip4_nic_events.he_flags = HOOK_RDONLY; + ipst->ips_ipv4nicevents = net_register_event( + ipst->ips_ipv4_net_data, &ipst->ips_ip4_nic_events); + if (ipst->ips_ipv4nicevents == NULL) { cmn_err(CE_NOTE, "ipv4_hook_init: " "net_register_event failed for ipv4/nic_events"); } } void -ipv4_hook_destroy() +ipv4_hook_destroy(ip_stack_t *ipst) { - if (ipv4firewall_forwarding != NULL) { - if (net_unregister_event(ipv4, &ip4_forwarding_event) == 0) - ipv4firewall_forwarding = NULL; + if (ipst->ips_ipv4firewall_forwarding != NULL) { + if (net_unregister_event(ipst->ips_ipv4_net_data, + &ipst->ips_ip4_forwarding_event) == 0) + ipst->ips_ipv4firewall_forwarding = NULL; } - if (ipv4firewall_physical_in != NULL) { - if (net_unregister_event(ipv4, &ip4_physical_in_event) == 0) - ipv4firewall_physical_in = NULL; + if (ipst->ips_ipv4firewall_physical_in != NULL) { + if (net_unregister_event(ipst->ips_ipv4_net_data, + &ipst->ips_ip4_physical_in_event) == 0) + ipst->ips_ipv4firewall_physical_in = NULL; } - if (ipv4firewall_physical_out != NULL) { - if (net_unregister_event(ipv4, &ip4_physical_out_event) == 0) - ipv4firewall_physical_out = NULL; + if (ipst->ips_ipv4firewall_physical_out != NULL) { + if (net_unregister_event(ipst->ips_ipv4_net_data, + &ipst->ips_ip4_physical_out_event) == 0) + ipst->ips_ipv4firewall_physical_out = NULL; } - if (ipv4firewall_loopback_in != NULL) { - if (net_unregister_event(ipv4, &ip4_loopback_in_event) == 0) - ipv4firewall_loopback_in = NULL; + if (ipst->ips_ipv4firewall_loopback_in != NULL) { + if (net_unregister_event(ipst->ips_ipv4_net_data, + &ipst->ips_ip4_loopback_in_event) == 0) + ipst->ips_ipv4firewall_loopback_in = NULL; } - if (ipv4firewall_loopback_out != NULL) { - if (net_unregister_event(ipv4, &ip4_loopback_out_event) == 0) - ipv4firewall_loopback_out = NULL; + if (ipst->ips_ipv4firewall_loopback_out != NULL) { + if (net_unregister_event(ipst->ips_ipv4_net_data, + &ipst->ips_ip4_loopback_out_event) == 0) + ipst->ips_ipv4firewall_loopback_out = NULL; } - if (ipv4nicevents != NULL) { - if (net_unregister_event(ipv4, &ip4_nic_events) == 0) - ipv4nicevents = NULL; + if (ipst->ips_ipv4nicevents != NULL) { + if (net_unregister_event(ipst->ips_ipv4_net_data, + &ipst->ips_ip4_nic_events) == 0) + ipst->ips_ipv4nicevents = NULL; } - (void) net_unregister_family(ipv4, &ipv4root); + (void) net_unregister_family(ipst->ips_ipv4_net_data, + &ipst->ips_ipv4root); } /* * Initialize IPv6 hooks family and event */ void -ipv6_hook_init() +ipv6_hook_init(ip_stack_t *ipst) { - HOOK_FAMILY_INIT(&ipv6root, Hn_IPV6); - if (net_register_family(ipv6, &ipv6root) != 0) { + HOOK_FAMILY_INIT(&ipst->ips_ipv6root, Hn_IPV6); + if (net_register_family(ipst->ips_ipv6_net_data, &ipst->ips_ipv6root) + != 0) { cmn_err(CE_NOTE, "ipv6_hook_init: " "net_register_family failed for ipv6"); } - HOOK_EVENT_INIT(&ip6_physical_in_event, NH_PHYSICAL_IN); - ipv6firewall_physical_in = net_register_event(ipv6, - &ip6_physical_in_event); - if (ipv6firewall_physical_in == NULL) { + HOOK_EVENT_INIT(&ipst->ips_ip6_physical_in_event, NH_PHYSICAL_IN); + ipst->ips_ipv6firewall_physical_in = net_register_event( + ipst->ips_ipv6_net_data, &ipst->ips_ip6_physical_in_event); + if (ipst->ips_ipv6firewall_physical_in == NULL) { cmn_err(CE_NOTE, "ipv6_hook_init: " "net_register_event failed for ipv6/physical_in"); } - HOOK_EVENT_INIT(&ip6_physical_out_event, NH_PHYSICAL_OUT); - ipv6firewall_physical_out = net_register_event(ipv6, - &ip6_physical_out_event); - if (ipv6firewall_physical_out == NULL) { + HOOK_EVENT_INIT(&ipst->ips_ip6_physical_out_event, NH_PHYSICAL_OUT); + ipst->ips_ipv6firewall_physical_out = net_register_event( + ipst->ips_ipv6_net_data, &ipst->ips_ip6_physical_out_event); + if (ipst->ips_ipv6firewall_physical_out == NULL) { cmn_err(CE_NOTE, "ipv6_hook_init: " "net_register_event failed for ipv6/physical_out"); } - HOOK_EVENT_INIT(&ip6_forwarding_event, NH_FORWARDING); - ipv6firewall_forwarding = net_register_event(ipv6, - &ip6_forwarding_event); - if (ipv6firewall_forwarding == NULL) { + HOOK_EVENT_INIT(&ipst->ips_ip6_forwarding_event, NH_FORWARDING); + ipst->ips_ipv6firewall_forwarding = net_register_event( + ipst->ips_ipv6_net_data, &ipst->ips_ip6_forwarding_event); + if (ipst->ips_ipv6firewall_forwarding == NULL) { cmn_err(CE_NOTE, "ipv6_hook_init: " "net_register_event failed for ipv6/forwarding"); } - HOOK_EVENT_INIT(&ip6_loopback_in_event, NH_LOOPBACK_IN); - ipv6firewall_loopback_in = net_register_event(ipv6, - &ip6_loopback_in_event); - if (ipv6firewall_loopback_in == NULL) { + HOOK_EVENT_INIT(&ipst->ips_ip6_loopback_in_event, NH_LOOPBACK_IN); + ipst->ips_ipv6firewall_loopback_in = net_register_event( + ipst->ips_ipv6_net_data, &ipst->ips_ip6_loopback_in_event); + if (ipst->ips_ipv6firewall_loopback_in == NULL) { cmn_err(CE_NOTE, "ipv6_hook_init: " "net_register_event failed for ipv6/loopback_in"); } - HOOK_EVENT_INIT(&ip6_loopback_out_event, NH_LOOPBACK_OUT); - ipv6firewall_loopback_out = net_register_event(ipv6, - &ip6_loopback_out_event); - if (ipv6firewall_loopback_out == NULL) { + HOOK_EVENT_INIT(&ipst->ips_ip6_loopback_out_event, NH_LOOPBACK_OUT); + ipst->ips_ipv6firewall_loopback_out = net_register_event( + ipst->ips_ipv6_net_data, &ipst->ips_ip6_loopback_out_event); + if (ipst->ips_ipv6firewall_loopback_out == NULL) { cmn_err(CE_NOTE, "ipv6_hook_init: " "net_register_event failed for ipv6/loopback_out"); } - HOOK_EVENT_INIT(&ip6_nic_events, NH_NIC_EVENTS); - ip6_nic_events.he_flags = HOOK_RDONLY; - ipv6nicevents = net_register_event(ipv6, &ip6_nic_events); - if (ipv6nicevents == NULL) { + HOOK_EVENT_INIT(&ipst->ips_ip6_nic_events, NH_NIC_EVENTS); + ipst->ips_ip6_nic_events.he_flags = HOOK_RDONLY; + ipst->ips_ipv6nicevents = net_register_event( + ipst->ips_ipv6_net_data, &ipst->ips_ip6_nic_events); + if (ipst->ips_ipv6nicevents == NULL) { cmn_err(CE_NOTE, "ipv6_hook_init: " "net_register_event failed for ipv6/nic_events"); } } void -ipv6_hook_destroy() +ipv6_hook_destroy(ip_stack_t *ipst) { - if (ipv6firewall_forwarding != NULL) { - if (net_unregister_event(ipv6, &ip6_forwarding_event) == 0) - ipv6firewall_forwarding = NULL; + if (ipst->ips_ipv6firewall_forwarding != NULL) { + if (net_unregister_event(ipst->ips_ipv6_net_data, + &ipst->ips_ip6_forwarding_event) == 0) + ipst->ips_ipv6firewall_forwarding = NULL; } - if (ipv6firewall_physical_in != NULL) { - if (net_unregister_event(ipv6, &ip6_physical_in_event) == 0) - ipv6firewall_physical_in = NULL; + if (ipst->ips_ipv6firewall_physical_in != NULL) { + if (net_unregister_event(ipst->ips_ipv6_net_data, + &ipst->ips_ip6_physical_in_event) == 0) + ipst->ips_ipv6firewall_physical_in = NULL; } - if (ipv6firewall_physical_out != NULL) { - if (net_unregister_event(ipv6, &ip6_physical_out_event) == 0) - ipv6firewall_physical_out = NULL; + if (ipst->ips_ipv6firewall_physical_out != NULL) { + if (net_unregister_event(ipst->ips_ipv6_net_data, + &ipst->ips_ip6_physical_out_event) == 0) + ipst->ips_ipv6firewall_physical_out = NULL; } - if (ipv6firewall_loopback_in != NULL) { - if (net_unregister_event(ipv6, &ip6_loopback_in_event) == 0) - ipv6firewall_loopback_in = NULL; + if (ipst->ips_ipv6firewall_loopback_in != NULL) { + if (net_unregister_event(ipst->ips_ipv6_net_data, + &ipst->ips_ip6_loopback_in_event) == 0) + ipst->ips_ipv6firewall_loopback_in = NULL; } - if (ipv6firewall_loopback_out != NULL) { - if (net_unregister_event(ipv6, &ip6_loopback_out_event) == 0) - ipv6firewall_loopback_out = NULL; + if (ipst->ips_ipv6firewall_loopback_out != NULL) { + if (net_unregister_event(ipst->ips_ipv6_net_data, + &ipst->ips_ip6_loopback_out_event) == 0) + ipst->ips_ipv6firewall_loopback_out = NULL; } - if (ipv6nicevents != NULL) { - if (net_unregister_event(ipv6, &ip6_nic_events) == 0) - ipv6nicevents = NULL; + if (ipst->ips_ipv6nicevents != NULL) { + if (net_unregister_event(ipst->ips_ipv6_net_data, + &ipst->ips_ip6_nic_events) == 0) + ipst->ips_ipv6nicevents = NULL; } - (void) net_unregister_family(ipv6, &ipv6root); + (void) net_unregister_family(ipst->ips_ipv6_net_data, + &ipst->ips_ipv6root); } /* * Determine the name of an IPv4 interface */ static int -ip_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen) +ip_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen, + netstack_t *ns) { - return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_FALSE)); + return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_FALSE, + ns->netstack_ip)); } /* * Determine the name of an IPv6 interface */ static int -ipv6_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen) +ipv6_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen, + netstack_t *ns) { - return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_TRUE)); + return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_TRUE, + ns->netstack_ip)); } /* @@ -463,14 +472,14 @@ ipv6_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen) /* ARGSUSED */ static int ip_getifname_impl(phy_if_t phy_ifdata, - char *buffer, const size_t buflen, boolean_t isv6) + char *buffer, const size_t buflen, boolean_t isv6, ip_stack_t *ipst) { ill_t *ill; ASSERT(buffer != NULL); ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6, NULL, NULL, - NULL, NULL); + NULL, NULL, ipst); if (ill == NULL) return (1); @@ -489,18 +498,20 @@ ip_getifname_impl(phy_if_t phy_ifdata, * Determine the MTU of an IPv4 network interface */ static int -ip_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata) +ip_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata, netstack_t *ns) { - return (ip_getmtu_impl(phy_ifdata, ifdata, B_FALSE)); + ASSERT(ns != NULL); + return (ip_getmtu_impl(phy_ifdata, ifdata, B_FALSE, ns->netstack_ip)); } /* * Determine the MTU of an IPv6 network interface */ static int -ipv6_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata) +ipv6_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata, netstack_t *ns) { - return (ip_getmtu_impl(phy_ifdata, ifdata, B_TRUE)); + ASSERT(ns != NULL); + return (ip_getmtu_impl(phy_ifdata, ifdata, B_TRUE, ns->netstack_ip)); } /* @@ -508,7 +519,8 @@ ipv6_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata) */ /* ARGSUSED */ static int -ip_getmtu_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6) +ip_getmtu_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6, + ip_stack_t *ipst) { lif_if_t ipifid; ipif_t *ipif; @@ -516,7 +528,8 @@ ip_getmtu_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6) ipifid = UNMAP_IPIF_ID(ifdata); - ipif = ipif_getby_indexes((uint_t)phy_ifdata, (uint_t)ipifid, isv6); + ipif = ipif_getby_indexes((uint_t)phy_ifdata, (uint_t)ipifid, + isv6, ipst); if (ipif == NULL) return (0); @@ -527,7 +540,7 @@ ip_getmtu_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6) ill_t *ill; if ((ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6, - NULL, NULL, NULL, NULL)) == NULL) { + NULL, NULL, NULL, NULL, ipst)) == NULL) { return (0); } mtu = ill->ill_max_frag; @@ -541,46 +554,50 @@ ip_getmtu_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6) * Determine if path MTU discovery is enabled for IP */ static int -ip_getpmtuenabled(void) +ip_getpmtuenabled(netstack_t *ns) { - return (ip_path_mtu_discovery); + ASSERT(ns != NULL); + return ((ns->netstack_ip)->ips_ip_path_mtu_discovery); } /* * Get next interface from the current list of IPv4 physical network interfaces */ static phy_if_t -ip_phygetnext(phy_if_t phy_ifdata) +ip_phygetnext(phy_if_t phy_ifdata, netstack_t *ns) { - return (ill_get_next_ifindex(phy_ifdata, B_FALSE)); + ASSERT(ns != NULL); + return (ill_get_next_ifindex(phy_ifdata, B_FALSE, ns->netstack_ip)); } /* * Get next interface from the current list of IPv6 physical network interfaces */ static phy_if_t -ipv6_phygetnext(phy_if_t phy_ifdata) +ipv6_phygetnext(phy_if_t phy_ifdata, netstack_t *ns) { - return (ill_get_next_ifindex(phy_ifdata, B_TRUE)); + ASSERT(ns != NULL); + return (ill_get_next_ifindex(phy_ifdata, B_TRUE, ns->netstack_ip)); } /* * Determine if a network interface name exists for IPv4 */ static phy_if_t -ip_phylookup(const char *name) +ip_phylookup(const char *name, netstack_t *ns) { - return (ip_phylookup_impl(name, B_FALSE)); - + ASSERT(ns != NULL); + return (ip_phylookup_impl(name, B_FALSE, ns->netstack_ip)); } /* * Determine if a network interface name exists for IPv6 */ static phy_if_t -ipv6_phylookup(const char *name) +ipv6_phylookup(const char *name, netstack_t *ns) { - return (ip_phylookup_impl(name, B_TRUE)); + ASSERT(ns != NULL); + return (ip_phylookup_impl(name, B_TRUE, ns->netstack_ip)); } /* @@ -589,13 +606,13 @@ ipv6_phylookup(const char *name) * because it does not match on the address family in addition to the name. */ static phy_if_t -ip_phylookup_impl(const char *name, boolean_t isv6) +ip_phylookup_impl(const char *name, boolean_t isv6, ip_stack_t *ipst) { phy_if_t phy; ill_t *ill; ill = ill_lookup_on_name((char *)name, B_FALSE, isv6, NULL, NULL, - NULL, NULL, NULL); + NULL, NULL, NULL, ipst); if (ill == NULL) return (0); @@ -611,18 +628,22 @@ ip_phylookup_impl(const char *name, boolean_t isv6) * Get next interface from the current list of IPv4 logical network interfaces */ static lif_if_t -ip_lifgetnext(phy_if_t phy_ifdata, lif_if_t ifdata) +ip_lifgetnext(phy_if_t phy_ifdata, lif_if_t ifdata, netstack_t *ns) { - return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_FALSE)); + ASSERT(ns != NULL); + return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_FALSE, + ns->netstack_ip)); } /* * Get next interface from the current list of IPv6 logical network interfaces */ static lif_if_t -ipv6_lifgetnext(phy_if_t phy_ifdata, lif_if_t ifdata) +ipv6_lifgetnext(phy_if_t phy_ifdata, lif_if_t ifdata, netstack_t *ns) { - return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_TRUE)); + ASSERT(ns != NULL); + return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_TRUE, + ns->netstack_ip)); } /* @@ -630,14 +651,16 @@ ipv6_lifgetnext(phy_if_t phy_ifdata, lif_if_t ifdata) * logical network interfaces */ static lif_if_t -ip_lifgetnext_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6) +ip_lifgetnext_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6, + ip_stack_t *ipst) { lif_if_t newidx, oldidx; boolean_t nextok; ipif_t *ipif; ill_t *ill; - ill = ill_lookup_on_ifindex(phy_ifdata, isv6, NULL, NULL, NULL, NULL); + ill = ill_lookup_on_ifindex(phy_ifdata, isv6, NULL, NULL, + NULL, NULL, ipst); if (ill == NULL) return (0); @@ -688,9 +711,10 @@ ip_lifgetnext_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6) * Inject an IPv4 packet to or from an interface */ static int -ip_inject(inject_t style, net_inject_t *packet) +ip_inject(inject_t style, net_inject_t *packet, netstack_t *ns) { - return (ip_inject_impl(style, packet, B_FALSE)); + ASSERT(ns != NULL); + return (ip_inject_impl(style, packet, B_FALSE, ns->netstack_ip)); } @@ -698,9 +722,10 @@ ip_inject(inject_t style, net_inject_t *packet) * Inject an IPv6 packet to or from an interface */ static int -ipv6_inject(inject_t style, net_inject_t *packet) +ipv6_inject(inject_t style, net_inject_t *packet, netstack_t *ns) { - return (ip_inject_impl(style, packet, B_TRUE)); + ASSERT(ns != NULL); + return (ip_inject_impl(style, packet, B_TRUE, ns->netstack_ip)); } /* @@ -711,11 +736,12 @@ ipv6_inject(inject_t style, net_inject_t *packet) * 1: other errors */ static int -ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6) +ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6, + ip_stack_t *ipst) { struct sockaddr_in6 *sin6; ddi_taskq_t *tq = NULL; - void (* func)(void*); + void (* func)(void *); injection_t *inject; ip6_t *ip6h; ire_t *ire; @@ -775,7 +801,8 @@ ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6) * Currently this function only supports IPv4. */ switch (ipfil_sendpkt(sock, mp, packet->ni_physical, - ALL_ZONES)) { + netstackid_to_zoneid( + ipst->ips_netstack->netstack_stackid))) { case 0 : case EINPROGRESS: return (0); @@ -795,7 +822,8 @@ ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6) ire = ire_route_lookup_v6(&sin6->sin6_addr, 0, 0, 0, NULL, NULL, ALL_ZONES, NULL, - MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE); + MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE, + ipst); if (ire == NULL) { ip2dbg(("ip_inject: ire_cache_lookup failed\n")); @@ -823,7 +851,7 @@ ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6) ire->ire_nce->nce_fp_mp == NULL && ire->ire_nce->nce_res_mp == NULL) { ip_newroute_v6(ire->ire_stq, mp, - &sin6->sin6_addr, NULL, NULL, ALL_ZONES); + &sin6->sin6_addr, NULL, NULL, ALL_ZONES, ipst); ire_refrele(ire); return (0); @@ -860,6 +888,7 @@ ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6) } if (tq) { + inject->inj_ptr = ipst; if (ddi_taskq_dispatch(tq, func, (void *)inject, DDI_SLEEP) == DDI_FAILURE) { ip2dbg(("ip_inject: ddi_taskq_dispatch failed\n")); @@ -878,26 +907,28 @@ ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6) * Find the interface used for traffic to a given IPv4 address */ static phy_if_t -ip_routeto(struct sockaddr *address) +ip_routeto(struct sockaddr *address, netstack_t *ns) { ASSERT(address != NULL); + ASSERT(ns != NULL); if (address->sa_family != AF_INET) return (0); - return (ip_routeto_impl(address)); + return (ip_routeto_impl(address, ns->netstack_ip)); } /* * Find the interface used for traffic to a given IPv6 address */ static phy_if_t -ipv6_routeto(struct sockaddr *address) +ipv6_routeto(struct sockaddr *address, netstack_t *ns) { ASSERT(address != NULL); + ASSERT(ns != NULL); if (address->sa_family != AF_INET6) return (0); - return (ip_routeto_impl(address)); + return (ip_routeto_impl(address, ns->netstack_ip)); } @@ -905,7 +936,7 @@ ipv6_routeto(struct sockaddr *address) * Find the interface used for traffic to an address */ static phy_if_t -ip_routeto_impl(struct sockaddr *address) +ip_routeto_impl(struct sockaddr *address, ip_stack_t *ipst) { ire_t *ire; ill_t *ill; @@ -915,12 +946,14 @@ ip_routeto_impl(struct sockaddr *address) struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)address; ire = ire_route_lookup_v6(&sin6->sin6_addr, NULL, 0, 0, NULL, NULL, ALL_ZONES, NULL, - MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE); + MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE, + ipst); } else { struct sockaddr_in *sin = (struct sockaddr_in *)address; ire = ire_route_lookup(sin->sin_addr.s_addr, 0, 0, 0, NULL, NULL, ALL_ZONES, NULL, - MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE); + MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE, + ipst); } if (ire == NULL) @@ -1035,10 +1068,11 @@ ipv6_isvalidchecksum(mblk_t *mp) */ static int ip_getlifaddr(phy_if_t phy_ifdata, lif_if_t ifdata, size_t nelem, - net_ifaddr_t type[], void *storage) + net_ifaddr_t type[], void *storage, netstack_t *ns) { + ASSERT(ns != NULL); return (ip_getlifaddr_impl(AF_INET, phy_ifdata, ifdata, - nelem, type, storage)); + nelem, type, storage, ns->netstack_ip)); } /* @@ -1046,10 +1080,11 @@ ip_getlifaddr(phy_if_t phy_ifdata, lif_if_t ifdata, size_t nelem, */ static int ipv6_getlifaddr(phy_if_t phy_ifdata, lif_if_t ifdata, size_t nelem, - net_ifaddr_t type[], void *storage) + net_ifaddr_t type[], void *storage, netstack_t *ns) { + ASSERT(ns != NULL); return (ip_getlifaddr_impl(AF_INET6, phy_ifdata, ifdata, - nelem, type, storage)); + nelem, type, storage, ns->netstack_ip)); } /* @@ -1059,7 +1094,7 @@ ipv6_getlifaddr(phy_if_t phy_ifdata, lif_if_t ifdata, size_t nelem, static int ip_getlifaddr_impl(sa_family_t family, phy_if_t phy_ifdata, lif_if_t ifdata, size_t nelem, net_ifaddr_t type[], - struct sockaddr *storage) + struct sockaddr *storage, ip_stack_t *ipst) { struct sockaddr_in6 *sin6; struct sockaddr_in *sin; @@ -1074,7 +1109,7 @@ ip_getlifaddr_impl(sa_family_t family, phy_if_t phy_ifdata, if (family == AF_INET) { if ((ipif = ipif_getby_indexes((uint_t)phy_ifdata, - (uint_t)ipifid, B_FALSE)) == NULL) + (uint_t)ipifid, B_FALSE, ipst)) == NULL) return (1); sin = (struct sockaddr_in *)storage; @@ -1089,7 +1124,7 @@ ip_getlifaddr_impl(sa_family_t family, phy_if_t phy_ifdata, } } else { if ((ipif = ipif_getby_indexes((uint_t)phy_ifdata, - (uint_t)ipifid, B_TRUE)) == NULL) + (uint_t)ipifid, B_TRUE, ipst)) == NULL) return (1); sin6 = (struct sockaddr_in6 *)storage; @@ -1195,13 +1230,14 @@ ip_ni_queue_func_impl(injection_t *inject, boolean_t out) net_inject_t *packet; conn_t *conn; ill_t *ill; + ip_stack_t *ipst = (ip_stack_t *)inject->inj_ptr; ASSERT(inject != NULL); packet = &inject->inj_data; ASSERT(packet->ni_packet != NULL); if ((ill = ill_lookup_on_ifindex((uint_t)packet->ni_physical, - B_FALSE, NULL, NULL, NULL, NULL)) == NULL) { + B_FALSE, NULL, NULL, NULL, NULL, ipst)) == NULL) { kmem_free(inject, sizeof (*inject)); return; } @@ -1223,7 +1259,7 @@ ip_ni_queue_func_impl(injection_t *inject, boolean_t out) * be a TCP connection backing the packet and more than * likely, non-TCP packets will go here too. */ - conn = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP); + conn = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ipst->ips_netstack); if (conn != NULL) { if (inject->inj_isv6) { conn->conn_flags |= IPCL_ISV6; @@ -1255,9 +1291,12 @@ ip_ne_queue_func(void *arg) { hook_event_int_t *hr; hook_nic_event_t *info = (hook_nic_event_t *)arg; + netstack_t *ns = info->hne_family->netd_netstack; + ip_stack_t *ipst = ns->netstack_ip; - hr = (info->hne_family == ipv6) ? ipv6nicevents : ipv4nicevents; - (void) hook_run(hr, (hook_data_t)info); + hr = (info->hne_family == ipst->ips_ipv6_net_data) ? + ipst->ips_ipv6nicevents : ipst->ips_ipv4nicevents; + (void) hook_run(hr, (hook_data_t)info, ns); if (info->hne_data != NULL) kmem_free(info->hne_data, info->hne_datalen); diff --git a/usr/src/uts/common/inet/ip/ip_opt_data.c b/usr/src/uts/common/inet/ip/ip_opt_data.c index 2462004b91..08ff30ffae 100644 --- a/usr/src/uts/common/inet/ip/ip_opt_data.c +++ b/usr/src/uts/common/inet/ip/ip_opt_data.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -131,7 +131,7 @@ opdes_t ip_opt_arr[] = { { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, (OP_PASSNEXT|OP_NODEFAULT), sizeof (ipsec_req_t), -1 /* not initialized */ }, -{ IP_NEXTHOP, IPPROTO_IP, OA_RW, OA_RW, OP_CONFIG, OP_PASSNEXT, +{ IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, OP_PASSNEXT, sizeof (in_addr_t), -1 /* not initialized */ }, { MRT_INIT, IPPROTO_IP, 0, OA_X, OP_CONFIG, diff --git a/usr/src/uts/common/inet/ip/ip_rts.c b/usr/src/uts/common/inet/ip/ip_rts.c index 14fa62f6a4..90187e442c 100644 --- a/usr/src/uts/common/inet/ip/ip_rts.c +++ b/usr/src/uts/common/inet/ip/ip_rts.c @@ -1,5 +1,5 @@ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -105,14 +105,14 @@ static void ip_rts_request_retry(ipsq_t *, queue_t *q, mblk_t *mp, void *); * */ void -rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af) +rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af, ip_stack_t *ipst) { mblk_t *mp1; int checkqfull; conn_t *connp, *next_connp; - mutex_enter(&rts_clients.connf_lock); - connp = rts_clients.connf_head; + mutex_enter(&ipst->ips_rts_clients->connf_lock); + connp = ipst->ips_rts_clients->connf_head; while (connp != NULL) { /* @@ -136,12 +136,17 @@ rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af) connp = connp->conn_next; continue; } - checkqfull = B_FALSE; + /* + * Just because it is the same queue doesn't mean it + * will promptly read its acks. Have to avoid using + * all of kernel memory. + */ + checkqfull = B_TRUE; } else { checkqfull = B_TRUE; } CONN_INC_REF(connp); - mutex_exit(&rts_clients.connf_lock); + mutex_exit(&ipst->ips_rts_clients->connf_lock); if (!checkqfull || canputnext(CONNP_TO_RQ(connp))) { mp1 = dupmsg(mp); if (mp1 == NULL) @@ -150,13 +155,13 @@ rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af) putnext(CONNP_TO_RQ(connp), mp1); } - mutex_enter(&rts_clients.connf_lock); + mutex_enter(&ipst->ips_rts_clients->connf_lock); /* Follow the next pointer before releasing the conn. */ next_connp = connp->conn_next; CONN_DEC_REF(connp); connp = next_connp; } - mutex_exit(&rts_clients.connf_lock); + mutex_exit(&ipst->ips_rts_clients->connf_lock); freemsg(mp); } @@ -167,7 +172,7 @@ rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af) * - when ire_expire deletes a stale redirect */ void -ip_rts_rtmsg(int type, ire_t *ire, int error) +ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst) { mblk_t *mp; rt_msghdr_t *rtm; @@ -215,7 +220,7 @@ ip_rts_rtmsg(int type, ire_t *ire, int error) rtm->rtm_errno = error; else rtm->rtm_flags |= RTF_DONE; - rts_queue_input(mp, NULL, af); + rts_queue_input(mp, NULL, af, ipst); } /* ARGSUSED */ @@ -281,12 +286,14 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) tsol_gc_t *gc = NULL; ts_label_t *tsl = NULL; zoneid_t zoneid; + ip_stack_t *ipst; ip1dbg(("ip_rts_request: mp is %x\n", DB_TYPE(mp))); ASSERT(CONN_Q(q)); connp = Q_TO_CONN(q); zoneid = connp->conn_zoneid; + ipst = connp->conn_netstack->netstack_ip; ASSERT(mp->b_cont != NULL); /* ioc_mp holds mp */ @@ -314,7 +321,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) */ connp->conn_loopback = 1; - ipcl_hash_insert_wildcard(&rts_clients, connp); + ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); goto done; } @@ -350,7 +357,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) if (rtm->rtm_type != RTM_GET && rtm->rtm_type != RTM_RESOLVE && (ioc_cr == NULL || - secpolicy_net_config(ioc_cr, B_FALSE) != 0)) { + secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) { error = EPERM; goto done; } @@ -442,7 +449,8 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) * If ILL_CHANGING the request is queued in the ipsq. */ ill = ill_lookup_on_ifindex(index, af == AF_INET6, - CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error); + CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error, + ipst); if (ill == NULL) { if (error != EINPROGRESS) error = EINVAL; @@ -468,7 +476,8 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) /* If ILL_CHANGING the request is queued in the ipsq. */ ill = ill_lookup_on_ifindex(src_index, B_FALSE, - CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error); + CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error, + ipst); if (ill == NULL) { if (error != EINPROGRESS) error = EINVAL; @@ -539,7 +548,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) error = ip_mrtun_rt_add(src_addr, rtm->rtm_flags, ipif, src_ipif, &ire, CONNP_TO_WQ(connp), - ioc_mp, ip_rts_request_retry); + ioc_mp, ip_rts_request_retry, ipst); break; } /* @@ -557,7 +566,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) */ tmp_ipif = ipif_lookup_addr(src_addr, NULL, ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp, - ip_rts_request_retry, &error); + ip_rts_request_retry, &error, ipst); if (tmp_ipif == NULL) { if (error != EINPROGRESS) error = EADDRNOTAVAIL; @@ -583,7 +592,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr, rtm->rtm_flags, ipif, src_ipif, &ire, B_FALSE, CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, - rtsap); + rtsap, ipst); if (ipif != NULL) ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); break; @@ -614,7 +623,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) */ tmp_ipif = ipif_lookup_addr_v6(&src_addr_v6, NULL, ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp, - ip_rts_request_retry, &error); + ip_rts_request_retry, &error, ipst); if (tmp_ipif == NULL) { if (error != EINPROGRESS) error = EADDRNOTAVAIL; @@ -631,7 +640,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, &gw_addr_v6, &src_addr_v6, rtm->rtm_flags, ipif, &ire, CONNP_TO_WQ(connp), ioc_mp, - ip_rts_request_retry, rtsap); + ip_rts_request_retry, rtsap, ipst); break; } /* @@ -645,7 +654,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, &gw_addr_v6, NULL, rtm->rtm_flags, ipif, &ire, CONNP_TO_WQ(connp), ioc_mp, - ip_rts_request_retry, rtsap); + ip_rts_request_retry, rtsap, ipst); if (ipif != NULL) ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); break; @@ -684,12 +693,13 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) error = ip_rt_delete(dst_addr, net_mask, gw_addr, found_addrs, rtm->rtm_flags, ipif, src_ipif, B_FALSE, CONNP_TO_WQ(connp), ioc_mp, - ip_rts_request_retry); + ip_rts_request_retry, ipst); break; case AF_INET6: error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6, &gw_addr_v6, found_addrs, rtm->rtm_flags, ipif, - CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry); + CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, + ipst); break; } break; @@ -773,7 +783,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) if (net_mask == IP_HOST_MASK) { ire = ire_ctable_lookup(dst_addr, gw_addr, IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid, - tsl, match_flags_local); + tsl, match_flags_local, ipst); /* * If we found an IRE_LOCAL, make sure * it is one that would be used by this @@ -781,9 +791,9 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) */ if (ire != NULL && ire->ire_type == IRE_LOCAL && - ip_restrict_interzone_loopback && + ipst->ips_ip_restrict_interzone_loopback && !ire_local_ok_across_zones(ire, - zoneid, &dst_addr, tsl)) { + zoneid, &dst_addr, tsl, ipst)) { ire_refrele(ire); ire = NULL; } @@ -791,14 +801,14 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) if (ire == NULL) { ire = ire_ftable_lookup(dst_addr, net_mask, gw_addr, 0, ipif, &sire, zoneid, 0, - tsl, match_flags); + tsl, match_flags, ipst); } break; case AF_INET6: if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) { ire = ire_ctable_lookup_v6(&dst_addr_v6, &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL, - zoneid, tsl, match_flags_local); + zoneid, tsl, match_flags_local, ipst); /* * If we found an IRE_LOCAL, make sure * it is one that would be used by this @@ -806,9 +816,9 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) */ if (ire != NULL && ire->ire_type == IRE_LOCAL && - ip_restrict_interzone_loopback && + ipst->ips_ip_restrict_interzone_loopback && !ire_local_ok_across_zones(ire, - zoneid, (void *)&dst_addr_v6, tsl)) { + zoneid, (void *)&dst_addr_v6, tsl, ipst)) { ire_refrele(ire); ire = NULL; } @@ -816,7 +826,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) if (ire == NULL) { ire = ire_ftable_lookup_v6(&dst_addr_v6, &net_mask_v6, &gw_addr_v6, 0, ipif, &sire, - zoneid, 0, tsl, match_flags); + zoneid, 0, tsl, match_flags, ipst); } break; } @@ -905,7 +915,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) src_addr, NULL, ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, - &error); + &error, ipst); if (tmp_ipif == NULL) { error = (error == EINPROGRESS) ? @@ -981,7 +991,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, - &error); + &error, ipst); if (tmp_ipif == NULL) { mutex_exit( &ire->ire_lock); @@ -1046,10 +1056,10 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) IN6_V4MAPPED_TO_IPADDR(&ga.ga_addr, ga_addr4); if (af == AF_INET) { ire_clookup_delete_cache_gw( - ga_addr4, ALL_ZONES); + ga_addr4, ALL_ZONES, ipst); } else { ire_clookup_delete_cache_gw_v6( - &ga.ga_addr, ALL_ZONES); + &ga.ga_addr, ALL_ZONES, ipst); } } rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx); @@ -1091,7 +1101,7 @@ done: /* OK ACK already set up by caller except this */ ip2dbg(("ip_rts_request: OK ACK\n")); } - rts_queue_input(mp, q, af); + rts_queue_input(mp, q, af, ipst); } iocp->ioc_error = error; ioc_mp->b_datap->db_type = M_IOCACK; @@ -1769,7 +1779,8 @@ rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt) */ void ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask, - ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs) + ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs, + ip_stack_t *ipst) { rt_msghdr_t *rtm; mblk_t *mp; @@ -1786,7 +1797,7 @@ ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask, rtm->rtm_errno = error; rtm->rtm_flags |= RTF_DONE; rtm->rtm_addrs = rtm_addrs; - rts_queue_input(mp, NULL, AF_INET); + rts_queue_input(mp, NULL, AF_INET, ipst); } /* @@ -1800,6 +1811,7 @@ ip_rts_ifmsg(const ipif_t *ipif) if_msghdr_t *ifm; mblk_t *mp; sa_family_t af; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; /* * This message should be generated only @@ -1830,7 +1842,7 @@ ip_rts_ifmsg(const ipif_t *ipif) ipif->ipif_ill->ill_phyint->phyint_flags; rts_getifdata(&ifm->ifm_data, ipif); ifm->ifm_addrs = RTA_IFP; - rts_queue_input(mp, NULL, af); + rts_queue_input(mp, NULL, af, ipst); } /* @@ -1848,6 +1860,7 @@ ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif) ifa_msghdr_t *ifam; rt_msghdr_t *rtm; sa_family_t af; + ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; if (ipif->ipif_isv6) af = AF_INET6; @@ -1887,7 +1900,7 @@ ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif) ifam->ifam_metric = ipif->ipif_metric; ifam->ifam_flags = ((cmd == RTM_ADD) ? RTF_UP : 0); ifam->ifam_addrs = rtm_addrs; - rts_queue_input(mp, NULL, af); + rts_queue_input(mp, NULL, af, ipst); } if ((cmd == RTM_ADD && pass == 2) || (cmd == RTM_DELETE && pass == 1)) { @@ -1917,7 +1930,7 @@ ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif) if (error == 0) rtm->rtm_flags |= RTF_DONE; rtm->rtm_addrs = rtm_addrs; - rts_queue_input(mp, NULL, af); + rts_queue_input(mp, NULL, af, ipst); } } } diff --git a/usr/src/uts/common/inet/ip/ip_sadb.c b/usr/src/uts/common/inet/ip/ip_sadb.c index 6a4f1cffb9..c28cd31074 100644 --- a/usr/src/uts/common/inet/ip/ip_sadb.c +++ b/usr/src/uts/common/inet/ip/ip_sadb.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,57 +35,16 @@ #include <inet/mib2.h> #include <inet/ip.h> #include <inet/ip6.h> -#include <inet/ipdrop.h> #include <net/pfkeyv2.h> #include <inet/ipsec_info.h> #include <inet/sadb.h> #include <inet/ipsec_impl.h> +#include <inet/ipdrop.h> #include <inet/ipsecesp.h> #include <inet/ipsecah.h> #include <sys/kstat.h> -/* stats */ -static kstat_t *ipsec_ksp; -ipsec_kstats_t *ipsec_kstats; - -/* The IPsec SADBs for AH and ESP */ -sadbp_t ah_sadb, esp_sadb; - -/* Packet dropper for IP IPsec processing failures */ -extern ipdropper_t ip_dropper; - -void -ipsec_kstat_init(void) -{ - ipsec_ksp = kstat_create("ip", 0, "ipsec_stat", "net", - KSTAT_TYPE_NAMED, sizeof (*ipsec_kstats) / sizeof (kstat_named_t), - KSTAT_FLAG_PERSISTENT); - - ASSERT(ipsec_ksp != NULL); - - ipsec_kstats = ipsec_ksp->ks_data; - -#define KI(x) kstat_named_init(&ipsec_kstats->x, #x, KSTAT_DATA_UINT64) - KI(esp_stat_in_requests); - KI(esp_stat_in_discards); - KI(esp_stat_lookup_failure); - KI(ah_stat_in_requests); - KI(ah_stat_in_discards); - KI(ah_stat_lookup_failure); - KI(sadb_acquire_maxpackets); - KI(sadb_acquire_qhiwater); -#undef KI - - kstat_install(ipsec_ksp); -} - -void -ipsec_kstat_destroy(void) -{ - kstat_delete(ipsec_ksp); -} - /* * Returns B_TRUE if the identities in the SA match the identities * in the "latch" structure. @@ -589,17 +548,25 @@ ipsec_outbound_sa(mblk_t *mp, uint_t proto) sadbp_t *sadbp; sadb_t *sp; sa_family_t af; + netstack_t *ns; data_mp = mp->b_cont; io = (ipsec_out_t *)mp->b_rptr; + ns = io->ipsec_out_ns; if (proto == IPPROTO_ESP) { + ipsecesp_stack_t *espstack; + + espstack = ns->netstack_ipsecesp; sa = &io->ipsec_out_esp_sa; - sadbp = &esp_sadb; + sadbp = &espstack->esp_sadb; } else { + ipsecah_stack_t *ahstack; + ASSERT(proto == IPPROTO_AH); + ahstack = ns->netstack_ipsecah; sa = &io->ipsec_out_ah_sa; - sadbp = &ah_sadb; + sadbp = &ahstack->ah_sadb; } ASSERT(*sa == NULL); @@ -659,7 +626,7 @@ ipsec_outbound_sa(mblk_t *mp, uint_t proto) */ ah_t * -ipsec_inbound_ah_sa(mblk_t *mp) +ipsec_inbound_ah_sa(mblk_t *mp, netstack_t *ns) { mblk_t *ipsec_in; ipha_t *ipha; @@ -674,8 +641,10 @@ ipsec_inbound_ah_sa(mblk_t *mp) int pullup_len; sadb_t *sp; sa_family_t af; + ipsec_stack_t *ipss = ns->netstack_ipsec; + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; - IP_AH_BUMP_STAT(in_requests); + IP_AH_BUMP_STAT(ipss, in_requests); ASSERT(mp->b_datap->db_type == M_CTL); @@ -705,12 +674,13 @@ ipsec_inbound_ah_sa(mblk_t *mp) pullup_len = ah_offset + sizeof (ah_t); if (mp->b_rptr + pullup_len > mp->b_wptr) { if (!pullupmsg(mp, pullup_len)) { - ipsec_rl_strlog(ip_mod_info.mi_idnum, 0, 0, + ipsec_rl_strlog(ns, ip_mod_info.mi_idnum, 0, 0, SL_WARN | SL_ERROR, "ipsec_inbound_ah_sa: Small AH header\n"); - IP_AH_BUMP_STAT(in_discards); + IP_AH_BUMP_STAT(ipss, in_discards); ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, - &ipdrops_ah_bad_length, &ip_dropper); + DROPPER(ipss, ipds_ah_bad_length), + &ipss->ipsec_dropper); return (NULL); } if (isv6) @@ -724,12 +694,12 @@ ipsec_inbound_ah_sa(mblk_t *mp) if (isv6) { src_ptr = (uint32_t *)&ip6h->ip6_src; dst_ptr = (uint32_t *)&ip6h->ip6_dst; - sp = &ah_sadb.s_v6; + sp = &ahstack->ah_sadb.s_v6; af = AF_INET6; } else { src_ptr = (uint32_t *)&ipha->ipha_src; dst_ptr = (uint32_t *)&ipha->ipha_dst; - sp = &ah_sadb.s_v4; + sp = &ahstack->ah_sadb.s_v4; af = AF_INET; } @@ -739,13 +709,13 @@ ipsec_inbound_ah_sa(mblk_t *mp) mutex_exit(&hptr->isaf_lock); if (assoc == NULL || assoc->ipsa_state == IPSA_STATE_DEAD) { - IP_AH_BUMP_STAT(lookup_failure); - IP_AH_BUMP_STAT(in_discards); + IP_AH_BUMP_STAT(ipss, lookup_failure); + IP_AH_BUMP_STAT(ipss, in_discards); ipsecah_in_assocfailure(ipsec_in, 0, SL_ERROR | SL_CONSOLE | SL_WARN, "ipsec_inbound_ah_sa: No association found for " "spi 0x%x, dst addr %s\n", - ah->ah_spi, dst_ptr, af); + ah->ah_spi, dst_ptr, af, ahstack); if (assoc != NULL) { IPSA_REFRELE(assoc); } @@ -754,7 +724,7 @@ ipsec_inbound_ah_sa(mblk_t *mp) if (assoc->ipsa_state == IPSA_STATE_LARVAL) { /* Not fully baked; swap the packet under a rock until then */ - sadb_set_lpkt(assoc, ipsec_in); + sadb_set_lpkt(assoc, ipsec_in, ns); IPSA_REFRELE(assoc); return (NULL); } @@ -774,7 +744,7 @@ ipsec_inbound_ah_sa(mblk_t *mp) } esph_t * -ipsec_inbound_esp_sa(mblk_t *ipsec_in_mp) +ipsec_inbound_esp_sa(mblk_t *ipsec_in_mp, netstack_t *ns) { mblk_t *data_mp, *placeholder; uint32_t *src_ptr, *dst_ptr; @@ -788,8 +758,10 @@ ipsec_inbound_esp_sa(mblk_t *ipsec_in_mp) sa_family_t af; boolean_t isv6; sadb_t *sp; + ipsec_stack_t *ipss = ns->netstack_ipsec; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; - IP_ESP_BUMP_STAT(in_requests); + IP_ESP_BUMP_STAT(ipss, in_requests); ASSERT(ipsec_in_mp->b_datap->db_type == M_CTL); /* We have IPSEC_IN already! */ @@ -821,13 +793,14 @@ ipsec_inbound_esp_sa(mblk_t *ipsec_in_mp) : ntohs(ipha->ipha_length))) { placeholder = msgpullup(data_mp, -1); if (placeholder == NULL) { - IP_ESP_BUMP_STAT(in_discards); + IP_ESP_BUMP_STAT(ipss, in_discards); /* * TODO: Extract inbound interface from the IPSEC_IN * message's ii->ipsec_in_rill_index. */ ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, - &ipdrops_esp_nomem, &ip_dropper); + DROPPER(ipss, ipds_esp_nomem), + &ipss->ipsec_dropper); return (NULL); } else { /* Reset packet with new pulled up mblk. */ @@ -852,7 +825,7 @@ ipsec_inbound_esp_sa(mblk_t *ipsec_in_mp) preamble = sizeof (ip6_t); } - sp = &esp_sadb.s_v6; + sp = &espstack->esp_sadb.s_v6; af = AF_INET6; } else { ipha = (ipha_t *)data_mp->b_rptr; @@ -860,7 +833,7 @@ ipsec_inbound_esp_sa(mblk_t *ipsec_in_mp) dst_ptr = (uint32_t *)&ipha->ipha_dst; preamble = IPH_HDR_LENGTH(ipha); - sp = &esp_sadb.s_v4; + sp = &espstack->esp_sadb.s_v4; af = AF_INET; } @@ -875,13 +848,13 @@ ipsec_inbound_esp_sa(mblk_t *ipsec_in_mp) if (ipsa == NULL || ipsa->ipsa_state == IPSA_STATE_DEAD) { /* This is a loggable error! AUDIT ME! */ - IP_ESP_BUMP_STAT(lookup_failure); - IP_ESP_BUMP_STAT(in_discards); + IP_ESP_BUMP_STAT(ipss, lookup_failure); + IP_ESP_BUMP_STAT(ipss, in_discards); ipsecesp_in_assocfailure(ipsec_in_mp, 0, SL_ERROR | SL_CONSOLE | SL_WARN, "ipsec_inbound_esp_sa: No association found for " "spi 0x%x, dst addr %s\n", - esph->esph_spi, dst_ptr, af); + esph->esph_spi, dst_ptr, af, espstack); if (ipsa != NULL) { IPSA_REFRELE(ipsa); } @@ -890,7 +863,7 @@ ipsec_inbound_esp_sa(mblk_t *ipsec_in_mp) if (ipsa->ipsa_state == IPSA_STATE_LARVAL) { /* Not fully baked; swap the packet under a rock until then */ - sadb_set_lpkt(ipsa, ipsec_in_mp); + sadb_set_lpkt(ipsa, ipsec_in_mp, ns); IPSA_REFRELE(ipsa); return (NULL); } diff --git a/usr/src/uts/common/inet/ip/ip_srcid.c b/usr/src/uts/common/inet/ip/ip_srcid.c index 25bcf01e04..f153479292 100644 --- a/usr/src/uts/common/inet/ip/ip_srcid.c +++ b/usr/src/uts/common/inet/ip/ip_srcid.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -121,28 +120,18 @@ struct srcid_map { }; typedef struct srcid_map srcid_map_t; -static uint_t srcid_nextid(void); +static uint_t srcid_nextid(ip_stack_t *); static srcid_map_t **srcid_lookup_addr(const in6_addr_t *addr, - zoneid_t zoneid); -static srcid_map_t **srcid_lookup_id(uint_t id); + zoneid_t zoneid, ip_stack_t *); +static srcid_map_t **srcid_lookup_id(uint_t id, ip_stack_t *); /* - * ID used to assign next free one. - * Increases by one. Once it wraps we search for an unused ID. - */ -static uint_t ip_src_id = 1; -static boolean_t srcid_wrapped = B_FALSE; - -static srcid_map_t *srcid_head; -krwlock_t srcid_lock; - -/* * Insert/add a new address to the map. * Returns zero if ok; otherwise errno (e.g. for memory allocation failure). */ int -ip_srcid_insert(const in6_addr_t *addr, zoneid_t zoneid) +ip_srcid_insert(const in6_addr_t *addr, zoneid_t zoneid, ip_stack_t *ipst) { srcid_map_t **smpp; #ifdef DEBUG @@ -152,28 +141,28 @@ ip_srcid_insert(const in6_addr_t *addr, zoneid_t zoneid) inet_ntop(AF_INET6, addr, abuf, sizeof (abuf)), zoneid)); #endif - rw_enter(&srcid_lock, RW_WRITER); - smpp = srcid_lookup_addr(addr, zoneid); + rw_enter(&ipst->ips_srcid_lock, RW_WRITER); + smpp = srcid_lookup_addr(addr, zoneid, ipst); if (*smpp != NULL) { /* Already present - increment refcount */ (*smpp)->sm_refcnt++; ASSERT((*smpp)->sm_refcnt != 0); /* wraparound */ - rw_exit(&srcid_lock); + rw_exit(&ipst->ips_srcid_lock); return (0); } /* Insert new */ *smpp = kmem_alloc(sizeof (srcid_map_t), KM_NOSLEEP); if (*smpp == NULL) { - rw_exit(&srcid_lock); + rw_exit(&ipst->ips_srcid_lock); return (ENOMEM); } (*smpp)->sm_next = NULL; (*smpp)->sm_addr = *addr; - (*smpp)->sm_srcid = srcid_nextid(); + (*smpp)->sm_srcid = srcid_nextid(ipst); (*smpp)->sm_refcnt = 1; (*smpp)->sm_zoneid = zoneid; - rw_exit(&srcid_lock); + rw_exit(&ipst->ips_srcid_lock); return (0); } @@ -182,7 +171,7 @@ ip_srcid_insert(const in6_addr_t *addr, zoneid_t zoneid) * Returns zero if ok; otherwise errno (e.g. for nonexistent address). */ int -ip_srcid_remove(const in6_addr_t *addr, zoneid_t zoneid) +ip_srcid_remove(const in6_addr_t *addr, zoneid_t zoneid, ip_stack_t *ipst) { srcid_map_t **smpp; srcid_map_t *smp; @@ -193,12 +182,12 @@ ip_srcid_remove(const in6_addr_t *addr, zoneid_t zoneid) inet_ntop(AF_INET6, addr, abuf, sizeof (abuf)), zoneid)); #endif - rw_enter(&srcid_lock, RW_WRITER); - smpp = srcid_lookup_addr(addr, zoneid); + rw_enter(&ipst->ips_srcid_lock, RW_WRITER); + smpp = srcid_lookup_addr(addr, zoneid, ipst); smp = *smpp; if (smp == NULL) { /* Not preset */ - rw_exit(&srcid_lock); + rw_exit(&ipst->ips_srcid_lock); return (ENOENT); } @@ -206,12 +195,12 @@ ip_srcid_remove(const in6_addr_t *addr, zoneid_t zoneid) ASSERT(smp->sm_refcnt != 0); smp->sm_refcnt--; if (smp->sm_refcnt != 0) { - rw_exit(&srcid_lock); + rw_exit(&ipst->ips_srcid_lock); return (0); } /* Remove entry */ *smpp = smp->sm_next; - rw_exit(&srcid_lock); + rw_exit(&ipst->ips_srcid_lock); smp->sm_next = NULL; kmem_free(smp, sizeof (srcid_map_t)); return (0); @@ -222,14 +211,16 @@ ip_srcid_remove(const in6_addr_t *addr, zoneid_t zoneid) * If the address is unknown return the unknown id (zero). */ uint_t -ip_srcid_find_addr(const in6_addr_t *addr, zoneid_t zoneid) +ip_srcid_find_addr(const in6_addr_t *addr, zoneid_t zoneid, + netstack_t *ns) { srcid_map_t **smpp; srcid_map_t *smp; uint_t id; + ip_stack_t *ipst = ns->netstack_ip; - rw_enter(&srcid_lock, RW_READER); - smpp = srcid_lookup_addr(addr, zoneid); + rw_enter(&ipst->ips_srcid_lock, RW_READER); + smpp = srcid_lookup_addr(addr, zoneid, ipst); smp = *smpp; if (smp == NULL) { char abuf[INET6_ADDRSTRLEN]; @@ -242,7 +233,7 @@ ip_srcid_find_addr(const in6_addr_t *addr, zoneid_t zoneid) ASSERT(smp->sm_refcnt != 0); id = smp->sm_srcid; } - rw_exit(&srcid_lock); + rw_exit(&ipst->ips_srcid_lock); return (id); } @@ -251,13 +242,15 @@ ip_srcid_find_addr(const in6_addr_t *addr, zoneid_t zoneid) * If the id is unknown return the unspecified address. */ void -ip_srcid_find_id(uint_t id, in6_addr_t *addr, zoneid_t zoneid) +ip_srcid_find_id(uint_t id, in6_addr_t *addr, zoneid_t zoneid, + netstack_t *ns) { srcid_map_t **smpp; srcid_map_t *smp; + ip_stack_t *ipst = ns->netstack_ip; - rw_enter(&srcid_lock, RW_READER); - smpp = srcid_lookup_id(id); + rw_enter(&ipst->ips_srcid_lock, RW_READER); + smpp = srcid_lookup_id(id, ipst); smp = *smpp; if (smp == NULL || smp->sm_zoneid != zoneid) { /* Not preset */ @@ -267,7 +260,7 @@ ip_srcid_find_id(uint_t id, in6_addr_t *addr, zoneid_t zoneid) ASSERT(smp->sm_refcnt != 0); *addr = smp->sm_addr; } - rw_exit(&srcid_lock); + rw_exit(&ipst->ips_srcid_lock); } /* @@ -280,41 +273,48 @@ ip_srcid_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) srcid_map_t *smp; char abuf[INET6_ADDRSTRLEN]; zoneid_t zoneid; + ip_stack_t *ipst; - zoneid = Q_TO_CONN(q)->conn_zoneid; + if (CONN_Q(q)) { + ipst = CONNQ_TO_IPST(q); + zoneid = Q_TO_CONN(q)->conn_zoneid; + } else { + ipst = ILLQ_TO_IPST(q); + zoneid = ((ill_t *)q->q_ptr)->ill_zoneid; + } (void) mi_mpprintf(mp, "addr " "id zone refcnt"); - rw_enter(&srcid_lock, RW_READER); - for (smp = srcid_head; smp != NULL; smp = smp->sm_next) { + rw_enter(&ipst->ips_srcid_lock, RW_READER); + for (smp = ipst->ips_srcid_head; smp != NULL; smp = smp->sm_next) { if (zoneid != GLOBAL_ZONEID && zoneid != smp->sm_zoneid) continue; (void) mi_mpprintf(mp, "%46s %5u %5d %5u", inet_ntop(AF_INET6, &smp->sm_addr, abuf, sizeof (abuf)), smp->sm_srcid, smp->sm_zoneid, smp->sm_refcnt); } - rw_exit(&srcid_lock); + rw_exit(&ipst->ips_srcid_lock); return (0); } /* Assign the next available ID */ static uint_t -srcid_nextid(void) +srcid_nextid(ip_stack_t *ipst) { uint_t id; srcid_map_t **smpp; - ASSERT(rw_owner(&srcid_lock) == curthread); + ASSERT(rw_owner(&ipst->ips_srcid_lock) == curthread); - if (!srcid_wrapped) { - id = ip_src_id++; - if (ip_src_id == 0) - srcid_wrapped = B_TRUE; + if (!ipst->ips_srcid_wrapped) { + id = ipst->ips_ip_src_id++; + if (ipst->ips_ip_src_id == 0) + ipst->ips_srcid_wrapped = B_TRUE; return (id); } /* Once it wraps we search for an unused ID. */ for (id = 0; id < 0xffffffff; id++) { - smpp = srcid_lookup_id(id); + smpp = srcid_lookup_id(id, ipst); if (*smpp == NULL) return (id); } @@ -329,12 +329,12 @@ srcid_nextid(void) * Otherwise *ptr will be NULL and can be used to insert a new object. */ static srcid_map_t ** -srcid_lookup_addr(const in6_addr_t *addr, zoneid_t zoneid) +srcid_lookup_addr(const in6_addr_t *addr, zoneid_t zoneid, ip_stack_t *ipst) { srcid_map_t **smpp; - ASSERT(RW_LOCK_HELD(&srcid_lock)); - smpp = &srcid_head; + ASSERT(RW_LOCK_HELD(&ipst->ips_srcid_lock)); + smpp = &ipst->ips_srcid_head; while (*smpp != NULL) { if (IN6_ARE_ADDR_EQUAL(&(*smpp)->sm_addr, addr) && zoneid == (*smpp)->sm_zoneid) @@ -351,12 +351,12 @@ srcid_lookup_addr(const in6_addr_t *addr, zoneid_t zoneid) * Otherwise *ptr will be NULL and can be used to insert a new object. */ static srcid_map_t ** -srcid_lookup_id(uint_t id) +srcid_lookup_id(uint_t id, ip_stack_t *ipst) { srcid_map_t **smpp; - ASSERT(RW_LOCK_HELD(&srcid_lock)); - smpp = &srcid_head; + ASSERT(RW_LOCK_HELD(&ipst->ips_srcid_lock)); + smpp = &ipst->ips_srcid_head; while (*smpp != NULL) { if ((*smpp)->sm_srcid == id) return (smpp); diff --git a/usr/src/uts/common/inet/ip/ipclassifier.c b/usr/src/uts/common/inet/ip/ipclassifier.c index 7465e3a4ed..8768bcbe07 100644 --- a/usr/src/uts/common/inet/ip/ipclassifier.c +++ b/usr/src/uts/common/inet/ip/ipclassifier.c @@ -94,8 +94,8 @@ const char ipclassifier_version[] = "@(#)ipclassifier.c %I% %E% SMI"; * Connection Lookup: * ------------------ * - * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid) - * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid) + * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid, ip_stack) + * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid, ip_stack) * * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if * it can't find any associated connection. If the connection is found, its @@ -159,21 +159,24 @@ const char ipclassifier_version[] = "@(#)ipclassifier.c %I% %E% SMI"; * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the * receiver's label must dominate the sender's default label. * - * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int); - * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t); + * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack); + * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, + * ip_stack); * * Lookup routine to find a exact match for {src, dst, local port, * remote port) for TCP connections in ipcl_conn_fanout. The address and * ports are read from the IP and TCP header respectively. * - * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol); - * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex); + * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, + * zoneid, ip_stack); + * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, + * zoneid, ip_stack); * * Lookup routine to find a listener with the tuple {lport, laddr, * protocol} in the ipcl_bind_fanout table. For IPv6, an additional * parameter interface index is also compared. * - * void ipcl_walk(func, arg) + * void ipcl_walk(func, arg, ip_stack) * * Apply 'func' to every connection available. The 'func' is called as * (*func)(connp, arg). The walk is non-atomic so connections may be @@ -221,7 +224,7 @@ const char ipclassifier_version[] = "@(#)ipclassifier.c %I% %E% SMI"; * Connection Creation/Destruction * ------------------------------- * - * conn_t *ipcl_conn_create(type, sleep) + * conn_t *ipcl_conn_create(type, sleep, netstack_t *) * * Creates a new conn based on the type flag, inserts it into * globalhash table. @@ -262,6 +265,7 @@ const char ipclassifier_version[] = "@(#)ipclassifier.c %I% %E% SMI"; #include <inet/ip_ndp.h> #include <inet/udp_impl.h> #include <inet/sctp_ip.h> +#include <inet/sctp/sctp_impl.h> #include <sys/cpuvar.h> @@ -283,28 +287,14 @@ int ipcl_debug_level = 0; #else #define IPCL_DEBUG_LVL(level, args) {; } #endif -connf_t *ipcl_conn_fanout; -connf_t *ipcl_bind_fanout; -connf_t ipcl_proto_fanout[IPPROTO_MAX + 1]; -connf_t ipcl_proto_fanout_v6[IPPROTO_MAX + 1]; -connf_t *ipcl_udp_fanout; - -/* A separate hash list for raw socket. */ -connf_t *ipcl_raw_fanout; - -connf_t rts_clients; - -/* Old value for compatibility */ +/* Old value for compatibility. Setable in /etc/system */ uint_t tcp_conn_hash_size = 0; -/* New value. Zero means choose automatically. */ +/* New value. Zero means choose automatically. Setable in /etc/system */ uint_t ipcl_conn_hash_size = 0; uint_t ipcl_conn_hash_memfactor = 8192; uint_t ipcl_conn_hash_maxsize = 82500; -uint_t ipcl_conn_fanout_size = 0; - - /* bind/udp fanout table size */ uint_t ipcl_bind_fanout_size = 512; uint_t ipcl_udp_fanout_size = 16384; @@ -337,7 +327,6 @@ typedef struct itc_s { #define itc_conn itc_u.itcu_conn struct kmem_cache *ipcl_tcpconn_cache; -struct kmem_cache *ipcl_tcp_cache; struct kmem_cache *ipcl_conn_cache; extern struct kmem_cache *sctp_conn_cache; extern struct kmem_cache *tcp_sack_info_cache; @@ -349,9 +338,6 @@ extern mblk_t *tcp_timermp_alloc(int); static int ipcl_tcpconn_constructor(void *, void *, int); static void ipcl_tcpconn_destructor(void *, void *); -static int conn_g_index; -connf_t *ipcl_globalhash_fanout; - #ifdef IPCL_DEBUG #define INET_NTOA_BUFSIZE 18 @@ -367,15 +353,11 @@ inet_ntoa_r(uint32_t in, char *b) #endif /* - * ipclassifier intialization routine, sets up hash tables and - * conn caches. + * Global (for all stack instances) init routine */ void -ipcl_init(void) +ipcl_g_init(void) { - int i; - int sizes[] = P2Ps(); - ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache", sizeof (conn_t), CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); @@ -384,117 +366,184 @@ ipcl_init(void) sizeof (itc_t), CACHE_ALIGN_SIZE, ipcl_tcpconn_constructor, ipcl_tcpconn_destructor, NULL, NULL, NULL, 0); +} + +/* + * ipclassifier intialization routine, sets up hash tables. + */ +void +ipcl_init(ip_stack_t *ipst) +{ + int i; + int sizes[] = P2Ps(); /* - * Calculate size of conn fanout table. + * Calculate size of conn fanout table from /etc/system settings */ if (ipcl_conn_hash_size != 0) { - ipcl_conn_fanout_size = ipcl_conn_hash_size; + ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; } else if (tcp_conn_hash_size != 0) { - ipcl_conn_fanout_size = tcp_conn_hash_size; + ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; } else { extern pgcnt_t freemem; - ipcl_conn_fanout_size = + ipst->ips_ipcl_conn_fanout_size = (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; - if (ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) - ipcl_conn_fanout_size = ipcl_conn_hash_maxsize; + if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { + ipst->ips_ipcl_conn_fanout_size = + ipcl_conn_hash_maxsize; + } } for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { - if (sizes[i] >= ipcl_conn_fanout_size) { + if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { break; } } - if ((ipcl_conn_fanout_size = sizes[i]) == 0) { + if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { /* Out of range, use the 2^16 value */ - ipcl_conn_fanout_size = sizes[16]; + ipst->ips_ipcl_conn_fanout_size = sizes[16]; } - ipcl_conn_fanout = (connf_t *)kmem_zalloc(ipcl_conn_fanout_size * - sizeof (*ipcl_conn_fanout), KM_SLEEP); - for (i = 0; i < ipcl_conn_fanout_size; i++) { - mutex_init(&ipcl_conn_fanout[i].connf_lock, NULL, + /* Take values from /etc/system */ + ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; + ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; + ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; + + ASSERT(ipst->ips_ipcl_conn_fanout == NULL); + + ipst->ips_ipcl_conn_fanout = kmem_zalloc( + ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); + + for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { + mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, MUTEX_DEFAULT, NULL); } - ipcl_bind_fanout = (connf_t *)kmem_zalloc(ipcl_bind_fanout_size * - sizeof (*ipcl_bind_fanout), KM_SLEEP); + ipst->ips_ipcl_bind_fanout = kmem_zalloc( + ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); - for (i = 0; i < ipcl_bind_fanout_size; i++) { - mutex_init(&ipcl_bind_fanout[i].connf_lock, NULL, + for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { + mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, MUTEX_DEFAULT, NULL); } - for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) { - mutex_init(&ipcl_proto_fanout[i].connf_lock, NULL, + ipst->ips_ipcl_proto_fanout = kmem_zalloc(IPPROTO_MAX * + sizeof (connf_t), KM_SLEEP); + for (i = 0; i < IPPROTO_MAX; i++) { + mutex_init(&ipst->ips_ipcl_proto_fanout[i].connf_lock, NULL, MUTEX_DEFAULT, NULL); } - for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) { - mutex_init(&ipcl_proto_fanout_v6[i].connf_lock, NULL, + + ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * + sizeof (connf_t), KM_SLEEP); + for (i = 0; i < IPPROTO_MAX; i++) { + mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, MUTEX_DEFAULT, NULL); } - mutex_init(&rts_clients.connf_lock, NULL, MUTEX_DEFAULT, NULL); - - ipcl_udp_fanout = (connf_t *)kmem_zalloc(ipcl_udp_fanout_size * - sizeof (*ipcl_udp_fanout), KM_SLEEP); + ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); + mutex_init(&ipst->ips_rts_clients->connf_lock, + NULL, MUTEX_DEFAULT, NULL); - for (i = 0; i < ipcl_udp_fanout_size; i++) { - mutex_init(&ipcl_udp_fanout[i].connf_lock, NULL, + ipst->ips_ipcl_udp_fanout = kmem_zalloc( + ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); + for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { + mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, MUTEX_DEFAULT, NULL); } - ipcl_raw_fanout = (connf_t *)kmem_zalloc(ipcl_raw_fanout_size * - sizeof (*ipcl_raw_fanout), KM_SLEEP); - - for (i = 0; i < ipcl_raw_fanout_size; i++) { - mutex_init(&ipcl_raw_fanout[i].connf_lock, NULL, + ipst->ips_ipcl_raw_fanout = kmem_zalloc( + ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); + for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { + mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, MUTEX_DEFAULT, NULL); } - ipcl_globalhash_fanout = (connf_t *)kmem_zalloc(sizeof (connf_t) * - CONN_G_HASH_SIZE, KM_SLEEP); - + ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( + sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); for (i = 0; i < CONN_G_HASH_SIZE; i++) { - mutex_init(&ipcl_globalhash_fanout[i].connf_lock, NULL, - MUTEX_DEFAULT, NULL); + mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, + NULL, MUTEX_DEFAULT, NULL); } } void -ipcl_destroy(void) +ipcl_g_destroy(void) { - int i; kmem_cache_destroy(ipcl_conn_cache); kmem_cache_destroy(ipcl_tcpconn_cache); - for (i = 0; i < ipcl_conn_fanout_size; i++) - mutex_destroy(&ipcl_conn_fanout[i].connf_lock); - kmem_free(ipcl_conn_fanout, ipcl_conn_fanout_size * - sizeof (*ipcl_conn_fanout)); - for (i = 0; i < ipcl_bind_fanout_size; i++) - mutex_destroy(&ipcl_bind_fanout[i].connf_lock); - kmem_free(ipcl_bind_fanout, ipcl_bind_fanout_size * - sizeof (*ipcl_bind_fanout)); - - for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) - mutex_destroy(&ipcl_proto_fanout[i].connf_lock); - for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) - mutex_destroy(&ipcl_proto_fanout_v6[i].connf_lock); - - for (i = 0; i < ipcl_udp_fanout_size; i++) - mutex_destroy(&ipcl_udp_fanout[i].connf_lock); - kmem_free(ipcl_udp_fanout, ipcl_udp_fanout_size * - sizeof (*ipcl_udp_fanout)); - - for (i = 0; i < ipcl_raw_fanout_size; i++) - mutex_destroy(&ipcl_raw_fanout[i].connf_lock); - kmem_free(ipcl_raw_fanout, ipcl_raw_fanout_size * - sizeof (*ipcl_raw_fanout)); - - kmem_free(ipcl_globalhash_fanout, sizeof (connf_t) * CONN_G_HASH_SIZE); - mutex_destroy(&rts_clients.connf_lock); +} + +/* + * All user-level and kernel use of the stack must be gone + * by now. + */ +void +ipcl_destroy(ip_stack_t *ipst) +{ + int i; + + for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { + ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); + mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); + } + kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * + sizeof (connf_t)); + ipst->ips_ipcl_conn_fanout = NULL; + + for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { + ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); + mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); + } + kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * + sizeof (connf_t)); + ipst->ips_ipcl_bind_fanout = NULL; + + for (i = 0; i < IPPROTO_MAX; i++) { + ASSERT(ipst->ips_ipcl_proto_fanout[i].connf_head == NULL); + mutex_destroy(&ipst->ips_ipcl_proto_fanout[i].connf_lock); + } + kmem_free(ipst->ips_ipcl_proto_fanout, IPPROTO_MAX * sizeof (connf_t)); + ipst->ips_ipcl_proto_fanout = NULL; + + for (i = 0; i < IPPROTO_MAX; i++) { + ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); + mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); + } + kmem_free(ipst->ips_ipcl_proto_fanout_v6, + IPPROTO_MAX * sizeof (connf_t)); + ipst->ips_ipcl_proto_fanout_v6 = NULL; + + for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { + ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); + mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); + } + kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * + sizeof (connf_t)); + ipst->ips_ipcl_udp_fanout = NULL; + + for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { + ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); + mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); + } + kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * + sizeof (connf_t)); + ipst->ips_ipcl_raw_fanout = NULL; + + for (i = 0; i < CONN_G_HASH_SIZE; i++) { + ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); + mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); + } + kmem_free(ipst->ips_ipcl_globalhash_fanout, + sizeof (connf_t) * CONN_G_HASH_SIZE); + ipst->ips_ipcl_globalhash_fanout = NULL; + + ASSERT(ipst->ips_rts_clients->connf_head == NULL); + mutex_destroy(&ipst->ips_rts_clients->connf_lock); + kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); + ipst->ips_rts_clients = NULL; } /* @@ -502,10 +551,11 @@ ipcl_destroy(void) * and inserts it in the global hash table. */ conn_t * -ipcl_conn_create(uint32_t type, int sleep) +ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) { itc_t *itc; conn_t *connp; + sctp_stack_t *sctps; switch (type) { case IPCL_TCPCONN: @@ -514,6 +564,8 @@ ipcl_conn_create(uint32_t type, int sleep) return (NULL); connp = &itc->itc_conn; connp->conn_ref = 1; + netstack_hold(ns); + connp->conn_netstack = ns; IPCL_DEBUG_LVL(1, ("ipcl_conn_create: connp = %p tcp (%p)", (void *)connp, (void *)connp->conn_tcp)); @@ -523,6 +575,10 @@ ipcl_conn_create(uint32_t type, int sleep) if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) return (NULL); connp->conn_flags = IPCL_SCTPCONN; + sctps = ns->netstack_sctp; + SCTP_G_Q_REFHOLD(sctps); + netstack_hold(ns); + connp->conn_netstack = ns; break; case IPCL_IPCCONN: connp = kmem_cache_alloc(ipcl_conn_cache, sleep); @@ -533,6 +589,8 @@ ipcl_conn_create(uint32_t type, int sleep) cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); connp->conn_flags = IPCL_IPCCONN; connp->conn_ref = 1; + netstack_hold(ns); + connp->conn_netstack = ns; IPCL_DEBUG_LVL(1, ("ipcl_conn_create: connp = %p\n", (void *)connp)); ipcl_globalhash_insert(connp); @@ -549,6 +607,7 @@ void ipcl_conn_destroy(conn_t *connp) { mblk_t *mp; + netstack_t *ns = connp->conn_netstack; ASSERT(!MUTEX_HELD(&connp->conn_lock)); ASSERT(connp->conn_ref == 0); @@ -569,9 +628,24 @@ ipcl_conn_destroy(conn_t *connp) cv_destroy(&connp->conn_cv); if (connp->conn_flags & IPCL_TCPCONN) { tcp_t *tcp = connp->conn_tcp; + tcp_stack_t *tcps; + + ASSERT(tcp != NULL); + tcps = tcp->tcp_tcps; + if (tcps != NULL) { + if (connp->conn_latch != NULL) { + IPLATCH_REFRELE(connp->conn_latch, ns); + connp->conn_latch = NULL; + } + if (connp->conn_policy != NULL) { + IPPH_REFRELE(connp->conn_policy, ns); + connp->conn_policy = NULL; + } + tcp->tcp_tcps = NULL; + TCPS_REFRELE(tcps); + } mutex_destroy(&connp->conn_lock); - ASSERT(connp->conn_tcp != NULL); tcp_free(tcp); mp = tcp->tcp_timercache; tcp->tcp_cred = NULL; @@ -592,10 +666,9 @@ ipcl_conn_destroy(conn_t *connp) } ASSERT(tcp->tcp_iphc_len == 0); - if (connp->conn_latch != NULL) - IPLATCH_REFRELE(connp->conn_latch); - if (connp->conn_policy != NULL) - IPPH_REFRELE(connp->conn_policy); + ASSERT(connp->conn_latch == NULL); + ASSERT(connp->conn_policy == NULL); + bzero(connp, sizeof (itc_t)); tcp->tcp_timercache = mp; @@ -603,12 +676,22 @@ ipcl_conn_destroy(conn_t *connp) connp->conn_flags = IPCL_TCPCONN; connp->conn_ulp = IPPROTO_TCP; tcp->tcp_connp = connp; + if (ns != NULL) { + ASSERT(tcp->tcp_tcps == NULL); + connp->conn_netstack = NULL; + netstack_rele(ns); + } kmem_cache_free(ipcl_tcpconn_cache, connp); } else if (connp->conn_flags & IPCL_SCTPCONN) { + ASSERT(ns != NULL); sctp_free(connp); } else { ASSERT(connp->conn_udp == NULL); mutex_destroy(&connp->conn_lock); + if (ns != NULL) { + connp->conn_netstack = NULL; + netstack_rele(ns); + } kmem_cache_free(ipcl_conn_cache, connp); } } @@ -808,6 +891,7 @@ void ipcl_proto_insert(conn_t *connp, uint8_t protocol) { connf_t *connfp; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; ASSERT(connp != NULL); ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || @@ -816,7 +900,7 @@ ipcl_proto_insert(conn_t *connp, uint8_t protocol) connp->conn_ulp = protocol; /* Insert it in the protocol hash */ - connfp = &ipcl_proto_fanout[protocol]; + connfp = &ipst->ips_ipcl_proto_fanout[protocol]; IPCL_HASH_INSERT_WILDCARD(connfp, connp); } @@ -824,6 +908,7 @@ void ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) { connf_t *connfp; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; ASSERT(connp != NULL); ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || @@ -832,7 +917,7 @@ ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) connp->conn_ulp = protocol; /* Insert it in the Bind Hash */ - connfp = &ipcl_proto_fanout_v6[protocol]; + connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; IPCL_HASH_INSERT_WILDCARD(connfp, connp); } @@ -848,8 +933,9 @@ ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) { connf_t *connfp; conn_t *oconnp; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; - connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; + connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; /* Check for existing raw socket already bound to the port. */ mutex_enter(&connfp->connf_lock); @@ -893,12 +979,12 @@ ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) * Returns true if there's a conflict. */ static boolean_t -check_exempt_conflict_v4(conn_t *connp) +check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) { connf_t *connfp; conn_t *tconn; - connfp = &ipcl_proto_fanout[connp->conn_ulp]; + connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; mutex_enter(&connfp->connf_lock); for (tconn = connfp->connf_head; tconn != NULL; tconn = tconn->conn_next) { @@ -921,12 +1007,12 @@ check_exempt_conflict_v4(conn_t *connp) } static boolean_t -check_exempt_conflict_v6(conn_t *connp) +check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) { connf_t *connfp; conn_t *tconn; - connfp = &ipcl_proto_fanout[connp->conn_ulp]; + connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; mutex_enter(&connfp->connf_lock); for (tconn = connfp->connf_head; tconn != NULL; tconn = tconn->conn_next) { @@ -959,6 +1045,7 @@ ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) char buf[INET_NTOA_BUFSIZE]; #endif int ret = 0; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; ASSERT(connp); @@ -971,7 +1058,8 @@ ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) switch (protocol) { default: - if (is_system_labeled() && check_exempt_conflict_v4(connp)) + if (is_system_labeled() && + check_exempt_conflict_v4(connp, ipst)) return (EADDRINUSE); /* FALLTHROUGH */ case IPPROTO_UDP: @@ -979,12 +1067,13 @@ ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p - udp\n", (void *)connp)); - connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; + connfp = &ipst->ips_ipcl_udp_fanout[ + IPCL_UDP_HASH(lport, ipst)]; } else { IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p - protocol\n", (void *)connp)); - connfp = &ipcl_proto_fanout[protocol]; + connfp = &ipst->ips_ipcl_proto_fanout[protocol]; } if (connp->conn_rem != INADDR_ANY) { @@ -1000,7 +1089,8 @@ ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) /* Insert it in the Bind Hash */ ASSERT(connp->conn_zoneid != ALL_ZONES); - connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; + connfp = &ipst->ips_ipcl_bind_fanout[ + IPCL_BIND_HASH(lport, ipst)]; if (connp->conn_src != INADDR_ANY) { IPCL_HASH_INSERT_BOUND(connfp, connp); } else { @@ -1028,6 +1118,7 @@ ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, { connf_t *connfp; int ret = 0; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; ASSERT(connp); @@ -1037,7 +1128,8 @@ ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, switch (protocol) { default: - if (is_system_labeled() && check_exempt_conflict_v6(connp)) + if (is_system_labeled() && + check_exempt_conflict_v6(connp, ipst)) return (EADDRINUSE); /* FALLTHROUGH */ case IPPROTO_UDP: @@ -1045,12 +1137,13 @@ ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, IPCL_DEBUG_LVL(128, ("ipcl_bind_insert_v6: connp %p - udp\n", (void *)connp)); - connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; + connfp = &ipst->ips_ipcl_udp_fanout[ + IPCL_UDP_HASH(lport, ipst)]; } else { IPCL_DEBUG_LVL(128, ("ipcl_bind_insert_v6: connp %p - protocol\n", (void *)connp)); - connfp = &ipcl_proto_fanout_v6[protocol]; + connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; } if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { @@ -1067,7 +1160,8 @@ ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, /* Insert it in the Bind Hash */ ASSERT(connp->conn_zoneid != ALL_ZONES); - connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; + connfp = &ipst->ips_ipcl_bind_fanout[ + IPCL_BIND_HASH(lport, ipst)]; if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { IPCL_HASH_INSERT_BOUND(connfp, connp); } else { @@ -1114,6 +1208,7 @@ ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, #endif in_port_t lport; int ret = 0; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " "dst = %s, ports = %x, protocol = %x", (void *)connp, @@ -1131,8 +1226,9 @@ ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, */ IPCL_CONN_INIT(connp, protocol, src, rem, ports); } - connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(connp->conn_rem, - connp->conn_ports)]; + connfp = &ipst->ips_ipcl_conn_fanout[ + IPCL_CONN_HASH(connp->conn_rem, + connp->conn_ports, ipst)]; mutex_enter(&connfp->connf_lock); for (tconnp = connfp->connf_head; tconnp != NULL; tconnp = tconnp->conn_next) { @@ -1178,7 +1274,8 @@ ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, * level per-transport binding logic. For all others, it's * done here. */ - if (is_system_labeled() && check_exempt_conflict_v4(connp)) + if (is_system_labeled() && + check_exempt_conflict_v4(connp, ipst)) return (EADDRINUSE); /* FALLTHROUGH */ @@ -1186,9 +1283,10 @@ ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, up = (uint16_t *)&ports; IPCL_CONN_INIT(connp, protocol, src, rem, ports); if (protocol == IPPROTO_UDP) { - connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; + connfp = &ipst->ips_ipcl_udp_fanout[ + IPCL_UDP_HASH(up[1], ipst)]; } else { - connfp = &ipcl_proto_fanout[protocol]; + connfp = &ipst->ips_ipcl_proto_fanout[protocol]; } if (connp->conn_rem != INADDR_ANY) { @@ -1213,6 +1311,7 @@ ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, conn_t *tconnp; in_port_t lport; int ret = 0; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; switch (protocol) { case IPPROTO_TCP: @@ -1220,8 +1319,9 @@ ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, if (!(connp->conn_flags & IPCL_EAGER)) { IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); } - connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(connp->conn_remv6, - connp->conn_ports)]; + connfp = &ipst->ips_ipcl_conn_fanout[ + IPCL_CONN_HASH_V6(connp->conn_remv6, connp->conn_ports, + ipst)]; mutex_enter(&connfp->connf_lock); for (tconnp = connfp->connf_head; tconnp != NULL; tconnp = tconnp->conn_next) { @@ -1255,16 +1355,18 @@ ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, break; default: - if (is_system_labeled() && check_exempt_conflict_v6(connp)) + if (is_system_labeled() && + check_exempt_conflict_v6(connp, ipst)) return (EADDRINUSE); /* FALLTHROUGH */ case IPPROTO_UDP: up = (uint16_t *)&ports; IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); if (protocol == IPPROTO_UDP) { - connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; + connfp = &ipst->ips_ipcl_udp_fanout[ + IPCL_UDP_HASH(up[1], ipst)]; } else { - connfp = &ipcl_proto_fanout_v6[protocol]; + connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; } if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { @@ -1291,7 +1393,8 @@ ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, * zone, then label checks are omitted. */ conn_t * -ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) +ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, + ip_stack_t *ipst) { ipha_t *ipha; connf_t *connfp, *bind_connfp; @@ -1310,7 +1413,8 @@ ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) case IPPROTO_TCP: ports = *(uint32_t *)up; connfp = - &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, ports)]; + &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, + ports, ipst)]; mutex_enter(&connfp->connf_lock); for (connp = connfp->connf_head; connp != NULL; connp = connp->conn_next) { @@ -1342,6 +1446,10 @@ ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) TSLF_UNLABELED) != 0; shared_addr = (zoneid == ALL_ZONES); if (shared_addr) { + /* + * No need to handle exclusive-stack zones since + * ALL_ZONES only applies to the shared stack. + */ zoneid = tsol_mlp_findzone(protocol, lport); /* * If no shared MLP is found, tsol_mlp_findzone returns @@ -1359,7 +1467,8 @@ ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) unlabeled = B_FALSE; } - bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; + bind_connfp = + &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; mutex_enter(&bind_connfp->connf_lock); for (connp = bind_connfp->connf_head; connp != NULL; connp = connp->conn_next) { @@ -1409,6 +1518,10 @@ ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) TSLF_UNLABELED) != 0; shared_addr = (zoneid == ALL_ZONES); if (shared_addr) { + /* + * No need to handle exclusive-stack zones since + * ALL_ZONES only applies to the shared stack. + */ zoneid = tsol_mlp_findzone(protocol, lport); /* * If no shared MLP is found, tsol_mlp_findzone returns @@ -1427,7 +1540,7 @@ ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) } fport = up[0]; IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); - connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; + connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; mutex_enter(&connfp->connf_lock); for (connp = connfp->connf_head; connp != NULL; connp = connp->conn_next) { @@ -1467,7 +1580,8 @@ ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) } conn_t * -ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) +ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, + ip_stack_t *ipst) { ip6_t *ip6h; connf_t *connfp, *bind_connfp; @@ -1489,7 +1603,8 @@ ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) ports = *(uint32_t *)up; connfp = - &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, ports)]; + &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, + ports, ipst)]; mutex_enter(&connfp->connf_lock); for (connp = connfp->connf_head; connp != NULL; connp = connp->conn_next) { @@ -1524,6 +1639,10 @@ ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) } shared_addr = (zoneid == ALL_ZONES); if (shared_addr) { + /* + * No need to handle exclusive-stack zones since + * ALL_ZONES only applies to the shared stack. + */ zoneid = tsol_mlp_findzone(protocol, lport); /* * If no shared MLP is found, tsol_mlp_findzone returns @@ -1541,7 +1660,8 @@ ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) unlabeled = B_FALSE; } - bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; + bind_connfp = + &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; mutex_enter(&bind_connfp->connf_lock); for (connp = bind_connfp->connf_head; connp != NULL; connp = connp->conn_next) { @@ -1592,6 +1712,10 @@ ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) } shared_addr = (zoneid == ALL_ZONES); if (shared_addr) { + /* + * No need to handle exclusive-stack zones since + * ALL_ZONES only applies to the shared stack. + */ zoneid = tsol_mlp_findzone(protocol, lport); /* * If no shared MLP is found, tsol_mlp_findzone returns @@ -1612,7 +1736,7 @@ ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) fport = up[0]; IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, fport)); - connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; + connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; mutex_enter(&connfp->connf_lock); for (connp = connfp->connf_head; connp != NULL; connp = connp->conn_next) { @@ -1655,7 +1779,7 @@ ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) * wrapper around ipcl_classify_(v4,v6) routines. */ conn_t * -ipcl_classify(mblk_t *mp, zoneid_t zoneid) +ipcl_classify(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst) { uint16_t hdr_len; ipha_t *ipha; @@ -1669,13 +1793,13 @@ ipcl_classify(mblk_t *mp, zoneid_t zoneid) ipha = (ipha_t *)mp->b_rptr; hdr_len = IPH_HDR_LENGTH(ipha); return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, - zoneid)); + zoneid, ipst)); case IPV6_VERSION: if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, &hdr_len, &nexthdrp)) return (NULL); - return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid)); + return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid, ipst)); } return (NULL); @@ -1683,7 +1807,7 @@ ipcl_classify(mblk_t *mp, zoneid_t zoneid) conn_t * ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid, - uint32_t ports, ipha_t *hdr) + uint32_t ports, ipha_t *hdr, ip_stack_t *ipst) { connf_t *connfp; conn_t *connp; @@ -1705,6 +1829,10 @@ ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid, } shared_addr = (zoneid == ALL_ZONES); if (shared_addr) { + /* + * No need to handle exclusive-stack zones since ALL_ZONES + * only applies to the shared stack. + */ zoneid = tsol_mlp_findzone(protocol, lport); /* * If no shared MLP is found, tsol_mlp_findzone returns @@ -1724,7 +1852,7 @@ ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid, af = IPH_HDR_VERSION(hdr); dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst : (const void *)&((ip6_t *)hdr)->ip6_dst; - connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; + connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; mutex_enter(&connfp->connf_lock); for (connp = connfp->connf_head; connp != NULL; @@ -1779,7 +1907,7 @@ ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid, mutex_exit(&connfp->connf_lock); /* Try to look for a wildcard match. */ - connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(0)]; + connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; mutex_enter(&connfp->connf_lock); for (connp = connfp->connf_head; connp != NULL; connp = connp->conn_next) { @@ -1859,13 +1987,15 @@ void ipcl_globalhash_insert(conn_t *connp) { int index; + struct connf_s *connfp; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; /* * No need for atomic here. Approximate even distribution * in the global lists is sufficient. */ - conn_g_index++; - index = conn_g_index & (CONN_G_HASH_SIZE - 1); + ipst->ips_conn_g_index++; + index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); connp->conn_g_prev = NULL; /* @@ -1874,22 +2004,25 @@ ipcl_globalhash_insert(conn_t *connp) */ connp->conn_state_flags |= CONN_INCIPIENT; + connfp = &ipst->ips_ipcl_globalhash_fanout[index]; /* Insert at the head of the list */ - mutex_enter(&ipcl_globalhash_fanout[index].connf_lock); - connp->conn_g_next = ipcl_globalhash_fanout[index].connf_head; + mutex_enter(&connfp->connf_lock); + connp->conn_g_next = connfp->connf_head; if (connp->conn_g_next != NULL) connp->conn_g_next->conn_g_prev = connp; - ipcl_globalhash_fanout[index].connf_head = connp; + connfp->connf_head = connp; /* The fanout bucket this conn points to */ - connp->conn_g_fanout = &ipcl_globalhash_fanout[index]; + connp->conn_g_fanout = connfp; - mutex_exit(&ipcl_globalhash_fanout[index].connf_lock); + mutex_exit(&connfp->connf_lock); } void ipcl_globalhash_remove(conn_t *connp) { + struct connf_s *connfp; + /* * We were never inserted in the global multi list. * IPCL_NONE variety is never inserted in the global multilist @@ -1898,14 +2031,15 @@ ipcl_globalhash_remove(conn_t *connp) if (connp->conn_g_fanout == NULL) return; - mutex_enter(&connp->conn_g_fanout->connf_lock); + connfp = connp->conn_g_fanout; + mutex_enter(&connfp->connf_lock); if (connp->conn_g_prev != NULL) connp->conn_g_prev->conn_g_next = connp->conn_g_next; else - connp->conn_g_fanout->connf_head = connp->conn_g_next; + connfp->connf_head = connp->conn_g_next; if (connp->conn_g_next != NULL) connp->conn_g_next->conn_g_prev = connp->conn_g_prev; - mutex_exit(&connp->conn_g_fanout->connf_lock); + mutex_exit(&connfp->connf_lock); /* Better to stumble on a null pointer than to corrupt memory */ connp->conn_g_next = NULL; @@ -1926,16 +2060,16 @@ ipcl_globalhash_remove(conn_t *connp) * is created to the struct that is going away. */ void -ipcl_walk(pfv_t func, void *arg) +ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) { int i; conn_t *connp; conn_t *prev_connp; for (i = 0; i < CONN_G_HASH_SIZE; i++) { - mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); + mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); prev_connp = NULL; - connp = ipcl_globalhash_fanout[i].connf_head; + connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; while (connp != NULL) { mutex_enter(&connp->conn_lock); if (connp->conn_state_flags & @@ -1946,15 +2080,17 @@ ipcl_walk(pfv_t func, void *arg) } CONN_INC_REF_LOCKED(connp); mutex_exit(&connp->conn_lock); - mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); + mutex_exit( + &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); (*func)(connp, arg); if (prev_connp != NULL) CONN_DEC_REF(prev_connp); - mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); + mutex_enter( + &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); prev_connp = connp; connp = connp->conn_g_next; } - mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); + mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); if (prev_connp != NULL) CONN_DEC_REF(prev_connp); } @@ -1967,7 +2103,8 @@ ipcl_walk(pfv_t func, void *arg) * (peer tcp in ESTABLISHED state). */ conn_t * -ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph) +ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph, + ip_stack_t *ipst) { uint32_t ports; uint16_t *pports = (uint16_t *)&ports; @@ -1987,7 +2124,8 @@ ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph) bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); - connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; + connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, + ports, ipst)]; mutex_enter(&connfp->connf_lock); for (tconnp = connfp->connf_head; tconnp != NULL; @@ -2015,7 +2153,8 @@ ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph) * (peer tcp in ESTABLISHED state). */ conn_t * -ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph) +ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph, + ip_stack_t *ipst) { uint32_t ports; uint16_t *pports = (uint16_t *)&ports; @@ -2038,7 +2177,8 @@ ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph) bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); - connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; + connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, + ports, ipst)]; mutex_enter(&connfp->connf_lock); for (tconnp = connfp->connf_head; tconnp != NULL; @@ -2066,7 +2206,8 @@ ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph) * Only checks for connected entries i.e. no INADDR_ANY checks. */ conn_t * -ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state) +ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state, + ip_stack_t *ipst) { uint32_t ports; uint16_t *pports; @@ -2077,7 +2218,8 @@ ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state) bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); - connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; + connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, + ports, ipst)]; mutex_enter(&connfp->connf_lock); for (tconnp = connfp->connf_head; tconnp != NULL; @@ -2104,7 +2246,7 @@ ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state) */ conn_t * ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, - uint_t ifindex) + uint_t ifindex, ip_stack_t *ipst) { tcp_t *tcp; uint32_t ports; @@ -2116,7 +2258,8 @@ ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, pports[0] = tcpha->tha_fport; pports[1] = tcpha->tha_lport; - connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; + connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, + ports, ipst)]; mutex_enter(&connfp->connf_lock); for (tconnp = connfp->connf_head; tconnp != NULL; @@ -2143,7 +2286,8 @@ ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, * a listener when changing state. */ conn_t * -ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid) +ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, + ip_stack_t *ipst) { connf_t *bind_connfp; conn_t *connp; @@ -2158,7 +2302,7 @@ ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid) ASSERT(zoneid != ALL_ZONES); - bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; + bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; mutex_enter(&bind_connfp->connf_lock); for (connp = bind_connfp->connf_head; connp != NULL; connp = connp->conn_next) { @@ -2181,7 +2325,7 @@ ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid) */ conn_t * ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, - zoneid_t zoneid) + zoneid_t zoneid, ip_stack_t *ipst) { connf_t *bind_connfp; conn_t *connp = NULL; @@ -2196,7 +2340,7 @@ ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, ASSERT(zoneid != ALL_ZONES); - bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; + bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; mutex_enter(&bind_connfp->connf_lock); for (connp = bind_connfp->connf_head; connp != NULL; connp = connp->conn_next) { diff --git a/usr/src/uts/common/inet/ip/ipdrop.c b/usr/src/uts/common/inet/ip/ipdrop.c index a8e6ff0ecc..73e07a2647 100644 --- a/usr/src/uts/common/inet/ip/ipdrop.c +++ b/usr/src/uts/common/inet/ip/ipdrop.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -36,145 +36,167 @@ #include <inet/ip.h> #include <inet/ip6.h> #include <inet/ipsec_info.h> +#include <inet/ipsec_impl.h> #include <inet/ipdrop.h> /* * Packet drop facility. */ -kstat_t *ip_drop_kstat; -struct ip_dropstats *ip_drop_types; - /* * Initialize drop facility kstats. */ void -ip_drop_init(void) +ip_drop_init(ipsec_stack_t *ipss) { - ip_drop_kstat = kstat_create("ip", 0, "ipdrop", "net", - KSTAT_TYPE_NAMED, sizeof (*ip_drop_types) / sizeof (kstat_named_t), - KSTAT_FLAG_PERSISTENT); + ipss->ipsec_ip_drop_kstat = kstat_create_netstack("ip", 0, "ipdrop", + "net", KSTAT_TYPE_NAMED, + sizeof (struct ip_dropstats) / sizeof (kstat_named_t), + KSTAT_FLAG_PERSISTENT, ipss->ipsec_netstack->netstack_stackid); - if (ip_drop_kstat == NULL) + if (ipss->ipsec_ip_drop_kstat == NULL || + ipss->ipsec_ip_drop_kstat->ks_data == NULL) return; - ip_drop_types = ip_drop_kstat->ks_data; + /* + * Note: here ipss->ipsec_ip_drop_types is initialized, however, + * if the previous kstat_create_netstack failed, it will remain + * NULL. Note this is done for all stack instances, so it *could* + * be NULL. Hence a non-NULL checking is added where + * ipss->ipsec_ip_drop_types is used. This checking is hidden in + * the DROPPER macro. + */ + ipss->ipsec_ip_drop_types = ipss->ipsec_ip_drop_kstat->ks_data; /* TCP IPsec drop statistics. */ - kstat_named_init(&ipdrops_tcp_clear, "tcp_clear", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_tcp_secure, "tcp_secure", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_tcp_mismatch, "tcp_mismatch", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_tcp_ipsec_alloc, "tcp_ipsec_alloc", - KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_tcp_clear, + "tcp_clear", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_tcp_secure, + "tcp_secure", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_tcp_mismatch, + "tcp_mismatch", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_tcp_ipsec_alloc, + "tcp_ipsec_alloc", KSTAT_DATA_UINT64); /* SADB-specific drop statistics. */ - kstat_named_init(&ipdrops_sadb_inlarval_timeout, + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_sadb_inlarval_timeout, "sadb_inlarval_timeout", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_sadb_inlarval_replace, + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_sadb_inlarval_replace, "sadb_inlarval_replace", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_sadb_acquire_nomem, + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_sadb_acquire_nomem, "sadb_acquire_nomem", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_sadb_acquire_toofull, + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_sadb_acquire_toofull, "sadb_acquire_toofull", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_sadb_acquire_timeout, + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_sadb_acquire_timeout, "sadb_acquire_timeout", KSTAT_DATA_UINT64); /* SPD drop statistics. */ - kstat_named_init(&ipdrops_spd_ahesp_diffid, "spd_ahesp_diffid", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_loopback_mismatch, + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_ahesp_diffid, + "spd_ahesp_diffid", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_loopback_mismatch, "spd_loopback_mismatch", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_explicit, "spd_explicit", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_got_secure, "spd_got_secure", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_got_clear, "spd_got_clear", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_bad_ahalg, "spd_bad_ahalg", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_got_ah, "spd_got_ah", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_bad_espealg, "spd_bad_espealg", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_bad_espaalg, "spd_bad_espaalg", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_got_esp, "spd_got_esp", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_got_selfencap, "spd_got_selfencap", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_bad_selfencap, "spd_bad_selfencap", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_nomem, "spd_nomem", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_ah_badid, "spd_ah_badid", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_esp_badid, "spd_esp_badid", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_ah_innermismatch, + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_explicit, + "spd_explicit", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_got_secure, + "spd_got_secure", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_got_clear, + "spd_got_clear", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_bad_ahalg, + "spd_bad_ahalg", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_got_ah, + "spd_got_ah", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_bad_espealg, + "spd_bad_espealg", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_bad_espaalg, + "spd_bad_espaalg", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_got_esp, + "spd_got_esp", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_got_selfencap, + "spd_got_selfencap", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_bad_selfencap, + "spd_bad_selfencap", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_nomem, + "spd_nomem", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_ah_badid, + "spd_ah_badid", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_ah_innermismatch, "spd_ah_innermismatch", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_esp_innermismatch, + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_esp_innermismatch, "spd_esp_innermismatch", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_no_policy, "spd_no_policy", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_malformed_packet, "spd_malformed_packet", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_malformed_frag, "spd_malformed_frag", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_overlap_frag, "spd_overlap_frag", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_evil_frag, "spd_evil_frag", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_spd_max_frags, "spd_max_frags", - KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_esp_badid, + "spd_esp_badid", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_no_policy, + "spd_no_policy", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_malformed_packet, + "spd_malformed_packet", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_malformed_frag, + "spd_malformed_frag", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_overlap_frag, + "spd_overlap_frag", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_evil_frag, + "spd_evil_frag", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_spd_max_frags, + "spd_max_frags", KSTAT_DATA_UINT64); /* ESP-specific drop statistics. */ - kstat_named_init(&ipdrops_esp_nomem, "esp_nomem", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_esp_no_sa, "esp_no_sa", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_esp_early_replay, "esp_early_replay", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_esp_replay, "esp_replay", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_esp_bytes_expire, "esp_bytes_expire", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_esp_bad_padlen, "esp_bad_padlen", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_esp_bad_padding, "esp_bad_padding", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_esp_bad_auth, "esp_bad_auth", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_esp_crypto_failed, "esp_crypto_failed", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_esp_icmp, "esp_icmp", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_esp_nomem, + "esp_nomem", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_esp_no_sa, + "esp_no_sa", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_esp_early_replay, + "esp_early_replay", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_esp_replay, + "esp_replay", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_esp_bytes_expire, + "esp_bytes_expire", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_esp_bad_padlen, + "esp_bad_padlen", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_esp_bad_padding, + "esp_bad_padding", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_esp_bad_auth, + "esp_bad_auth", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_esp_crypto_failed, + "esp_crypto_failed", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_esp_icmp, + "esp_icmp", KSTAT_DATA_UINT64); /* AH-specific drop statistics. */ - kstat_named_init(&ipdrops_ah_nomem, "ah_nomem", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_ah_bad_v6_hdrs, "ah_bad_v6_hdrs", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_ah_bad_v4_opts, "ah_bad_v4_opts", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_ah_no_sa, "ah_no_sa", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_ah_bad_length, "ah_bad_length", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_ah_bad_auth, "ah_bad_auth", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_ah_crypto_failed, "ah_crypto_failed", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_ah_early_replay, "ah_early_replay", - KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_ah_replay, "ah_replay", KSTAT_DATA_UINT64); - kstat_named_init(&ipdrops_ah_bytes_expire, "ah_bytes_expire", - KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_ah_nomem, + "ah_nomem", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_ah_bad_v6_hdrs, + "ah_bad_v6_hdrs", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_ah_bad_v4_opts, + "ah_bad_v4_opts", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_ah_no_sa, + "ah_no_sa", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_ah_bad_length, + "ah_bad_length", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_ah_bad_auth, + "ah_bad_auth", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_ah_crypto_failed, + "ah_crypto_failed", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_ah_early_replay, + "ah_early_replay", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_ah_replay, + "ah_replay", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_ah_bytes_expire, + "ah_bytes_expire", KSTAT_DATA_UINT64); /* IP-specific drop statistics. */ - kstat_named_init(&ipdrops_ip_ipsec_not_loaded, "ip_ipsec_not_loaded", - KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_ip_ipsec_not_loaded, + "ip_ipsec_not_loaded", KSTAT_DATA_UINT64); - kstat_install(ip_drop_kstat); + kstat_install(ipss->ipsec_ip_drop_kstat); } void -ip_drop_destroy(void) +ip_drop_destroy(ipsec_stack_t *ipss) { - kstat_delete(ip_drop_kstat); + kstat_delete_netstack(ipss->ipsec_ip_drop_kstat, + ipss->ipsec_netstack->netstack_stackid); + ipss->ipsec_ip_drop_kstat = NULL; + ipss->ipsec_ip_drop_types = NULL; } /* @@ -201,6 +223,12 @@ ip_drop_register(ipdropper_t *ipd, char *name) void ip_drop_unregister(ipdropper_t *ipd) { + if (ipd->ipd_name == NULL) { + cmn_err(CE_WARN, + "ip_drop_unregister: not registered (%p)\n", + (void *)ipd); + return; + } kmem_free(ipd->ipd_name, strlen(ipd->ipd_name) + 1); ipd->ipd_name = NULL; diff --git a/usr/src/uts/common/inet/ip/ipsec_loader.c b/usr/src/uts/common/inet/ip/ipsec_loader.c index 6460028c96..e37da9a631 100644 --- a/usr/src/uts/common/inet/ip/ipsec_loader.c +++ b/usr/src/uts/common/inet/ip/ipsec_loader.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2000-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,66 +34,58 @@ #include <sys/proc.h> #include <sys/modctl.h> #include <sys/disp.h> +#include <inet/ip.h> #include <inet/ipsec_impl.h> +#include <inet/optcom.h> +#include <inet/keysock.h> /* - * Loader commands.. + * Loader commands for ipsec_loader_sig */ #define IPSEC_LOADER_EXITNOW -1 #define IPSEC_LOADER_LOADNOW 1 /* - * The following variables are kept because IPsec should be loaded only when - * it is used. - */ -static kt_did_t ipsec_loader_tid; -kmutex_t ipsec_loader_lock; -static int ipsec_loader_sig = IPSEC_LOADER_WAIT; -int ipsec_loader_state = IPSEC_LOADER_WAIT; -static kcondvar_t ipsec_loader_sig_cv; /* For loader_sig conditions. */ - - -/* * NOTE: This function is entered w/o holding any STREAMS perimeters. */ -/* ARGSUSED */ static void -ipsec_loader(void *ignoreme) +ipsec_loader(void *arg) { - extern int keysock_plumb_ipsec(void); callb_cpr_t cprinfo; boolean_t ipsec_failure = B_FALSE; + ipsec_stack_t *ipss = (ipsec_stack_t *)arg; - CALLB_CPR_INIT(&cprinfo, &ipsec_loader_lock, callb_generic_cpr, + CALLB_CPR_INIT(&cprinfo, &ipss->ipsec_loader_lock, callb_generic_cpr, "ipsec_loader"); - mutex_enter(&ipsec_loader_lock); + mutex_enter(&ipss->ipsec_loader_lock); for (;;) { /* * Wait for someone to tell me to continue. */ - while (ipsec_loader_sig == IPSEC_LOADER_WAIT) { + while (ipss->ipsec_loader_sig == IPSEC_LOADER_WAIT) { CALLB_CPR_SAFE_BEGIN(&cprinfo); - cv_wait(&ipsec_loader_sig_cv, &ipsec_loader_lock); - CALLB_CPR_SAFE_END(&cprinfo, &ipsec_loader_lock); + cv_wait(&ipss->ipsec_loader_sig_cv, + &ipss->ipsec_loader_lock); + CALLB_CPR_SAFE_END(&cprinfo, &ipss->ipsec_loader_lock); } /* IPSEC_LOADER_EXITNOW implies signal by _fini(). */ - if (ipsec_loader_sig == IPSEC_LOADER_EXITNOW) { + if (ipss->ipsec_loader_sig == IPSEC_LOADER_EXITNOW) { /* * Let user patch ipsec_loader_tid to * 0 to try again. */ - ipsec_loader_state = IPSEC_LOADER_FAILED; - ipsec_loader_sig = IPSEC_LOADER_WAIT; + ipss->ipsec_loader_state = IPSEC_LOADER_FAILED; + ipss->ipsec_loader_sig = IPSEC_LOADER_WAIT; /* ipsec_loader_lock is held at this point! */ - ASSERT(MUTEX_HELD(&ipsec_loader_lock)); + ASSERT(MUTEX_HELD(&ipss->ipsec_loader_lock)); CALLB_CPR_EXIT(&cprinfo); - ASSERT(!MUTEX_HELD(&ipsec_loader_lock)); + ASSERT(!MUTEX_HELD(&ipss->ipsec_loader_lock)); thread_exit(); } - mutex_exit(&ipsec_loader_lock); + mutex_exit(&ipss->ipsec_loader_lock); /* * Load IPsec, which is done by modloading keysock and calling @@ -110,7 +101,7 @@ ipsec_loader(void *ignoreme) * B_FALSE and try again. */ ipsec_failure = B_TRUE; - } else if (keysock_plumb_ipsec() != 0) { + } else if (keysock_plumb_ipsec(ipss->ipsec_netstack) != 0) { cmn_err(CE_WARN, "IP: Cannot plumb IPsec."); /* * Only this function can set ipsec_failure. If the @@ -122,22 +113,22 @@ ipsec_loader(void *ignoreme) ipsec_failure = B_FALSE; } - mutex_enter(&ipsec_loader_lock); + mutex_enter(&ipss->ipsec_loader_lock); if (ipsec_failure) { - if (ipsec_loader_sig == IPSEC_LOADER_LOADNOW) - ipsec_loader_sig = IPSEC_LOADER_WAIT; - ipsec_loader_state = IPSEC_LOADER_FAILED; + if (ipss->ipsec_loader_sig == IPSEC_LOADER_LOADNOW) + ipss->ipsec_loader_sig = IPSEC_LOADER_WAIT; + ipss->ipsec_loader_state = IPSEC_LOADER_FAILED; } else { - ipsec_loader_state = IPSEC_LOADER_SUCCEEDED; + ipss->ipsec_loader_state = IPSEC_LOADER_SUCCEEDED; } - mutex_exit(&ipsec_loader_lock); + mutex_exit(&ipss->ipsec_loader_lock); - ip_ipsec_load_complete(); + ip_ipsec_load_complete(ipss); - mutex_enter(&ipsec_loader_lock); + mutex_enter(&ipss->ipsec_loader_lock); if (!ipsec_failure) { CALLB_CPR_EXIT(&cprinfo); - ASSERT(!MUTEX_HELD(&ipsec_loader_lock)); + ASSERT(!MUTEX_HELD(&ipss->ipsec_loader_lock)); ipsec_register_prov_update(); thread_exit(); } @@ -148,28 +139,28 @@ ipsec_loader(void *ignoreme) * Called from ip_ddi_init() to initialize ipsec loader thread. */ void -ipsec_loader_init(void) +ipsec_loader_init(ipsec_stack_t *ipss) { - mutex_init(&ipsec_loader_lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&ipsec_loader_sig_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&ipss->ipsec_loader_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ipss->ipsec_loader_sig_cv, NULL, CV_DEFAULT, NULL); } /* * Called from ip_ddi_destroy() to take down ipsec loader thread. */ void -ipsec_loader_destroy(void) +ipsec_loader_destroy(ipsec_stack_t *ipss) { kt_did_t tid; - mutex_enter(&ipsec_loader_lock); - tid = ipsec_loader_tid; + mutex_enter(&ipss->ipsec_loader_lock); + tid = ipss->ipsec_loader_tid; if (tid != 0) { - ipsec_loader_sig = IPSEC_LOADER_EXITNOW; - cv_signal(&ipsec_loader_sig_cv); - ipsec_loader_tid = 0; + ipss->ipsec_loader_sig = IPSEC_LOADER_EXITNOW; + cv_signal(&ipss->ipsec_loader_sig_cv); + ipss->ipsec_loader_tid = 0; } - mutex_exit(&ipsec_loader_lock); + mutex_exit(&ipss->ipsec_loader_lock); /* * Wait for ipsec_loader() to finish before we destroy @@ -178,28 +169,28 @@ ipsec_loader_destroy(void) if (tid != 0) thread_join(tid); - mutex_destroy(&ipsec_loader_lock); - cv_destroy(&ipsec_loader_sig_cv); + mutex_destroy(&ipss->ipsec_loader_lock); + cv_destroy(&ipss->ipsec_loader_sig_cv); } void -ipsec_loader_start(void) +ipsec_loader_start(ipsec_stack_t *ipss) { kthread_t *tp; - mutex_enter(&ipsec_loader_lock); + mutex_enter(&ipss->ipsec_loader_lock); - if (ipsec_loader_tid == 0) { - tp = thread_create(NULL, 0, ipsec_loader, NULL, 0, &p0, + if (ipss->ipsec_loader_tid == 0) { + tp = thread_create(NULL, 0, ipsec_loader, ipss, 0, &p0, TS_RUN, MAXCLSYSPRI); - ipsec_loader_tid = tp->t_did; + ipss->ipsec_loader_tid = tp->t_did; } /* Else we lost the race, oh well. */ - mutex_exit(&ipsec_loader_lock); + mutex_exit(&ipss->ipsec_loader_lock); } void -ipsec_loader_loadnow() +ipsec_loader_loadnow(ipsec_stack_t *ipss) { /* * It is possible that an algorithm update message was @@ -209,15 +200,15 @@ ipsec_loader_loadnow() * than spdsock, we must trigger the processing of * update messages from the ipsec loader. */ - spdsock_update_pending_algs(); + spdsock_update_pending_algs(ipss->ipsec_netstack); - mutex_enter(&ipsec_loader_lock); - if ((ipsec_loader_state == IPSEC_LOADER_WAIT) && - (ipsec_loader_sig == IPSEC_LOADER_WAIT)) { - ipsec_loader_sig = IPSEC_LOADER_LOADNOW; - cv_signal(&ipsec_loader_sig_cv); + mutex_enter(&ipss->ipsec_loader_lock); + if ((ipss->ipsec_loader_state == IPSEC_LOADER_WAIT) && + (ipss->ipsec_loader_sig == IPSEC_LOADER_WAIT)) { + ipss->ipsec_loader_sig = IPSEC_LOADER_LOADNOW; + cv_signal(&ipss->ipsec_loader_sig_cv); } - mutex_exit(&ipsec_loader_lock); + mutex_exit(&ipss->ipsec_loader_lock); } /* @@ -238,35 +229,35 @@ loader_nop(void *ignoreme) * Returns B_TRUE if it worked, B_FALSE if it didn't. */ boolean_t -ipsec_loader_wait(queue_t *q) +ipsec_loader_wait(queue_t *q, ipsec_stack_t *ipss) { /* * 30ms delay per loop is arbitrary; it takes ~300ms to * load and plumb ipsec on an ultra-1. */ - while (ipsec_loader_state == IPSEC_LOADER_WAIT) { + while (ipss->ipsec_loader_state == IPSEC_LOADER_WAIT) { (void) qtimeout(q, loader_nop, 0, drv_usectohz(30000)); qwait(q); } - return (ipsec_loader_state == IPSEC_LOADER_SUCCEEDED); + return (ipss->ipsec_loader_state == IPSEC_LOADER_SUCCEEDED); } /* * Just check to see if IPsec is loaded (or not). */ boolean_t -ipsec_loaded(void) +ipsec_loaded(ipsec_stack_t *ipss) { - return (ipsec_loader_state == IPSEC_LOADER_SUCCEEDED); + return (ipss->ipsec_loader_state == IPSEC_LOADER_SUCCEEDED); } /* * Check to see if IPsec loading failed. */ boolean_t -ipsec_failed(void) +ipsec_failed(ipsec_stack_t *ipss) { - return (ipsec_loader_state == IPSEC_LOADER_FAILED); + return (ipss->ipsec_loader_state == IPSEC_LOADER_FAILED); } diff --git a/usr/src/uts/common/inet/ip/ipsecah.c b/usr/src/uts/common/inet/ip/ipsecah.c index adcffc7c64..540c6b11dc 100644 --- a/usr/src/uts/common/inet/ip/ipsecah.c +++ b/usr/src/uts/common/inet/ip/ipsecah.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,6 +35,7 @@ #include <sys/ddi.h> #include <sys/sunddi.h> #include <sys/kmem.h> +#include <sys/zone.h> #include <sys/sysmacros.h> #include <sys/cmn_err.h> #include <sys/vtrace.h> @@ -69,16 +70,12 @@ #include <sys/kstat.h> #include <sys/strsubr.h> -/* Packet dropper for AH drops. */ -static ipdropper_t ah_dropper; - -static kmutex_t ipsecah_param_lock; /* Protect ipsecah_param_arr[] below. */ /* * Table of ND variables supported by ipsecah. These are loaded into * ipsecah_g_nd in ipsecah_init_nd. * All of these are alterable, within the min/max values given, at run time. */ -static ipsecahparam_t ipsecah_param_arr[] = { +static ipsecahparam_t lcl_param_arr[] = { /* min max value name */ { 0, 3, 0, "ipsecah_debug"}, { 125, 32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecah_age_interval"}, @@ -95,28 +92,26 @@ static ipsecahparam_t ipsecah_param_arr[] = { { 0, 0xffffffffU, 0, "ipsecah_default_hard_usetime"}, { 0, 1, 0, "ipsecah_log_unknown_spi"}, }; -#define ipsecah_debug ipsecah_param_arr[0].ipsecah_param_value -#define ipsecah_age_interval ipsecah_param_arr[1].ipsecah_param_value -#define ipsecah_age_int_max ipsecah_param_arr[1].ipsecah_param_max -#define ipsecah_reap_delay ipsecah_param_arr[2].ipsecah_param_value -#define ipsecah_replay_size ipsecah_param_arr[3].ipsecah_param_value -#define ipsecah_acquire_timeout ipsecah_param_arr[4].ipsecah_param_value -#define ipsecah_larval_timeout ipsecah_param_arr[5].ipsecah_param_value -#define ipsecah_default_soft_bytes ipsecah_param_arr[6].ipsecah_param_value -#define ipsecah_default_hard_bytes ipsecah_param_arr[7].ipsecah_param_value -#define ipsecah_default_soft_addtime ipsecah_param_arr[8].ipsecah_param_value -#define ipsecah_default_hard_addtime ipsecah_param_arr[9].ipsecah_param_value -#define ipsecah_default_soft_usetime ipsecah_param_arr[10].ipsecah_param_value -#define ipsecah_default_hard_usetime ipsecah_param_arr[11].ipsecah_param_value -#define ipsecah_log_unknown_spi ipsecah_param_arr[12].ipsecah_param_value +#define ipsecah_debug ipsecah_params[0].ipsecah_param_value +#define ipsecah_age_interval ipsecah_params[1].ipsecah_param_value +#define ipsecah_age_int_max ipsecah_params[1].ipsecah_param_max +#define ipsecah_reap_delay ipsecah_params[2].ipsecah_param_value +#define ipsecah_replay_size ipsecah_params[3].ipsecah_param_value +#define ipsecah_acquire_timeout ipsecah_params[4].ipsecah_param_value +#define ipsecah_larval_timeout ipsecah_params[5].ipsecah_param_value +#define ipsecah_default_soft_bytes ipsecah_params[6].ipsecah_param_value +#define ipsecah_default_hard_bytes ipsecah_params[7].ipsecah_param_value +#define ipsecah_default_soft_addtime ipsecah_params[8].ipsecah_param_value +#define ipsecah_default_hard_addtime ipsecah_params[9].ipsecah_param_value +#define ipsecah_default_soft_usetime ipsecah_params[10].ipsecah_param_value +#define ipsecah_default_hard_usetime ipsecah_params[11].ipsecah_param_value +#define ipsecah_log_unknown_spi ipsecah_params[12].ipsecah_param_value #define ah0dbg(a) printf a /* NOTE: != 0 instead of > 0 so lint doesn't complain. */ -#define ah1dbg(a) if (ipsecah_debug != 0) printf a -#define ah2dbg(a) if (ipsecah_debug > 1) printf a -#define ah3dbg(a) if (ipsecah_debug > 2) printf a - -static IDP ipsecah_g_nd; +#define ah1dbg(ahstack, a) if (ahstack->ipsecah_debug != 0) printf a +#define ah2dbg(ahstack, a) if (ahstack->ipsecah_debug > 1) printf a +#define ah3dbg(ahstack, a) if (ahstack->ipsecah_debug > 2) printf a /* * XXX This is broken. Padding should be determined dynamically @@ -134,13 +129,14 @@ static IDP ipsecah_g_nd; */ #define AH_MSGSIZE(mp) ((mp)->b_cont != NULL ? msgdsize(mp) : MBLKL(mp)) + static ipsec_status_t ah_auth_out_done(mblk_t *); static ipsec_status_t ah_auth_in_done(mblk_t *); static mblk_t *ah_process_ip_options_v4(mblk_t *, ipsa_t *, int *, uint_t, - boolean_t); + boolean_t, ipsecah_stack_t *); static mblk_t *ah_process_ip_options_v6(mblk_t *, ipsa_t *, int *, uint_t, - boolean_t); -static void ah_getspi(mblk_t *, keysock_in_t *); + boolean_t, ipsecah_stack_t *); +static void ah_getspi(mblk_t *, keysock_in_t *, ipsecah_stack_t *); static ipsec_status_t ah_inbound_accelerated(mblk_t *, boolean_t, ipsa_t *, uint32_t); static ipsec_status_t ah_outbound_accelerated_v4(mblk_t *, ipsa_t *); @@ -151,8 +147,15 @@ static int ipsecah_open(queue_t *, dev_t *, int, int, cred_t *); static int ipsecah_close(queue_t *); static void ipsecah_rput(queue_t *, mblk_t *); static void ipsecah_wput(queue_t *, mblk_t *); -static void ah_send_acquire(ipsacq_t *, mblk_t *); -static boolean_t ah_register_out(uint32_t, uint32_t, uint_t); +static void ah_send_acquire(ipsacq_t *, mblk_t *, netstack_t *); +static boolean_t ah_register_out(uint32_t, uint32_t, uint_t, ipsecah_stack_t *); +static void *ipsecah_stack_init(netstackid_t stackid, netstack_t *ns); +static void ipsecah_stack_fini(netstackid_t stackid, void *arg); + +/* Setable in /etc/system */ +uint32_t ah_hash_size = IPSEC_DEFAULT_HASH_SIZE; + +static taskq_t *ah_taskq; static struct module_info info = { 5136, "ipsecah", 0, INFPSZ, 65536, 1024 @@ -172,69 +175,29 @@ struct streamtab ipsecahinfo = { &rinit, &winit, NULL, NULL }; -/* - * Keysock instance of AH. "There can be only one." :) - * Use casptr() on this because I don't set it until KEYSOCK_HELLO comes down. - * Paired up with the ah_pfkey_q is the ah_event, which will age SAs. - */ -static queue_t *ah_pfkey_q; -static timeout_id_t ah_event; -static taskq_t *ah_taskq; - -static mblk_t *ah_ip_unbind; - -/* - * Stats. This may eventually become a full-blown SNMP MIB once that spec - * stabilizes. - */ -typedef struct -{ - kstat_named_t ah_stat_num_aalgs; - kstat_named_t ah_stat_good_auth; - kstat_named_t ah_stat_bad_auth; - kstat_named_t ah_stat_replay_failures; - kstat_named_t ah_stat_replay_early_failures; - kstat_named_t ah_stat_keysock_in; - kstat_named_t ah_stat_out_requests; - kstat_named_t ah_stat_acquire_requests; - kstat_named_t ah_stat_bytes_expired; - kstat_named_t ah_stat_out_discards; - kstat_named_t ah_stat_in_accelerated; - kstat_named_t ah_stat_out_accelerated; - kstat_named_t ah_stat_noaccel; - kstat_named_t ah_stat_crypto_sync; - kstat_named_t ah_stat_crypto_async; - kstat_named_t ah_stat_crypto_failures; -} ah_kstats_t; - -#define AH_BUMP_STAT(x) (ah_kstats->ah_stat_ ## x).value.ui64++ -#define AH_DEBUMP_STAT(x) (ah_kstats->ah_stat_ ## x).value.ui64-- - -uint32_t ah_hash_size = IPSEC_DEFAULT_HASH_SIZE; -static kstat_t *ah_ksp; -static ah_kstats_t *ah_kstats; - static int ah_kstat_update(kstat_t *, int); uint64_t ipsacq_maxpackets = IPSACQ_MAXPACKETS; static boolean_t -ah_kstat_init(void) +ah_kstat_init(ipsecah_stack_t *ahstack, netstackid_t stackid) { + ipsec_stack_t *ipss = ahstack->ipsecah_netstack->netstack_ipsec; - ah_ksp = kstat_create("ipsecah", 0, "ah_stat", "net", - KSTAT_TYPE_NAMED, sizeof (*ah_kstats) / sizeof (kstat_named_t), - KSTAT_FLAG_PERSISTENT); + ahstack->ah_ksp = kstat_create_netstack("ipsecah", 0, "ah_stat", "net", + KSTAT_TYPE_NAMED, sizeof (ah_kstats_t) / sizeof (kstat_named_t), + KSTAT_FLAG_PERSISTENT, stackid); - if (ah_ksp == NULL) + if (ahstack->ah_ksp == NULL || ahstack->ah_ksp->ks_data == NULL) return (B_FALSE); - ah_kstats = ah_ksp->ks_data; + ahstack->ah_kstats = ahstack->ah_ksp->ks_data; - ah_ksp->ks_update = ah_kstat_update; + ahstack->ah_ksp->ks_update = ah_kstat_update; + ahstack->ah_ksp->ks_private = (void *)(uintptr_t)stackid; #define K64 KSTAT_DATA_UINT64 -#define KI(x) kstat_named_init(&(ah_kstats->ah_stat_##x), #x, K64) +#define KI(x) kstat_named_init(&(ahstack->ah_kstats->ah_stat_##x), #x, K64) KI(num_aalgs); KI(good_auth); @@ -256,15 +219,18 @@ ah_kstat_init(void) #undef KI #undef K64 - kstat_install(ah_ksp); - IP_ACQUIRE_STAT(maxpackets, ipsacq_maxpackets); + kstat_install(ahstack->ah_ksp); + IP_ACQUIRE_STAT(ipss, maxpackets, ipsacq_maxpackets); return (B_TRUE); } static int ah_kstat_update(kstat_t *kp, int rw) { - ah_kstats_t *ekp; + ah_kstats_t *ekp; + netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; + netstack_t *ns; + ipsec_stack_t *ipss; if ((kp == NULL) || (kp->ks_data == NULL)) return (EIO); @@ -272,14 +238,21 @@ ah_kstat_update(kstat_t *kp, int rw) if (rw == KSTAT_WRITE) return (EACCES); - ASSERT(kp == ah_ksp); + ns = netstack_find_by_stackid(stackid); + if (ns == NULL) + return (-1); + ipss = ns->netstack_ipsec; + if (ipss == NULL) { + netstack_rele(ns); + return (-1); + } ekp = (ah_kstats_t *)kp->ks_data; - ASSERT(ekp == ah_kstats); - mutex_enter(&alg_lock); - ekp->ah_stat_num_aalgs.value.ui64 = ipsec_nalgs[IPSEC_ALG_AUTH]; - mutex_exit(&alg_lock); + mutex_enter(&ipss->ipsec_alg_lock); + ekp->ah_stat_num_aalgs.value.ui64 = ipss->ipsec_nalgs[IPSEC_ALG_AUTH]; + mutex_exit(&ipss->ipsec_alg_lock); + netstack_rele(ns); return (0); } @@ -288,19 +261,22 @@ ah_kstat_update(kstat_t *kp, int rw) * a time, because I control the one function that does a qtimeout() on * ah_pfkey_q. */ -/* ARGSUSED */ static void -ah_ager(void *ignoreme) +ah_ager(void *arg) { + ipsecah_stack_t *ahstack = (ipsecah_stack_t *)arg; + netstack_t *ns = ahstack->ipsecah_netstack; hrtime_t begin = gethrtime(); - sadb_ager(&ah_sadb.s_v4, ah_pfkey_q, ah_sadb.s_ip_q, - ipsecah_reap_delay); - sadb_ager(&ah_sadb.s_v6, ah_pfkey_q, ah_sadb.s_ip_q, - ipsecah_reap_delay); + sadb_ager(&ahstack->ah_sadb.s_v4, ahstack->ah_pfkey_q, + ahstack->ah_sadb.s_ip_q, ahstack->ipsecah_reap_delay, ns); + sadb_ager(&ahstack->ah_sadb.s_v6, ahstack->ah_pfkey_q, + ahstack->ah_sadb.s_ip_q, ahstack->ipsecah_reap_delay, ns); - ah_event = sadb_retimeout(begin, ah_pfkey_q, ah_ager, - &ipsecah_age_interval, ipsecah_age_int_max, info.mi_idnum); + ahstack->ah_event = sadb_retimeout(begin, ahstack->ah_pfkey_q, + ah_ager, ahstack, + &ahstack->ipsecah_age_interval, ahstack->ipsecah_age_int_max, + info.mi_idnum); } /* @@ -316,10 +292,11 @@ ipsecah_param_get(q, mp, cp, cr) { ipsecahparam_t *ipsecahpa = (ipsecahparam_t *)cp; uint_t value; + ipsecah_stack_t *ahstack = (ipsecah_stack_t *)q->q_ptr; - mutex_enter(&ipsecah_param_lock); + mutex_enter(&ahstack->ipsecah_param_lock); value = ipsecahpa->ipsecah_param_value; - mutex_exit(&ipsecah_param_lock); + mutex_exit(&ahstack->ipsecah_param_lock); (void) mi_mpprintf(mp, "%u", value); return (0); @@ -339,6 +316,7 @@ ipsecah_param_set(q, mp, value, cp, cr) { ulong_t new_value; ipsecahparam_t *ipsecahpa = (ipsecahparam_t *)cp; + ipsecah_stack_t *ahstack = (ipsecah_stack_t *)q->q_ptr; /* * Fail the request if the new value does not lie within the @@ -351,9 +329,9 @@ ipsecah_param_set(q, mp, value, cp, cr) } /* Set the new value */ - mutex_enter(&ipsecah_param_lock); + mutex_enter(&ahstack->ipsecah_param_lock); ipsecahpa->ipsecah_param_value = new_value; - mutex_exit(&ipsecah_param_lock); + mutex_exit(&ahstack->ipsecah_param_lock); return (0); } @@ -362,14 +340,20 @@ ipsecah_param_set(q, mp, value, cp, cr) * lifetime information. */ void -ipsecah_fill_defs(sadb_x_ecomb_t *ecomb) +ipsecah_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns) { - ecomb->sadb_x_ecomb_soft_bytes = ipsecah_default_soft_bytes; - ecomb->sadb_x_ecomb_hard_bytes = ipsecah_default_hard_bytes; - ecomb->sadb_x_ecomb_soft_addtime = ipsecah_default_soft_addtime; - ecomb->sadb_x_ecomb_hard_addtime = ipsecah_default_hard_addtime; - ecomb->sadb_x_ecomb_soft_usetime = ipsecah_default_soft_usetime; - ecomb->sadb_x_ecomb_hard_usetime = ipsecah_default_hard_usetime; + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; + + ecomb->sadb_x_ecomb_soft_bytes = ahstack->ipsecah_default_soft_bytes; + ecomb->sadb_x_ecomb_hard_bytes = ahstack->ipsecah_default_hard_bytes; + ecomb->sadb_x_ecomb_soft_addtime = + ahstack->ipsecah_default_soft_addtime; + ecomb->sadb_x_ecomb_hard_addtime = + ahstack->ipsecah_default_hard_addtime; + ecomb->sadb_x_ecomb_soft_usetime = + ahstack->ipsecah_default_soft_usetime; + ecomb->sadb_x_ecomb_hard_usetime = + ahstack->ipsecah_default_hard_usetime; } /* @@ -378,39 +362,72 @@ ipsecah_fill_defs(sadb_x_ecomb_t *ecomb) boolean_t ipsecah_ddi_init(void) { - int count; - ipsecahparam_t *ahp = ipsecah_param_arr; + ah_taskq = taskq_create("ah_taskq", 1, minclsyspri, + IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0); + + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel, so we can maintain the + * set of ipsecah_stack_t's. + */ + netstack_register(NS_IPSECAH, ipsecah_stack_init, NULL, + ipsecah_stack_fini); - for (count = A_CNT(ipsecah_param_arr); count-- > 0; ahp++) { + return (B_TRUE); +} + +/* + * Walk through the param array specified registering each element with the + * named dispatch handler. + */ +static boolean_t +ipsecah_param_register(IDP *ndp, ipsecahparam_t *ahp, int cnt) +{ + for (; cnt-- > 0; ahp++) { if (ahp->ipsecah_param_name != NULL && ahp->ipsecah_param_name[0]) { - if (!nd_load(&ipsecah_g_nd, ahp->ipsecah_param_name, + if (!nd_load(ndp, + ahp->ipsecah_param_name, ipsecah_param_get, ipsecah_param_set, (caddr_t)ahp)) { - nd_free(&ipsecah_g_nd); + nd_free(ndp); return (B_FALSE); } } } + return (B_TRUE); +} - if (!ah_kstat_init()) { - nd_free(&ipsecah_g_nd); - return (B_FALSE); - } +/* + * Initialize things for AH for each stack instance + */ +static void * +ipsecah_stack_init(netstackid_t stackid, netstack_t *ns) +{ + ipsecah_stack_t *ahstack; + ipsecahparam_t *ahp; - ah_taskq = taskq_create("ah_taskq", 1, minclsyspri, - IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0); + ahstack = (ipsecah_stack_t *)kmem_zalloc(sizeof (*ahstack), KM_SLEEP); + ahstack->ipsecah_netstack = ns; - ah_sadb.s_acquire_timeout = &ipsecah_acquire_timeout; - ah_sadb.s_acqfn = ah_send_acquire; + ahp = (ipsecahparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); + ahstack->ipsecah_params = ahp; + bcopy(lcl_param_arr, ahp, sizeof (lcl_param_arr)); - sadbp_init("AH", &ah_sadb, SADB_SATYPE_AH, ah_hash_size); + (void) ipsecah_param_register(&ahstack->ipsecah_g_nd, ahp, + A_CNT(lcl_param_arr)); - mutex_init(&ipsecah_param_lock, NULL, MUTEX_DEFAULT, 0); + (void) ah_kstat_init(ahstack, stackid); - ip_drop_register(&ah_dropper, "IPsec AH"); + ahstack->ah_sadb.s_acquire_timeout = &ahstack->ipsecah_acquire_timeout; + ahstack->ah_sadb.s_acqfn = ah_send_acquire; + sadbp_init("AH", &ahstack->ah_sadb, SADB_SATYPE_AH, ah_hash_size, + ahstack->ipsecah_netstack); - return (B_TRUE); + mutex_init(&ahstack->ipsecah_param_lock, NULL, MUTEX_DEFAULT, 0); + + ip_drop_register(&ahstack->ah_dropper, "IPsec AH"); + return (ahstack); } /* @@ -419,15 +436,35 @@ ipsecah_ddi_init(void) void ipsecah_ddi_destroy(void) { - ah1dbg(("In ddi_destroy.\n")); - - sadbp_destroy(&ah_sadb); - ip_drop_unregister(&ah_dropper); + netstack_unregister(NS_IPSECAH); taskq_destroy(ah_taskq); - mutex_destroy(&ipsecah_param_lock); - nd_free(&ipsecah_g_nd); +} - kstat_delete(ah_ksp); +/* + * Destroy things for AH for one stack... Never called? + */ +static void +ipsecah_stack_fini(netstackid_t stackid, void *arg) +{ + ipsecah_stack_t *ahstack = (ipsecah_stack_t *)arg; + + if (ahstack->ah_pfkey_q != NULL) { + (void) quntimeout(ahstack->ah_pfkey_q, ahstack->ah_event); + } + ahstack->ah_sadb.s_acqfn = NULL; + ahstack->ah_sadb.s_acquire_timeout = NULL; + sadbp_destroy(&ahstack->ah_sadb, ahstack->ipsecah_netstack); + ip_drop_unregister(&ahstack->ah_dropper); + mutex_destroy(&ahstack->ipsecah_param_lock); + nd_free(&ahstack->ipsecah_g_nd); + + kmem_free(ahstack->ipsecah_params, sizeof (lcl_param_arr)); + ahstack->ipsecah_params = NULL; + kstat_delete_netstack(ahstack->ah_ksp, stackid); + ahstack->ah_ksp = NULL; + ahstack->ah_kstats = NULL; + + kmem_free(ahstack, sizeof (*ahstack)); } /* @@ -437,8 +474,11 @@ ipsecah_ddi_destroy(void) static int ipsecah_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) { - if (secpolicy_net_config(credp, B_FALSE) != 0) { - ah1dbg(("Non-privileged user trying to open ipsecah.\n")); + netstack_t *ns; + ipsecah_stack_t *ahstack; + + if (secpolicy_ip_config(credp, B_FALSE) != 0) { + ah0dbg(("Non-privileged user trying to open ipsecah.\n")); return (EPERM); } @@ -448,6 +488,11 @@ ipsecah_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (sflag != MODOPEN) return (EINVAL); + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + ahstack = ns->netstack_ipsecah; + ASSERT(ahstack != NULL); + /* * ASSUMPTIONS (because I'm MT_OCEXCL): * @@ -458,33 +503,36 @@ ipsecah_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * If these assumptions are wrong, I'm in BIG trouble... */ - q->q_ptr = q; /* just so I know I'm open */ + q->q_ptr = ahstack; + WR(q)->q_ptr = q->q_ptr; - if (ah_sadb.s_ip_q == NULL) { + if (ahstack->ah_sadb.s_ip_q == NULL) { struct T_unbind_req *tur; - ah_sadb.s_ip_q = WR(q); + ahstack->ah_sadb.s_ip_q = WR(q); /* Allocate an unbind... */ - ah_ip_unbind = allocb(sizeof (struct T_unbind_req), BPRI_HI); + ahstack->ah_ip_unbind = allocb(sizeof (struct T_unbind_req), + BPRI_HI); /* * Send down T_BIND_REQ to bind IPPROTO_AH. * Handle the ACK here in AH. */ qprocson(q); - if (ah_ip_unbind == NULL || - !sadb_t_bind_req(ah_sadb.s_ip_q, IPPROTO_AH)) { - if (ah_ip_unbind != NULL) { - freeb(ah_ip_unbind); - ah_ip_unbind = NULL; + if (ahstack->ah_ip_unbind == NULL || + !sadb_t_bind_req(ahstack->ah_sadb.s_ip_q, IPPROTO_AH)) { + if (ahstack->ah_ip_unbind != NULL) { + freeb(ahstack->ah_ip_unbind); + ahstack->ah_ip_unbind = NULL; } q->q_ptr = NULL; qprocsoff(q); + netstack_rele(ahstack->ipsecah_netstack); return (ENOMEM); } - ah_ip_unbind->b_datap->db_type = M_PROTO; - tur = (struct T_unbind_req *)ah_ip_unbind->b_rptr; + ahstack->ah_ip_unbind->b_datap->db_type = M_PROTO; + tur = (struct T_unbind_req *)ahstack->ah_ip_unbind->b_rptr; tur->PRIM_type = T_UNBIND_REQ; } else { qprocson(q); @@ -505,14 +553,17 @@ ipsecah_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) static int ipsecah_close(queue_t *q) { + ipsecah_stack_t *ahstack = (ipsecah_stack_t *)q->q_ptr; + /* * If ah_sadb.s_ip_q is attached to this instance, send a * T_UNBIND_REQ to IP for the instance before doing * a qprocsoff(). */ - if (WR(q) == ah_sadb.s_ip_q && ah_ip_unbind != NULL) { - putnext(WR(q), ah_ip_unbind); - ah_ip_unbind = NULL; + if (WR(q) == ahstack->ah_sadb.s_ip_q && + ahstack->ah_ip_unbind != NULL) { + putnext(WR(q), ahstack->ah_ip_unbind); + ahstack->ah_ip_unbind = NULL; } /* @@ -522,51 +573,56 @@ ipsecah_close(queue_t *q) /* Keysock queue check is safe, because of OCEXCL perimeter. */ - if (q == ah_pfkey_q) { - ah0dbg(("ipsecah_close: Ummm... keysock is closing AH.\n")); - ah_pfkey_q = NULL; + if (q == ahstack->ah_pfkey_q) { + ah1dbg(ahstack, + ("ipsecah_close: Ummm... keysock is closing AH.\n")); + ahstack->ah_pfkey_q = NULL; /* Detach qtimeouts. */ - (void) quntimeout(q, ah_event); + (void) quntimeout(q, ahstack->ah_event); } - if (WR(q) == ah_sadb.s_ip_q) { + if (WR(q) == ahstack->ah_sadb.s_ip_q) { /* * If the ah_sadb.s_ip_q is attached to this instance, find * another. The OCEXCL outer perimeter helps us here. */ - ah_sadb.s_ip_q = NULL; + ahstack->ah_sadb.s_ip_q = NULL; /* * Find a replacement queue for ah_sadb.s_ip_q. */ - if (ah_pfkey_q != NULL && ah_pfkey_q != RD(q)) { + if (ahstack->ah_pfkey_q != NULL && + ahstack->ah_pfkey_q != RD(q)) { /* * See if we can use the pfkey_q. */ - ah_sadb.s_ip_q = WR(ah_pfkey_q); + ahstack->ah_sadb.s_ip_q = WR(ahstack->ah_pfkey_q); } - if (ah_sadb.s_ip_q == NULL || - !sadb_t_bind_req(ah_sadb.s_ip_q, IPPROTO_AH)) { - ah1dbg(("ipsecah: Can't reassign ah_sadb.s_ip_q.\n")); - ah_sadb.s_ip_q = NULL; + if (ahstack->ah_sadb.s_ip_q == NULL || + !sadb_t_bind_req(ahstack->ah_sadb.s_ip_q, IPPROTO_AH)) { + ah1dbg(ahstack, + ("ipsecah: Can't reassign ah_sadb.s_ip_q.\n")); + ahstack->ah_sadb.s_ip_q = NULL; } else { - ah_ip_unbind = allocb(sizeof (struct T_unbind_req), - BPRI_HI); + ahstack->ah_ip_unbind = + allocb(sizeof (struct T_unbind_req), BPRI_HI); - if (ah_ip_unbind != NULL) { + if (ahstack->ah_ip_unbind != NULL) { struct T_unbind_req *tur; - ah_ip_unbind->b_datap->db_type = M_PROTO; + ahstack->ah_ip_unbind->b_datap->db_type = + M_PROTO; tur = (struct T_unbind_req *) - ah_ip_unbind->b_rptr; + ahstack->ah_ip_unbind->b_rptr; tur->PRIM_type = T_UNBIND_REQ; } /* If it's NULL, I can't do much here. */ } } + netstack_rele(ahstack->ipsecah_netstack); return (0); } @@ -577,7 +633,10 @@ ipsecah_close(queue_t *q) static void ipsecah_rput(queue_t *q, mblk_t *mp) { + ipsecah_stack_t *ahstack = (ipsecah_stack_t *)q->q_ptr; + ASSERT(mp->b_datap->db_type != M_CTL); /* No more IRE_DB_REQ. */ + switch (mp->b_datap->db_type) { case M_PROTO: case M_PCPROTO: @@ -585,7 +644,8 @@ ipsecah_rput(queue_t *q, mblk_t *mp) switch (*((t_scalar_t *)mp->b_rptr)) { case T_BIND_ACK: /* We expect this. */ - ah3dbg(("Thank you IP from AH for T_BIND_ACK\n")); + ah3dbg(ahstack, + ("Thank you IP from AH for T_BIND_ACK\n")); break; case T_ERROR_ACK: cmn_err(CE_WARN, @@ -595,13 +655,13 @@ ipsecah_rput(queue_t *q, mblk_t *mp) /* Probably from a (rarely sent) T_UNBIND_REQ. */ break; default: - ah1dbg(("Unknown M_{,PC}PROTO message.\n")); + ah1dbg(ahstack, ("Unknown M_{,PC}PROTO message.\n")); } freemsg(mp); break; default: /* For now, passthru message. */ - ah2dbg(("AH got unknown mblk type %d.\n", + ah2dbg(ahstack, ("AH got unknown mblk type %d.\n", mp->b_datap->db_type)); putnext(q, mp); } @@ -611,7 +671,8 @@ ipsecah_rput(queue_t *q, mblk_t *mp) * Construct an SADB_REGISTER message with the current algorithms. */ static boolean_t -ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial) +ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial, + ipsecah_stack_t *ahstack) { mblk_t *mp; boolean_t rc = B_TRUE; @@ -622,6 +683,7 @@ ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial) uint_t i, numalgs_snap; ipsec_alginfo_t **authalgs; uint_t num_aalgs; + ipsec_stack_t *ipss = ahstack->ipsecah_netstack->netstack_ipsec; /* Allocate the KEYSOCK_OUT. */ mp = sadb_keysock_out(serial); @@ -636,14 +698,14 @@ ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial) * the variable part (i.e. the algorithms) of the message. */ - mutex_enter(&alg_lock); + mutex_enter(&ipss->ipsec_alg_lock); /* * Return only valid algorithms, so the number of algorithms * to send up may be less than the number of algorithm entries * in the table. */ - authalgs = ipsec_alglists[IPSEC_ALG_AUTH]; + authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH]; for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) num_aalgs++; @@ -658,7 +720,7 @@ ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial) } mp->b_cont = allocb(allocsize, BPRI_HI); if (mp->b_cont == NULL) { - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); freemsg(mp); return (B_FALSE); } @@ -672,7 +734,8 @@ ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial) numalgs_snap = 0; for (i = 0; - ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs)); i++) { + ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs)); + i++) { if (authalgs[i] == NULL || !ALG_VALID(authalgs[i])) continue; @@ -682,7 +745,8 @@ ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial) saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits; saalg->sadb_x_alg_increment = authalgs[i]->alg_increment; - saalg->sadb_x_alg_defincr = authalgs[i]->alg_ef_default; + saalg->sadb_x_alg_defincr = + authalgs[i]->alg_ef_default; numalgs_snap++; saalg++; } @@ -699,7 +763,7 @@ ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial) #endif /* DEBUG */ } - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); /* Now fill the restof the SADB_REGISTER message. */ @@ -725,8 +789,8 @@ ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial) sasupp->sadb_supported_reserved = 0; } - if (ah_pfkey_q != NULL) - putnext(ah_pfkey_q, mp); + if (ahstack->ah_pfkey_q != NULL) + putnext(ahstack->ah_pfkey_q, mp); else { rc = B_FALSE; freemsg(mp); @@ -741,13 +805,15 @@ ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial) * sent up to the AH listeners. */ void -ipsecah_algs_changed(void) +ipsecah_algs_changed(netstack_t *ns) { + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; + /* * Time to send a PF_KEY SADB_REGISTER message to AH listeners * everywhere. (The function itself checks for NULL ah_pfkey_q.) */ - (void) ah_register_out(0, 0, 0); + (void) ah_register_out(0, 0, 0, ahstack); } /* @@ -761,10 +827,13 @@ inbound_task(void *arg) mblk_t *mp = (mblk_t *)arg; ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; int ipsec_rc; + netstack_t *ns = ii->ipsec_in_ns; + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; - ah2dbg(("in AH inbound_task")); + ah2dbg(ahstack, ("in AH inbound_task")); - ah = ipsec_inbound_ah_sa(mp); + ASSERT(ahstack != NULL); + ah = ipsec_inbound_ah_sa(mp, ns); if (ah == NULL) return; ASSERT(ii->ipsec_in_ah_sa != NULL); @@ -781,7 +850,7 @@ inbound_task(void *arg) */ static int ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, - int *diagnostic) + int *diagnostic, ipsecah_stack_t *ahstack) { isaf_t *primary, *secondary, *inbound, *outbound; sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; @@ -799,6 +868,8 @@ ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, int rc; sadb_t *sp; int outhash; + netstack_t *ns = ahstack->ipsecah_netstack; + ipsec_stack_t *ipss = ns->netstack_ipsec; /* * Locate the appropriate table(s). @@ -808,12 +879,12 @@ ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, dst6 = (struct sockaddr_in6 *)dst; is_ipv4 = (dst->sin_family == AF_INET); if (is_ipv4) { - sp = &ah_sadb.s_v4; + sp = &ahstack->ah_sadb.s_v4; dstaddr = (uint32_t *)(&dst->sin_addr); outhash = OUTBOUND_HASH_V4(sp, *(ipaddr_t *)dstaddr); } else { ASSERT(dst->sin_family == AF_INET6); - sp = &ah_sadb.s_v6; + sp = &ahstack->ah_sadb.s_v6; dstaddr = (uint32_t *)(&dst6->sin6_addr); outhash = OUTBOUND_HASH_V6(sp, *(in6_addr_t *)dstaddr); } @@ -893,7 +964,7 @@ ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, acq_msgs = acqrec->ipsacq_mp; acqrec->ipsacq_mp = NULL; mutex_exit(&acqrec->ipsacq_lock); - sadb_destroy_acquire(acqrec); + sadb_destroy_acquire(acqrec, ns); } mutex_exit(&acq_bucket->iacqf_lock); } @@ -922,8 +993,9 @@ ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, if (larval != NULL) lpkt = sadb_clear_lpkt(larval); - rc = sadb_common_add(ah_sadb.s_ip_q, ah_pfkey_q, mp, samsg, ksi, - primary, secondary, larval, clone, is_inbound, diagnostic); + rc = sadb_common_add(ahstack->ah_sadb.s_ip_q, ahstack->ah_pfkey_q, mp, + samsg, ksi, primary, secondary, larval, clone, is_inbound, + diagnostic, ns); /* * How much more stack will I create with all of these @@ -937,7 +1009,8 @@ ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, if (rc != 0) { ip_drop_packet(lpkt, B_TRUE, NULL, NULL, - &ipdrops_sadb_inlarval_timeout, &ah_dropper); + DROPPER(ipss, ipds_sadb_inlarval_timeout), + &ahstack->ah_dropper); } while (acq_msgs != NULL) { @@ -948,7 +1021,7 @@ ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, if (rc == 0) { ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; - ASSERT(ah_sadb.s_ip_q != NULL); + ASSERT(ahstack->ah_sadb.s_ip_q != NULL); if (ipsec_outbound_sa(mp, IPPROTO_AH)) { io->ipsec_out_ah_done = B_TRUE; if (ah_outbound(mp) == IPSEC_STATUS_SUCCESS) { @@ -966,9 +1039,10 @@ ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, continue; } } - AH_BUMP_STAT(out_discards); + AH_BUMP_STAT(ahstack, out_discards); ip_drop_packet(mp, B_FALSE, NULL, NULL, - &ipdrops_sadb_acquire_timeout, &ah_dropper); + DROPPER(ipss, ipds_sadb_acquire_timeout), + &ahstack->ah_dropper); } return (rc); @@ -979,7 +1053,7 @@ ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, * routine eventually. */ static int -ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) +ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) { sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; sadb_address_t *srcext = @@ -998,6 +1072,8 @@ ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) sadb_lifetime_t *hard = (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD]; ipsec_alginfo_t *aalg; + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; + ipsec_stack_t *ipss = ns->netstack_ipsec; /* I need certain extensions present for an ADD message. */ if (srcext == NULL) { @@ -1062,11 +1138,12 @@ ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) */ /* verify that there is a mapping for the specified algorithm */ - mutex_enter(&alg_lock); - aalg = ipsec_alglists[IPSEC_ALG_AUTH][assoc->sadb_sa_auth]; + mutex_enter(&ipss->ipsec_alg_lock); + aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][assoc->sadb_sa_auth]; if (aalg == NULL || !ALG_VALID(aalg)) { - mutex_exit(&alg_lock); - ah1dbg(("Couldn't find auth alg #%d.\n", assoc->sadb_sa_auth)); + mutex_exit(&ipss->ipsec_alg_lock); + ah1dbg(ahstack, ("Couldn't find auth alg #%d.\n", + assoc->sadb_sa_auth)); *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; return (EINVAL); } @@ -1074,7 +1151,7 @@ ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) /* sanity check key sizes */ if (!ipsec_valid_key_size(key->sadb_key_bits, aalg)) { - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS; return (EINVAL); } @@ -1082,14 +1159,14 @@ ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) /* check key and fix parity if needed */ if (ipsec_check_key(aalg->alg_mech_type, key, B_TRUE, diagnostic) != 0) { - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); return (EINVAL); } - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); return (ah_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi, - diagnostic)); + diagnostic, ahstack)); } /* @@ -1098,7 +1175,8 @@ ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) * a larval SA, which ends up looking a lot more like an add. */ static int -ah_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) +ah_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, + ipsecah_stack_t *ahstack) { sadb_address_t *dstext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; @@ -1110,8 +1188,10 @@ ah_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) } sin = (struct sockaddr_in *)(dstext + 1); return (sadb_update_sa(mp, ksi, - (sin->sin_family == AF_INET6) ? &ah_sadb.s_v6 : &ah_sadb.s_v4, - diagnostic, ah_pfkey_q, ah_add_sa)); + (sin->sin_family == AF_INET6) ? &ahstack->ah_sadb.s_v6 : + &ahstack->ah_sadb.s_v4, + diagnostic, ahstack->ah_pfkey_q, ah_add_sa, + ahstack->ipsecah_netstack)); } /* @@ -1119,7 +1199,8 @@ ah_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) * both AH and ESP. Find the association, then unlink it. */ static int -ah_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) +ah_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, + ipsecah_stack_t *ahstack) { sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; sadb_address_t *dstext = @@ -1138,11 +1219,13 @@ ah_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) return (EINVAL); } return (sadb_purge_sa(mp, ksi, - (sin->sin_family == AF_INET6) ? &ah_sadb.s_v6 : - &ah_sadb.s_v4, ah_pfkey_q, ah_sadb.s_ip_q)); + (sin->sin_family == AF_INET6) ? &ahstack->ah_sadb.s_v6 : + &ahstack->ah_sadb.s_v4, + ahstack->ah_pfkey_q, ahstack->ah_sadb.s_ip_q)); } - return (sadb_del_sa(mp, ksi, &ah_sadb, diagnostic, ah_pfkey_q)); + return (sadb_del_sa(mp, ksi, &ahstack->ah_sadb, diagnostic, + ahstack->ah_pfkey_q)); } /* @@ -1150,7 +1233,7 @@ ah_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) * messages. */ static void -ah_dump(mblk_t *mp, keysock_in_t *ksi) +ah_dump(mblk_t *mp, keysock_in_t *ksi, ipsecah_stack_t *ahstack) { int error; sadb_msg_t *samsg; @@ -1159,24 +1242,27 @@ ah_dump(mblk_t *mp, keysock_in_t *ksi) * Dump each fanout, bailing if error is non-zero. */ - error = sadb_dump(ah_pfkey_q, mp, ksi->ks_in_serial, &ah_sadb.s_v4); + error = sadb_dump(ahstack->ah_pfkey_q, mp, ksi->ks_in_serial, + &ahstack->ah_sadb.s_v4); if (error != 0) goto bail; - error = sadb_dump(ah_pfkey_q, mp, ksi->ks_in_serial, &ah_sadb.s_v6); + error = sadb_dump(ahstack->ah_pfkey_q, mp, ksi->ks_in_serial, + &ahstack->ah_sadb.s_v6); bail: ASSERT(mp->b_cont != NULL); samsg = (sadb_msg_t *)mp->b_cont->b_rptr; samsg->sadb_msg_errno = (uint8_t)error; - sadb_pfkey_echo(ah_pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi, - NULL); + sadb_pfkey_echo(ahstack->ah_pfkey_q, mp, + (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL); } /* * First-cut reality check for an inbound PF_KEY message. */ static boolean_t -ah_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi) +ah_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi, + ipsecah_stack_t *ahstack) { int diagnostic; @@ -1201,7 +1287,8 @@ ah_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi) return (B_FALSE); /* False ==> no failures */ badmsg: - sadb_pfkey_error(ah_pfkey_q, mp, EINVAL, diagnostic, ksi->ks_in_serial); + sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL, + diagnostic, ksi->ks_in_serial); return (B_TRUE); /* True ==> failures */ } @@ -1217,7 +1304,7 @@ badmsg: * mucking with PF_KEY messages. */ static void -ah_parse_pfkey(mblk_t *mp) +ah_parse_pfkey(mblk_t *mp, ipsecah_stack_t *ahstack) { mblk_t *msg = mp->b_cont; sadb_msg_t *samsg; @@ -1226,6 +1313,7 @@ ah_parse_pfkey(mblk_t *mp) int diagnostic = SADB_X_DIAGNOSTIC_NONE; ASSERT(msg != NULL); + samsg = (sadb_msg_t *)msg->b_rptr; ksi = (keysock_in_t *)mp->b_rptr; @@ -1233,39 +1321,42 @@ ah_parse_pfkey(mblk_t *mp) * If applicable, convert unspecified AF_INET6 to unspecified * AF_INET. */ - if (!sadb_addrfix(ksi, ah_pfkey_q, mp) || - ah_pfkey_reality_failures(mp, ksi)) { + if (!sadb_addrfix(ksi, ahstack->ah_pfkey_q, mp, + ahstack->ipsecah_netstack) || + ah_pfkey_reality_failures(mp, ksi, ahstack)) { return; } switch (samsg->sadb_msg_type) { case SADB_ADD: - error = ah_add_sa(mp, ksi, &diagnostic); + error = ah_add_sa(mp, ksi, &diagnostic, + ahstack->ipsecah_netstack); if (error != 0) { - sadb_pfkey_error(ah_pfkey_q, mp, error, diagnostic, - ksi->ks_in_serial); + sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error, + diagnostic, ksi->ks_in_serial); } /* else ah_add_sa() took care of things. */ break; case SADB_DELETE: - error = ah_del_sa(mp, ksi, &diagnostic); + error = ah_del_sa(mp, ksi, &diagnostic, ahstack); if (error != 0) { - sadb_pfkey_error(ah_pfkey_q, mp, error, diagnostic, - ksi->ks_in_serial); + sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error, + diagnostic, ksi->ks_in_serial); } /* Else ah_del_sa() took care of things. */ break; case SADB_GET: - error = sadb_get_sa(mp, ksi, &ah_sadb, &diagnostic, ah_pfkey_q); + error = sadb_get_sa(mp, ksi, &ahstack->ah_sadb, &diagnostic, + ahstack->ah_pfkey_q); if (error != 0) { - sadb_pfkey_error(ah_pfkey_q, mp, error, diagnostic, - ksi->ks_in_serial); + sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error, + diagnostic, ksi->ks_in_serial); } /* Else sadb_get_sa() took care of things. */ break; case SADB_FLUSH: - sadbp_flush(&ah_sadb); - sadb_pfkey_echo(ah_pfkey_q, mp, samsg, ksi, NULL); + sadbp_flush(&ahstack->ah_sadb, ahstack->ipsecah_netstack); + sadb_pfkey_echo(ahstack->ah_pfkey_q, mp, samsg, ksi, NULL); break; case SADB_REGISTER: /* @@ -1276,7 +1367,7 @@ ah_parse_pfkey(mblk_t *mp) * Keysock takes care of the PF_KEY bookkeeping for this. */ if (ah_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid, - ksi->ks_in_serial)) { + ksi->ks_in_serial, ahstack)) { freemsg(mp); } else { /* @@ -1284,8 +1375,8 @@ ah_parse_pfkey(mblk_t *mp) * failure. It will not return B_FALSE because of * lack of ah_pfkey_q if I am in wput(). */ - sadb_pfkey_error(ah_pfkey_q, mp, ENOMEM, diagnostic, - ksi->ks_in_serial); + sadb_pfkey_error(ahstack->ah_pfkey_q, mp, ENOMEM, + diagnostic, ksi->ks_in_serial); } break; case SADB_UPDATE: @@ -1293,10 +1384,10 @@ ah_parse_pfkey(mblk_t *mp) * Find a larval, if not there, find a full one and get * strict. */ - error = ah_update_sa(mp, ksi, &diagnostic); + error = ah_update_sa(mp, ksi, &diagnostic, ahstack); if (error != 0) { - sadb_pfkey_error(ah_pfkey_q, mp, error, diagnostic, - ksi->ks_in_serial); + sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error, + diagnostic, ksi->ks_in_serial); } /* else ah_update_sa() took care of things. */ break; @@ -1304,7 +1395,7 @@ ah_parse_pfkey(mblk_t *mp) /* * Reserve a new larval entry. */ - ah_getspi(mp, ksi); + ah_getspi(mp, ksi, ahstack); break; case SADB_ACQUIRE: /* @@ -1312,23 +1403,24 @@ ah_parse_pfkey(mblk_t *mp) * most likely an error. Inbound ACQUIRE messages should only * have the base header. */ - sadb_in_acquire(samsg, &ah_sadb, ah_pfkey_q); + sadb_in_acquire(samsg, &ahstack->ah_sadb, ahstack->ah_pfkey_q, + ahstack->ipsecah_netstack); freemsg(mp); break; case SADB_DUMP: /* * Dump all entries. */ - ah_dump(mp, ksi); + ah_dump(mp, ksi, ahstack); /* ah_dump will take care of the return message, etc. */ break; case SADB_EXPIRE: /* Should never reach me. */ - sadb_pfkey_error(ah_pfkey_q, mp, EOPNOTSUPP, diagnostic, - ksi->ks_in_serial); + sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EOPNOTSUPP, + diagnostic, ksi->ks_in_serial); break; default: - sadb_pfkey_error(ah_pfkey_q, mp, EINVAL, + sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial); break; } @@ -1339,7 +1431,7 @@ ah_parse_pfkey(mblk_t *mp) * ACQUIRE messages. */ static void -ah_keysock_no_socket(mblk_t *mp) +ah_keysock_no_socket(mblk_t *mp, ipsecah_stack_t *ahstack) { sadb_msg_t *samsg; keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr; @@ -1359,9 +1451,10 @@ ah_keysock_no_socket(mblk_t *mp) samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg)); /* * Use the write-side of the ah_pfkey_q, in case there is - * no ah_sadb.s_ip_q. + * no ahstack->ah_sadb.s_ip_q. */ - sadb_in_acquire(samsg, &ah_sadb, WR(ah_pfkey_q)); + sadb_in_acquire(samsg, &ahstack->ah_sadb, + WR(ahstack->ah_pfkey_q), ahstack->ipsecah_netstack); } freemsg(mp); @@ -1375,8 +1468,9 @@ ipsecah_wput(queue_t *q, mblk_t *mp) { ipsec_info_t *ii; struct iocblk *iocp; + ipsecah_stack_t *ahstack = (ipsecah_stack_t *)q->q_ptr; - ah3dbg(("In ah_wput().\n")); + ah3dbg(ahstack, ("In ah_wput().\n")); /* NOTE: Each case must take care of freeing or passing mp. */ switch (mp->b_datap->db_type) { @@ -1390,22 +1484,23 @@ ipsecah_wput(queue_t *q, mblk_t *mp) switch (ii->ipsec_info_type) { case KEYSOCK_OUT_ERR: - ah1dbg(("Got KEYSOCK_OUT_ERR message.\n")); - ah_keysock_no_socket(mp); + ah1dbg(ahstack, ("Got KEYSOCK_OUT_ERR message.\n")); + ah_keysock_no_socket(mp, ahstack); break; case KEYSOCK_IN: - AH_BUMP_STAT(keysock_in); - ah3dbg(("Got KEYSOCK_IN message.\n")); + AH_BUMP_STAT(ahstack, keysock_in); + ah3dbg(ahstack, ("Got KEYSOCK_IN message.\n")); /* Parse the message. */ - ah_parse_pfkey(mp); + ah_parse_pfkey(mp, ahstack); break; case KEYSOCK_HELLO: - sadb_keysock_hello(&ah_pfkey_q, q, mp, - ah_ager, &ah_event, SADB_SATYPE_AH); + sadb_keysock_hello(&ahstack->ah_pfkey_q, q, mp, + ah_ager, (void *)ahstack, &ahstack->ah_event, + SADB_SATYPE_AH); break; default: - ah1dbg(("Got M_CTL from above of 0x%x.\n", + ah1dbg(ahstack, ("Got M_CTL from above of 0x%x.\n", ii->ipsec_info_type)); freemsg(mp); break; @@ -1416,7 +1511,7 @@ ipsecah_wput(queue_t *q, mblk_t *mp) switch (iocp->ioc_cmd) { case ND_SET: case ND_GET: - if (nd_getset(q, ipsecah_g_nd, mp)) { + if (nd_getset(q, ahstack->ipsecah_g_nd, mp)) { qreply(q, mp); return; } else { @@ -1435,7 +1530,8 @@ ipsecah_wput(queue_t *q, mblk_t *mp) return; } default: - ah3dbg(("Got default message, type %d, passing to IP.\n", + ah3dbg(ahstack, + ("Got default message, type %d, passing to IP.\n", mp->b_datap->db_type)); putnext(q, mp); } @@ -1456,6 +1552,8 @@ ah_set_usetime(ipsa_t *assoc, boolean_t inbound) sadb_t *sp; int outhash; boolean_t isv6; + netstack_t *ns = assoc->ipsa_netstack; + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; /* No peer? No problem! */ if (!assoc->ipsa_haspeer) { @@ -1476,9 +1574,9 @@ ah_set_usetime(ipsa_t *assoc, boolean_t inbound) /* Use address family to select IPv6/IPv4 */ isv6 = (assoc->ipsa_addrfam == AF_INET6); if (isv6) { - sp = &ah_sadb.s_v6; + sp = &ahstack->ah_sadb.s_v6; } else { - sp = &ah_sadb.s_v4; + sp = &ahstack->ah_sadb.s_v4; ASSERT(assoc->ipsa_addrfam == AF_INET); } if (inbound) { @@ -1552,10 +1650,12 @@ ah_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) boolean_t inrc, outrc, isv6; sadb_t *sp; int outhash; + netstack_t *ns = assoc->ipsa_netstack; + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; /* No peer? No problem! */ if (!assoc->ipsa_haspeer) { - return (sadb_age_bytes(ah_pfkey_q, assoc, bytes, + return (sadb_age_bytes(ahstack->ah_pfkey_q, assoc, bytes, B_TRUE)); } @@ -1573,9 +1673,9 @@ ah_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) /* Pick v4/v6 bucket based on addrfam. */ isv6 = (assoc->ipsa_addrfam == AF_INET6); if (isv6) { - sp = &ah_sadb.s_v6; + sp = &ahstack->ah_sadb.s_v6; } else { - sp = &ah_sadb.s_v4; + sp = &ahstack->ah_sadb.s_v4; ASSERT(assoc->ipsa_addrfam == AF_INET); } if (inbound) { @@ -1596,7 +1696,7 @@ ah_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) /* Q: Do we wish to set haspeer == B_FALSE? */ ah0dbg(("ah_age_bytes: " "can't find peer for inbound.\n")); - return (sadb_age_bytes(ah_pfkey_q, inassoc, + return (sadb_age_bytes(ahstack->ah_pfkey_q, inassoc, bytes, B_TRUE)); } } else { @@ -1611,13 +1711,13 @@ ah_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) /* Q: Do we wish to set haspeer == B_FALSE? */ ah0dbg(("ah_age_bytes: " "can't find peer for outbound.\n")); - return (sadb_age_bytes(ah_pfkey_q, outassoc, + return (sadb_age_bytes(ahstack->ah_pfkey_q, outassoc, bytes, B_TRUE)); } } - inrc = sadb_age_bytes(ah_pfkey_q, inassoc, bytes, B_TRUE); - outrc = sadb_age_bytes(ah_pfkey_q, outassoc, bytes, B_FALSE); + inrc = sadb_age_bytes(ahstack->ah_pfkey_q, inassoc, bytes, B_TRUE); + outrc = sadb_age_bytes(ahstack->ah_pfkey_q, outassoc, bytes, B_FALSE); /* * REFRELE any peer SA. @@ -1645,16 +1745,23 @@ ah_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs) ipsec_out_t *io; ipsec_action_t *ap; ipsec_prot_t *prot; - io = (ipsec_out_t *)acqrec->ipsacq_mp->b_rptr; + ipsecah_stack_t *ahstack; + netstack_t *ns; + ipsec_stack_t *ipss; - ASSERT(MUTEX_HELD(&alg_lock)); + io = (ipsec_out_t *)acqrec->ipsacq_mp->b_rptr; ASSERT(io->ipsec_out_type == IPSEC_OUT); + ns = io->ipsec_out_ns; + ipss = ns->netstack_ipsec; + ahstack = ns->netstack_ipsecah; + ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); + prop->sadb_prop_exttype = SADB_EXT_PROPOSAL; prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t)); *(uint32_t *)(&prop->sadb_prop_replay) = 0; /* Quick zero-out! */ - prop->sadb_prop_replay = ipsecah_replay_size; + prop->sadb_prop_replay = ahstack->ipsecah_replay_size; /* * Based upon algorithm properties, and what-not, prioritize a @@ -1675,7 +1782,8 @@ ah_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs) ASSERT(prot->ipp_auth_alg > 0); - aalg = ipsec_alglists[IPSEC_ALG_AUTH][prot->ipp_auth_alg]; + aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH] + [prot->ipp_auth_alg]; if (aalg == NULL || !ALG_VALID(aalg)) continue; @@ -1711,12 +1819,18 @@ ah_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs) /* * These may want to come from policy rule.. */ - comb->sadb_comb_soft_bytes = ipsecah_default_soft_bytes; - comb->sadb_comb_hard_bytes = ipsecah_default_hard_bytes; - comb->sadb_comb_soft_addtime = ipsecah_default_soft_addtime; - comb->sadb_comb_hard_addtime = ipsecah_default_hard_addtime; - comb->sadb_comb_soft_usetime = ipsecah_default_soft_usetime; - comb->sadb_comb_hard_usetime = ipsecah_default_hard_usetime; + comb->sadb_comb_soft_bytes = + ahstack->ipsecah_default_soft_bytes; + comb->sadb_comb_hard_bytes = + ahstack->ipsecah_default_hard_bytes; + comb->sadb_comb_soft_addtime = + ahstack->ipsecah_default_soft_addtime; + comb->sadb_comb_hard_addtime = + ahstack->ipsecah_default_hard_addtime; + comb->sadb_comb_soft_usetime = + ahstack->ipsecah_default_soft_usetime; + comb->sadb_comb_hard_usetime = + ahstack->ipsecah_default_hard_usetime; prop->sadb_prop_len += SADB_8TO64(sizeof (*comb)); if (--combs == 0) @@ -1729,26 +1843,29 @@ ah_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs) * Prepare and actually send the SADB_ACQUIRE message to PF_KEY. */ static void -ah_send_acquire(ipsacq_t *acqrec, mblk_t *extended) +ah_send_acquire(ipsacq_t *acqrec, mblk_t *extended, netstack_t *ns) { uint_t combs; sadb_msg_t *samsg; sadb_prop_t *prop; mblk_t *pfkeymp, *msgmp; + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; + ipsec_stack_t *ipss = ns->netstack_ipsec; - AH_BUMP_STAT(acquire_requests); + AH_BUMP_STAT(ahstack, acquire_requests); - if (ah_pfkey_q == NULL) + if (ahstack->ah_pfkey_q == NULL) return; /* Set up ACQUIRE. */ - pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_AH); + pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_AH, + ns->netstack_ipsec); if (pfkeymp == NULL) { ah0dbg(("sadb_setup_acquire failed.\n")); return; } - ASSERT(MUTEX_HELD(&alg_lock)); - combs = ipsec_nalgs[IPSEC_ALG_AUTH]; + ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); + combs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH]; msgmp = pfkeymp->b_cont; samsg = (sadb_msg_t *)(msgmp->b_rptr); @@ -1759,7 +1876,7 @@ ah_send_acquire(ipsacq_t *acqrec, mblk_t *extended) samsg->sadb_msg_len += prop->sadb_prop_len; msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len); - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); /* * Must mutex_exit() before sending PF_KEY message up, in @@ -1770,16 +1887,16 @@ ah_send_acquire(ipsacq_t *acqrec, mblk_t *extended) */ mutex_exit(&acqrec->ipsacq_lock); if (extended != NULL) { - putnext(ah_pfkey_q, extended); + putnext(ahstack->ah_pfkey_q, extended); } - putnext(ah_pfkey_q, pfkeymp); + putnext(ahstack->ah_pfkey_q, pfkeymp); } /* * Handle the SADB_GETSPI message. Create a larval SA. */ static void -ah_getspi(mblk_t *mp, keysock_in_t *ksi) +ah_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecah_stack_t *ahstack) { ipsa_t *newbie, *target; isaf_t *outbound, *inbound; @@ -1792,14 +1909,15 @@ ah_getspi(mblk_t *mp, keysock_in_t *ksi) * Randomly generate a proposed SPI value. */ (void) random_get_pseudo_bytes((uint8_t *)&newspi, sizeof (uint32_t)); - newbie = sadb_getspi(ksi, newspi, &diagnostic); + newbie = sadb_getspi(ksi, newspi, &diagnostic, + ahstack->ipsecah_netstack); if (newbie == NULL) { - sadb_pfkey_error(ah_pfkey_q, mp, ENOMEM, diagnostic, + sadb_pfkey_error(ahstack->ah_pfkey_q, mp, ENOMEM, diagnostic, ksi->ks_in_serial); return; } else if (newbie == (ipsa_t *)-1) { - sadb_pfkey_error(ah_pfkey_q, mp, EINVAL, diagnostic, + sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL, diagnostic, ksi->ks_in_serial); return; } @@ -1811,13 +1929,15 @@ ah_getspi(mblk_t *mp, keysock_in_t *ksi) */ if (newbie->ipsa_addrfam == AF_INET6) { - outbound = OUTBOUND_BUCKET_V6(&ah_sadb.s_v6, + outbound = OUTBOUND_BUCKET_V6(&ahstack->ah_sadb.s_v6, *(uint32_t *)(newbie->ipsa_dstaddr)); - inbound = INBOUND_BUCKET(&ah_sadb.s_v6, newbie->ipsa_spi); + inbound = INBOUND_BUCKET(&ahstack->ah_sadb.s_v6, + newbie->ipsa_spi); } else { - outbound = OUTBOUND_BUCKET_V4(&ah_sadb.s_v4, + outbound = OUTBOUND_BUCKET_V4(&ahstack->ah_sadb.s_v4, *(uint32_t *)(newbie->ipsa_dstaddr)); - inbound = INBOUND_BUCKET(&ah_sadb.s_v4, newbie->ipsa_spi); + inbound = INBOUND_BUCKET(&ahstack->ah_sadb.s_v4, + newbie->ipsa_spi); } mutex_enter(&outbound->isaf_lock); @@ -1854,7 +1974,7 @@ ah_getspi(mblk_t *mp, keysock_in_t *ksi) */ rc = sadb_insertassoc(newbie, inbound); (void) drv_getparm(TIME, &newbie->ipsa_hardexpiretime); - newbie->ipsa_hardexpiretime += ipsecah_larval_timeout; + newbie->ipsa_hardexpiretime += ahstack->ipsecah_larval_timeout; } /* @@ -1866,8 +1986,8 @@ ah_getspi(mblk_t *mp, keysock_in_t *ksi) if (rc != 0) { mutex_exit(&inbound->isaf_lock); IPSA_REFRELE(newbie); - sadb_pfkey_error(ah_pfkey_q, mp, rc, SADB_X_DIAGNOSTIC_NONE, - ksi->ks_in_serial); + sadb_pfkey_error(ahstack->ah_pfkey_q, mp, rc, + SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial); return; } @@ -1895,7 +2015,7 @@ ah_getspi(mblk_t *mp, keysock_in_t *ksi) * Can safely putnext() to ah_pfkey_q, because this is a turnaround * from the ah_pfkey_q. */ - putnext(ah_pfkey_q, mp); + putnext(ahstack->ah_pfkey_q, mp); } /* @@ -1903,7 +2023,7 @@ ah_getspi(mblk_t *mp, keysock_in_t *ksi) * header. */ static ipsec_status_t -ah_icmp_error_v6(mblk_t *ipsec_mp) +ah_icmp_error_v6(mblk_t *ipsec_mp, ipsecah_stack_t *ahstack) { mblk_t *mp; ip6_t *ip6h, *oip6h; @@ -1914,6 +2034,7 @@ ah_icmp_error_v6(mblk_t *ipsec_mp) isaf_t *isaf; ipsa_t *assoc; uint8_t *post_ah_ptr; + ipsec_stack_t *ipss = ahstack->ipsecah_netstack->netstack_ipsec; mp = ipsec_mp->b_cont; ASSERT(mp->b_datap->db_type == M_CTL); @@ -1932,9 +2053,10 @@ ah_icmp_error_v6(mblk_t *ipsec_mp) &nexthdrp) || mp->b_rptr + hdr_length + sizeof (icmp6_t) + sizeof (ip6_t) + sizeof (ah_t) > mp->b_wptr) { - IP_AH_BUMP_STAT(in_discards); - ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, &ipdrops_ah_nomem, - &ah_dropper); + IP_AH_BUMP_STAT(ipss, in_discards); + ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, + DROPPER(ipss, ipds_ah_nomem), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } @@ -1942,32 +2064,35 @@ ah_icmp_error_v6(mblk_t *ipsec_mp) icmp6 = (icmp6_t *)((uint8_t *)oip6h + hdr_length); ip6h = (ip6_t *)(icmp6 + 1); if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) { - IP_AH_BUMP_STAT(in_discards); + IP_AH_BUMP_STAT(ipss, in_discards); ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, - &ipdrops_ah_bad_v6_hdrs, &ah_dropper); + DROPPER(ipss, ipds_ah_bad_v6_hdrs), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } ah = (ah_t *)((uint8_t *)ip6h + hdr_length); - isaf = OUTBOUND_BUCKET_V6(&ah_sadb.s_v6, ip6h->ip6_dst); + isaf = OUTBOUND_BUCKET_V6(&ahstack->ah_sadb.s_v6, ip6h->ip6_dst); mutex_enter(&isaf->isaf_lock); assoc = ipsec_getassocbyspi(isaf, ah->ah_spi, (uint32_t *)&ip6h->ip6_src, (uint32_t *)&ip6h->ip6_dst, AF_INET6); mutex_exit(&isaf->isaf_lock); if (assoc == NULL) { - IP_AH_BUMP_STAT(lookup_failure); - IP_AH_BUMP_STAT(in_discards); - if (ipsecah_log_unknown_spi) { + IP_AH_BUMP_STAT(ipss, lookup_failure); + IP_AH_BUMP_STAT(ipss, in_discards); + if (ahstack->ipsecah_log_unknown_spi) { ipsec_assocfailure(info.mi_idnum, 0, 0, SL_CONSOLE | SL_WARN | SL_ERROR, "Bad ICMP message - No association for the " "attached AH header whose spi is 0x%x, " "sender is 0x%x\n", - ah->ah_spi, &oip6h->ip6_src, AF_INET6); + ah->ah_spi, &oip6h->ip6_src, AF_INET6, + ahstack->ipsecah_netstack); } - ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, &ipdrops_ah_no_sa, - &ah_dropper); + ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, + DROPPER(ipss, ipds_ah_no_sa), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } @@ -1986,9 +2111,10 @@ ah_icmp_error_v6(mblk_t *ipsec_mp) post_ah_ptr = (uint8_t *)ah + ah_length; if (post_ah_ptr > mp->b_wptr) { - IP_AH_BUMP_STAT(in_discards); + IP_AH_BUMP_STAT(ipss, in_discards); ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, - &ipdrops_ah_bad_length, &ah_dropper); + DROPPER(ipss, ipds_ah_bad_length), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } @@ -2008,7 +2134,7 @@ ah_icmp_error_v6(mblk_t *ipsec_mp) * the AH header. */ static ipsec_status_t -ah_icmp_error_v4(mblk_t *ipsec_mp) +ah_icmp_error_v4(mblk_t *ipsec_mp, ipsecah_stack_t *ahstack) { mblk_t *mp; mblk_t *mp1; @@ -2024,6 +2150,7 @@ ah_icmp_error_v4(mblk_t *ipsec_mp) uint32_t length; int alloc_size; uint8_t nexthdr; + ipsec_stack_t *ipss = ahstack->ipsecah_netstack->netstack_ipsec; mp = ipsec_mp->b_cont; ASSERT(mp->b_datap->db_type == M_CTL); @@ -2046,12 +2173,14 @@ ah_icmp_error_v4(mblk_t *ipsec_mp) if ((uchar_t *)ipha + hdr_length + 8 > mp->b_wptr) { if (!pullupmsg(mp, (uchar_t *)ipha + hdr_length + 8 - mp->b_rptr)) { - ipsec_rl_strlog(info.mi_idnum, 0, 0, + ipsec_rl_strlog(ahstack->ipsecah_netstack, + info.mi_idnum, 0, 0, SL_WARN | SL_ERROR, "ICMP error: Small AH header\n"); - IP_AH_BUMP_STAT(in_discards); + IP_AH_BUMP_STAT(ipss, in_discards); ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, - &ipdrops_ah_bad_length, &ah_dropper); + DROPPER(ipss, ipds_ah_bad_length), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; @@ -2061,25 +2190,27 @@ ah_icmp_error_v4(mblk_t *ipsec_mp) ah = (ah_t *)((uint8_t *)ipha + hdr_length); nexthdr = ah->ah_nexthdr; - hptr = OUTBOUND_BUCKET_V4(&ah_sadb.s_v4, ipha->ipha_dst); + hptr = OUTBOUND_BUCKET_V4(&ahstack->ah_sadb.s_v4, ipha->ipha_dst); mutex_enter(&hptr->isaf_lock); assoc = ipsec_getassocbyspi(hptr, ah->ah_spi, (uint32_t *)&ipha->ipha_src, (uint32_t *)&ipha->ipha_dst, AF_INET); mutex_exit(&hptr->isaf_lock); if (assoc == NULL) { - IP_AH_BUMP_STAT(lookup_failure); - IP_AH_BUMP_STAT(in_discards); - if (ipsecah_log_unknown_spi) { + IP_AH_BUMP_STAT(ipss, lookup_failure); + IP_AH_BUMP_STAT(ipss, in_discards); + if (ahstack->ipsecah_log_unknown_spi) { ipsec_assocfailure(info.mi_idnum, 0, 0, SL_CONSOLE | SL_WARN | SL_ERROR, "Bad ICMP message - No association for the " "attached AH header whose spi is 0x%x, " "sender is 0x%x\n", - ah->ah_spi, &oipha->ipha_src, AF_INET); + ah->ah_spi, &oipha->ipha_src, AF_INET, + ahstack->ipsecah_netstack); } - ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, &ipdrops_ah_no_sa, - &ah_dropper); + ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, + DROPPER(ipss, ipds_ah_no_sa), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } @@ -2115,9 +2246,10 @@ ah_icmp_error_v4(mblk_t *ipsec_mp) * enough to pullup or memory allocation failed. * We tried hard, give up now. */ - IP_AH_BUMP_STAT(in_discards); + IP_AH_BUMP_STAT(ipss, in_discards); ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, - &ipdrops_ah_nomem, &ah_dropper); + DROPPER(ipss, ipds_ah_nomem), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; @@ -2139,9 +2271,10 @@ done: alloc_size = iph_hdr_length + sizeof (icmph_t) + hdr_length; if ((mp1 = allocb(alloc_size, BPRI_LO)) == NULL) { - IP_AH_BUMP_STAT(in_discards); - ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, &ipdrops_ah_nomem, - &ah_dropper); + IP_AH_BUMP_STAT(ipss, in_discards); + ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, + DROPPER(ipss, ipds_ah_nomem), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } /* ICMP errors are M_CTL messages */ @@ -2184,11 +2317,13 @@ ipsec_status_t ipsecah_icmp_error(mblk_t *mp) { ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; + netstack_t *ns = ii->ipsec_in_ns; + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; if (ii->ipsec_in_v4) - return (ah_icmp_error_v4(mp)); + return (ah_icmp_error_v4(mp, ahstack)); else - return (ah_icmp_error_v6(mp)); + return (ah_icmp_error_v6(mp, ahstack)); } static int @@ -2427,7 +2562,7 @@ terminal_hdr: static boolean_t ah_finish_up(ah_t *phdr_ah, ah_t *inbound_ah, ipsa_t *assoc, - int ah_data_sz, int ah_align_sz) + int ah_data_sz, int ah_align_sz, ipsecah_stack_t *ahstack) { int i; @@ -2465,7 +2600,8 @@ ah_finish_up(ah_t *phdr_ah, ah_t *inbound_ah, ipsa_t *assoc, SL_ERROR | SL_CONSOLE | SL_WARN, "Outbound AH SA (0x%x), dst %s has wrapped " "sequence.\n", phdr_ah->ah_spi, - assoc->ipsa_dstaddr, assoc->ipsa_addrfam); + assoc->ipsa_dstaddr, assoc->ipsa_addrfam, + ahstack->ipsecah_netstack); sadb_replay_delete(assoc); /* Caller will free phdr_mp and return NULL. */ @@ -2490,8 +2626,8 @@ ah_finish_up(ah_t *phdr_ah, ah_t *inbound_ah, ipsa_t *assoc, phdr_ah->ah_replay = inbound_ah->ah_replay; if (ah_data_sz != ah_align_sz) { - uchar_t *opad = ((uchar_t *)inbound_ah + sizeof (ah_t) + - ah_data_sz); + uchar_t *opad = ((uchar_t *)inbound_ah + + sizeof (ah_t) + ah_data_sz); uchar_t *pad = ((uchar_t *)phdr_ah + sizeof (ah_t) + ah_data_sz); @@ -2517,6 +2653,9 @@ ah_log_bad_auth(mblk_t *ipsec_in) ipsa_t *assoc = ii->ipsec_in_ah_sa; int af; void *addr; + netstack_t *ns = ii->ipsec_in_ns; + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; + ipsec_stack_t *ipss = ns->netstack_ipsec; mp->b_rptr -= ii->ipsec_in_skip_len; @@ -2534,15 +2673,16 @@ ah_log_bad_auth(mblk_t *ipsec_in) * Log the event. Don't print to the console, block * potential denial-of-service attack. */ - AH_BUMP_STAT(bad_auth); + AH_BUMP_STAT(ahstack, bad_auth); ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, "AH Authentication failed spi %x, dst_addr %s", - assoc->ipsa_spi, addr, af); + assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack); - IP_AH_BUMP_STAT(in_discards); - ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, &ipdrops_ah_bad_auth, - &ah_dropper); + IP_AH_BUMP_STAT(ipss, in_discards); + ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, + DROPPER(ipss, ipds_ah_bad_auth), + &ahstack->ah_dropper); } /* @@ -2555,20 +2695,53 @@ ah_kcf_callback(void *arg, int status) mblk_t *ipsec_mp = (mblk_t *)arg; ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN); + netstackid_t stackid; + netstack_t *ns, *ns_arg; + ipsec_stack_t *ipss; + ipsecah_stack_t *ahstack; + ipsec_out_t *io = (ipsec_out_t *)ii; ASSERT(ipsec_mp->b_cont != NULL); + if (is_inbound) { + stackid = ii->ipsec_in_stackid; + ns_arg = ii->ipsec_in_ns; + } else { + stackid = io->ipsec_out_stackid; + ns_arg = io->ipsec_out_ns; + } + /* + * Verify that the netstack is still around; could have vanished + * while kEf was doing its work. + */ + ns = netstack_find_by_stackid(stackid); + if (ns == NULL || ns != ns_arg) { + /* Disappeared on us */ + if (ns != NULL) + netstack_rele(ns); + freemsg(ipsec_mp); + return; + } + + ahstack = ns->netstack_ipsecah; + ipss = ns->netstack_ipsec; + if (status == CRYPTO_SUCCESS) { if (is_inbound) { - if (ah_auth_in_done(ipsec_mp) != IPSEC_STATUS_SUCCESS) + if (ah_auth_in_done(ipsec_mp) != IPSEC_STATUS_SUCCESS) { + netstack_rele(ns); return; + } /* finish IPsec processing */ ip_fanout_proto_again(ipsec_mp, NULL, NULL, NULL); } else { ipha_t *ipha; - if (ah_auth_out_done(ipsec_mp) != IPSEC_STATUS_SUCCESS) + if (ah_auth_out_done(ipsec_mp) != + IPSEC_STATUS_SUCCESS) { + netstack_rele(ns); return; + } /* finish IPsec processing */ ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr; @@ -2584,43 +2757,50 @@ ah_kcf_callback(void *arg, int status) } else if (status == CRYPTO_INVALID_MAC) { ah_log_bad_auth(ipsec_mp); - } else { - ah1dbg(("ah_kcf_callback: crypto failed with 0x%x\n", status)); - AH_BUMP_STAT(crypto_failures); + ah1dbg(ahstack, ("ah_kcf_callback: crypto failed with 0x%x\n", + status)); + AH_BUMP_STAT(ahstack, crypto_failures); if (is_inbound) - IP_AH_BUMP_STAT(in_discards); + IP_AH_BUMP_STAT(ipss, in_discards); else - AH_BUMP_STAT(out_discards); + AH_BUMP_STAT(ahstack, out_discards); ip_drop_packet(ipsec_mp, is_inbound, NULL, NULL, - &ipdrops_ah_crypto_failed, &ah_dropper); + DROPPER(ipss, ipds_ah_crypto_failed), + &ahstack->ah_dropper); } + netstack_rele(ns); } /* * Invoked on kernel crypto failure during inbound and outbound processing. */ static void -ah_crypto_failed(mblk_t *mp, boolean_t is_inbound, int kef_rc) +ah_crypto_failed(mblk_t *mp, boolean_t is_inbound, int kef_rc, + ipsecah_stack_t *ahstack) { - ah1dbg(("crypto failed for %s AH with 0x%x\n", + ipsec_stack_t *ipss = ahstack->ipsecah_netstack->netstack_ipsec; + + ah1dbg(ahstack, ("crypto failed for %s AH with 0x%x\n", is_inbound ? "inbound" : "outbound", kef_rc)); - ip_drop_packet(mp, is_inbound, NULL, NULL, &ipdrops_ah_crypto_failed, - &ah_dropper); - AH_BUMP_STAT(crypto_failures); + ip_drop_packet(mp, is_inbound, NULL, NULL, + DROPPER(ipss, ipds_ah_crypto_failed), + &ahstack->ah_dropper); + AH_BUMP_STAT(ahstack, crypto_failures); if (is_inbound) - IP_AH_BUMP_STAT(in_discards); + IP_AH_BUMP_STAT(ipss, in_discards); else - AH_BUMP_STAT(out_discards); + AH_BUMP_STAT(ahstack, out_discards); } /* * Helper macros for the ah_submit_req_{inbound,outbound}() functions. */ -#define AH_INIT_CALLREQ(_cr) { \ +#define AH_INIT_CALLREQ(_cr, _ipss) { \ (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_RESTRICTED; \ - if (ipsec_algs_exec_mode[IPSEC_ALG_AUTH] == IPSEC_ALGS_EXEC_ASYNC) \ + if ((_ipss)->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] == \ + IPSEC_ALGS_EXEC_ASYNC) \ (_cr)->cr_flag |= CRYPTO_ALWAYS_QUEUE; \ (_cr)->cr_callback_arg = ipsec_mp; \ (_cr)->cr_callback_func = ah_kcf_callback; \ @@ -2654,11 +2834,21 @@ ah_submit_req_inbound(mblk_t *ipsec_mp, size_t skip_len, uint32_t ah_offset, ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; uint_t icv_len = assoc->ipsa_mac_len; crypto_ctx_template_t ctx_tmpl; + netstack_t *ns = ii->ipsec_in_ns; + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; + ipsec_stack_t *ipss = ns->netstack_ipsec; phdr_mp = ipsec_mp->b_cont; ASSERT(phdr_mp != NULL); ASSERT(ii->ipsec_in_type == IPSEC_IN); + /* + * In case kEF queues and calls back, keep netstackid_t for + * verification that the IP instance is still around in + * ah_kcf_callback(). + */ + ii->ipsec_in_stackid = ns->netstack_stackid; + /* init arguments for the crypto framework */ AH_INIT_CRYPTO_DATA(&ii->ipsec_in_crypto_data, AH_MSGSIZE(phdr_mp), phdr_mp); @@ -2667,7 +2857,7 @@ ah_submit_req_inbound(mblk_t *ipsec_mp, size_t skip_len, uint32_t ah_offset, (char *)phdr_mp->b_cont->b_rptr - skip_len + ah_offset + sizeof (ah_t)); - AH_INIT_CALLREQ(&call_req); + AH_INIT_CALLREQ(&call_req, ipss); ii->ipsec_in_skip_len = skip_len; @@ -2680,19 +2870,19 @@ ah_submit_req_inbound(mblk_t *ipsec_mp, size_t skip_len, uint32_t ah_offset, switch (kef_rc) { case CRYPTO_SUCCESS: - AH_BUMP_STAT(crypto_sync); + AH_BUMP_STAT(ahstack, crypto_sync); return (ah_auth_in_done(ipsec_mp)); case CRYPTO_QUEUED: - /* ah_callback() will be invoked on completion */ - AH_BUMP_STAT(crypto_async); + /* ah_kcf_callback() will be invoked on completion */ + AH_BUMP_STAT(ahstack, crypto_async); return (IPSEC_STATUS_PENDING); case CRYPTO_INVALID_MAC: - AH_BUMP_STAT(crypto_sync); + AH_BUMP_STAT(ahstack, crypto_sync); ah_log_bad_auth(ipsec_mp); return (IPSEC_STATUS_FAILED); } - ah_crypto_failed(ipsec_mp, B_TRUE, kef_rc); + ah_crypto_failed(ipsec_mp, B_TRUE, kef_rc, ahstack); return (IPSEC_STATUS_FAILED); } @@ -2707,11 +2897,21 @@ ah_submit_req_outbound(mblk_t *ipsec_mp, size_t skip_len, ipsa_t *assoc) crypto_call_req_t call_req; ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; uint_t icv_len = assoc->ipsa_mac_len; + netstack_t *ns = io->ipsec_out_ns; + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; + ipsec_stack_t *ipss = ns->netstack_ipsec; phdr_mp = ipsec_mp->b_cont; ASSERT(phdr_mp != NULL); ASSERT(io->ipsec_out_type == IPSEC_OUT); + /* + * In case kEF queues and calls back, keep netstackid_t for + * verification that the IP instance is still around in + * ah_kcf_callback(). + */ + io->ipsec_out_stackid = ns->netstack_stackid; + /* init arguments for the crypto framework */ AH_INIT_CRYPTO_DATA(&io->ipsec_out_crypto_data, AH_MSGSIZE(phdr_mp), phdr_mp); @@ -2719,7 +2919,7 @@ ah_submit_req_outbound(mblk_t *ipsec_mp, size_t skip_len, ipsa_t *assoc) AH_INIT_CRYPTO_MAC(&io->ipsec_out_crypto_mac, icv_len, (char *)phdr_mp->b_wptr); - AH_INIT_CALLREQ(&call_req); + AH_INIT_CALLREQ(&call_req, ipss); io->ipsec_out_skip_len = skip_len; @@ -2732,15 +2932,15 @@ ah_submit_req_outbound(mblk_t *ipsec_mp, size_t skip_len, ipsa_t *assoc) switch (kef_rc) { case CRYPTO_SUCCESS: - AH_BUMP_STAT(crypto_sync); + AH_BUMP_STAT(ahstack, crypto_sync); return (ah_auth_out_done(ipsec_mp)); case CRYPTO_QUEUED: - /* ah_callback() will be invoked on completion */ - AH_BUMP_STAT(crypto_async); + /* ah_kcf_callback() will be invoked on completion */ + AH_BUMP_STAT(ahstack, crypto_async); return (IPSEC_STATUS_PENDING); } - ah_crypto_failed(ipsec_mp, B_FALSE, kef_rc); + ah_crypto_failed(ipsec_mp, B_FALSE, kef_rc, ahstack); return (IPSEC_STATUS_FAILED); } @@ -2751,7 +2951,7 @@ ah_submit_req_outbound(mblk_t *ipsec_mp, size_t skip_len, ipsa_t *assoc) */ static mblk_t * ah_process_ip_options_v6(mblk_t *mp, ipsa_t *assoc, int *length_to_skip, - uint_t ah_data_sz, boolean_t outbound) + uint_t ah_data_sz, boolean_t outbound, ipsecah_stack_t *ahstack) { ip6_t *ip6h; ip6_t *oip6h; @@ -2760,6 +2960,7 @@ ah_process_ip_options_v6(mblk_t *mp, ipsa_t *assoc, int *length_to_skip, uint_t ah_align_sz; uint_t ah_offset; int hdr_size; + ipsec_stack_t *ipss = ahstack->ipsecah_netstack->netstack_ipsec; /* * Allocate space for the authentication data also. It is @@ -2839,14 +3040,15 @@ ah_process_ip_options_v6(mblk_t *mp, ipsa_t *assoc, int *length_to_skip, ah_offset = ah_fix_phdr_v6(ip6h, oip6h, outbound, B_FALSE); if (ah_offset == 0) { ip_drop_packet(phdr_mp, !outbound, NULL, NULL, - &ipdrops_ah_bad_v6_hdrs, &ah_dropper); + DROPPER(ipss, ipds_ah_bad_v6_hdrs), + &ahstack->ah_dropper); return (NULL); } } if (!ah_finish_up(((ah_t *)((uint8_t *)ip6h + ah_offset)), (outbound ? NULL : ((ah_t *)((uint8_t *)oip6h + ah_offset))), - assoc, ah_data_sz, ah_align_sz)) { + assoc, ah_data_sz, ah_align_sz, ahstack)) { freeb(phdr_mp); /* * Returning NULL will tell the caller to @@ -2869,7 +3071,7 @@ ah_process_ip_options_v6(mblk_t *mp, ipsa_t *assoc, int *length_to_skip, */ static mblk_t * ah_process_ip_options_v4(mblk_t *mp, ipsa_t *assoc, int *length_to_skip, - uint_t ah_data_sz, boolean_t outbound) + uint_t ah_data_sz, boolean_t outbound, ipsecah_stack_t *ahstack) { ipoptp_t opts; uint32_t option_length; @@ -3034,7 +3236,7 @@ ah_process_ip_options_v4(mblk_t *mp, ipsa_t *assoc, int *length_to_skip, if ((opts.ipoptp_flags & IPOPTP_ERROR) != 0) { bad_ipv4opt: - ah1dbg(("AH : bad IPv4 option")); + ah1dbg(ahstack, ("AH : bad IPv4 option")); freeb(phdr_mp); return (NULL); } @@ -3054,7 +3256,7 @@ ah_hdr: oipha->ipha_protocol; if (!ah_finish_up(((ah_t *)((uint8_t *)ipha + ip_hdr_length)), (outbound ? NULL : ((ah_t *)((uint8_t *)oipha + ip_hdr_length))), - assoc, ah_data_sz, ah_align_sz)) { + assoc, ah_data_sz, ah_align_sz, ahstack)) { freeb(phdr_mp); /* * Returning NULL will tell the caller to IPSA_REFELE(), free @@ -3088,6 +3290,9 @@ ah_outbound(mblk_t *ipsec_out) int length_to_skip; uint_t ah_align_sz; uint_t age_bytes; + netstack_t *ns; + ipsec_stack_t *ipss; + ipsecah_stack_t *ahstack; /* * Construct the chain of mblks @@ -3097,14 +3302,17 @@ ah_outbound(mblk_t *ipsec_out) * one by one. */ - AH_BUMP_STAT(out_requests); - ASSERT(ipsec_out->b_datap->db_type == M_CTL); ASSERT(MBLKL(ipsec_out) >= sizeof (ipsec_info_t)); mp = ipsec_out->b_cont; oi = (ipsec_out_t *)ipsec_out->b_rptr; + ns = oi->ipsec_out_ns; + ipss = ns->netstack_ipsec; + ahstack = ns->netstack_ipsecah; + + AH_BUMP_STAT(ahstack, out_requests); ASSERT(mp->b_datap->db_type == M_DATA); @@ -3136,19 +3344,20 @@ ah_outbound(mblk_t *ipsec_out) /* rig things as if ipsec_getassocbyconn() failed */ ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, "AH association 0x%x, dst %s had bytes expire.\n", - ntohl(assoc->ipsa_spi), assoc->ipsa_dstaddr, AF_INET); + ntohl(assoc->ipsa_spi), assoc->ipsa_dstaddr, AF_INET, + ahstack->ipsecah_netstack); freemsg(ipsec_out); return (IPSEC_STATUS_FAILED); } if (oi->ipsec_out_is_capab_ill) { - ah3dbg(("ah_outbound: pkt can be accelerated\n")); + ah3dbg(ahstack, ("ah_outbound: pkt can be accelerated\n")); if (oi->ipsec_out_v4) return (ah_outbound_accelerated_v4(ipsec_out, assoc)); else return (ah_outbound_accelerated_v6(ipsec_out, assoc)); } - AH_BUMP_STAT(noaccel); + AH_BUMP_STAT(ahstack, noaccel); /* * Insert pseudo header: @@ -3157,16 +3366,17 @@ ah_outbound(mblk_t *ipsec_out) if (oi->ipsec_out_v4) { phdr_mp = ah_process_ip_options_v4(mp, assoc, &length_to_skip, - assoc->ipsa_mac_len, B_TRUE); + assoc->ipsa_mac_len, B_TRUE, ahstack); } else { phdr_mp = ah_process_ip_options_v6(mp, assoc, &length_to_skip, - assoc->ipsa_mac_len, B_TRUE); + assoc->ipsa_mac_len, B_TRUE, ahstack); } if (phdr_mp == NULL) { - AH_BUMP_STAT(out_discards); + AH_BUMP_STAT(ahstack, out_discards); ip_drop_packet(ipsec_out, B_FALSE, NULL, NULL, - &ipdrops_ah_bad_v4_opts, &ah_dropper); + DROPPER(ipss, ipds_ah_bad_v4_opts), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } @@ -3197,6 +3407,9 @@ ah_inbound(mblk_t *ipsec_in_mp, void *arg) int ah_length; mblk_t *phdr_mp; uint32_t ah_offset; + netstack_t *ns = ii->ipsec_in_ns; + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT(assoc != NULL); if (assoc->ipsa_usetime == 0) @@ -3214,10 +3427,11 @@ ah_inbound(mblk_t *ipsec_in_mp, void *arg) * take place when it doesn't need to. */ if (!sadb_replay_peek(assoc, ah->ah_replay)) { - AH_BUMP_STAT(replay_early_failures); - IP_AH_BUMP_STAT(in_discards); + AH_BUMP_STAT(ahstack, replay_early_failures); + IP_AH_BUMP_STAT(ipss, in_discards); ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, - &ipdrops_ah_early_replay, &ah_dropper); + DROPPER(ipss, ipds_ah_early_replay), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } @@ -3233,12 +3447,13 @@ ah_inbound(mblk_t *ipsec_in_mp, void *arg) * IPsec accelerator? */ if (ii->ipsec_in_accelerated) { - ah3dbg(("ah_inbound_v6: pkt processed by ill=%d isv6=%d\n", + ah3dbg(ahstack, + ("ah_inbound_v6: pkt processed by ill=%d isv6=%d\n", ii->ipsec_in_ill_index, !ii->ipsec_in_v4)); return (ah_inbound_accelerated(ipsec_in_mp, ii->ipsec_in_v4, assoc, ah_offset)); } - AH_BUMP_STAT(noaccel); + AH_BUMP_STAT(ahstack, noaccel); /* * We need to pullup until the ICV before we call @@ -3253,12 +3468,13 @@ ah_inbound(mblk_t *ipsec_in_mp, void *arg) if (((uchar_t *)ah + ah_length) > data_mp->b_wptr) { if (!pullupmsg(data_mp, (uchar_t *)ah + ah_length - data_mp->b_rptr)) { - (void) ipsec_rl_strlog(info.mi_idnum, 0, 0, + (void) ipsec_rl_strlog(ns, info.mi_idnum, 0, 0, SL_WARN | SL_ERROR, "ah_inbound: Small AH header\n"); - IP_AH_BUMP_STAT(in_discards); + IP_AH_BUMP_STAT(ipss, in_discards); ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, - &ipdrops_ah_nomem, &ah_dropper); + DROPPER(ipss, ipds_ah_nomem), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } } @@ -3269,17 +3485,19 @@ ah_inbound(mblk_t *ipsec_in_mp, void *arg) */ if (ii->ipsec_in_v4) { phdr_mp = ah_process_ip_options_v4(data_mp, assoc, - &length_to_skip, assoc->ipsa_mac_len, B_FALSE); + &length_to_skip, assoc->ipsa_mac_len, B_FALSE, ahstack); } else { phdr_mp = ah_process_ip_options_v6(data_mp, assoc, - &length_to_skip, assoc->ipsa_mac_len, B_FALSE); + &length_to_skip, assoc->ipsa_mac_len, B_FALSE, ahstack); } if (phdr_mp == NULL) { - IP_AH_BUMP_STAT(in_discards); + IP_AH_BUMP_STAT(ipss, in_discards); ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, - ii->ipsec_in_v4 ? &ipdrops_ah_bad_v4_opts : - &ipdrops_ah_bad_v6_hdrs, &ah_dropper); + (ii->ipsec_in_v4 ? + DROPPER(ipss, ipds_ah_bad_v4_opts) : + DROPPER(ipss, ipds_ah_bad_v6_hdrs)), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } @@ -3333,15 +3551,22 @@ ah_inbound_accelerated(mblk_t *ipsec_in, boolean_t isv4, ipsa_t *assoc, uint32_t next_hdr; da_ipsec_t *hada; kstat_named_t *counter; - - AH_BUMP_STAT(in_accelerated); + ipsecah_stack_t *ahstack; + netstack_t *ns; + ipsec_stack_t *ipss; ii = (ipsec_in_t *)ipsec_in->b_rptr; + ns = ii->ipsec_in_ns; + ahstack = ns->netstack_ipsecah; + ipss = ns->netstack_ipsec; + mp = ipsec_in->b_cont; hada_mp = ii->ipsec_in_da; ASSERT(hada_mp != NULL); hada = (da_ipsec_t *)hada_mp->b_rptr; + AH_BUMP_STAT(ahstack, in_accelerated); + /* * We only support one level of decapsulation in hardware, so * nuke the pointer. @@ -3363,7 +3588,7 @@ ah_inbound_accelerated(mblk_t *ipsec_in, boolean_t isv4, ipsa_t *assoc, ah0dbg(("ah_inbound_accelerated: " "ICV len (%u) incorrect or mblk too small (%u)\n", icv_len, (uint32_t)(MBLKL(hada_mp)))); - counter = &ipdrops_ah_bad_length; + counter = DROPPER(ipss, ipds_ah_bad_length); goto ah_in_discard; } ASSERT(icv_len != 0); @@ -3399,16 +3624,16 @@ ah_inbound_accelerated(mblk_t *ipsec_in, boolean_t isv4, ipsa_t *assoc, * Log the event. Don't print to the console, block * potential denial-of-service attack. */ - AH_BUMP_STAT(bad_auth); + AH_BUMP_STAT(ahstack, bad_auth); ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, "AH Authentication failed spi %x, dst_addr %s", - assoc->ipsa_spi, addr, af); - counter = &ipdrops_ah_bad_auth; + assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack); + counter = DROPPER(ipss, ipds_ah_bad_auth); goto ah_in_discard; } - ah3dbg(("AH succeeded, checking replay\n")); - AH_BUMP_STAT(good_auth); + ah3dbg(ahstack, ("AH succeeded, checking replay\n")); + AH_BUMP_STAT(ahstack, good_auth); if (!sadb_replay_check(assoc, ah->ah_replay)) { int af; @@ -3429,12 +3654,12 @@ ah_inbound_accelerated(mblk_t *ipsec_in, boolean_t isv4, ipsa_t *assoc, * the replay number that failed (or printing to the * console) opens a denial-of-service attack. */ - AH_BUMP_STAT(replay_failures); + AH_BUMP_STAT(ahstack, replay_failures); ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, "Replay failed for AH spi %x, dst_addr %s", - assoc->ipsa_spi, addr, af); - counter = &ipdrops_ah_replay; + assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack); + counter = DROPPER(ipss, ipds_ah_replay); goto ah_in_discard; } @@ -3480,9 +3705,9 @@ ah_inbound_accelerated(mblk_t *ipsec_in, boolean_t isv4, ipsa_t *assoc, SL_ERROR | SL_WARN, "AH Association 0x%x, dst %s had bytes expire.\n", assoc->ipsa_spi, assoc->ipsa_dstaddr, - AF_INET); - AH_BUMP_STAT(bytes_expired); - counter = &ipdrops_ah_bytes_expire; + AF_INET, ahstack->ipsecah_netstack); + AH_BUMP_STAT(ahstack, bytes_expired); + counter = DROPPER(ipss, ipds_ah_bytes_expire); goto ah_in_discard; } @@ -3490,9 +3715,10 @@ ah_inbound_accelerated(mblk_t *ipsec_in, boolean_t isv4, ipsa_t *assoc, return (IPSEC_STATUS_SUCCESS); ah_in_discard: - IP_AH_BUMP_STAT(in_discards); + IP_AH_BUMP_STAT(ipss, in_discards); freeb(hada_mp); - ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter, &ah_dropper); + ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter, + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } @@ -3520,12 +3746,19 @@ ah_outbound_accelerated_v4(mblk_t *ipsec_mp, ipsa_t *assoc) uint_t new_hdr_len; /* new header length */ uint_t iphdr_length; ah_t *ah_hdr; /* ptr to AH header */ - - AH_BUMP_STAT(out_accelerated); + netstack_t *ns; + ipsec_stack_t *ipss; + ipsecah_stack_t *ahstack; oi = (ipsec_out_t *)ipsec_mp->b_rptr; + ns = oi->ipsec_out_ns; + ipss = ns->netstack_ipsec; + ahstack = ns->netstack_ipsecah; + mp = ipsec_mp->b_cont; + AH_BUMP_STAT(ahstack, out_accelerated); + oipha = (ipha_t *)mp->b_rptr; v_hlen_tos_len = ((uint32_t *)oipha)[0]; @@ -3557,7 +3790,8 @@ ah_outbound_accelerated_v4(mblk_t *ipsec_mp, ipsa_t *assoc) if ((new_mp = allocb(new_hdr_len, BPRI_HI)) == NULL) { /* IPsec kstats: bump bean counter here */ ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, - &ipdrops_ah_nomem, &ah_dropper); + DROPPER(ipss, ipds_ah_nomem), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } @@ -3585,10 +3819,12 @@ ah_outbound_accelerated_v4(mblk_t *ipsec_mp, ipsa_t *assoc) ah_hdr = (ah_t *)(new_mp->b_rptr + IP_SIMPLE_HDR_LENGTH + option_length); ah_hdr->ah_nexthdr = oipha->ipha_protocol; - if (!ah_finish_up(ah_hdr, NULL, assoc, ah_data_sz, ah_align_sz)) { + if (!ah_finish_up(ah_hdr, NULL, assoc, ah_data_sz, ah_align_sz, + ahstack)) { /* Only way this fails is if outbound replay counter wraps. */ ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, - &ipdrops_ah_replay, &ah_dropper); + DROPPER(ipss, ipds_ah_replay), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } @@ -3619,12 +3855,19 @@ ah_outbound_accelerated_v6(mblk_t *ipsec_mp, ipsa_t *assoc) uint_t hdr_size; uint_t ah_offset; ah_t *ah_hdr; /* ptr to AH header */ - - AH_BUMP_STAT(out_accelerated); + netstack_t *ns; + ipsec_stack_t *ipss; + ipsecah_stack_t *ahstack; oi = (ipsec_out_t *)ipsec_mp->b_rptr; + ns = oi->ipsec_out_ns; + ipss = ns->netstack_ipsec; + ahstack = ns->netstack_ipsecah; + mp = ipsec_mp->b_cont; + AH_BUMP_STAT(ahstack, out_accelerated); + oip6h = (ip6_t *)mp->b_rptr; /* mark packet as being accelerated in IPSEC_OUT */ @@ -3645,8 +3888,9 @@ ah_outbound_accelerated_v6(mblk_t *ipsec_mp, ipsa_t *assoc) hdr_size += (sizeof (ah_t) + ah_align_sz); if ((phdr_mp = allocb(hdr_size, BPRI_HI)) == NULL) { - ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, &ipdrops_ah_nomem, - &ah_dropper); + ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, + DROPPER(ipss, ipds_ah_nomem), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } phdr_mp->b_wptr += hdr_size; @@ -3687,7 +3931,8 @@ ah_outbound_accelerated_v6(mblk_t *ipsec_mp, ipsa_t *assoc) if (ah_offset == 0) { freemsg(phdr_mp); ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, - &ipdrops_ah_bad_v6_hdrs, &ah_dropper); + DROPPER(ipss, ipds_ah_bad_v6_hdrs), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } } @@ -3703,10 +3948,11 @@ ah_outbound_accelerated_v6(mblk_t *ipsec_mp, ipsa_t *assoc) ah_hdr->ah_nexthdr = oip6h->ip6_nxt; if (!ah_finish_up(((ah_t *)((uint8_t *)ip6h + ah_offset)), NULL, - assoc, ah_data_sz, ah_align_sz)) { + assoc, ah_data_sz, ah_align_sz, ahstack)) { /* Only way this fails is if outbound replay counter wraps. */ ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, - &ipdrops_ah_replay, &ah_dropper); + DROPPER(ipss, ipds_ah_replay), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } @@ -3738,23 +3984,32 @@ ah_auth_in_done(mblk_t *ipsec_in) uint_t icv_len; ipsa_t *assoc; kstat_named_t *counter; + netstack_t *ns; + ipsecah_stack_t *ahstack; + ipsec_stack_t *ipss; ii = (ipsec_in_t *)ipsec_in->b_rptr; + ns = ii->ipsec_in_ns; + ahstack = ns->netstack_ipsecah; + ipss = ns->netstack_ipsec; + isv4 = ii->ipsec_in_v4; assoc = ii->ipsec_in_ah_sa; icv_len = (uint_t)ii->ipsec_in_crypto_mac.cd_raw.iov_len; phdr_mp = ipsec_in->b_cont; if (phdr_mp == NULL) { - ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, &ipdrops_ah_nomem, - &ah_dropper); + ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, + DROPPER(ipss, ipds_ah_nomem), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } mp = phdr_mp->b_cont; if (mp == NULL) { - ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, &ipdrops_ah_nomem, - &ah_dropper); + ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, + DROPPER(ipss, ipds_ah_nomem), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } mp->b_rptr -= ii->ipsec_in_skip_len; @@ -3781,8 +4036,8 @@ ah_auth_in_done(mblk_t *ipsec_in) * We get here only when authentication passed. */ - ah3dbg(("AH succeeded, checking replay\n")); - AH_BUMP_STAT(good_auth); + ah3dbg(ahstack, ("AH succeeded, checking replay\n")); + AH_BUMP_STAT(ahstack, good_auth); if (!sadb_replay_check(assoc, ah->ah_replay)) { int af; @@ -3803,12 +4058,12 @@ ah_auth_in_done(mblk_t *ipsec_in) * the replay number that failed (or printing to the * console) opens a denial-of-service attack. */ - AH_BUMP_STAT(replay_failures); + AH_BUMP_STAT(ahstack, replay_failures); ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, "Replay failed for AH spi %x, dst_addr %s", - assoc->ipsa_spi, addr, af); - counter = &ipdrops_ah_replay; + assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack); + counter = DROPPER(ipss, ipds_ah_replay); goto ah_in_discard; } @@ -3832,9 +4087,9 @@ ah_auth_in_done(mblk_t *ipsec_in) SL_ERROR | SL_WARN, "AH Association 0x%x, dst %s had bytes expire.\n", assoc->ipsa_spi, assoc->ipsa_dstaddr, - AF_INET); - AH_BUMP_STAT(bytes_expired); - counter = &ipdrops_ah_bytes_expire; + AF_INET, ahstack->ipsecah_netstack); + AH_BUMP_STAT(ahstack, bytes_expired); + counter = DROPPER(ipss, ipds_ah_bytes_expire); goto ah_in_discard; } ipha->ipha_protocol = ah->ah_nexthdr; @@ -3862,9 +4117,9 @@ ah_auth_in_done(mblk_t *ipsec_in) SL_ERROR | SL_WARN, "AH Association 0x%x, dst %s had bytes " "expire.\n", assoc->ipsa_spi, &ip6h->ip6_dst, - AF_INET6); - AH_BUMP_STAT(bytes_expired); - counter = &ipdrops_ah_bytes_expire; + AF_INET6, ahstack->ipsecah_netstack); + AH_BUMP_STAT(ahstack, bytes_expired); + counter = DROPPER(ipss, ipds_ah_bytes_expire); goto ah_in_discard; } @@ -3929,8 +4184,9 @@ ah_auth_in_done(mblk_t *ipsec_in) return (IPSEC_STATUS_SUCCESS); ah_in_discard: - IP_AH_BUMP_STAT(in_discards); - ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter, &ah_dropper); + IP_AH_BUMP_STAT(ipss, in_discards); + ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter, + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } @@ -3952,22 +4208,31 @@ ah_auth_out_done(mblk_t *ipsec_out) boolean_t isv4; ipsec_out_t *io; size_t icv_len; + netstack_t *ns; + ipsec_stack_t *ipss; + ipsecah_stack_t *ahstack; io = (ipsec_out_t *)ipsec_out->b_rptr; + ns = io->ipsec_out_ns; + ipss = ns->netstack_ipsec; + ahstack = ns->netstack_ipsecah; + isv4 = io->ipsec_out_v4; icv_len = io->ipsec_out_crypto_mac.cd_raw.iov_len; phdr_mp = ipsec_out->b_cont; if (phdr_mp == NULL) { ip_drop_packet(ipsec_out, B_FALSE, NULL, NULL, - &ipdrops_ah_nomem, &ah_dropper); + DROPPER(ipss, ipds_ah_nomem), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } mp = phdr_mp->b_cont; if (mp == NULL) { ip_drop_packet(ipsec_out, B_FALSE, NULL, NULL, - &ipdrops_ah_nomem, &ah_dropper); + DROPPER(ipss, ipds_ah_nomem), + &ahstack->ah_dropper); return (IPSEC_STATUS_FAILED); } mp->b_rptr -= io->ipsec_out_skip_len; @@ -4066,15 +4331,18 @@ ah_auth_out_done(mblk_t *ipsec_out) */ void ipsecah_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt, - uint32_t spi, void *addr, int af) + uint32_t spi, void *addr, int af, ipsecah_stack_t *ahstack) { - if (ipsecah_log_unknown_spi) { + ipsec_stack_t *ipss = ahstack->ipsecah_netstack->netstack_ipsec; + + if (ahstack->ipsecah_log_unknown_spi) { ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi, - addr, af); + addr, af, ahstack->ipsecah_netstack); } - ip_drop_packet(mp, B_TRUE, NULL, NULL, &ipdrops_ah_no_sa, - &ah_dropper); + ip_drop_packet(mp, B_TRUE, NULL, NULL, + DROPPER(ipss, ipds_ah_no_sa), + &ahstack->ah_dropper); } /* diff --git a/usr/src/uts/common/inet/ip/ipsecesp.c b/usr/src/uts/common/inet/ip/ipsecesp.c index f6f23503b1..46fab6f792 100644 --- a/usr/src/uts/common/inet/ip/ipsecesp.c +++ b/usr/src/uts/common/inet/ip/ipsecesp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,6 +35,7 @@ #include <sys/ddi.h> #include <sys/sunddi.h> #include <sys/kmem.h> +#include <sys/zone.h> #include <sys/sysmacros.h> #include <sys/cmn_err.h> #include <sys/vtrace.h> @@ -63,19 +64,16 @@ #include <sys/strsun.h> #include <inet/udp_impl.h> #include <sys/taskq.h> +#include <sys/note.h> #include <sys/iphada.h> -/* Packet dropper for ESP drops. */ -static ipdropper_t esp_dropper; - -static kmutex_t ipsecesp_param_lock; /* Protects ipsecesp_param_arr[] below. */ /* * Table of ND variables supported by ipsecesp. These are loaded into * ipsecesp_g_nd in ipsecesp_init_nd. * All of these are alterable, within the min/max values given, at run time. */ -static ipsecespparam_t ipsecesp_param_arr[] = { +static ipsecespparam_t lcl_param_arr[] = { /* min max value name */ { 0, 3, 0, "ipsecesp_debug"}, { 125, 32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"}, @@ -93,55 +91,61 @@ static ipsecespparam_t ipsecesp_param_arr[] = { { 0, 1, 0, "ipsecesp_log_unknown_spi"}, { 0, 2, 1, "ipsecesp_padding_check"}, }; -#define ipsecesp_debug ipsecesp_param_arr[0].ipsecesp_param_value -#define ipsecesp_age_interval ipsecesp_param_arr[1].ipsecesp_param_value -#define ipsecesp_age_int_max ipsecesp_param_arr[1].ipsecesp_param_max -#define ipsecesp_reap_delay ipsecesp_param_arr[2].ipsecesp_param_value -#define ipsecesp_replay_size ipsecesp_param_arr[3].ipsecesp_param_value -#define ipsecesp_acquire_timeout ipsecesp_param_arr[4].ipsecesp_param_value -#define ipsecesp_larval_timeout ipsecesp_param_arr[5].ipsecesp_param_value -#define ipsecesp_default_soft_bytes \ - ipsecesp_param_arr[6].ipsecesp_param_value -#define ipsecesp_default_hard_bytes \ - ipsecesp_param_arr[7].ipsecesp_param_value -#define ipsecesp_default_soft_addtime \ - ipsecesp_param_arr[8].ipsecesp_param_value -#define ipsecesp_default_hard_addtime \ - ipsecesp_param_arr[9].ipsecesp_param_value -#define ipsecesp_default_soft_usetime \ - ipsecesp_param_arr[10].ipsecesp_param_value -#define ipsecesp_default_hard_usetime \ - ipsecesp_param_arr[11].ipsecesp_param_value -#define ipsecesp_log_unknown_spi \ - ipsecesp_param_arr[12].ipsecesp_param_value -#define ipsecesp_padding_check \ - ipsecesp_param_arr[13].ipsecesp_param_value +#define ipsecesp_debug ipsecesp_params[0].ipsecesp_param_value +#define ipsecesp_age_interval ipsecesp_params[1].ipsecesp_param_value +#define ipsecesp_age_int_max ipsecesp_params[1].ipsecesp_param_max +#define ipsecesp_reap_delay ipsecesp_params[2].ipsecesp_param_value +#define ipsecesp_replay_size ipsecesp_params[3].ipsecesp_param_value +#define ipsecesp_acquire_timeout \ + ipsecesp_params[4].ipsecesp_param_value +#define ipsecesp_larval_timeout \ + ipsecesp_params[5].ipsecesp_param_value +#define ipsecesp_default_soft_bytes \ + ipsecesp_params[6].ipsecesp_param_value +#define ipsecesp_default_hard_bytes \ + ipsecesp_params[7].ipsecesp_param_value +#define ipsecesp_default_soft_addtime \ + ipsecesp_params[8].ipsecesp_param_value +#define ipsecesp_default_hard_addtime \ + ipsecesp_params[9].ipsecesp_param_value +#define ipsecesp_default_soft_usetime \ + ipsecesp_params[10].ipsecesp_param_value +#define ipsecesp_default_hard_usetime \ + ipsecesp_params[11].ipsecesp_param_value +#define ipsecesp_log_unknown_spi \ + ipsecesp_params[12].ipsecesp_param_value +#define ipsecesp_padding_check \ + ipsecesp_params[13].ipsecesp_param_value #define esp0dbg(a) printf a /* NOTE: != 0 instead of > 0 so lint doesn't complain. */ -#define esp1dbg(a) if (ipsecesp_debug != 0) printf a -#define esp2dbg(a) if (ipsecesp_debug > 1) printf a -#define esp3dbg(a) if (ipsecesp_debug > 2) printf a - -static IDP ipsecesp_g_nd; +#define esp1dbg(espstack, a) if (espstack->ipsecesp_debug != 0) printf a +#define esp2dbg(espstack, a) if (espstack->ipsecesp_debug > 1) printf a +#define esp3dbg(espstack, a) if (espstack->ipsecesp_debug > 2) printf a static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *); static int ipsecesp_close(queue_t *); static void ipsecesp_rput(queue_t *, mblk_t *); static void ipsecesp_wput(queue_t *, mblk_t *); -static void esp_send_acquire(ipsacq_t *, mblk_t *); +static void *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns); +static void ipsecesp_stack_fini(netstackid_t stackid, void *arg); +static void esp_send_acquire(ipsacq_t *, mblk_t *, netstack_t *); static ipsec_status_t esp_outbound_accelerated(mblk_t *, uint_t); static ipsec_status_t esp_inbound_accelerated(mblk_t *, mblk_t *, boolean_t, ipsa_t *); -static boolean_t esp_register_out(uint32_t, uint32_t, uint_t); +static boolean_t esp_register_out(uint32_t, uint32_t, uint_t, + ipsecesp_stack_t *); static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t, - kstat_named_t **); + kstat_named_t **, ipsecesp_stack_t *); static ipsec_status_t esp_submit_req_inbound(mblk_t *, ipsa_t *, uint_t); static ipsec_status_t esp_submit_req_outbound(mblk_t *, ipsa_t *, uchar_t *, uint_t); +/* Setable in /etc/system */ +uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE; + static struct module_info info = { 5137, "ipsecesp", 0, INFPSZ, 65536, 1024 }; @@ -160,13 +164,6 @@ struct streamtab ipsecespinfo = { &rinit, &winit, NULL, NULL }; -/* - * Keysock instance of ESP. "There can be only one." :) - * Use casptr() on this because I don't set it until KEYSOCK_HELLO comes down. - * Paired up with the esp_pfkey_q is the esp_event, which will age SAs. - */ -static queue_t *esp_pfkey_q; -static timeout_id_t esp_event; static taskq_t *esp_taskq; /* @@ -178,14 +175,13 @@ static taskq_t *esp_taskq; * Answer: Yes, because I need to know which queue is BOUND to * IPPROTO_ESP */ -static mblk_t *esp_ip_unbind; /* * Stats. This may eventually become a full-blown SNMP MIB once that spec * stabilizes. */ -typedef struct { +typedef struct esp_kstats_s { kstat_named_t esp_stat_num_aalgs; kstat_named_t esp_stat_good_auth; kstat_named_t esp_stat_bad_auth; @@ -207,31 +203,47 @@ typedef struct { kstat_named_t esp_stat_bad_decrypt; } esp_kstats_t; -uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE; -#define ESP_BUMP_STAT(x) (esp_kstats->esp_stat_ ## x).value.ui64++ -#define ESP_DEBUMP_STAT(x) (esp_kstats->esp_stat_ ## x).value.ui64-- - -static kstat_t *esp_ksp; -static esp_kstats_t *esp_kstats; +/* + * espstack->esp_kstats is equal to espstack->esp_ksp->ks_data if + * kstat_create_netstack for espstack->esp_ksp succeeds, but when it + * fails, it will be NULL. Note this is done for all stack instances, + * so it *could* fail. hence a non-NULL checking is done for + * ESP_BUMP_STAT and ESP_DEBUMP_STAT + */ +#define ESP_BUMP_STAT(espstack, x) \ +do { \ + if (espstack->esp_kstats != NULL) \ + (espstack->esp_kstats->esp_stat_ ## x).value.ui64++; \ +_NOTE(CONSTCOND) \ +} while (0) + +#define ESP_DEBUMP_STAT(espstack, x) \ +do { \ + if (espstack->esp_kstats != NULL) \ + (espstack->esp_kstats->esp_stat_ ## x).value.ui64--; \ +_NOTE(CONSTCOND) \ +} while (0) static int esp_kstat_update(kstat_t *, int); static boolean_t -esp_kstat_init(void) +esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid) { - esp_ksp = kstat_create("ipsecesp", 0, "esp_stat", "net", - KSTAT_TYPE_NAMED, sizeof (*esp_kstats) / sizeof (kstat_named_t), - KSTAT_FLAG_PERSISTENT); + espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat", + "net", KSTAT_TYPE_NAMED, + sizeof (esp_kstats_t) / sizeof (kstat_named_t), + KSTAT_FLAG_PERSISTENT, stackid); - if (esp_ksp == NULL) + if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL) return (B_FALSE); - esp_kstats = esp_ksp->ks_data; + espstack->esp_kstats = espstack->esp_ksp->ks_data; - esp_ksp->ks_update = esp_kstat_update; + espstack->esp_ksp->ks_update = esp_kstat_update; + espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid; #define K64 KSTAT_DATA_UINT64 -#define KI(x) kstat_named_init(&(esp_kstats->esp_stat_##x), #x, K64) +#define KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64) KI(num_aalgs); KI(num_ealgs); @@ -256,7 +268,7 @@ esp_kstat_init(void) #undef KI #undef K64 - kstat_install(esp_ksp); + kstat_install(espstack->esp_ksp); return (B_TRUE); } @@ -265,6 +277,9 @@ static int esp_kstat_update(kstat_t *kp, int rw) { esp_kstats_t *ekp; + netstackid_t stackid = (zoneid_t)(uintptr_t)kp->ks_private; + netstack_t *ns; + ipsec_stack_t *ipss; if ((kp == NULL) || (kp->ks_data == NULL)) return (EIO); @@ -272,15 +287,24 @@ esp_kstat_update(kstat_t *kp, int rw) if (rw == KSTAT_WRITE) return (EACCES); - ASSERT(kp == esp_ksp); + ns = netstack_find_by_stackid(stackid); + if (ns == NULL) + return (-1); + ipss = ns->netstack_ipsec; + if (ipss == NULL) { + netstack_rele(ns); + return (-1); + } ekp = (esp_kstats_t *)kp->ks_data; - ASSERT(ekp == esp_kstats); - mutex_enter(&alg_lock); - ekp->esp_stat_num_aalgs.value.ui64 = ipsec_nalgs[IPSEC_ALG_AUTH]; - ekp->esp_stat_num_ealgs.value.ui64 = ipsec_nalgs[IPSEC_ALG_ENCR]; - mutex_exit(&alg_lock); + mutex_enter(&ipss->ipsec_alg_lock); + ekp->esp_stat_num_aalgs.value.ui64 = + ipss->ipsec_nalgs[IPSEC_ALG_AUTH]; + ekp->esp_stat_num_ealgs.value.ui64 = + ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; + mutex_exit(&ipss->ipsec_alg_lock); + netstack_rele(ns); return (0); } @@ -342,19 +366,22 @@ dump_msg(mblk_t *mp) * Don't have to lock age_interval, as only one thread will access it at * a time, because I control the one function that does with timeout(). */ -/* ARGSUSED */ static void -esp_ager(void *ignoreme) +esp_ager(void *arg) { + ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; + netstack_t *ns = espstack->ipsecesp_netstack; hrtime_t begin = gethrtime(); - sadb_ager(&esp_sadb.s_v4, esp_pfkey_q, esp_sadb.s_ip_q, - ipsecesp_reap_delay); - sadb_ager(&esp_sadb.s_v6, esp_pfkey_q, esp_sadb.s_ip_q, - ipsecesp_reap_delay); + sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q, + espstack->esp_sadb.s_ip_q, espstack->ipsecesp_reap_delay, ns); + sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q, + espstack->esp_sadb.s_ip_q, espstack->ipsecesp_reap_delay, ns); - esp_event = sadb_retimeout(begin, esp_pfkey_q, esp_ager, - &(ipsecesp_age_interval), ipsecesp_age_int_max, info.mi_idnum); + espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q, + esp_ager, espstack, + &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max, + info.mi_idnum); } /* @@ -370,10 +397,11 @@ ipsecesp_param_get(q, mp, cp, cr) { ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; uint_t value; + ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; - mutex_enter(&ipsecesp_param_lock); + mutex_enter(&espstack->ipsecesp_param_lock); value = ipsecesppa->ipsecesp_param_value; - mutex_exit(&ipsecesp_param_lock); + mutex_exit(&espstack->ipsecesp_param_lock); (void) mi_mpprintf(mp, "%u", value); return (0); @@ -393,6 +421,7 @@ ipsecesp_param_set(q, mp, value, cp, cr) { ulong_t new_value; ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; + ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; /* * Fail the request if the new value does not lie within the @@ -405,9 +434,9 @@ ipsecesp_param_set(q, mp, value, cp, cr) } /* Set the new value */ - mutex_enter(&ipsecesp_param_lock); + mutex_enter(&espstack->ipsecesp_param_lock); ipsecesppa->ipsecesp_param_value = new_value; - mutex_exit(&ipsecesp_param_lock); + mutex_exit(&espstack->ipsecesp_param_lock); return (0); } @@ -416,14 +445,20 @@ ipsecesp_param_set(q, mp, value, cp, cr) * lifetime information. */ void -ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb) +ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns) { - ecomb->sadb_x_ecomb_soft_bytes = ipsecesp_default_soft_bytes; - ecomb->sadb_x_ecomb_hard_bytes = ipsecesp_default_hard_bytes; - ecomb->sadb_x_ecomb_soft_addtime = ipsecesp_default_soft_addtime; - ecomb->sadb_x_ecomb_hard_addtime = ipsecesp_default_hard_addtime; - ecomb->sadb_x_ecomb_soft_usetime = ipsecesp_default_soft_usetime; - ecomb->sadb_x_ecomb_hard_usetime = ipsecesp_default_hard_usetime; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + + ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes; + ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes; + ecomb->sadb_x_ecomb_soft_addtime = + espstack->ipsecesp_default_soft_addtime; + ecomb->sadb_x_ecomb_hard_addtime = + espstack->ipsecesp_default_hard_addtime; + ecomb->sadb_x_ecomb_soft_usetime = + espstack->ipsecesp_default_soft_usetime; + ecomb->sadb_x_ecomb_hard_usetime = + espstack->ipsecesp_default_hard_usetime; } /* @@ -432,38 +467,73 @@ ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb) boolean_t ipsecesp_ddi_init(void) { - int count; - ipsecespparam_t *espp = ipsecesp_param_arr; + esp_taskq = taskq_create("esp_taskq", 1, minclsyspri, + IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0); + + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel, so we can maintain the + * set of ipsecesp_stack_t's. + */ + netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL, + ipsecesp_stack_fini); + + return (B_TRUE); +} - for (count = A_CNT(ipsecesp_param_arr); count-- > 0; espp++) { +/* + * Walk through the param array specified registering each element with the + * named dispatch handler. + */ +static boolean_t +ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt) +{ + for (; cnt-- > 0; espp++) { if (espp->ipsecesp_param_name != NULL && espp->ipsecesp_param_name[0]) { - if (!nd_load(&ipsecesp_g_nd, espp->ipsecesp_param_name, + if (!nd_load(ndp, + espp->ipsecesp_param_name, ipsecesp_param_get, ipsecesp_param_set, (caddr_t)espp)) { - nd_free(&ipsecesp_g_nd); + nd_free(ndp); return (B_FALSE); } } } + return (B_TRUE); +} +/* + * Initialize things for ESP for each stack instance + */ +static void * +ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns) +{ + ipsecesp_stack_t *espstack; + ipsecespparam_t *espp; - if (!esp_kstat_init()) { - nd_free(&ipsecesp_g_nd); - return (B_FALSE); - } + espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack), + KM_SLEEP); + espstack->ipsecesp_netstack = ns; - esp_sadb.s_acquire_timeout = &ipsecesp_acquire_timeout; - esp_sadb.s_acqfn = esp_send_acquire; - sadbp_init("ESP", &esp_sadb, SADB_SATYPE_ESP, esp_hash_size); + espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); + espstack->ipsecesp_params = espp; + bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr)); - esp_taskq = taskq_create("esp_taskq", 1, minclsyspri, - IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0); + (void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp, + A_CNT(lcl_param_arr)); - mutex_init(&ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0); + (void) esp_kstat_init(espstack, stackid); - ip_drop_register(&esp_dropper, "IPsec ESP"); + espstack->esp_sadb.s_acquire_timeout = + &espstack->ipsecesp_acquire_timeout; + espstack->esp_sadb.s_acqfn = esp_send_acquire; + sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size, + espstack->ipsecesp_netstack); - return (B_TRUE); + mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0); + + ip_drop_register(&espstack->esp_dropper, "IPsec ESP"); + return (espstack); } /* @@ -472,14 +542,34 @@ ipsecesp_ddi_init(void) void ipsecesp_ddi_destroy(void) { - esp1dbg(("In ipsecesp_ddi_destroy.\n")); - - sadbp_destroy(&esp_sadb); - ip_drop_unregister(&esp_dropper); + netstack_unregister(NS_IPSECESP); taskq_destroy(esp_taskq); - mutex_destroy(&ipsecesp_param_lock); - nd_free(&ipsecesp_g_nd); - kstat_delete(esp_ksp); +} + +/* + * Destroy things for ESP for one stack instance + */ +static void +ipsecesp_stack_fini(netstackid_t stackid, void *arg) +{ + ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; + + if (espstack->esp_pfkey_q != NULL) { + (void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event); + } + espstack->esp_sadb.s_acqfn = NULL; + espstack->esp_sadb.s_acquire_timeout = NULL; + sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack); + ip_drop_unregister(&espstack->esp_dropper); + mutex_destroy(&espstack->ipsecesp_param_lock); + nd_free(&espstack->ipsecesp_g_nd); + + kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr)); + espstack->ipsecesp_params = NULL; + kstat_delete_netstack(espstack->esp_ksp, stackid); + espstack->esp_ksp = NULL; + espstack->esp_kstats = NULL; + kmem_free(espstack, sizeof (*espstack)); } /* @@ -489,8 +579,11 @@ ipsecesp_ddi_destroy(void) static int ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) { - if (secpolicy_net_config(credp, B_FALSE) != 0) { - esp1dbg(("Non-privileged user trying to open ipsecesp.\n")); + netstack_t *ns; + ipsecesp_stack_t *espstack; + + if (secpolicy_ip_config(credp, B_FALSE) != 0) { + esp0dbg(("Non-privileged user trying to open ipsecesp.\n")); return (EPERM); } @@ -500,6 +593,11 @@ ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (sflag != MODOPEN) return (EINVAL); + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + espstack = ns->netstack_ipsecesp; + ASSERT(espstack != NULL); + /* * ASSUMPTIONS (because I'm MT_OCEXCL): * @@ -510,32 +608,35 @@ ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * If these assumptions are wrong, I'm in BIG trouble... */ - q->q_ptr = q; /* just so I know I'm open */ + q->q_ptr = espstack; + WR(q)->q_ptr = q->q_ptr; - if (esp_sadb.s_ip_q == NULL) { + if (espstack->esp_sadb.s_ip_q == NULL) { struct T_unbind_req *tur; - esp_sadb.s_ip_q = WR(q); + espstack->esp_sadb.s_ip_q = WR(q); /* Allocate an unbind... */ - esp_ip_unbind = allocb(sizeof (struct T_unbind_req), BPRI_HI); + espstack->esp_ip_unbind = allocb(sizeof (struct T_unbind_req), + BPRI_HI); /* * Send down T_BIND_REQ to bind IPPROTO_ESP. * Handle the ACK here in ESP. */ qprocson(q); - if (esp_ip_unbind == NULL || - !sadb_t_bind_req(esp_sadb.s_ip_q, IPPROTO_ESP)) { - if (esp_ip_unbind != NULL) { - freeb(esp_ip_unbind); - esp_ip_unbind = NULL; + if (espstack->esp_ip_unbind == NULL || + !sadb_t_bind_req(espstack->esp_sadb.s_ip_q, IPPROTO_ESP)) { + if (espstack->esp_ip_unbind != NULL) { + freeb(espstack->esp_ip_unbind); + espstack->esp_ip_unbind = NULL; } q->q_ptr = NULL; + netstack_rele(espstack->ipsecesp_netstack); return (ENOMEM); } - esp_ip_unbind->b_datap->db_type = M_PROTO; - tur = (struct T_unbind_req *)esp_ip_unbind->b_rptr; + espstack->esp_ip_unbind->b_datap->db_type = M_PROTO; + tur = (struct T_unbind_req *)espstack->esp_ip_unbind->b_rptr; tur->PRIM_type = T_UNBIND_REQ; } else { qprocson(q); @@ -556,14 +657,17 @@ ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) static int ipsecesp_close(queue_t *q) { + ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; + /* * If esp_sadb.s_ip_q is attached to this instance, send a * T_UNBIND_REQ to IP for the instance before doing * a qprocsoff(). */ - if (WR(q) == esp_sadb.s_ip_q && esp_ip_unbind != NULL) { - putnext(WR(q), esp_ip_unbind); - esp_ip_unbind = NULL; + if (WR(q) == espstack->esp_sadb.s_ip_q && + espstack->esp_ip_unbind != NULL) { + putnext(WR(q), espstack->esp_ip_unbind); + espstack->esp_ip_unbind = NULL; } /* @@ -573,50 +677,54 @@ ipsecesp_close(queue_t *q) /* Keysock queue check is safe, because of OCEXCL perimeter. */ - if (q == esp_pfkey_q) { - esp0dbg(("ipsecesp_close: Ummm... keysock is closing ESP.\n")); - esp_pfkey_q = NULL; + if (q == espstack->esp_pfkey_q) { + esp1dbg(espstack, + ("ipsecesp_close: Ummm... keysock is closing ESP.\n")); + espstack->esp_pfkey_q = NULL; /* Detach qtimeouts. */ - (void) quntimeout(q, esp_event); + (void) quntimeout(q, espstack->esp_event); } - if (WR(q) == esp_sadb.s_ip_q) { + if (WR(q) == espstack->esp_sadb.s_ip_q) { /* * If the esp_sadb.s_ip_q is attached to this instance, find * another. The OCEXCL outer perimeter helps us here. */ - esp_sadb.s_ip_q = NULL; + espstack->esp_sadb.s_ip_q = NULL; /* * Find a replacement queue for esp_sadb.s_ip_q. */ - if (esp_pfkey_q != NULL && esp_pfkey_q != RD(q)) { + if (espstack->esp_pfkey_q != NULL && + espstack->esp_pfkey_q != RD(q)) { /* * See if we can use the pfkey_q. */ - esp_sadb.s_ip_q = WR(esp_pfkey_q); + espstack->esp_sadb.s_ip_q = WR(espstack->esp_pfkey_q); } - if (esp_sadb.s_ip_q == NULL || - !sadb_t_bind_req(esp_sadb.s_ip_q, IPPROTO_ESP)) { - esp1dbg(("ipsecesp: Can't reassign ip_q.\n")); - esp_sadb.s_ip_q = NULL; + if (espstack->esp_sadb.s_ip_q == NULL || + !sadb_t_bind_req(espstack->esp_sadb.s_ip_q, IPPROTO_ESP)) { + esp1dbg(espstack, ("ipsecesp: Can't reassign ip_q.\n")); + espstack->esp_sadb.s_ip_q = NULL; } else { - esp_ip_unbind = allocb(sizeof (struct T_unbind_req), - BPRI_HI); + espstack->esp_ip_unbind = + allocb(sizeof (struct T_unbind_req), BPRI_HI); - if (esp_ip_unbind != NULL) { + if (espstack->esp_ip_unbind != NULL) { struct T_unbind_req *tur; - esp_ip_unbind->b_datap->db_type = M_PROTO; + espstack->esp_ip_unbind->b_datap->db_type = + M_PROTO; tur = (struct T_unbind_req *) - esp_ip_unbind->b_rptr; + espstack->esp_ip_unbind->b_rptr; tur->PRIM_type = T_UNBIND_REQ; } /* If it's NULL, I can't do much here. */ } } + netstack_rele(espstack->ipsecesp_netstack); return (0); } @@ -635,10 +743,12 @@ esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) boolean_t inrc, outrc, isv6; sadb_t *sp; int outhash; + netstack_t *ns = assoc->ipsa_netstack; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; /* No peer? No problem! */ if (!assoc->ipsa_haspeer) { - return (sadb_age_bytes(esp_pfkey_q, assoc, bytes, + return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes, B_TRUE)); } @@ -655,7 +765,7 @@ esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) /* Use address length to select IPv6/IPv4 */ isv6 = (assoc->ipsa_addrfam == AF_INET6); - sp = isv6 ? &esp_sadb.s_v6 : &esp_sadb.s_v4; + sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; if (inbound) { inassoc = assoc; @@ -676,7 +786,7 @@ esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) /* Q: Do we wish to set haspeer == B_FALSE? */ esp0dbg(("esp_age_bytes: " "can't find peer for inbound.\n")); - return (sadb_age_bytes(esp_pfkey_q, inassoc, + return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE)); } } else { @@ -691,13 +801,13 @@ esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) /* Q: Do we wish to set haspeer == B_FALSE? */ esp0dbg(("esp_age_bytes: " "can't find peer for outbound.\n")); - return (sadb_age_bytes(esp_pfkey_q, outassoc, + return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_TRUE)); } } - inrc = sadb_age_bytes(esp_pfkey_q, inassoc, bytes, B_TRUE); - outrc = sadb_age_bytes(esp_pfkey_q, outassoc, bytes, B_FALSE); + inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE); + outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE); /* * REFRELE any peer SA. @@ -768,7 +878,7 @@ esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc) */ static boolean_t esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, - kstat_named_t **counter) + kstat_named_t **counter, ipsecesp_stack_t *espstack) { ipha_t *ipha; ip6_t *ip6h; @@ -776,6 +886,7 @@ esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, mblk_t *scratch; uint8_t nexthdr, padlen; uint8_t lastpad; + ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; uint8_t *lastbyte; /* @@ -821,15 +932,19 @@ esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, */ if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 - sizeof (esph_t) - ivlen) { - ESP_BUMP_STAT(bad_decrypt); - ipsec_rl_strlog(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, + ESP_BUMP_STAT(espstack, bad_decrypt); + ipsec_rl_strlog(espstack->ipsecesp_netstack, + info.mi_idnum, 0, 0, + SL_ERROR | SL_WARN, "Corrupt ESP packet (padlen too big).\n"); - esp1dbg(("padlen (%d) is greater than:\n", padlen)); - esp1dbg(("pkt len(%d) - ip hdr - esp hdr - ivlen(%d) " - "= %d.\n", ntohs(ipha->ipha_length), ivlen, + esp1dbg(espstack, ("padlen (%d) is greater than:\n", + padlen)); + esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp " + "hdr - ivlen(%d) = %d.\n", + ntohs(ipha->ipha_length), ivlen, (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) - - 2 - sizeof (esph_t) - ivlen))); - *counter = &ipdrops_esp_bad_padlen; + 2 - sizeof (esph_t) - ivlen))); + *counter = DROPPER(ipss, ipds_esp_bad_padlen); return (B_FALSE); } @@ -866,16 +981,20 @@ esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) - ivlen) { - ESP_BUMP_STAT(bad_decrypt); - ipsec_rl_strlog(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, + ESP_BUMP_STAT(espstack, bad_decrypt); + ipsec_rl_strlog(espstack->ipsecesp_netstack, + info.mi_idnum, 0, 0, + SL_ERROR | SL_WARN, "Corrupt ESP packet (v6 padlen too big).\n"); - esp1dbg(("padlen (%d) is greater than:\n", padlen)); - esp1dbg(("pkt len(%u) - ip hdr - esp hdr - ivlen(%d)" - " = %u.\n", (unsigned)(ntohs(ip6h->ip6_plen) - + sizeof (ip6_t)), ivlen, - (unsigned)(ntohs(ip6h->ip6_plen) - 2 - - sizeof (esph_t) - ivlen))); - *counter = &ipdrops_esp_bad_padlen; + esp1dbg(espstack, ("padlen (%d) is greater than:\n", + padlen)); + esp1dbg(espstack, ("pkt len(%u) - ip hdr - esp " + "hdr - ivlen(%d) = %u.\n", + (unsigned)(ntohs(ip6h->ip6_plen) + + sizeof (ip6_t)), ivlen, + (unsigned)(ntohs(ip6h->ip6_plen) - 2 - + sizeof (esph_t) - ivlen))); + *counter = DROPPER(ipss, ipds_esp_bad_padlen); return (B_FALSE); } @@ -889,7 +1008,7 @@ esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, 2 - sizeof (esph_t) - ivlen); } - if (ipsecesp_padding_check > 0 && padlen > 0) { + if (espstack->ipsecesp_padding_check > 0 && padlen > 0) { /* * Weak padding check: compare last-byte to length, they * should be equal. @@ -897,12 +1016,14 @@ esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, lastpad = *lastbyte--; if (padlen != lastpad) { - ipsec_rl_strlog(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, + ipsec_rl_strlog(espstack->ipsecesp_netstack, + info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, "Corrupt ESP packet (lastpad != padlen).\n"); - esp1dbg(("lastpad (%d) not equal to padlen (%d):\n", - lastpad, padlen)); - ESP_BUMP_STAT(bad_padding); - *counter = &ipdrops_esp_bad_padding; + esp1dbg(espstack, + ("lastpad (%d) not equal to padlen (%d):\n", + lastpad, padlen)); + ESP_BUMP_STAT(espstack, bad_padding); + *counter = DROPPER(ipss, ipds_esp_bad_padding); return (B_FALSE); } @@ -914,7 +1035,7 @@ esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, * Consequently, start the check one byte before the location * of "lastpad". */ - if (ipsecesp_padding_check > 1) { + if (espstack->ipsecesp_padding_check > 1) { /* * This assert may have to become an if and a pullup * if we start accepting multi-dblk mblks. For now, @@ -929,13 +1050,17 @@ esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, */ while (--lastpad != 0) { if (lastpad != *lastbyte) { - ipsec_rl_strlog(info.mi_idnum, 0, 0, + ipsec_rl_strlog( + espstack->ipsecesp_netstack, + info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, "Corrupt ESP " "packet (bad padding).\n"); - esp1dbg(("padding not in correct" - " format:\n")); - ESP_BUMP_STAT(bad_padding); - *counter = &ipdrops_esp_bad_padding; + esp1dbg(espstack, + ("padding not in correct" + " format:\n")); + ESP_BUMP_STAT(espstack, bad_padding); + *counter = DROPPER(ipss, + ipds_esp_bad_padding); return (B_FALSE); } lastbyte--; @@ -991,8 +1116,8 @@ esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, data_mp->b_rptr = dst; } - esp2dbg(("data_mp after inbound ESP adjustment:\n")); - esp2dbg((dump_msg(data_mp))); + esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n")); + esp2dbg(espstack, (dump_msg(data_mp))); return (B_TRUE); } @@ -1012,6 +1137,8 @@ esp_set_usetime(ipsa_t *assoc, boolean_t inbound) sadb_t *sp; int outhash; boolean_t isv6; + netstack_t *ns = assoc->ipsa_netstack; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; /* No peer? No problem! */ if (!assoc->ipsa_haspeer) { @@ -1031,7 +1158,7 @@ esp_set_usetime(ipsa_t *assoc, boolean_t inbound) /* Use address length to select IPv6/IPv4 */ isv6 = (assoc->ipsa_addrfam == AF_INET6); - sp = isv6 ? &esp_sadb.s_v6 : &esp_sadb.s_v4; + sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; if (inbound) { inassoc = assoc; @@ -1101,6 +1228,9 @@ esp_inbound(mblk_t *ipsec_in_mp, void *arg) ipsec_in_t *ii = (ipsec_in_t *)ipsec_in_mp->b_rptr; esph_t *esph = (esph_t *)arg; ipsa_t *ipsa = ii->ipsec_in_esp_sa; + netstack_t *ns = ii->ipsec_in_ns; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + ipsec_stack_t *ipss = ns->netstack_ipsec; if (ipsa->ipsa_usetime == 0) esp_set_usetime(ipsa, B_TRUE); @@ -1117,14 +1247,15 @@ esp_inbound(mblk_t *ipsec_in_mp, void *arg) * take place when it doesn't need to. */ if (!sadb_replay_peek(ipsa, esph->esph_replay)) { - ESP_BUMP_STAT(replay_early_failures); - IP_ESP_BUMP_STAT(in_discards); + ESP_BUMP_STAT(espstack, replay_early_failures); + IP_ESP_BUMP_STAT(ipss, in_discards); /* * TODO: Extract inbound interface from the IPSEC_IN * message's ii->ipsec_in_rill_index. */ ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, - &ipdrops_esp_early_replay, &esp_dropper); + DROPPER(ipss, ipds_esp_early_replay), + &espstack->esp_dropper); return (IPSEC_STATUS_FAILED); } @@ -1134,13 +1265,14 @@ esp_inbound(mblk_t *ipsec_in_mp, void *arg) */ if (ii->ipsec_in_accelerated) { ipsec_status_t rv; - esp3dbg(("esp_inbound: pkt processed by ill=%d isv6=%d\n", + esp3dbg(espstack, + ("esp_inbound: pkt processed by ill=%d isv6=%d\n", ii->ipsec_in_ill_index, !ii->ipsec_in_v4)); rv = esp_inbound_accelerated(ipsec_in_mp, data_mp, ii->ipsec_in_v4, ipsa); return (rv); } - ESP_BUMP_STAT(noaccel); + ESP_BUMP_STAT(espstack, noaccel); /* * Adjust the IP header's payload length to reflect the removal @@ -1172,16 +1304,22 @@ esp_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs) ipsec_out_t *io; ipsec_action_t *ap; ipsec_prot_t *prot; + netstack_t *ns; + ipsecesp_stack_t *espstack; + ipsec_stack_t *ipss; - ASSERT(MUTEX_HELD(&alg_lock)); io = (ipsec_out_t *)acqrec->ipsacq_mp->b_rptr; ASSERT(io->ipsec_out_type == IPSEC_OUT); + ns = io->ipsec_out_ns; + espstack = ns->netstack_ipsecesp; + ipss = ns->netstack_ipsec; + ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); prop->sadb_prop_exttype = SADB_EXT_PROPOSAL; prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t)); *(uint32_t *)(&prop->sadb_prop_replay) = 0; /* Quick zero-out! */ - prop->sadb_prop_replay = ipsecesp_replay_size; + prop->sadb_prop_replay = espstack->ipsecesp_replay_size; /* * Based upon algorithm properties, and what-not, prioritize @@ -1206,14 +1344,15 @@ esp_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs) continue; if (prot->ipp_esp_auth_alg != 0) { - aalg = ipsec_alglists[IPSEC_ALG_AUTH] + aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH] [prot->ipp_esp_auth_alg]; if (aalg == NULL || !ALG_VALID(aalg)) continue; } ASSERT(prot->ipp_encr_alg > 0); - ealg = ipsec_alglists[IPSEC_ALG_ENCR][prot->ipp_encr_alg]; + ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR] + [prot->ipp_encr_alg]; if (ealg == NULL || !ALG_VALID(ealg)) continue; @@ -1254,12 +1393,18 @@ esp_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs) /* * These may want to come from policy rule.. */ - comb->sadb_comb_soft_bytes = ipsecesp_default_soft_bytes; - comb->sadb_comb_hard_bytes = ipsecesp_default_hard_bytes; - comb->sadb_comb_soft_addtime = ipsecesp_default_soft_addtime; - comb->sadb_comb_hard_addtime = ipsecesp_default_hard_addtime; - comb->sadb_comb_soft_usetime = ipsecesp_default_soft_usetime; - comb->sadb_comb_hard_usetime = ipsecesp_default_hard_usetime; + comb->sadb_comb_soft_bytes = + espstack->ipsecesp_default_soft_bytes; + comb->sadb_comb_hard_bytes = + espstack->ipsecesp_default_hard_bytes; + comb->sadb_comb_soft_addtime = + espstack->ipsecesp_default_soft_addtime; + comb->sadb_comb_hard_addtime = + espstack->ipsecesp_default_hard_addtime; + comb->sadb_comb_soft_usetime = + espstack->ipsecesp_default_soft_usetime; + comb->sadb_comb_hard_usetime = + espstack->ipsecesp_default_hard_usetime; prop->sadb_prop_len += SADB_8TO64(sizeof (*comb)); if (--combs == 0) @@ -1272,26 +1417,30 @@ esp_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs) * Prepare and actually send the SADB_ACQUIRE message to PF_KEY. */ static void -esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended) +esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended, netstack_t *ns) { uint_t combs; sadb_msg_t *samsg; sadb_prop_t *prop; mblk_t *pfkeymp, *msgmp; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + ipsec_stack_t *ipss = ns->netstack_ipsec; - ESP_BUMP_STAT(acquire_requests); + ESP_BUMP_STAT(espstack, acquire_requests); - if (esp_pfkey_q == NULL) + if (espstack->esp_pfkey_q == NULL) return; /* Set up ACQUIRE. */ - pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_ESP); + pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_ESP, + ns->netstack_ipsec); if (pfkeymp == NULL) { esp0dbg(("sadb_setup_acquire failed.\n")); return; } - ASSERT(MUTEX_HELD(&alg_lock)); - combs = ipsec_nalgs[IPSEC_ALG_AUTH] * ipsec_nalgs[IPSEC_ALG_ENCR]; + ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); + combs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH] * + ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; msgmp = pfkeymp->b_cont; samsg = (sadb_msg_t *)(msgmp->b_rptr); @@ -1302,7 +1451,7 @@ esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended) samsg->sadb_msg_len += prop->sadb_prop_len; msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len); - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); /* * Must mutex_exit() before sending PF_KEY message up, in @@ -1313,16 +1462,16 @@ esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended) */ mutex_exit(&acqrec->ipsacq_lock); if (extended != NULL) { - putnext(esp_pfkey_q, extended); + putnext(espstack->esp_pfkey_q, extended); } - putnext(esp_pfkey_q, pfkeymp); + putnext(espstack->esp_pfkey_q, pfkeymp); } /* * Handle the SADB_GETSPI message. Create a larval SA. */ static void -esp_getspi(mblk_t *mp, keysock_in_t *ksi) +esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) { ipsa_t *newbie, *target; isaf_t *outbound, *inbound; @@ -1335,14 +1484,15 @@ esp_getspi(mblk_t *mp, keysock_in_t *ksi) * Randomly generate a proposed SPI value */ (void) random_get_pseudo_bytes((uint8_t *)&newspi, sizeof (uint32_t)); - newbie = sadb_getspi(ksi, newspi, &diagnostic); + newbie = sadb_getspi(ksi, newspi, &diagnostic, + espstack->ipsecesp_netstack); if (newbie == NULL) { - sadb_pfkey_error(esp_pfkey_q, mp, ENOMEM, diagnostic, + sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic, ksi->ks_in_serial); return; } else if (newbie == (ipsa_t *)-1) { - sadb_pfkey_error(esp_pfkey_q, mp, EINVAL, diagnostic, + sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, ksi->ks_in_serial); return; } @@ -1354,14 +1504,16 @@ esp_getspi(mblk_t *mp, keysock_in_t *ksi) */ if (newbie->ipsa_addrfam == AF_INET6) { - outbound = OUTBOUND_BUCKET_V6(&esp_sadb.s_v6, + outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6, *(uint32_t *)(newbie->ipsa_dstaddr)); - inbound = INBOUND_BUCKET(&esp_sadb.s_v6, newbie->ipsa_spi); + inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6, + newbie->ipsa_spi); } else { ASSERT(newbie->ipsa_addrfam == AF_INET); - outbound = OUTBOUND_BUCKET_V4(&esp_sadb.s_v4, + outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4, *(uint32_t *)(newbie->ipsa_dstaddr)); - inbound = INBOUND_BUCKET(&esp_sadb.s_v4, newbie->ipsa_spi); + inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4, + newbie->ipsa_spi); } mutex_enter(&outbound->isaf_lock); @@ -1398,7 +1550,8 @@ esp_getspi(mblk_t *mp, keysock_in_t *ksi) */ rc = sadb_insertassoc(newbie, inbound); (void) drv_getparm(TIME, &newbie->ipsa_hardexpiretime); - newbie->ipsa_hardexpiretime += ipsecesp_larval_timeout; + newbie->ipsa_hardexpiretime += + espstack->ipsecesp_larval_timeout; } /* @@ -1410,8 +1563,8 @@ esp_getspi(mblk_t *mp, keysock_in_t *ksi) if (rc != 0) { mutex_exit(&inbound->isaf_lock); IPSA_REFRELE(newbie); - sadb_pfkey_error(esp_pfkey_q, mp, rc, SADB_X_DIAGNOSTIC_NONE, - ksi->ks_in_serial); + sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc, + SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial); return; } @@ -1440,7 +1593,7 @@ esp_getspi(mblk_t *mp, keysock_in_t *ksi) * Can safely putnext() to esp_pfkey_q, because this is a turnaround * from the esp_pfkey_q. */ - putnext(esp_pfkey_q, mp); + putnext(espstack->esp_pfkey_q, mp); } /* @@ -1448,7 +1601,8 @@ esp_getspi(mblk_t *mp, keysock_in_t *ksi) * allocated mblk with the ESP header in between the two. */ static boolean_t -esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint) +esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint, + ipsecesp_stack_t *espstack) { mblk_t *split_mp = mp; uint_t wheretodiv = divpoint; @@ -1465,7 +1619,8 @@ esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint) /* "scratch" is the 2nd half, split_mp is the first. */ scratch = dupb(split_mp); if (scratch == NULL) { - esp1dbg(("esp_insert_esp: can't allocate scratch.\n")); + esp1dbg(espstack, + ("esp_insert_esp: can't allocate scratch.\n")); return (B_FALSE); } /* NOTE: dupb() doesn't set b_cont appropriately. */ @@ -1506,6 +1661,9 @@ esp_in_done(mblk_t *ipsec_in_mp) esph_t *esph; kstat_named_t *counter; boolean_t is_natt; + netstack_t *ns = ii->ipsec_in_ns; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + ipsec_stack_t *ipss = ns->netstack_ipsec; assoc = ii->ipsec_in_esp_sa; ASSERT(assoc != NULL); @@ -1539,7 +1697,7 @@ esp_in_done(mblk_t *ipsec_in_mp) if (assoc->ipsa_auth_alg != IPSA_AALG_NONE) { /* authentication passed if we reach this point */ - ESP_BUMP_STAT(good_auth); + ESP_BUMP_STAT(espstack, good_auth); data_mp->b_wptr -= assoc->ipsa_mac_len; /* @@ -1561,9 +1719,9 @@ esp_in_done(mblk_t *ipsec_in_mp) SL_ERROR | SL_WARN, "Replay failed for ESP spi 0x%x, dst %s.\n", assoc->ipsa_spi, assoc->ipsa_dstaddr, - assoc->ipsa_addrfam); - ESP_BUMP_STAT(replay_failures); - counter = &ipdrops_esp_replay; + assoc->ipsa_addrfam, espstack->ipsecesp_netstack); + ESP_BUMP_STAT(espstack, replay_failures); + counter = DROPPER(ipss, ipds_esp_replay); goto drop_and_bail; } } @@ -1573,9 +1731,10 @@ esp_in_done(mblk_t *ipsec_in_mp) ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, "ESP association 0x%x, dst %s had bytes expire.\n", - assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam); - ESP_BUMP_STAT(bytes_expired); - counter = &ipdrops_esp_bytes_expire; + assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, + espstack->ipsecesp_netstack); + ESP_BUMP_STAT(espstack, bytes_expired); + counter = DROPPER(ipss, ipds_esp_bytes_expire); goto drop_and_bail; } @@ -1584,20 +1743,22 @@ esp_in_done(mblk_t *ipsec_in_mp) * spews "branch, predict taken" code for this. */ - if (esp_strip_header(data_mp, ii->ipsec_in_v4, ivlen, &counter)) { + if (esp_strip_header(data_mp, ii->ipsec_in_v4, ivlen, &counter, + espstack)) { if (is_natt) return (esp_fix_natt_checksums(data_mp, assoc)); return (IPSEC_STATUS_SUCCESS); } - esp1dbg(("esp_in_done: esp_strip_header() failed\n")); + esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n")); drop_and_bail: - IP_ESP_BUMP_STAT(in_discards); + IP_ESP_BUMP_STAT(ipss, in_discards); /* * TODO: Extract inbound interface from the IPSEC_IN message's * ii->ipsec_in_rill_index. */ - ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, counter, &esp_dropper); + ip_drop_packet(ipsec_in_mp, B_TRUE, NULL, NULL, counter, + &espstack->esp_dropper); return (IPSEC_STATUS_FAILED); } @@ -1610,24 +1771,29 @@ esp_log_bad_auth(mblk_t *ipsec_in) { ipsec_in_t *ii = (ipsec_in_t *)ipsec_in->b_rptr; ipsa_t *assoc = ii->ipsec_in_esp_sa; + netstack_t *ns = ii->ipsec_in_ns; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + ipsec_stack_t *ipss = ns->netstack_ipsec; /* * Log the event. Don't print to the console, block * potential denial-of-service attack. */ - ESP_BUMP_STAT(bad_auth); + ESP_BUMP_STAT(espstack, bad_auth); ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, "ESP Authentication failed for spi 0x%x, dst %s.\n", - assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam); + assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, + espstack->ipsecesp_netstack); - IP_ESP_BUMP_STAT(in_discards); + IP_ESP_BUMP_STAT(ipss, in_discards); /* * TODO: Extract inbound interface from the IPSEC_IN * message's ii->ipsec_in_rill_index. */ - ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, &ipdrops_esp_bad_auth, - &esp_dropper); + ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, + DROPPER(ipss, ipds_esp_bad_auth), + &espstack->esp_dropper); } @@ -1680,14 +1846,44 @@ esp_kcf_callback(void *arg, int status) mblk_t *ipsec_mp = (mblk_t *)arg; ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN); + netstackid_t stackid; + netstack_t *ns, *ns_arg; + ipsecesp_stack_t *espstack; + ipsec_stack_t *ipss; + ipsec_out_t *io = (ipsec_out_t *)ii; ASSERT(ipsec_mp->b_cont != NULL); + if (is_inbound) { + stackid = ii->ipsec_in_stackid; + ns_arg = ii->ipsec_in_ns; + } else { + stackid = io->ipsec_out_stackid; + ns_arg = io->ipsec_out_ns; + } + + /* + * Verify that the netstack is still around; could have vanished + * while kEf was doing its work. + */ + ns = netstack_find_by_stackid(stackid); + if (ns == NULL || ns != ns_arg) { + /* Disappeared on us */ + if (ns != NULL) + netstack_rele(ns); + freemsg(ipsec_mp); + return; + } + + espstack = ns->netstack_ipsecesp; + ipss = ns->netstack_ipsec; + if (status == CRYPTO_SUCCESS) { if (is_inbound) { - if (esp_in_done(ipsec_mp) != IPSEC_STATUS_SUCCESS) + if (esp_in_done(ipsec_mp) != IPSEC_STATUS_SUCCESS) { + netstack_rele(ns); return; - + } /* finish IPsec processing */ ip_fanout_proto_again(ipsec_mp, NULL, NULL, NULL); } else { @@ -1698,9 +1894,10 @@ esp_kcf_callback(void *arg, int status) ipha_t *ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr; /* do AH processing if needed */ - if (!esp_do_outbound_ah(ipsec_mp)) + if (!esp_do_outbound_ah(ipsec_mp)) { + netstack_rele(ns); return; - + } /* finish IPsec processing */ if (IPH_HDR_VERSION(ipha) == IP_VERSION) { ip_wput_ipsec_out(NULL, ipsec_mp, ipha, NULL, @@ -1716,33 +1913,40 @@ esp_kcf_callback(void *arg, int status) esp_log_bad_auth(ipsec_mp); } else { - esp1dbg(("esp_kcf_callback: crypto failed with 0x%x\n", + esp1dbg(espstack, + ("esp_kcf_callback: crypto failed with 0x%x\n", status)); - ESP_BUMP_STAT(crypto_failures); + ESP_BUMP_STAT(espstack, crypto_failures); if (is_inbound) - IP_ESP_BUMP_STAT(in_discards); + IP_ESP_BUMP_STAT(ipss, in_discards); else - ESP_BUMP_STAT(out_discards); + ESP_BUMP_STAT(espstack, out_discards); ip_drop_packet(ipsec_mp, is_inbound, NULL, NULL, - &ipdrops_esp_crypto_failed, &esp_dropper); + DROPPER(ipss, ipds_esp_crypto_failed), + &espstack->esp_dropper); } + netstack_rele(ns); } /* * Invoked on crypto framework failure during inbound and outbound processing. */ static void -esp_crypto_failed(mblk_t *mp, boolean_t is_inbound, int kef_rc) +esp_crypto_failed(mblk_t *mp, boolean_t is_inbound, int kef_rc, + ipsecesp_stack_t *espstack) { - esp1dbg(("crypto failed for %s ESP with 0x%x\n", + ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; + + esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n", is_inbound ? "inbound" : "outbound", kef_rc)); - ip_drop_packet(mp, is_inbound, NULL, NULL, &ipdrops_esp_crypto_failed, - &esp_dropper); - ESP_BUMP_STAT(crypto_failures); + ip_drop_packet(mp, is_inbound, NULL, NULL, + DROPPER(ipss, ipds_esp_crypto_failed), + &espstack->esp_dropper); + ESP_BUMP_STAT(espstack, crypto_failures); if (is_inbound) - IP_ESP_BUMP_STAT(in_discards); + IP_ESP_BUMP_STAT(ipss, in_discards); else - ESP_BUMP_STAT(out_discards); + ESP_BUMP_STAT(espstack, out_discards); } #define ESP_INIT_CALLREQ(_cr) { \ @@ -1797,9 +2001,19 @@ esp_submit_req_inbound(mblk_t *ipsec_mp, ipsa_t *assoc, uint_t esph_offset) uint_t encr_offset, encr_len; uint_t iv_len = assoc->ipsa_iv_len; crypto_ctx_template_t encr_ctx_tmpl; + netstack_t *ns = ii->ipsec_in_ns; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT(ii->ipsec_in_type == IPSEC_IN); + /* + * In case kEF queues and calls back, keep netstackid_t for + * verification that the IP instance is still around in + * esp_kcf_callback(). + */ + ii->ipsec_in_stackid = ns->netstack_stackid; + do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; @@ -1814,7 +2028,7 @@ esp_submit_req_inbound(mblk_t *ipsec_mp, ipsa_t *assoc, uint_t esph_offset) if (do_auth) { /* force asynchronous processing? */ - if (ipsec_algs_exec_mode[IPSEC_ALG_AUTH] == + if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] == IPSEC_ALGS_EXEC_ASYNC) call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; @@ -1845,7 +2059,7 @@ esp_submit_req_inbound(mblk_t *ipsec_mp, ipsa_t *assoc, uint_t esph_offset) if (do_encr) { /* force asynchronous processing? */ - if (ipsec_algs_exec_mode[IPSEC_ALG_ENCR] == + if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] == IPSEC_ALGS_EXEC_ASYNC) call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; @@ -1897,19 +2111,19 @@ esp_submit_req_inbound(mblk_t *ipsec_mp, ipsa_t *assoc, uint_t esph_offset) switch (kef_rc) { case CRYPTO_SUCCESS: - ESP_BUMP_STAT(crypto_sync); + ESP_BUMP_STAT(espstack, crypto_sync); return (esp_in_done(ipsec_mp)); case CRYPTO_QUEUED: /* esp_kcf_callback() will be invoked on completion */ - ESP_BUMP_STAT(crypto_async); + ESP_BUMP_STAT(espstack, crypto_async); return (IPSEC_STATUS_PENDING); case CRYPTO_INVALID_MAC: - ESP_BUMP_STAT(crypto_sync); + ESP_BUMP_STAT(espstack, crypto_sync); esp_log_bad_auth(ipsec_mp); return (IPSEC_STATUS_FAILED); } - esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc); + esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc, espstack); return (IPSEC_STATUS_FAILED); } @@ -1930,11 +2144,22 @@ esp_submit_req_outbound(mblk_t *ipsec_mp, ipsa_t *assoc, uchar_t *icv_buf, crypto_ctx_template_t encr_ctx_tmpl; boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); size_t esph_offset = (is_natt ? UDPH_SIZE : 0); + netstack_t *ns = io->ipsec_out_ns; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + ipsec_stack_t *ipss = ns->netstack_ipsec; - esp3dbg(("esp_submit_req_outbound:%s", is_natt ? "natt" : "not natt")); + esp3dbg(espstack, ("esp_submit_req_outbound:%s", + is_natt ? "natt" : "not natt")); ASSERT(io->ipsec_out_type == IPSEC_OUT); + /* + * In case kEF queues and calls back, keep netstackid_t for + * verification that the IP instance is still around in + * esp_kcf_callback(). + */ + io->ipsec_out_stackid = ns->netstack_stackid; + do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; @@ -1952,7 +2177,7 @@ esp_submit_req_outbound(mblk_t *ipsec_mp, ipsa_t *assoc, uchar_t *icv_buf, if (do_auth) { /* force asynchronous processing? */ - if (ipsec_algs_exec_mode[IPSEC_ALG_AUTH] == + if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] == IPSEC_ALGS_EXEC_ASYNC) call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; @@ -1982,7 +2207,7 @@ esp_submit_req_outbound(mblk_t *ipsec_mp, ipsa_t *assoc, uchar_t *icv_buf, if (do_encr) { /* force asynchronous processing? */ - if (ipsec_algs_exec_mode[IPSEC_ALG_ENCR] == + if (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] == IPSEC_ALGS_EXEC_ASYNC) call_req.cr_flag |= CRYPTO_ALWAYS_QUEUE; @@ -2037,15 +2262,15 @@ esp_submit_req_outbound(mblk_t *ipsec_mp, ipsa_t *assoc, uchar_t *icv_buf, switch (kef_rc) { case CRYPTO_SUCCESS: - ESP_BUMP_STAT(crypto_sync); + ESP_BUMP_STAT(espstack, crypto_sync); return (IPSEC_STATUS_SUCCESS); case CRYPTO_QUEUED: /* esp_kcf_callback() will be invoked on completion */ - ESP_BUMP_STAT(crypto_async); + ESP_BUMP_STAT(espstack, crypto_async); return (IPSEC_STATUS_PENDING); } - esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc); + esp_crypto_failed(ipsec_mp, B_TRUE, kef_rc, espstack); return (IPSEC_STATUS_FAILED); } @@ -2072,12 +2297,20 @@ esp_outbound(mblk_t *mp) uchar_t *icv_buf; udpha_t *udpha; boolean_t is_natt = B_FALSE; - - ESP_BUMP_STAT(out_requests); + netstack_t *ns; + ipsecesp_stack_t *espstack; + ipsec_stack_t *ipss; ipsec_out_mp = mp; data_mp = ipsec_out_mp->b_cont; + io = (ipsec_out_t *)ipsec_out_mp->b_rptr; + ns = io->ipsec_out_ns; + espstack = ns->netstack_ipsecesp; + ipss = ns->netstack_ipsec; + + ESP_BUMP_STAT(espstack, out_requests); + /* * <sigh> We have to copy the message here, because TCP (for example) * keeps a dupb() of the message lying around for retransmission. @@ -2096,15 +2329,14 @@ esp_outbound(mblk_t *mp) * pass it to ip_drop_packet(). */ ip_drop_packet(ipsec_out_mp, B_FALSE, NULL, NULL, - &ipdrops_esp_nomem, &esp_dropper); + DROPPER(ipss, ipds_esp_nomem), + &espstack->esp_dropper); return (IPSEC_STATUS_FAILED); } else { freemsg(data_mp); data_mp = ipsec_out_mp->b_cont; } - io = (ipsec_out_t *)ipsec_out_mp->b_rptr; - /* * Reality check.... */ @@ -2205,27 +2437,29 @@ esp_outbound(mblk_t *mp) * pass it to ip_drop_packet(). */ ip_drop_packet(mp, B_FALSE, NULL, NULL, - &ipdrops_esp_bytes_expire, &esp_dropper); + DROPPER(ipss, ipds_esp_bytes_expire), + &espstack->esp_dropper); return (IPSEC_STATUS_FAILED); } espmp = allocb(esplen, BPRI_HI); if (espmp == NULL) { - ESP_BUMP_STAT(out_discards); - esp1dbg(("esp_outbound: can't allocate espmp.\n")); + ESP_BUMP_STAT(espstack, out_discards); + esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n")); /* * TODO: Find the outbound IRE for this packet and * pass it to ip_drop_packet(). */ - ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_esp_nomem, - &esp_dropper); + ip_drop_packet(mp, B_FALSE, NULL, NULL, + DROPPER(ipss, ipds_esp_nomem), + &espstack->esp_dropper); return (IPSEC_STATUS_FAILED); } espmp->b_wptr += esplen; esph = (esph_t *)espmp->b_rptr; if (is_natt) { - esp3dbg(("esp_outbound: NATT")); + esp3dbg(espstack, ("esp_outbound: NATT")); udpha = (udpha_t *)espmp->b_rptr; udpha->uha_src_port = htons(IPPORT_IKE_NATT); @@ -2252,16 +2486,18 @@ esp_outbound(mblk_t *mp) ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_CONSOLE | SL_WARN, "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n", - esph->esph_spi, assoc->ipsa_dstaddr, af); + esph->esph_spi, assoc->ipsa_dstaddr, af, + espstack->ipsecesp_netstack); - ESP_BUMP_STAT(out_discards); + ESP_BUMP_STAT(espstack, out_discards); sadb_replay_delete(assoc); /* * TODO: Find the outbound IRE for this packet and * pass it to ip_drop_packet(). */ - ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_esp_replay, - &esp_dropper); + ip_drop_packet(mp, B_FALSE, NULL, NULL, + DROPPER(ipss, ipds_esp_replay), + &espstack->esp_dropper); return (IPSEC_STATUS_FAILED); } @@ -2298,18 +2534,19 @@ esp_outbound(mblk_t *mp) /* I've got the two ESP mblks, now insert them. */ - esp2dbg(("data_mp before outbound ESP adjustment:\n")); - esp2dbg((dump_msg(data_mp))); + esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n")); + esp2dbg(espstack, (dump_msg(data_mp))); - if (!esp_insert_esp(data_mp, espmp, divpoint)) { - ESP_BUMP_STAT(out_discards); + if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) { + ESP_BUMP_STAT(espstack, out_discards); /* NOTE: esp_insert_esp() only fails if there's no memory. */ /* * TODO: Find the outbound IRE for this packet and * pass it to ip_drop_packet(). */ - ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_esp_nomem, - &esp_dropper); + ip_drop_packet(mp, B_FALSE, NULL, NULL, + DROPPER(ipss, ipds_esp_nomem), + &espstack->esp_dropper); freeb(espmp); return (IPSEC_STATUS_FAILED); } @@ -2320,14 +2557,15 @@ esp_outbound(mblk_t *mp) if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) { tailmp->b_cont = allocb(alloclen, BPRI_HI); if (tailmp->b_cont == NULL) { - ESP_BUMP_STAT(out_discards); + ESP_BUMP_STAT(espstack, out_discards); esp0dbg(("esp_outbound: Can't allocate tailmp.\n")); /* * TODO: Find the outbound IRE for this packet and * pass it to ip_drop_packet(). */ ip_drop_packet(mp, B_FALSE, NULL, NULL, - &ipdrops_esp_nomem, &esp_dropper); + DROPPER(ipss, ipds_esp_nomem), + &espstack->esp_dropper); return (IPSEC_STATUS_FAILED); } tailmp = tailmp->b_cont; @@ -2344,8 +2582,8 @@ esp_outbound(mblk_t *mp) *tailmp->b_wptr++ = i; *tailmp->b_wptr++ = protocol; - esp2dbg(("data_Mp before encryption:\n")); - esp2dbg((dump_msg(data_mp))); + esp2dbg(espstack, ("data_Mp before encryption:\n")); + esp2dbg(espstack, (dump_msg(data_mp))); /* * The packet is eligible for hardware acceleration if the @@ -2368,7 +2606,7 @@ esp_outbound(mblk_t *mp) if (io->ipsec_out_is_capab_ill && !(assoc->ipsa_flags & IPSA_F_NATT)) { return (esp_outbound_accelerated(ipsec_out_mp, mac_len)); } - ESP_BUMP_STAT(noaccel); + ESP_BUMP_STAT(espstack, noaccel); /* * Okay. I've set up the pre-encryption ESP. Let's do it! @@ -2393,6 +2631,22 @@ esp_outbound(mblk_t *mp) ipsec_status_t ipsecesp_icmp_error(mblk_t *ipsec_mp) { + ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; + boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN); + netstack_t *ns; + ipsecesp_stack_t *espstack; + ipsec_stack_t *ipss; + + if (is_inbound) { + ns = ii->ipsec_in_ns; + } else { + ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; + + ns = io->ipsec_out_ns; + } + espstack = ns->netstack_ipsecesp; + ipss = ns->netstack_ipsec; + /* * Unless we get an entire packet back, this function is useless. * Why? @@ -2407,9 +2661,10 @@ ipsecesp_icmp_error(mblk_t *ipsec_mp) * Since the chances of us getting an entire packet back are very * very small, we discard here. */ - IP_ESP_BUMP_STAT(in_discards); - ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, &ipdrops_esp_icmp, - &esp_dropper); + IP_ESP_BUMP_STAT(ipss, in_discards); + ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, + DROPPER(ipss, ipds_esp_icmp), + &espstack->esp_dropper); return (IPSEC_STATUS_FAILED); } @@ -2420,14 +2675,18 @@ ipsecesp_icmp_error(mblk_t *ipsec_mp) static void ipsecesp_rput(queue_t *q, mblk_t *mp) { + ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; + ASSERT(mp->b_datap->db_type != M_CTL); /* No more IRE_DB_REQ. */ + switch (mp->b_datap->db_type) { case M_PROTO: case M_PCPROTO: /* TPI message of some sort. */ switch (*((t_scalar_t *)mp->b_rptr)) { case T_BIND_ACK: - esp3dbg(("Thank you IP from ESP for T_BIND_ACK\n")); + esp3dbg(espstack, + ("Thank you IP from ESP for T_BIND_ACK\n")); break; case T_ERROR_ACK: cmn_err(CE_WARN, @@ -2436,7 +2695,7 @@ ipsecesp_rput(queue_t *q, mblk_t *mp) * Make esp_sadb.s_ip_q NULL, and in the * future, perhaps try again. */ - esp_sadb.s_ip_q = NULL; + espstack->esp_sadb.s_ip_q = NULL; break; case T_OK_ACK: /* Probably from a (rarely sent) T_UNBIND_REQ. */ @@ -2448,7 +2707,7 @@ ipsecesp_rput(queue_t *q, mblk_t *mp) break; default: /* For now, passthru message. */ - esp2dbg(("ESP got unknown mblk type %d.\n", + esp2dbg(espstack, ("ESP got unknown mblk type %d.\n", mp->b_datap->db_type)); putnext(q, mp); } @@ -2458,7 +2717,8 @@ ipsecesp_rput(queue_t *q, mblk_t *mp) * Construct an SADB_REGISTER message with the current algorithms. */ static boolean_t -esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial) +esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial, + ipsecesp_stack_t *espstack) { mblk_t *pfkey_msg_mp, *keysock_out_mp; sadb_msg_t *samsg; @@ -2473,6 +2733,7 @@ esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial) int current_ealgs; ipsec_alginfo_t **encralgs; uint_t num_ealgs; + ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; /* Allocate the KEYSOCK_OUT. */ keysock_out_mp = sadb_keysock_out(serial); @@ -2485,7 +2746,7 @@ esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial) * Allocate the PF_KEY message that follows KEYSOCK_OUT. */ - mutex_enter(&alg_lock); + mutex_enter(&ipss->ipsec_alg_lock); /* * Fill SADB_REGISTER message's algorithm descriptors. Hold @@ -2495,7 +2756,7 @@ esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial) * to send up may be less than the number of algorithm entries * in the table. */ - authalgs = ipsec_alglists[IPSEC_ALG_AUTH]; + authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH]; for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) num_aalgs++; @@ -2504,7 +2765,7 @@ esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial) allocsize += (num_aalgs * sizeof (*saalg)); allocsize += sizeof (*sasupp_auth); } - encralgs = ipsec_alglists[IPSEC_ALG_ENCR]; + encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR]; for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) num_ealgs++; @@ -2515,7 +2776,7 @@ esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial) } keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI); if (keysock_out_mp->b_cont == NULL) { - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); freemsg(keysock_out_mp); return (B_FALSE); } @@ -2531,7 +2792,8 @@ esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial) numalgs_snap = 0; for (i = 0; - ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs)); i++) { + ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs)); + i++) { if (authalgs[i] == NULL || !ALG_VALID(authalgs[i])) continue; @@ -2599,7 +2861,7 @@ esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial) current_aalgs = num_aalgs; current_ealgs = num_ealgs; - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); /* Now fill the rest of the SADB_REGISTER message. */ @@ -2634,8 +2896,8 @@ esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial) sasupp_encr->sadb_supported_reserved = 0; } - if (esp_pfkey_q != NULL) - putnext(esp_pfkey_q, keysock_out_mp); + if (espstack->esp_pfkey_q != NULL) + putnext(espstack->esp_pfkey_q, keysock_out_mp); else { freemsg(keysock_out_mp); return (B_FALSE); @@ -2650,13 +2912,15 @@ esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial) * sent up to the ESP listeners. */ void -ipsecesp_algs_changed(void) +ipsecesp_algs_changed(netstack_t *ns) { + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + /* * Time to send a PF_KEY SADB_REGISTER message to ESP listeners * everywhere. (The function itself checks for NULL esp_pfkey_q.) */ - (void) esp_register_out(0, 0, 0); + (void) esp_register_out(0, 0, 0, espstack); } /* @@ -2668,11 +2932,14 @@ inbound_task(void *arg) esph_t *esph; mblk_t *mp = (mblk_t *)arg; ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; + netstack_t *ns = ii->ipsec_in_ns; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; int ipsec_rc; - esp2dbg(("in ESP inbound_task")); + esp2dbg(espstack, ("in ESP inbound_task")); + ASSERT(espstack != NULL); - esph = ipsec_inbound_esp_sa(mp); + esph = ipsec_inbound_esp_sa(mp, ns); if (esph == NULL) return; ASSERT(ii->ipsec_in_esp_sa != NULL); @@ -2688,7 +2955,7 @@ inbound_task(void *arg) */ static int esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, - int *diagnostic) + int *diagnostic, ipsecesp_stack_t *espstack) { isaf_t *primary, *secondary, *inbound, *outbound; sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; @@ -2706,6 +2973,7 @@ esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, sadb_t *sp; int outhash; mblk_t *lpkt; + ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; /* * Locate the appropriate table(s). @@ -2715,11 +2983,11 @@ esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, dst6 = (struct sockaddr_in6 *)dst; is_ipv4 = (dst->sin_family == AF_INET); if (is_ipv4) { - sp = &esp_sadb.s_v4; + sp = &espstack->esp_sadb.s_v4; dstaddr = (uint32_t *)(&dst->sin_addr); outhash = OUTBOUND_HASH_V4(sp, *(ipaddr_t *)dstaddr); } else { - sp = &esp_sadb.s_v6; + sp = &espstack->esp_sadb.s_v6; dstaddr = (uint32_t *)(&dst6->sin6_addr); outhash = OUTBOUND_HASH_V6(sp, *(in6_addr_t *)dstaddr); } @@ -2799,7 +3067,8 @@ esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, acq_msgs = acqrec->ipsacq_mp; acqrec->ipsacq_mp = NULL; mutex_exit(&acqrec->ipsacq_lock); - sadb_destroy_acquire(acqrec); + sadb_destroy_acquire(acqrec, + espstack->ipsecesp_netstack); } mutex_exit(&acq_bucket->iacqf_lock); } @@ -2825,8 +3094,9 @@ esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, if (larval != NULL) lpkt = sadb_clear_lpkt(larval); - rc = sadb_common_add(esp_sadb.s_ip_q, esp_pfkey_q, mp, samsg, ksi, - primary, secondary, larval, clone, is_inbound, diagnostic); + rc = sadb_common_add(espstack->esp_sadb.s_ip_q, espstack->esp_pfkey_q, + mp, samsg, ksi, primary, secondary, larval, clone, is_inbound, + diagnostic, espstack->ipsecesp_netstack); if (rc == 0 && lpkt != NULL) { rc = !taskq_dispatch(esp_taskq, inbound_task, @@ -2835,7 +3105,8 @@ esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, if (rc != 0) { ip_drop_packet(lpkt, B_TRUE, NULL, NULL, - &ipdrops_sadb_inlarval_timeout, &esp_dropper); + DROPPER(ipss, ipds_sadb_inlarval_timeout), + &espstack->esp_dropper); } /* @@ -2874,9 +3145,10 @@ esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, continue; } } - ESP_BUMP_STAT(out_discards); + ESP_BUMP_STAT(espstack, out_discards); ip_drop_packet(mp, B_FALSE, NULL, NULL, - &ipdrops_sadb_acquire_timeout, &esp_dropper); + DROPPER(ipss, ipds_sadb_acquire_timeout), + &espstack->esp_dropper); } return (rc); @@ -2887,7 +3159,7 @@ esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, * routine eventually. */ static int -esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) +esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) { sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; sadb_address_t *srcext = @@ -2907,11 +3179,12 @@ esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) struct sockaddr_in *src, *dst; struct sockaddr_in *natt_loc, *natt_rem; struct sockaddr_in6 *natt_loc6, *natt_rem6; - sadb_lifetime_t *soft = (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT]; sadb_lifetime_t *hard = (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD]; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + ipsec_stack_t *ipss = ns->netstack_ipsec; /* I need certain extensions present for an ADD message. */ if (srcext == NULL) { @@ -3012,7 +3285,7 @@ esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) * the weak key check up to the algorithm. */ - mutex_enter(&alg_lock); + mutex_enter(&ipss->ipsec_alg_lock); /* * First locate the authentication algorithm. @@ -3020,10 +3293,11 @@ esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) if (akey != NULL) { ipsec_alginfo_t *aalg; - aalg = ipsec_alglists[IPSEC_ALG_AUTH][assoc->sadb_sa_auth]; + aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH] + [assoc->sadb_sa_auth]; if (aalg == NULL || !ALG_VALID(aalg)) { - mutex_exit(&alg_lock); - esp1dbg(("Couldn't find auth alg #%d.\n", + mutex_exit(&ipss->ipsec_alg_lock); + esp1dbg(espstack, ("Couldn't find auth alg #%d.\n", assoc->sadb_sa_auth)); *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; return (EINVAL); @@ -3037,7 +3311,7 @@ esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) * a auth_key != NULL should be made here ( see below). */ if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) { - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS; return (EINVAL); } @@ -3046,7 +3320,7 @@ esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) /* check key and fix parity if needed */ if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE, diagnostic) != 0) { - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); return (EINVAL); } } @@ -3057,10 +3331,11 @@ esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) if (ekey != NULL) { ipsec_alginfo_t *ealg; - ealg = ipsec_alglists[IPSEC_ALG_ENCR][assoc->sadb_sa_encrypt]; + ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR] + [assoc->sadb_sa_encrypt]; if (ealg == NULL || !ALG_VALID(ealg)) { - mutex_exit(&alg_lock); - esp1dbg(("Couldn't find encr alg #%d.\n", + mutex_exit(&ipss->ipsec_alg_lock); + esp1dbg(espstack, ("Couldn't find encr alg #%d.\n", assoc->sadb_sa_encrypt)); *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; return (EINVAL); @@ -3073,7 +3348,7 @@ esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) */ if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) || (!ipsec_valid_key_size(ekey->sadb_key_bits, ealg))) { - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); *diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS; return (EINVAL); } @@ -3082,14 +3357,14 @@ esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) /* check key */ if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE, diagnostic) != 0) { - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); return (EINVAL); } } - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi, - diagnostic)); + diagnostic, espstack)); } /* @@ -3098,7 +3373,8 @@ esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) * a larval SA, which ends up looking a lot more like an add. */ static int -esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) +esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, + ipsecesp_stack_t *espstack) { sadb_address_t *dstext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; @@ -3111,8 +3387,10 @@ esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) sin = (struct sockaddr_in *)(dstext + 1); return (sadb_update_sa(mp, ksi, - (sin->sin_family == AF_INET6) ? &esp_sadb.s_v6 : &esp_sadb.s_v4, - diagnostic, esp_pfkey_q, esp_add_sa)); + (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 : + &espstack->esp_sadb.s_v4, + diagnostic, espstack->esp_pfkey_q, esp_add_sa, + espstack->ipsecesp_netstack)); } /* @@ -3120,7 +3398,8 @@ esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) * both AH and ESP. Find the association, then unlink it. */ static int -esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) +esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, + ipsecesp_stack_t *espstack) { sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; sadb_address_t *dstext = @@ -3139,11 +3418,13 @@ esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) return (EINVAL); } return (sadb_purge_sa(mp, ksi, - (sin->sin_family == AF_INET6) ? &esp_sadb.s_v6 : - &esp_sadb.s_v4, esp_pfkey_q, esp_sadb.s_ip_q)); + (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 : + &espstack->esp_sadb.s_v4, espstack->esp_pfkey_q, + espstack->esp_sadb.s_ip_q)); } - return (sadb_del_sa(mp, ksi, &esp_sadb, diagnostic, esp_pfkey_q)); + return (sadb_del_sa(mp, ksi, &espstack->esp_sadb, diagnostic, + espstack->esp_pfkey_q)); } /* @@ -3151,7 +3432,7 @@ esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic) * messages. */ static void -esp_dump(mblk_t *mp, keysock_in_t *ksi) +esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) { int error; sadb_msg_t *samsg; @@ -3160,24 +3441,27 @@ esp_dump(mblk_t *mp, keysock_in_t *ksi) * Dump each fanout, bailing if error is non-zero. */ - error = sadb_dump(esp_pfkey_q, mp, ksi->ks_in_serial, &esp_sadb.s_v4); + error = sadb_dump(espstack->esp_pfkey_q, mp, ksi->ks_in_serial, + &espstack->esp_sadb.s_v4); if (error != 0) goto bail; - error = sadb_dump(esp_pfkey_q, mp, ksi->ks_in_serial, &esp_sadb.s_v6); + error = sadb_dump(espstack->esp_pfkey_q, mp, ksi->ks_in_serial, + &espstack->esp_sadb.s_v6); bail: ASSERT(mp->b_cont != NULL); samsg = (sadb_msg_t *)mp->b_cont->b_rptr; samsg->sadb_msg_errno = (uint8_t)error; - sadb_pfkey_echo(esp_pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi, - NULL); + sadb_pfkey_echo(espstack->esp_pfkey_q, mp, + (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL); } /* * First-cut reality check for an inbound PF_KEY message. */ static boolean_t -esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi) +esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi, + ipsecesp_stack_t *espstack) { int diagnostic; @@ -3193,7 +3477,7 @@ esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi) return (B_FALSE); /* False ==> no failures */ badmsg: - sadb_pfkey_error(esp_pfkey_q, mp, EINVAL, diagnostic, + sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, ksi->ks_in_serial); return (B_TRUE); /* True ==> failures */ } @@ -3210,7 +3494,7 @@ badmsg: * mucking with PF_KEY messages. */ static void -esp_parse_pfkey(mblk_t *mp) +esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack) { mblk_t *msg = mp->b_cont; sadb_msg_t *samsg; @@ -3219,6 +3503,7 @@ esp_parse_pfkey(mblk_t *mp) int diagnostic = SADB_X_DIAGNOSTIC_NONE; ASSERT(msg != NULL); + samsg = (sadb_msg_t *)msg->b_rptr; ksi = (keysock_in_t *)mp->b_rptr; @@ -3226,40 +3511,42 @@ esp_parse_pfkey(mblk_t *mp) * If applicable, convert unspecified AF_INET6 to unspecified * AF_INET. And do other address reality checks. */ - if (!sadb_addrfix(ksi, esp_pfkey_q, mp) || - esp_pfkey_reality_failures(mp, ksi)) { + if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp, + espstack->ipsecesp_netstack) || + esp_pfkey_reality_failures(mp, ksi, espstack)) { return; } switch (samsg->sadb_msg_type) { case SADB_ADD: - error = esp_add_sa(mp, ksi, &diagnostic); + error = esp_add_sa(mp, ksi, &diagnostic, + espstack->ipsecesp_netstack); if (error != 0) { - sadb_pfkey_error(esp_pfkey_q, mp, error, diagnostic, - ksi->ks_in_serial); + sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, + diagnostic, ksi->ks_in_serial); } /* else esp_add_sa() took care of things. */ break; case SADB_DELETE: - error = esp_del_sa(mp, ksi, &diagnostic); + error = esp_del_sa(mp, ksi, &diagnostic, espstack); if (error != 0) { - sadb_pfkey_error(esp_pfkey_q, mp, error, diagnostic, - ksi->ks_in_serial); + sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, + diagnostic, ksi->ks_in_serial); } /* Else esp_del_sa() took care of things. */ break; case SADB_GET: - error = sadb_get_sa(mp, ksi, &esp_sadb, &diagnostic, - esp_pfkey_q); + error = sadb_get_sa(mp, ksi, &espstack->esp_sadb, &diagnostic, + espstack->esp_pfkey_q); if (error != 0) { - sadb_pfkey_error(esp_pfkey_q, mp, error, diagnostic, - ksi->ks_in_serial); + sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, + diagnostic, ksi->ks_in_serial); } /* Else sadb_get_sa() took care of things. */ break; case SADB_FLUSH: - sadbp_flush(&esp_sadb); - sadb_pfkey_echo(esp_pfkey_q, mp, samsg, ksi, NULL); + sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack); + sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL); break; case SADB_REGISTER: /* @@ -3270,7 +3557,7 @@ esp_parse_pfkey(mblk_t *mp) * Keysock takes care of the PF_KEY bookkeeping for this. */ if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid, - ksi->ks_in_serial)) { + ksi->ks_in_serial, espstack)) { freemsg(mp); } else { /* @@ -3278,8 +3565,8 @@ esp_parse_pfkey(mblk_t *mp) * failure. It will not return B_FALSE because of * lack of esp_pfkey_q if I am in wput(). */ - sadb_pfkey_error(esp_pfkey_q, mp, ENOMEM, diagnostic, - ksi->ks_in_serial); + sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, + diagnostic, ksi->ks_in_serial); } break; case SADB_UPDATE: @@ -3287,10 +3574,10 @@ esp_parse_pfkey(mblk_t *mp) * Find a larval, if not there, find a full one and get * strict. */ - error = esp_update_sa(mp, ksi, &diagnostic); + error = esp_update_sa(mp, ksi, &diagnostic, espstack); if (error != 0) { - sadb_pfkey_error(esp_pfkey_q, mp, error, diagnostic, - ksi->ks_in_serial); + sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, + diagnostic, ksi->ks_in_serial); } /* else esp_update_sa() took care of things. */ break; @@ -3298,7 +3585,7 @@ esp_parse_pfkey(mblk_t *mp) /* * Reserve a new larval entry. */ - esp_getspi(mp, ksi); + esp_getspi(mp, ksi, espstack); break; case SADB_ACQUIRE: /* @@ -3306,23 +3593,24 @@ esp_parse_pfkey(mblk_t *mp) * most likely an error. Inbound ACQUIRE messages should only * have the base header. */ - sadb_in_acquire(samsg, &esp_sadb, esp_pfkey_q); + sadb_in_acquire(samsg, &espstack->esp_sadb, + espstack->esp_pfkey_q, espstack->ipsecesp_netstack); freemsg(mp); break; case SADB_DUMP: /* * Dump all entries. */ - esp_dump(mp, ksi); + esp_dump(mp, ksi, espstack); /* esp_dump will take care of the return message, etc. */ break; case SADB_EXPIRE: /* Should never reach me. */ - sadb_pfkey_error(esp_pfkey_q, mp, EOPNOTSUPP, diagnostic, - ksi->ks_in_serial); + sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP, + diagnostic, ksi->ks_in_serial); break; default: - sadb_pfkey_error(esp_pfkey_q, mp, EINVAL, + sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial); break; } @@ -3333,7 +3621,7 @@ esp_parse_pfkey(mblk_t *mp) * ACQUIRE messages. */ static void -esp_keysock_no_socket(mblk_t *mp) +esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack) { sadb_msg_t *samsg; keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr; @@ -3355,7 +3643,8 @@ esp_keysock_no_socket(mblk_t *mp) * Use the write-side of the esp_pfkey_q, in case there is * no esp_sadb.s_ip_q. */ - sadb_in_acquire(samsg, &esp_sadb, WR(esp_pfkey_q)); + sadb_in_acquire(samsg, &espstack->esp_sadb, + WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack); } freemsg(mp); @@ -3369,8 +3658,9 @@ ipsecesp_wput(queue_t *q, mblk_t *mp) { ipsec_info_t *ii; struct iocblk *iocp; + ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; - esp3dbg(("In esp_wput().\n")); + esp3dbg(espstack, ("In esp_wput().\n")); /* NOTE: Each case must take care of freeing or passing mp. */ switch (mp->b_datap->db_type) { @@ -3384,22 +3674,23 @@ ipsecesp_wput(queue_t *q, mblk_t *mp) switch (ii->ipsec_info_type) { case KEYSOCK_OUT_ERR: - esp1dbg(("Got KEYSOCK_OUT_ERR message.\n")); - esp_keysock_no_socket(mp); + esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n")); + esp_keysock_no_socket(mp, espstack); break; case KEYSOCK_IN: - ESP_BUMP_STAT(keysock_in); - esp3dbg(("Got KEYSOCK_IN message.\n")); + ESP_BUMP_STAT(espstack, keysock_in); + esp3dbg(espstack, ("Got KEYSOCK_IN message.\n")); /* Parse the message. */ - esp_parse_pfkey(mp); + esp_parse_pfkey(mp, espstack); break; case KEYSOCK_HELLO: - sadb_keysock_hello(&esp_pfkey_q, q, mp, - esp_ager, &esp_event, SADB_SATYPE_ESP); + sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp, + esp_ager, (void *)espstack, &espstack->esp_event, + SADB_SATYPE_ESP); break; default: - esp2dbg(("Got M_CTL from above of 0x%x.\n", + esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n", ii->ipsec_info_type)); freemsg(mp); break; @@ -3410,7 +3701,7 @@ ipsecesp_wput(queue_t *q, mblk_t *mp) switch (iocp->ioc_cmd) { case ND_SET: case ND_GET: - if (nd_getset(q, ipsecesp_g_nd, mp)) { + if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) { qreply(q, mp); return; } else { @@ -3429,7 +3720,8 @@ ipsecesp_wput(queue_t *q, mblk_t *mp) return; } default: - esp3dbg(("Got default message, type %d, passing to IP.\n", + esp3dbg(espstack, + ("Got default message, type %d, passing to IP.\n", mp->b_datap->db_type)); putnext(q, mp); } @@ -3450,10 +3742,16 @@ esp_outbound_accelerated(mblk_t *ipsec_out, uint_t icv_len) { ipsec_out_t *io; mblk_t *lastmp; - - ESP_BUMP_STAT(out_accelerated); + netstack_t *ns; + ipsecesp_stack_t *espstack; + ipsec_stack_t *ipss; io = (ipsec_out_t *)ipsec_out->b_rptr; + ns = io->ipsec_out_ns; + espstack = ns->netstack_ipsecesp; + ipss = ns->netstack_ipsec; + + ESP_BUMP_STAT(espstack, out_accelerated); /* mark packet as being accelerated in IPSEC_OUT */ ASSERT(io->ipsec_out_accelerated == B_FALSE); @@ -3473,9 +3771,10 @@ esp_outbound_accelerated(mblk_t *ipsec_out, uint_t icv_len) if ((lastmp->b_wptr + icv_len) > lastmp->b_datap->db_lim) { lastmp->b_cont = allocb(icv_len, BPRI_HI); if (lastmp->b_cont == NULL) { - ESP_BUMP_STAT(out_discards); + ESP_BUMP_STAT(espstack, out_discards); ip_drop_packet(ipsec_out, B_FALSE, NULL, NULL, - &ipdrops_esp_nomem, &esp_dropper); + DROPPER(ipss, ipds_esp_nomem), + &espstack->esp_dropper); return (IPSEC_STATUS_FAILED); } lastmp = lastmp->b_cont; @@ -3495,17 +3794,19 @@ static ipsec_status_t esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4, ipsa_t *assoc) { - ipsec_in_t *ii; + ipsec_in_t *ii = (ipsec_in_t *)ipsec_in->b_rptr; mblk_t *hada_mp; uint32_t icv_len = 0; da_ipsec_t *hada; ipha_t *ipha; ip6_t *ip6h; kstat_named_t *counter; + netstack_t *ns = ii->ipsec_in_ns; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + ipsec_stack_t *ipss = ns->netstack_ipsec; - ESP_BUMP_STAT(in_accelerated); + ESP_BUMP_STAT(espstack, in_accelerated); - ii = (ipsec_in_t *)ipsec_in->b_rptr; hada_mp = ii->ipsec_in_da; ASSERT(hada_mp != NULL); hada = (da_ipsec_t *)hada_mp->b_rptr; @@ -3539,7 +3840,7 @@ esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4, esp0dbg(("esp_inbound_accelerated: " "ICV len (%u) incorrect or mblk too small (%u)\n", icv_len, (uint32_t)(MBLKL(hada_mp)))); - counter = &ipdrops_esp_bad_auth; + counter = DROPPER(ipss, ipds_esp_bad_auth); goto esp_in_discard; } } @@ -3582,16 +3883,16 @@ esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4, * Log the event. Don't print to the console, block * potential denial-of-service attack. */ - ESP_BUMP_STAT(bad_auth); + ESP_BUMP_STAT(espstack, bad_auth); ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, "ESP Authentication failed spi %x, dst_addr %s", - assoc->ipsa_spi, addr, af); - counter = &ipdrops_esp_bad_auth; + assoc->ipsa_spi, addr, af, espstack->ipsecesp_netstack); + counter = DROPPER(ipss, ipds_esp_bad_auth); goto esp_in_discard; } - esp3dbg(("esp_inbound_accelerated: ESP authentication succeeded, " - "checking replay\n")); + esp3dbg(espstack, ("esp_inbound_accelerated: ESP authentication " + "succeeded, checking replay\n")); ipsec_in->b_cont = data_mp; @@ -3599,8 +3900,8 @@ esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4, * Remove ESP header and padding from packet. */ if (!esp_strip_header(data_mp, ii->ipsec_in_v4, assoc->ipsa_iv_len, - &counter)) { - esp1dbg(("esp_inbound_accelerated: " + &counter, espstack)) { + esp1dbg(espstack, ("esp_inbound_accelerated: " "esp_strip_header() failed\n")); goto esp_in_discard; } @@ -3612,13 +3913,15 @@ esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4, */ if (!esp_age_bytes(assoc, msgdsize(data_mp), B_TRUE)) { /* The ipsa has hit hard expiration, LOG and AUDIT. */ - ESP_BUMP_STAT(bytes_expired); - IP_ESP_BUMP_STAT(in_discards); + ESP_BUMP_STAT(espstack, bytes_expired); + IP_ESP_BUMP_STAT(ipss, in_discards); ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, "ESP association 0x%x, dst %s had bytes expire.\n", - assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam); + assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, + espstack->ipsecesp_netstack); ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, - &ipdrops_esp_bytes_expire, &esp_dropper); + DROPPER(ipss, ipds_esp_bytes_expire), + &espstack->esp_dropper); return (IPSEC_STATUS_FAILED); } @@ -3626,11 +3929,12 @@ esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4, return (IPSEC_STATUS_SUCCESS); esp_in_discard: - IP_ESP_BUMP_STAT(in_discards); + IP_ESP_BUMP_STAT(ipss, in_discards); freeb(hada_mp); ipsec_in->b_cont = data_mp; /* For ip_drop_packet()'s sake... */ - ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter, &esp_dropper); + ip_drop_packet(ipsec_in, B_TRUE, NULL, NULL, counter, + &espstack->esp_dropper); return (IPSEC_STATUS_FAILED); } @@ -3641,15 +3945,18 @@ esp_in_discard: */ void ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt, - uint32_t spi, void *addr, int af) + uint32_t spi, void *addr, int af, ipsecesp_stack_t *espstack) { - if (ipsecesp_log_unknown_spi) { + ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; + + if (espstack->ipsecesp_log_unknown_spi) { ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi, - addr, af); + addr, af, espstack->ipsecesp_netstack); } - ip_drop_packet(mp, B_TRUE, NULL, NULL, &ipdrops_esp_no_sa, - &esp_dropper); + ip_drop_packet(mp, B_TRUE, NULL, NULL, + DROPPER(ipss, ipds_esp_no_sa), + &espstack->esp_dropper); } /* diff --git a/usr/src/uts/common/inet/ip/keysock.c b/usr/src/uts/common/inet/ip/keysock.c index 3c8ad50570..d3bae3a95b 100644 --- a/usr/src/uts/common/inet/ip/keysock.c +++ b/usr/src/uts/common/inet/ip/keysock.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,6 +33,7 @@ #include <sys/strsun.h> #include <sys/stropts.h> #include <sys/vnode.h> +#include <sys/zone.h> #include <sys/strlog.h> #include <sys/sysmacros.h> #define _SUN_TPI_VERSION 2 @@ -52,6 +53,7 @@ #include <sys/atomic.h> #include <sys/mkdev.h> #include <sys/policy.h> +#include <sys/disp.h> #include <sys/socket.h> #include <netinet/in.h> @@ -90,17 +92,9 @@ * down the *multiple* messages they create. */ -/* List of open PF_KEY sockets, protected by keysock_list_lock. */ -static kmutex_t keysock_list_lock; -static keysock_t *keysock_list; - static vmem_t *keysock_vmem; /* for minor numbers. */ -/* Consumers table. If an entry is NULL, keysock maintains the table. */ -static kmutex_t keysock_consumers_lock; - #define KEYSOCK_MAX_CONSUMERS 256 -static keysock_consumer_t *keysock_consumers[KEYSOCK_MAX_CONSUMERS]; /* Default structure copied into T_INFO_ACK messages (from rts.c...) */ static struct T_info_ack keysock_g_t_info_ack = { @@ -118,7 +112,7 @@ static struct T_info_ack keysock_g_t_info_ack = { }; /* Named Dispatch Parameter Management Structure */ -typedef struct keysockpparam_s { +typedef struct keysockparam_s { uint_t keysock_param_min; uint_t keysock_param_max; uint_t keysock_param_value; @@ -130,7 +124,7 @@ typedef struct keysockpparam_s { * keysock_g_nd in keysock_init_nd. * All of these are alterable, within the min/max values given, at run time. */ -static keysockparam_t keysock_param_arr[] = { +static keysockparam_t lcl_param_arr[] = { /* min max value name */ { 4096, 65536, 8192, "keysock_xmit_hiwat"}, { 0, 65536, 1024, "keysock_xmit_lowat"}, @@ -138,28 +132,17 @@ static keysockparam_t keysock_param_arr[] = { { 65536, 1024*1024*1024, 256*1024, "keysock_max_buf"}, { 0, 3, 0, "keysock_debug"}, }; -#define keysock_xmit_hiwat keysock_param_arr[0].keysock_param_value -#define keysock_xmit_lowat keysock_param_arr[1].keysock_param_value -#define keysock_recv_hiwat keysock_param_arr[2].keysock_param_value -#define keysock_max_buf keysock_param_arr[3].keysock_param_value -#define keysock_debug keysock_param_arr[4].keysock_param_value - -kmutex_t keysock_param_lock; /* Protects the NDD variables. */ +#define keystack_xmit_hiwat keystack_params[0].keysock_param_value +#define keystack_xmit_lowat keystack_params[1].keysock_param_value +#define keystack_recv_hiwat keystack_params[2].keysock_param_value +#define keystack_max_buf keystack_params[3].keysock_param_value +#define keystack_debug keystack_params[4].keysock_param_value #define ks0dbg(a) printf a /* NOTE: != 0 instead of > 0 so lint doesn't complain. */ -#define ks1dbg(a) if (keysock_debug != 0) printf a -#define ks2dbg(a) if (keysock_debug > 1) printf a -#define ks3dbg(a) if (keysock_debug > 2) printf a - -static IDP keysock_g_nd; - -/* - * State for flush/dump. This would normally be a boolean_t, but - * cas32() works best for a known 32-bit quantity. - */ -static uint32_t keysock_flushdump; -static int keysock_flushdump_errno; +#define ks1dbg(keystack, a) if (keystack->keystack_debug != 0) printf a +#define ks2dbg(keystack, a) if (keystack->keystack_debug > 1) printf a +#define ks3dbg(keystack, a) if (keystack->keystack_debug > 2) printf a static int keysock_close(queue_t *); static int keysock_open(queue_t *, dev_t *, int, int, cred_t *); @@ -167,7 +150,9 @@ static void keysock_wput(queue_t *, mblk_t *); static void keysock_rput(queue_t *, mblk_t *); static void keysock_rsrv(queue_t *); static void keysock_passup(mblk_t *, sadb_msg_t *, minor_t, - keysock_consumer_t *, boolean_t); + keysock_consumer_t *, boolean_t, keysock_stack_t *); +static void *keysock_stack_init(netstackid_t stackid, netstack_t *ns); +static void keysock_stack_fini(netstackid_t stackid, void *arg); static struct module_info info = { 5138, "keysock", 1, INFPSZ, 512, 128 @@ -205,37 +190,29 @@ static char *KEYSOCK = "keysock"; static char *STRMOD = "strmod"; /* - * keysock_plumbed: zero if plumb not attempted, positive if it succeeded, - * negative if it failed. - */ -static int keysock_plumbed = 0; - -/* - * This integer counts the number of extended REGISTERed sockets. This - * determines if we should send extended REGISTERs. - */ -static uint32_t keysock_num_extended = 0; - -/* - * Global sequence space for SADB_ACQUIRE messages of any sort. - */ -static uint32_t keysock_acquire_seq = 0xffffffff; - -/* * Load the other ipsec modules and plumb them together. */ int -keysock_plumb_ipsec(void) +keysock_plumb_ipsec(netstack_t *ns) { ldi_handle_t lh, ip6_lh = NULL; ldi_ident_t li = NULL; int err = 0; int muxid, rval; boolean_t esp_present = B_TRUE; + cred_t *cr; + keysock_stack_t *keystack = ns->netstack_keysock; +#ifdef NS_DEBUG + (void) printf("keysock_plumb_ipsec(%d)\n", + ns->netstack_stackid); +#endif - keysock_plumbed = 0; /* we're trying again.. */ + keystack->keystack_plumbed = 0; /* we're trying again.. */ + cr = zone_get_kcred(netstackid_to_zoneid( + keystack->keystack_netstack->netstack_stackid)); + ASSERT(cr != NULL); /* * Load up the drivers (AH/ESP). * @@ -266,70 +243,75 @@ keysock_plumb_ipsec(void) goto bail; } - err = ldi_open_by_name(IP6DEV, FREAD|FWRITE, CRED(), &ip6_lh, li); + err = ldi_open_by_name(IP6DEV, FREAD|FWRITE, cr, &ip6_lh, li); if (err) { ks0dbg(("IPsec: Open of IP6 failed (err %d).\n", err)); goto bail; } /* PLINK KEYSOCK/AH */ - err = ldi_open_by_name(IPSECAHDEV, FREAD|FWRITE, CRED(), &lh, li); + err = ldi_open_by_name(IPSECAHDEV, FREAD|FWRITE, cr, &lh, li); if (err) { ks0dbg(("IPsec: Open of AH failed (err %d).\n", err)); goto bail; } err = ldi_ioctl(lh, - I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, CRED(), &rval); + I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, cr, &rval); if (err) { ks0dbg(("IPsec: Push of KEYSOCK onto AH failed (err %d).\n", err)); - (void) ldi_close(lh, FREAD|FWRITE, CRED()); + (void) ldi_close(lh, FREAD|FWRITE, cr); goto bail; } err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh, - FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &muxid); + FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid); if (err) { ks0dbg(("IPsec: PLINK of KEYSOCK/AH failed (err %d).\n", err)); - (void) ldi_close(lh, FREAD|FWRITE, CRED()); + (void) ldi_close(lh, FREAD|FWRITE, cr); goto bail; } - (void) ldi_close(lh, FREAD|FWRITE, CRED()); + (void) ldi_close(lh, FREAD|FWRITE, cr); /* PLINK KEYSOCK/ESP */ if (esp_present) { err = ldi_open_by_name(IPSECESPDEV, - FREAD|FWRITE, CRED(), &lh, li); + FREAD|FWRITE, cr, &lh, li); if (err) { ks0dbg(("IPsec: Open of ESP failed (err %d).\n", err)); goto bail; } err = ldi_ioctl(lh, - I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, CRED(), &rval); + I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, cr, &rval); if (err) { ks0dbg(("IPsec: " "Push of KEYSOCK onto ESP failed (err %d).\n", err)); - (void) ldi_close(lh, FREAD|FWRITE, CRED()); + (void) ldi_close(lh, FREAD|FWRITE, cr); goto bail; } err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh, - FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &muxid); + FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid); if (err) { ks0dbg(("IPsec: " "PLINK of KEYSOCK/ESP failed (err %d).\n", err)); - (void) ldi_close(lh, FREAD|FWRITE, CRED()); + (void) ldi_close(lh, FREAD|FWRITE, cr); goto bail; } - (void) ldi_close(lh, FREAD|FWRITE, CRED()); + (void) ldi_close(lh, FREAD|FWRITE, cr); } bail: - keysock_plumbed = (err == 0) ? 1 : -1; + keystack->keystack_plumbed = (err == 0) ? 1 : -1; if (ip6_lh != NULL) { - (void) ldi_close(ip6_lh, FREAD|FWRITE, CRED()); + (void) ldi_close(ip6_lh, FREAD|FWRITE, cr); } if (li != NULL) ldi_ident_release(li); +#ifdef NS_DEBUG + (void) printf("keysock_plumb_ipsec -> %d\n", + keystack->keystack_plumbed); +#endif + crfree(cr); return (err); } @@ -343,10 +325,12 @@ keysock_param_get(q, mp, cp, cr) { keysockparam_t *keysockpa = (keysockparam_t *)cp; uint_t value; + keysock_t *ks = (keysock_t *)q->q_ptr; + keysock_stack_t *keystack = ks->keysock_keystack; - mutex_enter(&keysock_param_lock); + mutex_enter(&keystack->keystack_param_lock); value = keysockpa->keysock_param_value; - mutex_exit(&keysock_param_lock); + mutex_exit(&keystack->keystack_param_lock); (void) mi_mpprintf(mp, "%u", value); return (0); @@ -364,80 +348,133 @@ keysock_param_set(q, mp, value, cp, cr) { ulong_t new_value; keysockparam_t *keysockpa = (keysockparam_t *)cp; + keysock_t *ks = (keysock_t *)q->q_ptr; + keysock_stack_t *keystack = ks->keysock_keystack; /* Convert the value from a string into a long integer. */ if (ddi_strtoul(value, NULL, 10, &new_value) != 0) return (EINVAL); - mutex_enter(&keysock_param_lock); + mutex_enter(&keystack->keystack_param_lock); /* * Fail the request if the new value does not lie within the * required bounds. */ if (new_value < keysockpa->keysock_param_min || new_value > keysockpa->keysock_param_max) { - mutex_exit(&keysock_param_lock); + mutex_exit(&keystack->keystack_param_lock); return (EINVAL); } /* Set the new value */ keysockpa->keysock_param_value = new_value; - mutex_exit(&keysock_param_lock); + mutex_exit(&keystack->keystack_param_lock); return (0); } /* - * Initialize NDD variables, and other things, for keysock. + * Initialize keysock at module load time */ boolean_t keysock_ddi_init(void) { - keysockparam_t *ksp = keysock_param_arr; - int count = A_CNT(keysock_param_arr); - - if (!keysock_g_nd) { - for (; count-- > 0; ksp++) { - if (ksp->keysock_param_name != NULL && - ksp->keysock_param_name[0]) { - if (!nd_load(&keysock_g_nd, - ksp->keysock_param_name, - keysock_param_get, keysock_param_set, - (caddr_t)ksp)) { - nd_free(&keysock_g_nd); - return (B_FALSE); - } - } - } - } - keysock_max_optsize = optcom_max_optsize( keysock_opt_obj.odb_opt_des_arr, keysock_opt_obj.odb_opt_arr_cnt); keysock_vmem = vmem_create("keysock", (void *)1, MAXMIN, 1, NULL, NULL, NULL, 1, VM_SLEEP | VMC_IDENTIFIER); - mutex_init(&keysock_list_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&keysock_consumers_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&keysock_param_lock, NULL, MUTEX_DEFAULT, NULL); + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel, so we can maintain the + * set of keysock_stack_t's. + */ + netstack_register(NS_KEYSOCK, keysock_stack_init, NULL, + keysock_stack_fini); + + return (B_TRUE); +} +/* + * Walk through the param array specified registering each element with the + * named dispatch handler. + */ +static boolean_t +keysock_param_register(IDP *ndp, keysockparam_t *ksp, int cnt) +{ + for (; cnt-- > 0; ksp++) { + if (ksp->keysock_param_name != NULL && + ksp->keysock_param_name[0]) { + if (!nd_load(ndp, + ksp->keysock_param_name, + keysock_param_get, keysock_param_set, + (caddr_t)ksp)) { + nd_free(ndp); + return (B_FALSE); + } + } + } return (B_TRUE); } /* + * Initialize keysock for one stack instance + */ +/* ARGSUSED */ +static void * +keysock_stack_init(netstackid_t stackid, netstack_t *ns) +{ + keysock_stack_t *keystack; + keysockparam_t *ksp; + + keystack = (keysock_stack_t *)kmem_zalloc(sizeof (*keystack), KM_SLEEP); + keystack->keystack_netstack = ns; + + keystack->keystack_acquire_seq = 0xffffffff; + + ksp = (keysockparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); + keystack->keystack_params = ksp; + bcopy(lcl_param_arr, ksp, sizeof (lcl_param_arr)); + + (void) keysock_param_register(&keystack->keystack_g_nd, ksp, + A_CNT(lcl_param_arr)); + + mutex_init(&keystack->keystack_list_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&keystack->keystack_consumers_lock, + NULL, MUTEX_DEFAULT, NULL); + mutex_init(&keystack->keystack_param_lock, NULL, MUTEX_DEFAULT, NULL); + return (keystack); +} + +/* * Free NDD variable space, and other destructors, for keysock. */ void keysock_ddi_destroy(void) { - /* XXX Free instances? */ - ks0dbg(("keysock_ddi_destroy being called.\n")); - + netstack_unregister(NS_KEYSOCK); vmem_destroy(keysock_vmem); - mutex_destroy(&keysock_list_lock); - mutex_destroy(&keysock_consumers_lock); - mutex_destroy(&keysock_param_lock); - nd_free(&keysock_g_nd); +} + +/* + * Remove one stack instance from keysock + */ +/* ARGSUSED */ +static void +keysock_stack_fini(netstackid_t stackid, void *arg) +{ + keysock_stack_t *keystack = (keysock_stack_t *)arg; + + nd_free(&keystack->keystack_g_nd); + kmem_free(keystack->keystack_params, sizeof (lcl_param_arr)); + keystack->keystack_params = NULL; + + mutex_destroy(&keystack->keystack_list_lock); + mutex_destroy(&keystack->keystack_consumers_lock); + mutex_destroy(&keystack->keystack_param_lock); + + kmem_free(keystack, sizeof (*keystack)); } /* @@ -450,6 +487,8 @@ keysock_close(queue_t *q) keysock_consumer_t *kc; void *ptr = q->q_ptr; int size; + keysock_stack_t *keystack; + qprocsoff(q); @@ -458,7 +497,9 @@ keysock_close(queue_t *q) if (WR(q)->q_next) { kc = (keysock_consumer_t *)ptr; - ks0dbg(("Module close, removing a consumer (%d).\n", + keystack = kc->kc_keystack; + + ks1dbg(keystack, ("Module close, removing a consumer (%d).\n", kc->kc_sa_type)); /* * Because of PERMOD open/close exclusive perimeter, I @@ -473,8 +514,8 @@ keysock_close(queue_t *q) * really necessary, but if we ever loosen up, we will * have this bit covered already. */ - keysock_flushdump--; - if (keysock_flushdump == 0) { + keystack->keystack_flushdump--; + if (keystack->keystack_flushdump == 0) { /* * The flush/dump terminated by having a * consumer go away. I need to send up to the @@ -487,24 +528,29 @@ keysock_close(queue_t *q) } } size = sizeof (keysock_consumer_t); - mutex_enter(&keysock_consumers_lock); - keysock_consumers[kc->kc_sa_type] = NULL; - mutex_exit(&keysock_consumers_lock); + mutex_enter(&keystack->keystack_consumers_lock); + keystack->keystack_consumers[kc->kc_sa_type] = NULL; + mutex_exit(&keystack->keystack_consumers_lock); mutex_destroy(&kc->kc_lock); + netstack_rele(kc->kc_keystack->keystack_netstack); } else { - ks3dbg(("Driver close, PF_KEY socket is going away.\n")); ks = (keysock_t *)ptr; + keystack = ks->keysock_keystack; + + ks3dbg(keystack, + ("Driver close, PF_KEY socket is going away.\n")); if ((ks->keysock_flags & KEYSOCK_EXTENDED) != 0) - atomic_add_32(&keysock_num_extended, -1); + atomic_add_32(&keystack->keystack_num_extended, -1); size = sizeof (keysock_t); - mutex_enter(&keysock_list_lock); + mutex_enter(&keystack->keystack_list_lock); *(ks->keysock_ptpn) = ks->keysock_next; if (ks->keysock_next != NULL) ks->keysock_next->keysock_ptpn = ks->keysock_ptpn; - mutex_exit(&keysock_list_lock); + mutex_exit(&keystack->keystack_list_lock); mutex_destroy(&ks->keysock_lock); vmem_free(keysock_vmem, (void *)(uintptr_t)ks->keysock_serial, 1); + netstack_rele(ks->keysock_keystack->keystack_netstack); } /* Now I'm free. */ @@ -522,10 +568,10 @@ keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) keysock_consumer_t *kc; mblk_t *mp; ipsec_info_t *ii; + netstack_t *ns; + keysock_stack_t *keystack; - ks3dbg(("Entering keysock open.\n")); - - if (secpolicy_net_config(credp, B_FALSE) != 0) { + if (secpolicy_ip_config(credp, B_FALSE) != 0) { /* Privilege debugging will log the error */ return (EPERM); } @@ -533,21 +579,36 @@ keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (q->q_ptr != NULL) return (0); /* Re-open of an already open instance. */ - if (keysock_plumbed < 1) { - keysock_plumbed = 0; + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + keystack = ns->netstack_keysock; + ASSERT(keystack != NULL); + + ks3dbg(keystack, ("Entering keysock open.\n")); + + if (keystack->keystack_plumbed < 1) { + netstack_t *ns = keystack->keystack_netstack; + + keystack->keystack_plumbed = 0; +#ifdef NS_DEBUG + printf("keysock_open(%d) - plumb\n", + keystack->keystack_netstack->netstack_stackid); +#endif /* * Don't worry about ipsec_failure being true here. * (See ip.c). An open of keysock should try and force * the issue. Maybe it was a transient failure. */ - ipsec_loader_loadnow(); + ipsec_loader_loadnow(ns->netstack_ipsec); } if (sflag & MODOPEN) { /* Initialize keysock_consumer state here. */ kc = kmem_zalloc(sizeof (keysock_consumer_t), KM_NOSLEEP); - if (kc == NULL) + if (kc == NULL) { + netstack_rele(keystack->keystack_netstack); return (ENOMEM); + } mutex_init(&kc->kc_lock, NULL, MUTEX_DEFAULT, 0); kc->kc_rq = q; kc->kc_wq = WR(q); @@ -555,6 +616,7 @@ keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) q->q_ptr = kc; WR(q)->q_ptr = kc; + kc->kc_keystack = keystack; qprocson(q); /* @@ -565,13 +627,14 @@ keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) /* Allocate it. */ mp = allocb(sizeof (ipsec_info_t), BPRI_HI); if (mp == NULL) { - ks1dbg(( + ks1dbg(keystack, ( "keysock_open: Cannot allocate KEYSOCK_HELLO.\n")); /* Do I need to set these to null? */ q->q_ptr = NULL; WR(q)->q_ptr = NULL; mutex_destroy(&kc->kc_lock); kmem_free(kc, sizeof (*kc)); + netstack_rele(keystack->keystack_netstack); return (ENOMEM); } @@ -582,23 +645,25 @@ keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) ii->ipsec_info_type = KEYSOCK_HELLO; /* Length only of type/len. */ ii->ipsec_info_len = sizeof (ii->ipsec_allu); - ks2dbg(("Ready to putnext KEYSOCK_HELLO.\n")); + ks2dbg(keystack, ("Ready to putnext KEYSOCK_HELLO.\n")); putnext(kc->kc_wq, mp); } else { minor_t ksminor; /* Initialize keysock state. */ - ks2dbg(("Made it into PF_KEY socket open.\n")); + ks2dbg(keystack, ("Made it into PF_KEY socket open.\n")); ksminor = (minor_t)(uintptr_t) vmem_alloc(keysock_vmem, 1, VM_NOSLEEP); - if (ksminor == 0) + if (ksminor == 0) { + netstack_rele(keystack->keystack_netstack); return (ENOMEM); - + } ks = kmem_zalloc(sizeof (keysock_t), KM_NOSLEEP); if (ks == NULL) { vmem_free(keysock_vmem, (void *)(uintptr_t)ksminor, 1); + netstack_rele(keystack->keystack_netstack); return (ENOMEM); } @@ -610,6 +675,7 @@ keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) q->q_ptr = ks; WR(q)->q_ptr = ks; + ks->keysock_keystack = keystack; /* * The receive hiwat is only looked at on the stream head @@ -617,7 +683,7 @@ keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * getsockopts. */ - q->q_hiwat = keysock_recv_hiwat; + q->q_hiwat = keystack->keystack_recv_hiwat; /* * The transmit hiwat/lowat is only looked at on IP's queue. @@ -625,30 +691,33 @@ keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * SO_SNDBUF/SO_SNDLOWAT getsockopts. */ - WR(q)->q_hiwat = keysock_xmit_hiwat; - WR(q)->q_lowat = keysock_xmit_lowat; + WR(q)->q_hiwat = keystack->keystack_xmit_hiwat; + WR(q)->q_lowat = keystack->keystack_xmit_lowat; *devp = makedevice(getmajor(*devp), ksminor); /* * Thread keysock into the global keysock list. */ - mutex_enter(&keysock_list_lock); - ks->keysock_next = keysock_list; - ks->keysock_ptpn = &keysock_list; - if (keysock_list != NULL) - keysock_list->keysock_ptpn = &ks->keysock_next; - keysock_list = ks; - mutex_exit(&keysock_list_lock); + mutex_enter(&keystack->keystack_list_lock); + ks->keysock_next = keystack->keystack_list; + ks->keysock_ptpn = &keystack->keystack_list; + if (keystack->keystack_list != NULL) { + keystack->keystack_list->keysock_ptpn = + &ks->keysock_next; + } + keystack->keystack_list = ks; + mutex_exit(&keystack->keystack_list_lock); qprocson(q); - (void) mi_set_sth_hiwat(q, keysock_recv_hiwat); + (void) mi_set_sth_hiwat(q, keystack->keystack_recv_hiwat); /* * Wait outside the keysock module perimeter for IPsec * plumbing to be completed. If it fails, keysock_close() * undoes everything we just did. */ - if (!ipsec_loader_wait(q)) { + if (!ipsec_loader_wait(q, + keystack->keystack_netstack->netstack_ipsec)) { (void) keysock_close(q); return (EPFNOSUPPORT); } @@ -789,6 +858,7 @@ keysock_opt_set(queue_t *q, uint_t mgmt_flags, int level, { int *i1 = (int *)invalp; keysock_t *ks = (keysock_t *)q->q_ptr; + keysock_stack_t *keystack = ks->keysock_keystack; switch (level) { case SOL_SOCKET: @@ -800,12 +870,12 @@ keysock_opt_set(queue_t *q, uint_t mgmt_flags, int level, else ks->keysock_flags &= ~KEYSOCK_NOLOOP; break; case SO_SNDBUF: - if (*i1 > keysock_max_buf) + if (*i1 > keystack->keystack_max_buf) return (ENOBUFS); q->q_hiwat = *i1; break; case SO_RCVBUF: - if (*i1 > keysock_max_buf) + if (*i1 > keystack->keystack_max_buf) return (ENOBUFS); RD(q)->q_hiwat = *i1; (void) mi_set_sth_hiwat(RD(q), *i1); @@ -825,43 +895,52 @@ keysock_wput_other(queue_t *q, mblk_t *mp) { struct iocblk *iocp; int error; + keysock_t *ks = (keysock_t *)q->q_ptr; + keysock_stack_t *keystack = ks->keysock_keystack; + cred_t *cr; switch (mp->b_datap->db_type) { case M_PROTO: case M_PCPROTO: if ((mp->b_wptr - mp->b_rptr) < sizeof (long)) { - ks3dbg(( + ks3dbg(keystack, ( "keysock_wput_other: Not big enough M_PROTO\n")); freemsg(mp); return; } + cr = zone_get_kcred(netstackid_to_zoneid( + keystack->keystack_netstack->netstack_stackid)); + ASSERT(cr != NULL); + switch (((union T_primitives *)mp->b_rptr)->type) { case T_CAPABILITY_REQ: keysock_capability_req(q, mp); - return; + break; case T_INFO_REQ: keysock_info_req(q, mp); - return; + break; case T_SVR4_OPTMGMT_REQ: - (void) svr4_optcom_req(q, mp, DB_CREDDEF(mp, kcred), + (void) svr4_optcom_req(q, mp, DB_CREDDEF(mp, cr), &keysock_opt_obj); - return; + break; case T_OPTMGMT_REQ: - (void) tpi_optcom_req(q, mp, DB_CREDDEF(mp, kcred), + (void) tpi_optcom_req(q, mp, DB_CREDDEF(mp, cr), &keysock_opt_obj); - return; + break; case T_DATA_REQ: case T_EXDATA_REQ: case T_ORDREL_REQ: /* Illegal for keysock. */ freemsg(mp); (void) putnextctl1(RD(q), M_ERROR, EPROTO); - return; + break; default: /* Not supported by keysock. */ keysock_err_ack(q, mp, TNOTSUPPORT, 0); - return; + break; } + crfree(cr); + return; case M_IOCTL: iocp = (struct iocblk *)mp->b_rptr; error = EINVAL; @@ -869,7 +948,7 @@ keysock_wput_other(queue_t *q, mblk_t *mp) switch (iocp->ioc_cmd) { case ND_SET: case ND_GET: - if (nd_getset(q, keysock_g_nd, mp)) { + if (nd_getset(q, keystack->keystack_g_nd, mp)) { qreply(q, mp); return; } else @@ -907,6 +986,7 @@ static void keysock_error(keysock_t *ks, mblk_t *mp, int error, int diagnostic) { sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr; + keysock_stack_t *keystack = ks->keysock_keystack; ASSERT(mp->b_datap->db_type == M_DATA); @@ -923,7 +1003,7 @@ keysock_error(keysock_t *ks, mblk_t *mp, int error, int diagnostic) samsg->sadb_msg_errno = (uint8_t)error; samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic; - keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE); + keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE, keystack); } /* @@ -938,10 +1018,11 @@ keysock_passdown(keysock_t *ks, mblk_t *mp, uint8_t satype, sadb_ext_t *extv[], mblk_t *wrapper; keysock_in_t *ksi; int i; + keysock_stack_t *keystack = ks->keysock_keystack; wrapper = allocb(sizeof (ipsec_info_t), BPRI_HI); if (wrapper == NULL) { - ks3dbg(("keysock_passdown: allocb failed.\n")); + ks3dbg(keystack, ("keysock_passdown: allocb failed.\n")); if (extv[SADB_EXT_KEY_ENCRYPT] != NULL) bzero(extv[SADB_EXT_KEY_ENCRYPT], SADB_64TO8( @@ -954,7 +1035,7 @@ keysock_passdown(keysock_t *ks, mblk_t *mp, uint8_t satype, sadb_ext_t *extv[], ks0dbg(( "keysock: Downwards flush/dump message failed!\n")); /* If this is true, I hold the perimeter. */ - keysock_flushdump--; + keystack->keystack_flushdump--; } freemsg(mp); return; @@ -979,7 +1060,7 @@ keysock_passdown(keysock_t *ks, mblk_t *mp, uint8_t satype, sadb_ext_t *extv[], /* * Find the appropriate consumer where the message is passed down. */ - kc = keysock_consumers[satype]; + kc = keystack->keystack_consumers[satype]; if (kc == NULL) { freeb(wrapper); keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE); @@ -987,7 +1068,7 @@ keysock_passdown(keysock_t *ks, mblk_t *mp, uint8_t satype, sadb_ext_t *extv[], ks0dbg(( "keysock: Downwards flush/dump message failed!\n")); /* If this is true, I hold the perimeter. */ - keysock_flushdump--; + keystack->keystack_flushdump--; } return; } @@ -1011,7 +1092,7 @@ keysock_passdown(keysock_t *ks, mblk_t *mp, uint8_t satype, sadb_ext_t *extv[], * High-level reality checking of extensions. */ static boolean_t -ext_check(sadb_ext_t *ext) +ext_check(sadb_ext_t *ext, keysock_stack_t *keystack) { int i; uint64_t *lp; @@ -1050,9 +1131,9 @@ ext_check(sadb_ext_t *ext) */ if ((roundup(SADB_1TO8(((sadb_key_t *)ext)->sadb_key_bits), 8) + sizeof (sadb_key_t)) != SADB_64TO8(ext->sadb_ext_len)) { - ks1dbg(( + ks1dbg(keystack, ( "ext_check: Key bits/length inconsistent.\n")); - ks1dbg(("%d bits, len is %d bytes.\n", + ks1dbg(keystack, ("%d bits, len is %d bytes.\n", ((sadb_key_t *)ext)->sadb_key_bits, SADB_64TO8(ext->sadb_ext_len))); return (B_FALSE); @@ -1123,7 +1204,8 @@ ext_check(sadb_ext_t *ext) * like an assembly programmer, yet trying to make the compiler happy. */ static int -keysock_get_ext(sadb_ext_t *extv[], sadb_msg_t *basehdr, uint_t msgsize) +keysock_get_ext(sadb_ext_t *extv[], sadb_msg_t *basehdr, uint_t msgsize, + keysock_stack_t *keystack) { bzero(extv, sizeof (sadb_ext_t *) * (SADB_EXT_MAX + 1)); @@ -1155,7 +1237,7 @@ keysock_get_ext(sadb_ext_t *extv[], sadb_msg_t *basehdr, uint_t msgsize) * Reality check the extension if possible at the keysock * level. */ - if (!ext_check(extv[0])) + if (!ext_check(extv[0], keystack)) return (KGE_CHK); /* If I make it here, assign the appropriate bin. */ @@ -1190,17 +1272,19 @@ keysock_do_flushdump(queue_t *q, mblk_t *mp) keysock_t *ks = (keysock_t *)q->q_ptr; sadb_ext_t *extv[SADB_EXT_MAX + 1]; sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr; + keysock_stack_t *keystack = ks->keysock_keystack; /* * I am guaranteed this will work. I did the work in keysock_parse() * already. */ - (void) keysock_get_ext(extv, samsg, SADB_64TO8(samsg->sadb_msg_len)); + (void) keysock_get_ext(extv, samsg, SADB_64TO8(samsg->sadb_msg_len), + keystack); /* * I hold the perimeter, therefore I don't need to use atomic ops. */ - if (keysock_flushdump != 0) { + if (keystack->keystack_flushdump != 0) { /* XXX Should I instead use EBUSY? */ /* XXX Or is there a way to queue these up? */ keysock_error(ks, mp, ENOMEM, SADB_X_DIAGNOSTIC_NONE); @@ -1220,7 +1304,7 @@ keysock_do_flushdump(queue_t *q, mblk_t *mp) * and/or flushes. */ - keysock_flushdump_errno = 0; + keystack->keystack_flushdump_errno = 0; /* * Okay, I hold the perimeter. Eventually keysock_flushdump will @@ -1239,9 +1323,9 @@ keysock_do_flushdump(queue_t *q, mblk_t *mp) * and accordingly back to the socket. */ - mutex_enter(&keysock_consumers_lock); + mutex_enter(&keystack->keystack_consumers_lock); for (i = start; i <= finish; i++) { - if (keysock_consumers[i] != NULL) { + if (keystack->keystack_consumers[i] != NULL) { mp1 = copymsg(mp); if (mp1 == NULL) { ks0dbg(("SADB_FLUSH copymsg() failed.\n")); @@ -1261,13 +1345,14 @@ keysock_do_flushdump(queue_t *q, mblk_t *mp) * Because my entry conditions are met above, the * following assertion should hold true. */ - mutex_enter(&(keysock_consumers[i]->kc_lock)); - ASSERT((keysock_consumers[i]->kc_flags & KC_FLUSHING) - == 0); - keysock_consumers[i]->kc_flags |= KC_FLUSHING; - mutex_exit(&(keysock_consumers[i]->kc_lock)); + mutex_enter(&keystack->keystack_consumers[i]->kc_lock); + ASSERT((keystack->keystack_consumers[i]->kc_flags & + KC_FLUSHING) == 0); + keystack->keystack_consumers[i]->kc_flags |= + KC_FLUSHING; + mutex_exit(&(keystack->keystack_consumers[i]->kc_lock)); /* Always increment the number of flushes... */ - keysock_flushdump++; + keystack->keystack_flushdump++; /* Guaranteed to return a message. */ keysock_passdown(ks, mp1, i, extv, B_TRUE); } else if (start == finish) { @@ -1275,15 +1360,15 @@ keysock_do_flushdump(queue_t *q, mblk_t *mp) * In case where start == finish, and there's no * consumer, should we force an error? Yes. */ - mutex_exit(&keysock_consumers_lock); + mutex_exit(&keystack->keystack_consumers_lock); keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE); return; } } - mutex_exit(&keysock_consumers_lock); + mutex_exit(&keystack->keystack_consumers_lock); - if (keysock_flushdump == 0) { + if (keystack->keystack_flushdump == 0) { /* * There were no consumers at all for this message. * XXX For now return ESRCH. @@ -1380,6 +1465,7 @@ keysock_inverse_acquire(mblk_t *mp, sadb_msg_t *samsg, sadb_ext_t *extv[], keysock_t *ks) { mblk_t *reply_mp; + keysock_stack_t *keystack = ks->keysock_keystack; /* * Reality check things... @@ -1407,12 +1493,13 @@ keysock_inverse_acquire(mblk_t *mp, sadb_msg_t *samsg, sadb_ext_t *extv[], return; } - reply_mp = ipsec_construct_inverse_acquire(samsg, extv); + reply_mp = ipsec_construct_inverse_acquire(samsg, extv, + keystack->keystack_netstack); if (reply_mp != NULL) { freemsg(mp); keysock_passup(reply_mp, (sadb_msg_t *)reply_mp->b_rptr, - ks->keysock_serial, NULL, B_FALSE); + ks->keysock_serial, NULL, B_FALSE, keystack); } else { keysock_error(ks, mp, samsg->sadb_msg_errno, samsg->sadb_x_msg_diagnostic); @@ -1429,6 +1516,7 @@ keysock_extended_register(keysock_t *ks, mblk_t *mp, sadb_ext_t *extv[]) uint8_t *satypes, *fencepost; mblk_t *downmp; sadb_ext_t *downextv[SADB_EXT_MAX + 1]; + keysock_stack_t *keystack = ks->keysock_keystack; if (ks->keysock_registered[0] != 0 || ks->keysock_registered[1] != 0 || ks->keysock_registered[2] != 0 || ks->keysock_registered[3] != 0) { @@ -1452,7 +1540,8 @@ keysock_extended_register(keysock_t *ks, mblk_t *mp, sadb_ext_t *extv[]) * Since we've made it here, keysock_get_ext will work! */ (void) keysock_get_ext(downextv, - (sadb_msg_t *)downmp->b_rptr, msgdsize(downmp)); + (sadb_msg_t *)downmp->b_rptr, msgdsize(downmp), + keystack); keysock_passdown(ks, downmp, *satypes, downextv, B_FALSE); ++satypes; @@ -1463,7 +1552,7 @@ keysock_extended_register(keysock_t *ks, mblk_t *mp, sadb_ext_t *extv[]) /* * Set global to indicate we prefer an extended ACQUIRE. */ - atomic_add_32(&keysock_num_extended, 1); + atomic_add_32(&keystack->keystack_num_extended, 1); } /* @@ -1477,12 +1566,13 @@ keysock_parse(queue_t *q, mblk_t *mp) keysock_t *ks = (keysock_t *)q->q_ptr; uint_t msgsize; uint8_t satype; + keysock_stack_t *keystack = ks->keysock_keystack; /* Make sure I'm a PF_KEY socket. (i.e. nothing's below me) */ ASSERT(WR(q)->q_next == NULL); samsg = (sadb_msg_t *)mp->b_rptr; - ks2dbg(("Received possible PF_KEY message, type %d.\n", + ks2dbg(keystack, ("Received possible PF_KEY message, type %d.\n", samsg->sadb_msg_type)); msgsize = SADB_64TO8(samsg->sadb_msg_len); @@ -1496,7 +1586,8 @@ keysock_parse(queue_t *q, mblk_t *mp) * do the right thing. Then again, maybe just letting * the error delivery do the right thing. */ - ks2dbg(("mblk (%lu) and base (%d) message sizes don't jibe.\n", + ks2dbg(keystack, + ("mblk (%lu) and base (%d) message sizes don't jibe.\n", msgdsize(mp), msgsize)); keysock_error(ks, mp, EMSGSIZE, SADB_X_DIAGNOSTIC_NONE); return; @@ -1508,36 +1599,39 @@ keysock_parse(queue_t *q, mblk_t *mp) /* * Something screwy happened. */ - ks3dbg(("keysock_parse: pullupmsg() failed.\n")); + ks3dbg(keystack, + ("keysock_parse: pullupmsg() failed.\n")); return; } else { samsg = (sadb_msg_t *)mp->b_rptr; } } - switch (keysock_get_ext(extv, samsg, msgsize)) { + switch (keysock_get_ext(extv, samsg, msgsize, keystack)) { case KGE_DUP: /* Handle duplicate extension. */ - ks1dbg(("Got duplicate extension of type %d.\n", + ks1dbg(keystack, ("Got duplicate extension of type %d.\n", extv[0]->sadb_ext_type)); keysock_error(ks, mp, EINVAL, keysock_duplicate(extv[0]->sadb_ext_type)); return; case KGE_UNK: /* Handle unknown extension. */ - ks1dbg(("Got unknown extension of type %d.\n", + ks1dbg(keystack, ("Got unknown extension of type %d.\n", extv[0]->sadb_ext_type)); keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_EXT); return; case KGE_LEN: /* Length error. */ - ks1dbg(("Length %d on extension type %d overrun or 0.\n", + ks1dbg(keystack, + ("Length %d on extension type %d overrun or 0.\n", extv[0]->sadb_ext_len, extv[0]->sadb_ext_type)); keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_BAD_EXTLEN); return; case KGE_CHK: /* Reality check failed. */ - ks1dbg(("Reality check failed on extension type %d.\n", + ks1dbg(keystack, + ("Reality check failed on extension type %d.\n", extv[0]->sadb_ext_type)); keysock_error(ks, mp, EINVAL, keysock_malformed(extv[0]->sadb_ext_type)); @@ -1614,11 +1708,13 @@ keysock_parse(queue_t *q, mblk_t *mp) } keysock_passdown(ks, mp, satype, extv, B_FALSE); } else { - if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) + if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) { keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_SATYPE_NEEDED); - else - keysock_passup(mp, samsg, 0, NULL, B_FALSE); + } else { + keysock_passup(mp, samsg, 0, NULL, B_FALSE, + keystack); + } } return; case SADB_EXPIRE: @@ -1647,7 +1743,7 @@ keysock_parse(queue_t *q, mblk_t *mp) * FLUSH or DUMP messages shouldn't have extensions. * Return EINVAL. */ - ks2dbg(("FLUSH message with extension.\n")); + ks2dbg(keystack, ("FLUSH message with extension.\n")); keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_NO_EXT); return; } @@ -1663,13 +1759,14 @@ keysock_parse(queue_t *q, mblk_t *mp) ks->keysock_flags &= ~KEYSOCK_PROMISC; else ks->keysock_flags |= KEYSOCK_PROMISC; - keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE); + keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE, + keystack); return; case SADB_X_INVERSE_ACQUIRE: keysock_inverse_acquire(mp, samsg, extv, ks); return; default: - ks2dbg(("Got unknown message type %d.\n", + ks2dbg(keystack, ("Got unknown message type %d.\n", samsg->sadb_msg_type)); keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_MSG); return; @@ -1691,28 +1788,35 @@ keysock_wput(queue_t *q, mblk_t *mp) { uchar_t *rptr = mp->b_rptr; mblk_t *mp1; - - ks3dbg(("In keysock_wput\n")); + keysock_t *ks; + keysock_stack_t *keystack; if (WR(q)->q_next) { keysock_consumer_t *kc = (keysock_consumer_t *)q->q_ptr; + keystack = kc->kc_keystack; + + ks3dbg(keystack, ("In keysock_wput\n")); /* * We shouldn't get writes on a consumer instance. * But for now, just passthru. */ - ks1dbg(("Huh? wput for an consumer instance (%d)?\n", + ks1dbg(keystack, ("Huh? wput for an consumer instance (%d)?\n", kc->kc_sa_type)); putnext(q, mp); return; } + ks = (keysock_t *)q->q_ptr; + keystack = ks->keysock_keystack; + + ks3dbg(keystack, ("In keysock_wput\n")); switch (mp->b_datap->db_type) { case M_DATA: /* * Silently discard. */ - ks2dbg(("raw M_DATA in keysock.\n")); + ks2dbg(keystack, ("raw M_DATA in keysock.\n")); freemsg(mp); return; case M_PROTO: @@ -1721,19 +1825,20 @@ keysock_wput(queue_t *q, mblk_t *mp) if (((union T_primitives *)rptr)->type == T_DATA_REQ) { if ((mp1 = mp->b_cont) == NULL) { /* No data after T_DATA_REQ. */ - ks2dbg(("No data after DATA_REQ.\n")); + ks2dbg(keystack, + ("No data after DATA_REQ.\n")); freemsg(mp); return; } freeb(mp); mp = mp1; - ks2dbg(("T_DATA_REQ\n")); + ks2dbg(keystack, ("T_DATA_REQ\n")); break; /* Out of switch. */ } } /* FALLTHRU */ default: - ks3dbg(("In default wput case (%d %d).\n", + ks3dbg(keystack, ("In default wput case (%d %d).\n", mp->b_datap->db_type, ((union T_primitives *)rptr)->type)); keysock_wput_other(q, mp); return; @@ -1753,10 +1858,11 @@ static void keysock_link_consumer(uint8_t satype, keysock_consumer_t *kc) { keysock_t *ks; + keysock_stack_t *keystack = kc->kc_keystack; - mutex_enter(&keysock_consumers_lock); + mutex_enter(&keystack->keystack_consumers_lock); mutex_enter(&kc->kc_lock); - if (keysock_consumers[satype] != NULL) { + if (keystack->keystack_consumers[satype] != NULL) { ks0dbg(( "Hmmmm, someone closed %d before the HELLO_ACK happened.\n", satype)); @@ -1765,29 +1871,31 @@ keysock_link_consumer(uint8_t satype, keysock_consumer_t *kc) * so far would work too? */ mutex_exit(&kc->kc_lock); - mutex_exit(&keysock_consumers_lock); + mutex_exit(&keystack->keystack_consumers_lock); } else { /* Add new below-me consumer. */ - keysock_consumers[satype] = kc; + keystack->keystack_consumers[satype] = kc; kc->kc_flags = 0; kc->kc_sa_type = satype; mutex_exit(&kc->kc_lock); - mutex_exit(&keysock_consumers_lock); + mutex_exit(&keystack->keystack_consumers_lock); /* Scan the keysock list. */ - mutex_enter(&keysock_list_lock); - for (ks = keysock_list; ks != NULL; ks = ks->keysock_next) { + mutex_enter(&keystack->keystack_list_lock); + for (ks = keystack->keystack_list; ks != NULL; + ks = ks->keysock_next) { if (KEYSOCK_ISREG(ks, satype)) { /* * XXX Perhaps send an SADB_REGISTER down on * the socket's behalf. */ - ks1dbg(("Socket %u registered already for " + ks1dbg(keystack, + ("Socket %u registered already for " "new consumer.\n", ks->keysock_serial)); } } - mutex_exit(&keysock_list_lock); + mutex_exit(&keystack->keystack_list_lock); } } @@ -1799,10 +1907,11 @@ keysock_out_err(keysock_consumer_t *kc, int ks_errno, mblk_t *mp) { keysock_out_err_t *kse; mblk_t *imp; + keysock_stack_t *keystack = kc->kc_keystack; imp = allocb(sizeof (ipsec_info_t), BPRI_HI); if (imp == NULL) { - ks1dbg(("keysock_out_err: Can't alloc message.\n")); + ks1dbg(keystack, ("keysock_out_err: Can't alloc message.\n")); return; } @@ -1843,7 +1952,7 @@ keysock_out_err(keysock_consumer_t *kc, int ks_errno, mblk_t *mp) */ static void keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial, - keysock_consumer_t *kc, boolean_t persistent) + keysock_consumer_t *kc, boolean_t persistent, keysock_stack_t *keystack) { keysock_t *ks; uint8_t satype = samsg->sadb_msg_satype; @@ -1876,7 +1985,8 @@ keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial, * These are most likely replies. Don't worry about * KEYSOCK_OUT_ERR handling. Deliver to all sockets. */ - ks3dbg(("Delivering normal message (%d) to all sockets.\n", + ks3dbg(keystack, + ("Delivering normal message (%d) to all sockets.\n", samsg->sadb_msg_type)); toall = B_TRUE; break; @@ -1901,7 +2011,7 @@ keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial, ASSERT(samsg->sadb_msg_errno != 0); break; /* Out of switch. */ } - ks3dbg(("Delivering REGISTER.\n")); + ks3dbg(keystack, ("Delivering REGISTER.\n")); if (satype == SADB_SATYPE_UNSPEC) { /* REGISTER Reason #2 */ allereg = B_TRUE; @@ -1928,7 +2038,7 @@ keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial, * regular (sadb_msg_satype != 0). And we're guaranteed * that serial == 0 for an ACQUIRE. */ - ks3dbg(("Delivering ACQUIRE.\n")); + ks3dbg(keystack, ("Delivering ACQUIRE.\n")); allereg = (satype == SADB_SATYPE_UNSPEC); allreg = !allereg; /* @@ -1938,7 +2048,7 @@ keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial, * their ACQUIRE record. This might be too hackish of a * solution. */ - if (allreg && keysock_num_extended > 0) + if (allreg && keystack->keystack_num_extended > 0) err = 0; break; case SADB_X_PROMISC: @@ -1949,13 +2059,13 @@ keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial, /* * Deliver to the sender and promiscuous only. */ - ks3dbg(("Delivering sender/promisc only (%d).\n", + ks3dbg(keystack, ("Delivering sender/promisc only (%d).\n", samsg->sadb_msg_type)); break; } - mutex_enter(&keysock_list_lock); - for (ks = keysock_list; ks != NULL; ks = ks->keysock_next) { + mutex_enter(&keystack->keystack_list_lock); + for (ks = keystack->keystack_list; ks != NULL; ks = ks->keysock_next) { /* Delivery loop. */ /* @@ -1994,7 +2104,7 @@ keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial, mp1 = dupmsg(mp); if (mp1 == NULL) { - ks2dbg(( + ks2dbg(keystack, ( "keysock_passup(): dupmsg() failed.\n")); mp1 = mp; mp = NULL; @@ -2015,7 +2125,7 @@ keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial, if (!canputnext(ks->keysock_rq)) { if (persistent) { if (putq(ks->keysock_rq, mp1) == 0) { - ks1dbg(( + ks1dbg(keystack, ( "keysock_passup: putq failed.\n")); } else { continue; @@ -2025,7 +2135,8 @@ keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial, continue; } - ks3dbg(("Putting to serial %d.\n", ks->keysock_serial)); + ks3dbg(keystack, + ("Putting to serial %d.\n", ks->keysock_serial)); /* * Unlike the specific keysock instance case, this * will only hit for listeners, so we will only @@ -2035,7 +2146,7 @@ keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial, if (mp == NULL) break; /* out of for loop. */ } - mutex_exit(&keysock_list_lock); + mutex_exit(&keystack->keystack_list_lock); error: if ((err != 0) && (kc != NULL)) { @@ -2044,7 +2155,8 @@ error: * Basically, I send this back if I have not been able to * transmit (for whatever reason) */ - ks1dbg(("keysock_passup(): No registered of type %d.\n", + ks1dbg(keystack, + ("keysock_passup(): No registered of type %d.\n", satype)); if (mp != NULL) { if (mp->b_datap->db_type == M_PROTO) { @@ -2058,7 +2170,8 @@ error: */ mp1 = copymsg(mp); if (mp1 == NULL) { - ks2dbg(("keysock_passup: copymsg() failed.\n")); + ks2dbg(keystack, + ("keysock_passup: copymsg() failed.\n")); mp1 = mp; mp = NULL; } @@ -2110,6 +2223,7 @@ keysock_rput(queue_t *q, mblk_t *mp) minor_t serial; mblk_t *mp1; sadb_msg_t *samsg; + keysock_stack_t *keystack = kc->kc_keystack; /* Make sure I'm a consumer instance. (i.e. something's below me) */ ASSERT(WR(q)->q_next != NULL); @@ -2121,7 +2235,8 @@ keysock_rput(queue_t *q, mblk_t *mp) * To be robust, however, putnext() up so the STREAM head can * deal with it appropriately. */ - ks1dbg(("Hmmm, a non M_CTL (%d, 0x%x) on keysock_rput.\n", + ks1dbg(keystack, + ("Hmmm, a non M_CTL (%d, 0x%x) on keysock_rput.\n", mp->b_datap->db_type, mp->b_datap->db_type)); putnext(q, mp); return; @@ -2146,35 +2261,39 @@ keysock_rput(queue_t *q, mblk_t *mp) /* * If I'm an end-of-FLUSH or an end-of-DUMP marker... */ - ASSERT(keysock_flushdump != 0); /* Am I flushing? */ + ASSERT(keystack->keystack_flushdump != 0); + /* Am I flushing? */ mutex_enter(&kc->kc_lock); kc->kc_flags &= ~KC_FLUSHING; mutex_exit(&kc->kc_lock); if (samsg->sadb_msg_errno != 0) - keysock_flushdump_errno = samsg->sadb_msg_errno; + keystack->keystack_flushdump_errno = + samsg->sadb_msg_errno; /* * Lower the atomic "flushing" count. If it's * the last one, send up the end-of-{FLUSH,DUMP} to * the appropriate PF_KEY socket. */ - if (atomic_add_32_nv(&keysock_flushdump, -1) != 0) { - ks1dbg(("One flush/dump message back from %d," + if (atomic_add_32_nv(&keystack->keystack_flushdump, + -1) != 0) { + ks1dbg(keystack, + ("One flush/dump message back from %d," " more to go.\n", samsg->sadb_msg_satype)); freemsg(mp1); return; } samsg->sadb_msg_errno = - (uint8_t)keysock_flushdump_errno; + (uint8_t)keystack->keystack_flushdump_errno; if (samsg->sadb_msg_type == SADB_DUMP) { samsg->sadb_msg_seq = 0; } } keysock_passup(mp1, samsg, serial, kc, - (samsg->sadb_msg_type == SADB_DUMP)); + (samsg->sadb_msg_type == SADB_DUMP), keystack); return; case KEYSOCK_HELLO_ACK: /* Aha, now we can link in the consumer! */ @@ -2183,7 +2302,7 @@ keysock_rput(queue_t *q, mblk_t *mp) freemsg(mp); return; default: - ks1dbg(("Hmmm, an IPsec info I'm not used to, 0x%x\n", + ks1dbg(keystack, ("Hmmm, an IPsec info I'm not used to, 0x%x\n", ii->ipsec_info_type)); putnext(q, mp); } @@ -2193,13 +2312,17 @@ keysock_rput(queue_t *q, mblk_t *mp) * So we can avoid external linking problems.... */ boolean_t -keysock_extended_reg(void) +keysock_extended_reg(netstack_t *ns) { - return (keysock_num_extended != 0); + keysock_stack_t *keystack = ns->netstack_keysock; + + return (keystack->keystack_num_extended != 0); } uint32_t -keysock_next_seq(void) +keysock_next_seq(netstack_t *ns) { - return (atomic_add_32_nv(&keysock_acquire_seq, -1)); + keysock_stack_t *keystack = ns->netstack_keysock; + + return (atomic_add_32_nv(&keystack->keystack_acquire_seq, -1)); } diff --git a/usr/src/uts/common/inet/ip/nattymod.c b/usr/src/uts/common/inet/ip/nattymod.c index 34453ea9fd..f8fb6bf453 100644 --- a/usr/src/uts/common/inet/ip/nattymod.c +++ b/usr/src/uts/common/inet/ip/nattymod.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -48,8 +48,11 @@ #include <inet/sadb.h> #include <inet/ip_ire.h> #include <sys/cmn_err.h> -#include <inet/ipdrop.h> #include <inet/udp_impl.h> +#include <inet/ipsec_impl.h> +#include <inet/ipdrop.h> +#include <inet/sadb.h> +#include <inet/ipsecesp.h> /* * Design notes: @@ -77,15 +80,13 @@ typedef struct nattyinfo boolean_t ni_rh_wait; /* Seen UDP_RCVHDR request go by */ boolean_t ni_rh_set; /* Have we set UDP_RCVHDR? */ boolean_t ni_addr_wait; /* Seen T_ADDR_REQ go by */ + netstack_t *ni_netstack; } nattyinfo_t; kmutex_t nattyhlock; /* List lock. */ nattyinfo_t *nattyhead; /* List of instances. */ -/* Packet dropper for IP IPsec processing failures */ -extern ipdropper_t ip_dropper; - /* * Function prototypes. */ @@ -199,13 +200,17 @@ _info(struct modinfo *modinfop) /* ARGSUSED */ static int -nattymodopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp) +nattymodopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *credp) { nattyinfo_t *ni; + netstack_t *ns; if (sflag != MODOPEN) return (EINVAL); + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + /* Use kmem_zalloc() to avoid initializing ni->* fields. */ ni = kmem_zalloc(sizeof (nattyinfo_t), KM_SLEEP); mutex_init(&ni->ni_lock, NULL, MUTEX_DEFAULT, NULL); @@ -220,6 +225,7 @@ nattymodopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp) nattyhead->ni_ptpn = &ni->ni_next; ni->ni_next = nattyhead; nattyhead = ni; + ni->ni_netstack = ns; mutex_exit(&nattyhlock); qprocson(rq); @@ -239,8 +245,9 @@ nattymodclose(queue_t *rq) ni->ni_next->ni_ptpn = ni->ni_ptpn; mutex_exit(&nattyhlock); - sadb_clear_timeouts(WR(rq)); + sadb_clear_timeouts(WR(rq), ni->ni_netstack); + netstack_rele(ni->ni_netstack); qprocsoff(rq); /* Unlinked from list means ==> no need to mutex. */ @@ -308,7 +315,7 @@ get_my_ire(nattyinfo_t *ni, ipaddr_t addr) ni->ni_addr = addr; ire = ire_ctable_lookup(addr, 0, IRE_LOCAL, NULL, ALL_ZONES, NULL, - MATCH_IRE_TYPE); + MATCH_IRE_TYPE, ni->ni_netstack->netstack_ip); if (ire == NULL) goto bail; @@ -423,6 +430,9 @@ natty_rput_pkt(queue_t *q, mblk_t *mp) int ntries = 0; nattyinfo_t *ni = q->q_ptr; sadb_t *sp; + netstack_t *ns = ni->ni_netstack; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + ipsec_stack_t *ipss = ns->netstack_ipsec; if (!ni->ni_rh_set) { #ifdef DEBUG @@ -536,7 +546,7 @@ natty_rput_pkt(queue_t *q, mblk_t *mp) iph_mp->b_wptr -= UDPH_SIZE; /* we are v4 only */ - sp = &esp_sadb.s_v4; + sp = &espstack->esp_sadb.s_v4; bucket = INBOUND_BUCKET(sp, spi); mutex_enter(&bucket->isaf_lock); @@ -564,8 +574,10 @@ natty_rput_pkt(queue_t *q, mblk_t *mp) IPSA_REFRELE(ipsa); } + /* Handle the kstat_create in ip_drop_init() failing */ ip_drop_packet(iph_mp, B_TRUE, NULL, NULL, - &ipdrops_esp_no_sa, &ip_dropper); + DROPPER(ipss, ipds_esp_no_sa), + &ipss->ipsec_dropper); return; } diff --git a/usr/src/uts/common/inet/ip/rts.c b/usr/src/uts/common/inet/ip/rts.c index 8e469b3d22..c7a2d42eaa 100644 --- a/usr/src/uts/common/inet/ip/rts.c +++ b/usr/src/uts/common/inet/ip/rts.c @@ -40,6 +40,7 @@ #include <sys/proc.h> #include <sys/suntpi.h> #include <sys/policy.h> +#include <sys/zone.h> #include <sys/socket.h> #include <netinet/in.h> @@ -75,6 +76,17 @@ */ /* + * RTS stack instances + */ +struct rts_stack { + netstack_t *rtss_netstack; /* Common netstack */ + + caddr_t rtss_g_nd; + struct rtsparam_s *rtss_params; +}; +typedef struct rts_stack rts_stack_t; + +/* * Object to represent database of options to search passed to * {sock,tpi}optcom_req() interface routine to take care of option * management and associated methods. @@ -99,6 +111,7 @@ typedef struct rts_s { rts_hdrincl : 1, /* IP_HDRINCL option + RAW and IGMP */ : 0; + rts_stack_t *rts_rtss; } rts_t; #define RTS_WPUT_PENDING 0x1 /* Waiting for write-side to complete */ @@ -121,7 +134,7 @@ static struct T_info_ack rts_g_t_info_ack = { }; /* Named Dispatch Parameter Management Structure */ -typedef struct rtspparam_s { +typedef struct rtsparam_s { uint_t rts_param_min; uint_t rts_param_max; uint_t rts_param_value; @@ -133,17 +146,17 @@ typedef struct rtspparam_s { * in rts_open. * All of these are alterable, within the min/max values given, at run time. */ -static rtsparam_t rts_param_arr[] = { +static rtsparam_t lcl_param_arr[] = { /* min max value name */ { 4096, 65536, 8192, "rts_xmit_hiwat"}, { 0, 65536, 1024, "rts_xmit_lowat"}, { 4096, 65536, 8192, "rts_recv_hiwat"}, { 65536, 1024*1024*1024, 256*1024, "rts_max_buf"}, }; -#define rts_xmit_hiwat rts_param_arr[0].rts_param_value -#define rts_xmit_lowat rts_param_arr[1].rts_param_value -#define rts_recv_hiwat rts_param_arr[2].rts_param_value -#define rts_max_buf rts_param_arr[3].rts_param_value +#define rtss_xmit_hiwat rtss_params[0].rts_param_value +#define rtss_xmit_lowat rtss_params[1].rts_param_value +#define rtss_recv_hiwat rtss_params[2].rts_param_value +#define rtss_max_buf rtss_params[3].rts_param_value static int rts_close(queue_t *q); static void rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, @@ -158,12 +171,14 @@ int rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, int rts_opt_set(queue_t *q, uint_t optset_context, int level, int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk); -static void rts_param_cleanup(void); +static void rts_param_cleanup(IDP *ndp); static int rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); -static boolean_t rts_param_register(rtsparam_t *rtspa, int cnt); +static boolean_t rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt); static int rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); static void rts_rput(queue_t *q, mblk_t *mp); +static void *rts_stack_init(netstackid_t stackid, netstack_t *ns); +static void rts_stack_fini(netstackid_t stackid, void *arg); static void rts_wput(queue_t *q, mblk_t *mp); static void rts_wput_iocdata(queue_t *q, mblk_t *mp); static void rts_wput_other(queue_t *q, mblk_t *mp); @@ -186,9 +201,6 @@ struct streamtab rtsinfo = { &rinit, &winit }; -static IDP rts_g_nd; /* Points to table of RTS ND variables. */ -uint_t rts_open_streams = 0; - /* * This routine allocates the necessary * message blocks for IOCTL wrapping the @@ -238,17 +250,14 @@ rts_ioctl_alloc(mblk_t *data, cred_t *cr) static int rts_close(queue_t *q) { + rts_t *rts = (rts_t *)q->q_ptr; + qprocsoff(q); - crfree(((rts_t *)q->q_ptr)->rts_credp); + crfree(rts->rts_credp); + netstack_rele(rts->rts_rtss->rtss_netstack); mi_free(q->q_ptr); - rts_open_streams--; - /* - * Free the ND table if this was - * the last stream close - */ - rts_param_cleanup(); return (0); } @@ -264,6 +273,8 @@ rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) { mblk_t *mp = NULL; rts_t *rts; + netstack_t *ns; + rts_stack_t *rtss; /* If the stream is already open, return immediately. */ if (q->q_ptr != NULL) @@ -273,29 +284,31 @@ rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (sflag != MODOPEN) return (EINVAL); - /* If this is the first open of rts, create the ND table. */ - if (rts_g_nd == NULL) { - if (!rts_param_register(rts_param_arr, A_CNT(rts_param_arr))) - return (ENOMEM); - } + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + rtss = ns->netstack_rts; + ASSERT(rtss != NULL); + q->q_ptr = mi_zalloc_sleep(sizeof (rts_t)); WR(q)->q_ptr = q->q_ptr; rts = (rts_t *)q->q_ptr; + rts->rts_rtss = rtss; + rts->rts_credp = credp; crhold(credp); /* * The receive hiwat is only looked at on the stream head queue. * Store in q_hiwat in order to return on SO_RCVBUF getsockopts. */ - q->q_hiwat = rts_recv_hiwat; + q->q_hiwat = rtss->rtss_recv_hiwat; /* * The transmit hiwat/lowat is only looked at on IP's queue. * Store in q_hiwat/q_lowat in order to return on SO_SNDBUF/SO_SNDLOWAT * getsockopts. */ - WR(q)->q_hiwat = rts_xmit_hiwat; - WR(q)->q_lowat = rts_xmit_lowat; + WR(q)->q_hiwat = rtss->rtss_xmit_hiwat; + WR(q)->q_lowat = rtss->rtss_xmit_lowat; qprocson(q); /* * Indicate the down IP module that this is a routing socket @@ -305,14 +318,13 @@ rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) */ mp = rts_ioctl_alloc(NULL, credp); if (mp == NULL) { - rts_param_cleanup(); qprocsoff(q); ASSERT(q->q_ptr != NULL); + netstack_rele(rtss->rtss_netstack); mi_free(q->q_ptr); crfree(credp); return (ENOMEM); } - rts_open_streams++; rts->rts_flag |= RTS_OPEN_PENDING; putnext(WR(q), mp); while (rts->rts_flag & RTS_OPEN_PENDING) { @@ -565,6 +577,7 @@ rts_opt_set(queue_t *q, uint_t optset_context, int level, int *i1 = (int *)invalp; rts_t *rts = (rts_t *)q->q_ptr; boolean_t checkonly; + rts_stack_t *rtss = rts->rts_rtss; switch (optset_context) { case SETFN_OPTCOM_CHECKONLY: @@ -662,7 +675,7 @@ rts_opt_set(queue_t *q, uint_t optset_context, int level, * but changing them should do nothing. */ case SO_SNDBUF: - if (*i1 > rts_max_buf) { + if (*i1 > rtss->rtss_max_buf) { *outlenp = 0; return (ENOBUFS); } @@ -672,7 +685,7 @@ rts_opt_set(queue_t *q, uint_t optset_context, int level, } break; /* goto sizeof (int) option return */ case SO_RCVBUF: - if (*i1 > rts_max_buf) { + if (*i1 > rtss->rtss_max_buf) { *outlenp = 0; return (ENOBUFS); } @@ -703,10 +716,9 @@ rts_opt_set(queue_t *q, uint_t optset_context, int level, * It is called by rts_close and rts_open. */ static void -rts_param_cleanup(void) +rts_param_cleanup(IDP *ndp) { - if (!rts_open_streams) - nd_free(&rts_g_nd); + nd_free(ndp); } /* @@ -729,13 +741,13 @@ rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) * named dispatch (ND) handler. */ static boolean_t -rts_param_register(rtsparam_t *rtspa, int cnt) +rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt) { for (; cnt-- > 0; rtspa++) { if (rtspa->rts_param_name != NULL && rtspa->rts_param_name[0]) { - if (!nd_load(&rts_g_nd, rtspa->rts_param_name, + if (!nd_load(ndp, rtspa->rts_param_name, rts_param_get, rts_param_set, (caddr_t)rtspa)) { - nd_free(&rts_g_nd); + nd_free(ndp); return (B_FALSE); } } @@ -792,7 +804,7 @@ rts_wrw(queue_t *q, struiod_t *dp) rts->rts_error = EINTR; goto err_ret; } - } + } rts->rts_flag |= RTS_WRW_PENDING; if (isuioq(q) && (error = struioget(q, mp, dp, 0))) { @@ -918,8 +930,10 @@ rts_wput_other(queue_t *q, mblk_t *mp) rts_t *rts; struct iocblk *iocp; cred_t *cr; + rts_stack_t *rtss; rts = (rts_t *)q->q_ptr; + rtss = rts->rts_rtss; cr = DB_CREDDEF(mp, rts->rts_credp); @@ -976,7 +990,7 @@ rts_wput_other(queue_t *q, mblk_t *mp) switch (iocp->ioc_cmd) { case ND_SET: case ND_GET: - if (nd_getset(q, rts_g_nd, mp)) { + if (nd_getset(q, rtss->rtss_g_nd, mp)) { qreply(q, mp); return; } @@ -1111,4 +1125,54 @@ rts_ddi_init(void) { rts_max_optsize = optcom_max_optsize(rts_opt_obj.odb_opt_des_arr, rts_opt_obj.odb_opt_arr_cnt); + + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel, so we can maintain the + * set of rts_stack_t's. + */ + netstack_register(NS_RTS, rts_stack_init, NULL, rts_stack_fini); +} + +void +rts_ddi_destroy(void) +{ + netstack_unregister(NS_RTS); +} + +/* + * Initialize the RTS stack instance. + */ +/* ARGSUSED */ +static void * +rts_stack_init(netstackid_t stackid, netstack_t *ns) +{ + rts_stack_t *rtss; + rtsparam_t *pa; + + rtss = (rts_stack_t *)kmem_zalloc(sizeof (*rtss), KM_SLEEP); + rtss->rtss_netstack = ns; + + pa = (rtsparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); + rtss->rtss_params = pa; + bcopy(lcl_param_arr, rtss->rtss_params, sizeof (lcl_param_arr)); + + (void) rts_param_register(&rtss->rtss_g_nd, + rtss->rtss_params, A_CNT(lcl_param_arr)); + return (rtss); +} + +/* + * Free the RTS stack instance. + */ +/* ARGSUSED */ +static void +rts_stack_fini(netstackid_t stackid, void *arg) +{ + rts_stack_t *rtss = (rts_stack_t *)arg; + + rts_param_cleanup(&rtss->rtss_g_nd); + kmem_free(rtss->rtss_params, sizeof (lcl_param_arr)); + rtss->rtss_params = NULL; + kmem_free(rtss, sizeof (*rtss)); } diff --git a/usr/src/uts/common/inet/ip/rtsddi.c b/usr/src/uts/common/inet/ip/rtsddi.c index 30d4302fc8..f8c80ecd83 100644 --- a/usr/src/uts/common/inet/ip/rtsddi.c +++ b/usr/src/uts/common/inet/ip/rtsddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 1992-2002 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -43,10 +42,13 @@ #include "../inetddi.c" extern void rts_ddi_init(void); +extern void rts_ddi_destroy(void); int _init(void) { + int error; + INET_BECOME_IP(); /* @@ -54,13 +56,23 @@ _init(void) * therefore all initialization is done before it. */ rts_ddi_init(); - return (mod_install(&modlinkage)); + error = mod_install(&modlinkage); + if (error != 0) + rts_ddi_destroy(); + return (error); } int _fini(void) { - return (mod_remove(&modlinkage)); + int error; + + error = mod_remove(&modlinkage); + if (error != 0) + return (error); + + rts_ddi_destroy(); + return (0); } int diff --git a/usr/src/uts/common/inet/ip/sadb.c b/usr/src/uts/common/inet/ip/sadb.c index cab99a1f4e..70d1f1f23d 100644 --- a/usr/src/uts/common/inet/ip/sadb.c +++ b/usr/src/uts/common/inet/ip/sadb.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -51,9 +51,9 @@ #include <inet/ip_ire.h> #include <inet/ip6.h> #include <inet/ipsec_info.h> -#include <inet/ipsec_impl.h> #include <inet/tcp.h> #include <inet/sadb.h> +#include <inet/ipsec_impl.h> #include <inet/ipsecah.h> #include <inet/ipsecesp.h> #include <sys/random.h> @@ -71,16 +71,14 @@ * of falling under export control, it was safe to link it in there. */ -/* Packet dropper for generic SADB drops. */ -static ipdropper_t sadb_dropper; - static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *, - ipsec_action_t *, boolean_t, uint32_t, uint32_t); + ipsec_action_t *, boolean_t, uint32_t, uint32_t, netstack_t *); static void sadb_ill_df(ill_t *, mblk_t *, isaf_t *, int, boolean_t); static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *, boolean_t, mblk_t **); static void sadb_drain_torchq(queue_t *, mblk_t *); -static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t); -static void sadb_destroy(sadb_t *); +static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t, + netstack_t *); +static void sadb_destroy(sadb_t *, netstack_t *); static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *); static time_t sadb_add_time(time_t, uint64_t); @@ -249,13 +247,17 @@ sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket) static void sadb_freeassoc(ipsa_t *ipsa) { + ipsec_stack_t *ipss = ipsa->ipsa_netstack->netstack_ipsec; + + ASSERT(ipss != NULL); ASSERT(!MUTEX_HELD(&ipsa->ipsa_lock)); ASSERT(ipsa->ipsa_refcnt == 0); ASSERT(ipsa->ipsa_next == NULL); ASSERT(ipsa->ipsa_ptpn == NULL); ip_drop_packet(sadb_clear_lpkt(ipsa), B_TRUE, NULL, NULL, - &ipdrops_sadb_inlarval_timeout, &sadb_dropper); + DROPPER(ipss, ipds_sadb_inlarval_timeout), + &ipss->ipsec_sadb_dropper); mutex_enter(&ipsa->ipsa_lock); @@ -324,7 +326,8 @@ sadb_unlinkassoc(ipsa_t *ipsa) * fields are zeroed. */ static ipsa_t * -sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam) +sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam, + netstack_t *ns) { ipsa_t *newbie; @@ -340,6 +343,7 @@ sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam) /* Assigned requested SPI, assume caller does SPI allocation magic. */ newbie->ipsa_spi = spi; + newbie->ipsa_netstack = ns; /* No netstack_hold */ /* * Copy addresses... @@ -439,7 +443,8 @@ sadb_init_trial(sadb_t *sp, uint_t size, int kmflag) * Call me to initialize an SADB instance; fall back to default size on failure. */ static void -sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver) +sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver, + netstack_t *ns) { ASSERT(sp->sdb_of == NULL); ASSERT(sp->sdb_if == NULL); @@ -454,7 +459,7 @@ sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver) "Unable to allocate %u entry IPv%u %s SADB hash table", size, ver, name); - sadb_destroy(sp); + sadb_destroy(sp, ns); size = IPSEC_DEFAULT_HASH_SIZE; cmn_err(CE_WARN, "Falling back to %d entries", size); (void) sadb_init_trial(sp, size, KM_SLEEP); @@ -466,16 +471,19 @@ sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver) * Initialize an SADB-pair. */ void -sadbp_init(const char *name, sadbp_t *sp, int type, int size) +sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns) { - sadb_init(name, &sp->s_v4, size, 4); - sadb_init(name, &sp->s_v6, size, 6); + sadb_init(name, &sp->s_v4, size, 4, ns); + sadb_init(name, &sp->s_v6, size, 6, ns); sp->s_satype = type; ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP)); - if (type == SADB_SATYPE_AH) - ip_drop_register(&sadb_dropper, "IPsec SADB"); + if (type == SADB_SATYPE_AH) { + ipsec_stack_t *ipss = ns->netstack_ipsec; + + ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB"); + } } /* @@ -838,6 +846,8 @@ sadb_ill_df(ill_t *ill, mblk_t *mp, isaf_t *fanout, int num_entries, ipsa_t *walker; mblk_t *nmp, *salist; int i, error = 0; + ip_stack_t *ipst = ill->ill_ipst; + netstack_t *ns = ipst->ips_netstack; IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_ill_df: fanout at 0x%p ne=%d\n", (void *)fanout, num_entries)); @@ -874,7 +884,7 @@ sadb_ill_df(ill_t *ill, mblk_t *mp, isaf_t *fanout, int num_entries, mutex_enter(&walker->ipsa_lock); if (ipsec_capab_match(ill, ill->ill_phyint->phyint_ifindex, ill->ill_isv6, - walker)) { + walker, ns)) { nmp = copymsg(mp); if (nmp == NULL) { IPSECHW_DEBUG(IPSECHW_SADB, @@ -919,6 +929,8 @@ sadb_ill_download(ill_t *ill, uint_t sa_type) sadbp_t *spp; sadb_t *sp; int dlt; + ip_stack_t *ipst = ill->ill_ipst; + netstack_t *ns = ipst->ips_netstack; ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP); @@ -935,7 +947,15 @@ sadb_ill_download(ill_t *ill, uint_t sa_type) protomp->b_datap->db_type = M_PROTO; dlt = (sa_type == SADB_SATYPE_AH) ? DL_CT_IPSEC_AH : DL_CT_IPSEC_ESP; - spp = (sa_type == SADB_SATYPE_ESP) ? &esp_sadb : &ah_sadb; + if (sa_type == SADB_SATYPE_ESP) { + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + + spp = &espstack->esp_sadb; + } else { + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; + + spp = &ahstack->ah_sadb; + } ctrl = (dl_control_req_t *)protomp->b_wptr; ctrl->dl_primitive = DL_CONTROL_REQ; @@ -993,7 +1013,7 @@ sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever) * Entry points to sadb_destroyer(). */ static void -sadb_flush(sadb_t *sp) +sadb_flush(sadb_t *sp, netstack_t *ns) { /* * Flush out each bucket, one at a time. Were it not for keysock's @@ -1005,17 +1025,17 @@ sadb_flush(sadb_t *sp) sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE); /* For each acquire, destroy it; leave the bucket mutex alone. */ - sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE); + sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns); } static void -sadb_destroy(sadb_t *sp) +sadb_destroy(sadb_t *sp, netstack_t *ns) { sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE); sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE); /* For each acquire, destroy it, including the bucket mutex. */ - sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE); + sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns); ASSERT(sp->sdb_of == NULL); ASSERT(sp->sdb_if == NULL); @@ -1040,23 +1060,26 @@ sadb_send_flush_req(sadbp_t *spp) } void -sadbp_flush(sadbp_t *spp) +sadbp_flush(sadbp_t *spp, netstack_t *ns) { - sadb_flush(&spp->s_v4); - sadb_flush(&spp->s_v6); + sadb_flush(&spp->s_v4, ns); + sadb_flush(&spp->s_v6, ns); sadb_send_flush_req(spp); } void -sadbp_destroy(sadbp_t *spp) +sadbp_destroy(sadbp_t *spp, netstack_t *ns) { - sadb_destroy(&spp->s_v4); - sadb_destroy(&spp->s_v6); + sadb_destroy(&spp->s_v4, ns); + sadb_destroy(&spp->s_v6, ns); sadb_send_flush_req(spp); - if (spp->s_satype == SADB_SATYPE_AH) - ip_drop_unregister(&sadb_dropper); + if (spp->s_satype == SADB_SATYPE_AH) { + ipsec_stack_t *ipss = ns->netstack_ipsec; + + ip_drop_unregister(&ipss->ipsec_sadb_dropper); + } } @@ -1886,7 +1909,7 @@ sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg, */ void sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp, - void (*ager)(void *), timeout_id_t *top, int satype) + void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype) { keysock_hello_ack_t *kha; queue_t *oldq; @@ -1921,7 +1944,7 @@ sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp, * to the timeout handle. Fire it off in 4 seconds, because it * just seems like a good interval. */ - *top = qtimeout(*pfkey_qp, ager, NULL, drv_usectohz(4000000)); + *top = qtimeout(*pfkey_qp, ager, agerarg, drv_usectohz(4000000)); putnext(*pfkey_qp, mp); } @@ -1933,7 +1956,8 @@ sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp, * Check ire table for local/non-local/broadcast. */ int -sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial) +sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial, + netstack_t *ns) { sadb_address_t *addr = (sadb_address_t *)ext; struct sockaddr_in *sin; @@ -2071,7 +2095,8 @@ bail: * by what zone we're in when we go to zone-aware IPsec. */ ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL, - IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, + ns->netstack_ip); if (ire != NULL) { /* Hey hey, it's local. */ IRE_REFRELE(ire); @@ -2095,7 +2120,7 @@ bail: */ ire = ire_ctable_lookup(sin->sin_addr.s_addr, 0, IRE_LOCAL | IRE_BROADCAST, NULL, ALL_ZONES, NULL, - MATCH_IRE_TYPE); + MATCH_IRE_TYPE, ns->netstack_ip); if (ire != NULL) { /* Check for local or broadcast */ type = ire->ire_type; @@ -2116,7 +2141,7 @@ bail: * the source to AF_INET. Do the same for the inner sources. */ boolean_t -sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp) +sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns) { struct sockaddr_in *src, *isrc; struct sockaddr_in6 *dst, *idst; @@ -2127,7 +2152,7 @@ sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp) if (extv[SADB_EXT_ADDRESS_SRC] != NULL) { rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC], - ksi->ks_in_serial); + ksi->ks_in_serial, ns); if (rc == KS_IN_ADDR_UNKNOWN) return (B_FALSE); if (rc == KS_IN_ADDR_MBCAST) { @@ -2140,7 +2165,7 @@ sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp) if (extv[SADB_EXT_ADDRESS_DST] != NULL) { rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST], - ksi->ks_in_serial); + ksi->ks_in_serial, ns); if (rc == KS_IN_ADDR_UNKNOWN) return (B_FALSE); if (rc == KS_IN_ADDR_UNSPEC) { @@ -2158,7 +2183,7 @@ sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp) */ if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) { rc = sadb_addrcheck(pfkey_q, mp, - extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial); + extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns); /* * NATT addresses never use an IRE_LOCAL, so it should @@ -2185,7 +2210,7 @@ sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp) if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) { rc = sadb_addrcheck(pfkey_q, mp, - extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial); + extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns); /* * NATT addresses never use an IRE_LOCAL, so it should @@ -2219,10 +2244,10 @@ sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp) } if (sadb_addrcheck(pfkey_q, mp, - extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial) + extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns) == KS_IN_ADDR_UNKNOWN || sadb_addrcheck(pfkey_q, mp, - extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial) + extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns) == KS_IN_ADDR_UNKNOWN) return (B_FALSE); @@ -2623,11 +2648,12 @@ static void sadb_init_alginfo(ipsa_t *sa) { ipsec_alginfo_t *alg; + ipsec_stack_t *ipss = sa->ipsa_netstack->netstack_ipsec; - mutex_enter(&alg_lock); + mutex_enter(&ipss->ipsec_alg_lock); if (sa->ipsa_encrkey != NULL) { - alg = ipsec_alglists[IPSEC_ALG_ENCR][sa->ipsa_encr_alg]; + alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR][sa->ipsa_encr_alg]; if (alg != NULL && ALG_VALID(alg)) { sa->ipsa_emech.cm_type = alg->alg_mech_type; sa->ipsa_emech.cm_param = NULL; @@ -2638,7 +2664,7 @@ sadb_init_alginfo(ipsa_t *sa) } if (sa->ipsa_authkey != NULL) { - alg = ipsec_alglists[IPSEC_ALG_AUTH][sa->ipsa_auth_alg]; + alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][sa->ipsa_auth_alg]; if (alg != NULL && ALG_VALID(alg)) { sa->ipsa_amech.cm_type = alg->alg_mech_type; sa->ipsa_amech.cm_param = (char *)&sa->ipsa_mac_len; @@ -2648,7 +2674,7 @@ sadb_init_alginfo(ipsa_t *sa) sa->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID; } - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); } /* @@ -2763,7 +2789,8 @@ sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext, int sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary, - ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic) + ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic, + netstack_t *ns) { ipsa_t *newbie_clone = NULL, *scratch; sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; @@ -2797,6 +2824,7 @@ sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg, boolean_t isupdate = (newbie != NULL); uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr; mblk_t *ctl_mp = NULL; + ipsec_stack_t *ipss = ns->netstack_ipsec; src = (struct sockaddr_in *)(srcext + 1); src6 = (struct sockaddr_in6 *)(srcext + 1); @@ -2826,7 +2854,7 @@ sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg, if (!isupdate) { newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi, - src_addr_ptr, dst_addr_ptr, af); + src_addr_ptr, dst_addr_ptr, af, ns); if (newbie == NULL) return (ENOMEM); } @@ -2996,9 +3024,9 @@ sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg, newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits; newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey; - mutex_enter(&alg_lock); + mutex_enter(&ipss->ipsec_alg_lock); error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH); - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); if (error != 0) { mutex_exit(&newbie->ipsa_lock); goto error; @@ -3030,9 +3058,9 @@ sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg, newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits; newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey; - mutex_enter(&alg_lock); + mutex_enter(&ipss->ipsec_alg_lock); error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR); - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); if (error != 0) { mutex_exit(&newbie->ipsa_lock); goto error; @@ -3063,7 +3091,7 @@ sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg, * keysock.c prepares the string for us. */ newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type, - (char *)(id+1)); + (char *)(id+1), ns); if (newbie->ipsa_src_cid == NULL) { error = ENOMEM; mutex_exit(&newbie->ipsa_lock); @@ -3080,7 +3108,7 @@ sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg, * keysock.c prepares the string for us. */ newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type, - (char *)(id+1)); + (char *)(id+1), ns); if (newbie->ipsa_dst_cid == NULL) { error = ENOMEM; mutex_exit(&newbie->ipsa_lock); @@ -3639,7 +3667,8 @@ sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc, * Security Associations. */ void -sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay) +sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay, + netstack_t *ns) { int i; isaf_t *bucket; @@ -3673,7 +3702,7 @@ sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay) acqrec = spareacq) { spareacq = acqrec->ipsacq_next; if (current > acqrec->ipsacq_expire) - sadb_destroy_acquire(acqrec); + sadb_destroy_acquire(acqrec, ns); } mutex_exit(&acqlist->iacqf_lock); } @@ -3819,7 +3848,7 @@ sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay) /* * Run a GC pass to clean out dead identities. */ - ipsid_gc(); + ipsid_gc(ns); } /* @@ -3827,7 +3856,7 @@ sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay) */ timeout_id_t sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *), - uint_t *intp, uint_t intmax, short mid) + void *agerarg, uint_t *intp, uint_t intmax, short mid) { hrtime_t end = gethrtime(); uint_t interval = *intp; @@ -3862,7 +3891,8 @@ sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *), interval = max(interval, SADB_AGE_INTERVAL_DEFAULT); } *intp = interval; - return (qtimeout(pfkey_q, ager, NULL, interval * drv_usectohz(1000))); + return (qtimeout(pfkey_q, ager, agerarg, + interval * drv_usectohz(1000))); } @@ -3948,7 +3978,8 @@ sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard, int sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp, int *diagnostic, queue_t *pfkey_q, - int (*add_sa_func)(mblk_t *, keysock_in_t *, int *)) + int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *), + netstack_t *ns) { sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; sadb_address_t *srcext = @@ -4030,7 +4061,7 @@ sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, * deal with updating a larval SA. */ IPSA_REFRELE(inbound_target); - return (add_sa_func(mp, ksi, diagnostic)); + return (add_sa_func(mp, ksi, diagnostic, ns)); } } @@ -4232,12 +4263,22 @@ sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp) uint64_t unique_id = 0; ipsec_selector_t sel; boolean_t tunnel_mode = io->ipsec_out_tunnel; + netstack_t *ns = io->ipsec_out_ns; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT((pp != NULL) || (ap != NULL)); ASSERT(need_ah != NULL || need_esp != NULL); /* Assign sadb pointers */ - spp = need_esp ? &esp_sadb : &ah_sadb; /* ESP for AH+ESP */ + if (need_esp) { /* ESP for AH+ESP */ + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + + spp = &espstack->esp_sadb; + } else { + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; + + spp = &ahstack->ah_sadb; + } sp = io->ipsec_out_v4 ? &spp->s_v4 : &spp->s_v6; if (ap == NULL) @@ -4256,7 +4297,7 @@ sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp) * make sure the high bit on the sequence number is set.) */ - seq = keysock_next_seq() | IACQF_LOWEST_SEQ; + seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ; if (IPH_HDR_VERSION(ipha) == IP_VERSION) { src = (uint32_t *)&ipha->ipha_src; @@ -4298,7 +4339,8 @@ sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp) if (newbie == NULL) { mutex_exit(&bucket->iacqf_lock); ip_drop_packet(mp, B_FALSE, NULL, NULL, - &ipdrops_sadb_acquire_nomem, &sadb_dropper); + DROPPER(ipss, ipds_sadb_acquire_nomem), + &ipss->ipsec_sadb_dropper); return; } newbie->ipsacq_policy = pp; @@ -4371,9 +4413,11 @@ sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp) newbie->ipsacq_mp = lastone->b_next; lastone->b_next = NULL; ip_drop_packet(lastone, B_FALSE, NULL, NULL, - &ipdrops_sadb_acquire_toofull, &sadb_dropper); + DROPPER(ipss, ipds_sadb_acquire_toofull), + &ipss->ipsec_sadb_dropper); } else { - IP_ACQUIRE_STAT(qhiwater, newbie->ipsacq_numpackets); + IP_ACQUIRE_STAT(ipss, qhiwater, + newbie->ipsacq_numpackets); } } @@ -4396,7 +4440,7 @@ sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp) return; } - if (keysock_extended_reg()) { + if (keysock_extended_reg(ns)) { /* * Construct an extended ACQUIRE. There are logging * opportunities here in failure cases. @@ -4426,7 +4470,7 @@ sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp) extended = sadb_keysock_out(0); if (extended != NULL) { extended->b_cont = sadb_extended_acquire(&sel, pp, ap, - tunnel_mode, seq, 0); + tunnel_mode, seq, 0, ns); if (extended->b_cont == NULL) { freeb(extended); extended = NULL; @@ -4440,21 +4484,22 @@ sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp) * this new record. The send-acquire callback assumes that acqrec is * already locked. */ - (*spp->s_acqfn)(newbie, extended); + (*spp->s_acqfn)(newbie, extended, ns); } /* * Unlink and free an acquire record. */ void -sadb_destroy_acquire(ipsacq_t *acqrec) +sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns) { mblk_t *mp; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock)); if (acqrec->ipsacq_policy != NULL) { - IPPOL_REFRELE(acqrec->ipsacq_policy); + IPPOL_REFRELE(acqrec->ipsacq_policy, ns); } if (acqrec->ipsacq_act != NULL) { IPACT_REFRELE(acqrec->ipsacq_act); @@ -4477,7 +4522,8 @@ sadb_destroy_acquire(ipsacq_t *acqrec) acqrec->ipsacq_mp = mp->b_next; mp->b_next = NULL; ip_drop_packet(mp, B_FALSE, NULL, NULL, - &ipdrops_sadb_acquire_timeout, &sadb_dropper); + DROPPER(ipss, ipds_sadb_acquire_timeout), + &ipss->ipsec_sadb_dropper); } mutex_exit(&acqrec->ipsacq_lock); @@ -4490,7 +4536,8 @@ sadb_destroy_acquire(ipsacq_t *acqrec) * Destroy an acquire list fanout. */ static void -sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever) +sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever, + netstack_t *ns) { int i; iacqf_t *list = *listp; @@ -4501,7 +4548,7 @@ sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever) for (i = 0; i < numentries; i++) { mutex_enter(&(list[i].iacqf_lock)); while (list[i].iacqf_ipsacq != NULL) - sadb_destroy_acquire(list[i].iacqf_ipsacq); + sadb_destroy_acquire(list[i].iacqf_ipsacq, ns); mutex_exit(&(list[i].iacqf_lock)); if (forever) mutex_destroy(&(list[i].iacqf_lock)); @@ -4520,7 +4567,7 @@ sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever) static uint8_t * sadb_new_algdesc(uint8_t *start, uint8_t *limit, sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype, - uint8_t alg, uint16_t minbits, uint16_t maxbits) + uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss) { uint8_t *cur = start; ipsec_alginfo_t *algp; @@ -4537,14 +4584,14 @@ sadb_new_algdesc(uint8_t *start, uint8_t *limit, * a stronger policy, and when the framework loads a stronger version, * you can just keep plowing w/o rewhacking your SPD. */ - mutex_enter(&alg_lock); - algp = ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ? + mutex_enter(&ipss->ipsec_alg_lock); + algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ? IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg]; if (minbits < algp->alg_ef_minbits) minbits = algp->alg_ef_minbits; if (maxbits > algp->alg_ef_maxbits) maxbits = algp->alg_ef_maxbits; - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); algdesc->sadb_x_algdesc_satype = satype; algdesc->sadb_x_algdesc_algtype = algtype; @@ -4562,11 +4609,13 @@ sadb_new_algdesc(uint8_t *start, uint8_t *limit, * return NULL if we ran out of room or a pointer to the end of the ecomb. */ static uint8_t * -sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act) +sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act, + netstack_t *ns) { uint8_t *cur = start; sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur; ipsec_prot_t *ipp; + ipsec_stack_t *ipss = ns->netstack_ipsec; cur += sizeof (*ecomb); if (cur >= limit) @@ -4601,10 +4650,10 @@ sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act) if (ipp->ipp_use_ah) { cur = sadb_new_algdesc(cur, limit, ecomb, SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg, - ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits); + ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss); if (cur == NULL) return (NULL); - ipsecah_fill_defs(ecomb); + ipsecah_fill_defs(ecomb, ns); } if (ipp->ipp_use_esp) { @@ -4613,7 +4662,7 @@ sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act) SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH, ipp->ipp_esp_auth_alg, ipp->ipp_espa_minbits, - ipp->ipp_espa_maxbits); + ipp->ipp_espa_maxbits, ipss); if (cur == NULL) return (NULL); } @@ -4622,12 +4671,12 @@ sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act) SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT, ipp->ipp_encr_alg, ipp->ipp_espe_minbits, - ipp->ipp_espe_maxbits); + ipp->ipp_espe_maxbits, ipss); if (cur == NULL) return (NULL); /* Fill in lifetimes if and only if AH didn't already... */ if (!ipp->ipp_use_ah) - ipsecesp_fill_defs(ecomb); + ipsecesp_fill_defs(ecomb, ns); } return (cur); @@ -4643,7 +4692,8 @@ sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act) */ static mblk_t * sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol, - ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid) + ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid, + netstack_t *ns) { mblk_t *mp; sadb_msg_t *samsg; @@ -4843,7 +4893,7 @@ sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol, ap->ipa_act.ipa_apply.ipp_replay_depth; } - cur = sadb_action_to_ecomb(cur, end, ap); + cur = sadb_action_to_ecomb(cur, end, ap, ns); if (cur == NULL) { /* no space */ freeb(mp); return (NULL); @@ -4887,7 +4937,7 @@ sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol, * with the callers finishing touches on the ACQUIRE itself. */ mblk_t * -sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype) +sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss) { uint_t allocsize; mblk_t *pfkeymp, *msgmp; @@ -4914,10 +4964,10 @@ sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype) /* Make sure there's enough to cover both AF_INET and AF_INET6. */ allocsize += 2 * sizeof (struct sockaddr_in6); - mutex_enter(&alg_lock); + mutex_enter(&ipss->ipsec_alg_lock); /* NOTE: The lock is now held through to this function's return. */ - allocsize += ipsec_nalgs[IPSEC_ALG_AUTH] * - ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t); + allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] * + ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t); if (tunnel_mode) { /* Tunnel mode! */ @@ -4929,7 +4979,7 @@ sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype) msgmp = allocb(allocsize, BPRI_HI); if (msgmp == NULL) { freeb(pfkeymp); - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); return (NULL); } @@ -4952,7 +5002,7 @@ sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype) cmn_err(CE_WARN, "sadb_setup_acquire: corrupt ACQUIRE record.\n"); ASSERT(0); - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); return (NULL); } @@ -4999,7 +5049,7 @@ sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype) if (cur != NULL) samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr); else - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); return (pfkeymp); } @@ -5013,7 +5063,8 @@ sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype) * master_spi is passed in host order. */ ipsa_t * -sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic) +sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic, + netstack_t *ns) { sadb_address_t *src = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC], @@ -5076,7 +5127,8 @@ sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic) * Since master_spi is passed in host order, we need to htonl() it * for the purposes of creating a new SA. */ - return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af)); + return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af, + ns)); } /* @@ -5093,7 +5145,7 @@ sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic) */ /* ARGSUSED */ void -sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *ip_q) +sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *ip_q, netstack_t *ns) { int i; ipsacq_t *acqrec; @@ -5158,7 +5210,7 @@ sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *ip_q) */ ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock); - sadb_destroy_acquire(acqrec); + sadb_destroy_acquire(acqrec, ns); /* Have to exit mutex here, because of breaking out of for loop. */ mutex_exit(&bucket->iacqf_lock); } @@ -5393,13 +5445,13 @@ sadb_t_bind_req(queue_t *q, int proto) */ void ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt, - uint32_t spi, void *addr, int af) + uint32_t spi, void *addr, int af, netstack_t *ns) { char buf[INET6_ADDRSTRLEN]; ASSERT(af == AF_INET6 || af == AF_INET); - ipsec_rl_strlog(mid, sid, level, sl, fmt, ntohl(spi), + ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi), inet_ntop(af, addr, buf, sizeof (buf))); } @@ -5417,7 +5469,8 @@ ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp) pp = ipl->ipl_out_policy; IPPOL_REFHOLD(pp); } else { - pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel); + pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel, + connp->conn_netstack); } *ppp = pp; CONN_DEC_REF(connp); @@ -5429,7 +5482,7 @@ ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp) * Caller must release the reference. */ static void -ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp) +ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst) { connf_t *connfp; conn_t *connp = NULL; @@ -5440,7 +5493,8 @@ ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp) if (sel->ips_local_port == 0) return; - connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port)]; + connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port, + ipst)]; mutex_enter(&connfp->connf_lock); if (sel->ips_isv4) { @@ -5483,7 +5537,7 @@ ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp) } static conn_t * -ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel) +ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst) { connf_t *connfp; conn_t *connp = NULL; @@ -5492,7 +5546,8 @@ ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel) if (sel->ips_local_port == 0) return (NULL); - connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(sel->ips_local_port)]; + connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(sel->ips_local_port, + ipst)]; mutex_enter(&connfp->connf_lock); if (sel->ips_isv4) { @@ -5531,7 +5586,7 @@ ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel) } static void -ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp) +ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst) { connf_t *connfp; conn_t *connp; @@ -5557,8 +5612,8 @@ ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp) pptr[0] = sel->ips_remote_port; pptr[1] = sel->ips_local_port; - connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(sel->ips_remote_addr_v4, - ports)]; + connfp = &ipst->ips_ipcl_conn_fanout[ + IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)]; mutex_enter(&connfp->connf_lock); connp = connfp->connf_head; @@ -5587,7 +5642,7 @@ ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp) mutex_exit(&connfp->connf_lock); /* Try the listen hash. */ - if ((connp = ipsec_find_listen_conn(pptr, sel)) == NULL) + if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL) return; } @@ -5595,7 +5650,8 @@ ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp) } static void -ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp) +ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, + ip_stack_t *ipst) { conn_t *connp; uint32_t ports; @@ -5625,10 +5681,12 @@ ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp) IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst); IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src); - connp = sctp_find_conn(&dst, &src, ports, 0, ALL_ZONES); + connp = sctp_find_conn(&dst, &src, ports, 0, ALL_ZONES, + ipst->ips_netstack->netstack_sctp); } else { connp = sctp_find_conn(&sel->ips_remote_addr_v6, - &sel->ips_local_addr_v6, ports, 0, ALL_ZONES); + &sel->ips_local_addr_v6, ports, 0, ALL_ZONES, + ipst->ips_netstack->netstack_sctp); } if (connp == NULL) return; @@ -5708,7 +5766,7 @@ ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext, static int ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp, - int *diagnostic) + int *diagnostic, netstack_t *ns) { int err; ipsec_policy_head_t *polhead; @@ -5736,7 +5794,7 @@ ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, err = ipsec_get_inverse_acquire_sel(sel, innsrcext, inndstext, diagnostic); if (err != 0) { - ITP_REFRELE(itp); + ITP_REFRELE(itp, ns); return (err); } /* @@ -5753,12 +5811,12 @@ ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, * check is needed. */ if (itp != NULL) { - ITP_REFRELE(itp); + ITP_REFRELE(itp, ns); } return (0); } else if (itp->itp_flags & ITPF_P_TUNNEL) { /* Tunnel mode set with no inner selectors. */ - ITP_REFRELE(itp); + ITP_REFRELE(itp, ns); return (ENOENT); } /* @@ -5773,9 +5831,10 @@ ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, polhead = itp->itp_policy; ASSERT(polhead != NULL); rw_enter(&polhead->iph_lock, RW_READER); - *ppp = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, sel); + *ppp = ipsec_find_policy_head(NULL, polhead, + IPSEC_TYPE_INBOUND, sel, ns); rw_exit(&polhead->iph_lock); - ITP_REFRELE(itp); + ITP_REFRELE(itp, ns); /* * Don't default to global if we didn't find a matching policy entry. @@ -5788,16 +5847,17 @@ ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, } static void -ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp) +ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, + ip_stack_t *ipst) { boolean_t isv4 = sel->ips_isv4; connf_t *connfp; conn_t *connp; if (isv4) { - connfp = &ipcl_proto_fanout[sel->ips_protocol]; + connfp = &ipst->ips_ipcl_proto_fanout[sel->ips_protocol]; } else { - connfp = &ipcl_proto_fanout_v6[sel->ips_protocol]; + connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol]; } mutex_enter(&connfp->connf_lock); @@ -5842,7 +5902,8 @@ ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp) * The SRC address is the local one - just like an outbound ACQUIRE message. */ mblk_t * -ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[]) +ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[], + netstack_t *ns) { int err; int diagnostic; @@ -5856,17 +5917,19 @@ ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[]) ipsec_policy_t *pp = NULL; ipsec_selector_t sel, isel; mblk_t *retmp; + ip_stack_t *ipst = ns->netstack_ip; + ipsec_stack_t *ipss = ns->netstack_ipsec; /* Normalize addresses */ - if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0) == - KS_IN_ADDR_UNKNOWN) { + if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns) + == KS_IN_ADDR_UNKNOWN) { err = EINVAL; diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC; goto bail; } src = (struct sockaddr_in6 *)(srcext + 1); - if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0) == - KS_IN_ADDR_UNKNOWN) { + if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns) + == KS_IN_ADDR_UNKNOWN) { err = EINVAL; diagnostic = SADB_X_DIAGNOSTIC_BAD_DST; goto bail; @@ -5886,14 +5949,14 @@ ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[]) goto bail; } if (sadb_addrcheck(NULL, (mblk_t *)samsg, - (sadb_ext_t *)innsrcext, 0) == KS_IN_ADDR_UNKNOWN) { + (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) { err = EINVAL; diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC; goto bail; } isrc = (struct sockaddr_in6 *)(innsrcext + 1); if (sadb_addrcheck(NULL, (mblk_t *)samsg, - (sadb_ext_t *)inndstext, 0) == KS_IN_ADDR_UNKNOWN) { + (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) { err = EINVAL; diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST; goto bail; @@ -5939,24 +6002,26 @@ ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[]) pp = NULL; switch (sel.ips_protocol) { case IPPROTO_TCP: - ipsec_tcp_pol(&sel, &pp); + ipsec_tcp_pol(&sel, &pp, ipst); break; case IPPROTO_UDP: - ipsec_udp_pol(&sel, &pp); + ipsec_udp_pol(&sel, &pp, ipst); break; case IPPROTO_SCTP: - ipsec_sctp_pol(&sel, &pp); + ipsec_sctp_pol(&sel, &pp, ipst); break; case IPPROTO_ENCAP: case IPPROTO_IPV6: - rw_enter(&itp_get_byaddr_rw_lock, RW_READER); + rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_READER); /* * Assume sel.ips_remote_addr_* has the right address at * that exact position. */ - itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6), - (uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family); - rw_exit(&itp_get_byaddr_rw_lock); + itp = ipss->ipsec_itp_get_byaddr( + (uint32_t *)(&sel.ips_local_addr_v6), + (uint32_t *)(&sel.ips_remote_addr_v6), + src->sin6_family, ns); + rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); if (innsrcext == NULL) { /* * Transport-mode tunnel, make sure we fake out isel @@ -5966,7 +6031,7 @@ ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[]) isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP); } /* Else isel is initialized by ipsec_tun_pol(). */ err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp, - &diagnostic); + &diagnostic, ns); /* * NOTE: isel isn't used for now, but in RFC 430x IPsec, it * may be. @@ -5975,7 +6040,7 @@ ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[]) goto bail; break; default: - ipsec_oth_pol(&sel, &pp); + ipsec_oth_pol(&sel, &pp, ipst); break; } @@ -5984,7 +6049,8 @@ ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[]) * look in the global policy. */ if (pp == NULL) { - pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, NULL, &sel); + pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, NULL, &sel, + ns); if (pp == NULL) { /* There's no global policy. */ err = ENOENT; @@ -6000,9 +6066,9 @@ ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[]) */ retmp = sadb_extended_acquire(&sel, pp, NULL, (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)), - samsg->sadb_msg_seq, samsg->sadb_msg_pid); + samsg->sadb_msg_seq, samsg->sadb_msg_pid, ns); if (pp != NULL) { - IPPOL_REFRELE(pp); + IPPOL_REFRELE(pp, ns); } if (retmp != NULL) { return (retmp); @@ -6030,17 +6096,19 @@ bail: */ void -sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt) +sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, netstack_t *ns) { mblk_t *opkt; + ipsec_stack_t *ipss = ns->netstack_ipsec; membar_producer(); do opkt = ipsa->ipsa_lpkt; while (casptr(&ipsa->ipsa_lpkt, opkt, npkt) != opkt); - ip_drop_packet(opkt, B_TRUE, NULL, NULL, &ipdrops_sadb_inlarval_replace, - &sadb_dropper); + ip_drop_packet(opkt, B_TRUE, NULL, NULL, + DROPPER(ipss, ipds_sadb_inlarval_replace), + &ipss->ipsec_sadb_dropper); } /* @@ -6139,9 +6207,12 @@ sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie) &update_state) void -sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added) +sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added, + netstack_t *ns) { struct sadb_update_alg_state update_state; + ipsecah_stack_t *ahstack = ns->netstack_ipsecah; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; update_state.alg_type = alg_type; update_state.alg_id = alg_id; @@ -6149,17 +6220,17 @@ sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added) if (alg_type == IPSEC_ALG_AUTH) { /* walk the AH tables only for auth. algorithm changes */ - SADB_ALG_UPDATE_WALK(ah_sadb.s_v4, sdb_of); - SADB_ALG_UPDATE_WALK(ah_sadb.s_v4, sdb_if); - SADB_ALG_UPDATE_WALK(ah_sadb.s_v6, sdb_of); - SADB_ALG_UPDATE_WALK(ah_sadb.s_v6, sdb_if); + SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of); + SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if); + SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of); + SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if); } /* walk the ESP tables */ - SADB_ALG_UPDATE_WALK(esp_sadb.s_v4, sdb_of); - SADB_ALG_UPDATE_WALK(esp_sadb.s_v4, sdb_if); - SADB_ALG_UPDATE_WALK(esp_sadb.s_v6, sdb_of); - SADB_ALG_UPDATE_WALK(esp_sadb.s_v6, sdb_if); + SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of); + SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if); + SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of); + SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if); } /* @@ -6175,8 +6246,9 @@ ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type) crypto_key_t *key; crypto_ctx_template_t *sa_tmpl; int rv; + ipsec_stack_t *ipss = sa->ipsa_netstack->netstack_ipsec; - ASSERT(MUTEX_HELD(&alg_lock)); + ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); ASSERT(MUTEX_HELD(&sa->ipsa_lock)); /* get pointers to the algorithm info, context template, and key */ @@ -6184,12 +6256,12 @@ ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type) case IPSEC_ALG_AUTH: key = &sa->ipsa_kcfauthkey; sa_tmpl = &sa->ipsa_authtmpl; - alg = ipsec_alglists[alg_type][sa->ipsa_auth_alg]; + alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg]; break; case IPSEC_ALG_ENCR: key = &sa->ipsa_kcfencrkey; sa_tmpl = &sa->ipsa_encrtmpl; - alg = ipsec_alglists[alg_type][sa->ipsa_encr_alg]; + alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg]; break; default: alg = NULL; @@ -6320,10 +6392,14 @@ sadb_clear_timeouts_walker(isaf_t *head, ipsa_t *ipsa, void *q) mutex_exit(&ipsa->ipsa_lock); } +/* + * Is only to be used on a nattymod queue. + */ void -sadb_clear_timeouts(queue_t *q) +sadb_clear_timeouts(queue_t *q, netstack_t *ns) { - sadb_t *sp = &esp_sadb.s_v4; + ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + sadb_t *sp = &espstack->esp_sadb.s_v4; sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_clear_timeouts_walker, q); diff --git a/usr/src/uts/common/inet/ip/spd.c b/usr/src/uts/common/inet/ip/spd.c index db9f0eac56..30cb8a608f 100644 --- a/usr/src/uts/common/inet/ip/spd.c +++ b/usr/src/uts/common/inet/ip/spd.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -72,21 +72,24 @@ #include <inet/ipclassifier.h> #include <inet/tun.h> -static void ipsec_update_present_flags(); -static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *); +static void ipsec_update_present_flags(ipsec_stack_t *); +static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *, + netstack_t *); static void ipsec_out_free(void *); static void ipsec_in_free(void *); static mblk_t *ipsec_attach_global_policy(mblk_t *, conn_t *, - ipsec_selector_t *); + ipsec_selector_t *, netstack_t *); static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *, - ipsec_selector_t *); + ipsec_selector_t *, netstack_t *); static mblk_t *ipsec_check_ipsecin_policy(mblk_t *, ipsec_policy_t *, - ipha_t *, ip6_t *, uint64_t); + ipha_t *, ip6_t *, uint64_t, netstack_t *); static void ipsec_in_release_refs(ipsec_in_t *); static void ipsec_out_release_refs(ipsec_out_t *); +static void ipsec_action_free_table(ipsec_action_t *); static void ipsec_action_reclaim(void *); -static void ipsid_init(void); -static void ipsid_fini(void); +static void ipsec_action_reclaim_stack(netstack_t *); +static void ipsid_init(netstack_t *); +static void ipsid_fini(netstack_t *); /* sel_flags values for ipsec_init_inbound_sel(). */ #define SEL_NONE 0x0000 @@ -105,45 +108,23 @@ static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *, struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **, kstat_named_t **); static void ipsec_unregister_prov_update(void); +static void ipsec_prov_update_callback_stack(uint32_t, void *, netstack_t *); static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *); static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *); +static boolean_t ipsec_kstat_init(ipsec_stack_t *); +static void ipsec_kstat_destroy(ipsec_stack_t *); +static int ipsec_free_tables(ipsec_stack_t *); +static int tunnel_compare(const void *, const void *); +static void ipsec_freemsg_chain(mblk_t *); +static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, + struct kstat_named *, ipdropper_t *); +static boolean_t ipsec_kstat_init(ipsec_stack_t *); +static void ipsec_kstat_destroy(ipsec_stack_t *); +static int ipsec_free_tables(ipsec_stack_t *); static int tunnel_compare(const void *, const void *); static void ipsec_freemsg_chain(mblk_t *); static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *, struct kstat_named *, ipdropper_t *); - -/* - * Policy rule index generator. We assume this won't wrap in the - * lifetime of a system. If we make 2^20 policy changes per second, - * this will last 2^44 seconds, or roughly 500,000 years, so we don't - * have to worry about reusing policy index values. - * - * Protected by ipsec_conf_lock. - */ -uint64_t ipsec_next_policy_index = 1; - -/* - * Active & Inactive system policy roots - */ -static ipsec_policy_head_t system_policy; -static ipsec_policy_head_t inactive_policy; - -/* - * Tunnel policies - AVL tree indexed by tunnel name. - */ -krwlock_t tunnel_policy_lock; -uint64_t tunnel_policy_gen; /* To keep track of updates w/o searches. */ -avl_tree_t tunnel_policies; - -/* Packet dropper for generic SPD drops. */ -ipdropper_t spd_dropper; - -/* - * For now, use a trivially sized hash table for actions. - * In the future we can add the structure canonicalization necessary - * to get the hash function to behave correctly.. - */ -#define IPSEC_ACTION_HASH_SIZE 1 /* * Selector hash table is statically sized at module load time. @@ -151,49 +132,31 @@ ipdropper_t spd_dropper; */ #define IPSEC_SPDHASH_DEFAULT 251 -uint32_t ipsec_spd_hashsize = 0; /* SPD hash-size tunable per tunnel. */ #define TUN_SPDHASH_DEFAULT 5 -uint32_t tun_spd_hashsize; #define IPSEC_SEL_NOHASH ((uint32_t)(~0)) -static HASH_HEAD(ipsec_action_s) ipsec_action_hash[IPSEC_ACTION_HASH_SIZE]; -static HASH_HEAD(ipsec_sel) *ipsec_sel_hash; +/* + * Handle global across all stack instances + */ +static crypto_notify_handle_t prov_update_handle = NULL; static kmem_cache_t *ipsec_action_cache; static kmem_cache_t *ipsec_sel_cache; static kmem_cache_t *ipsec_pol_cache; static kmem_cache_t *ipsec_info_cache; -boolean_t ipsec_inbound_v4_policy_present = B_FALSE; -boolean_t ipsec_outbound_v4_policy_present = B_FALSE; -boolean_t ipsec_inbound_v6_policy_present = B_FALSE; -boolean_t ipsec_outbound_v6_policy_present = B_FALSE; - /* Frag cache prototypes */ static void ipsec_fragcache_clean(ipsec_fragcache_t *); static ipsec_fragcache_entry_t *fragcache_delentry(int, ipsec_fragcache_entry_t *, ipsec_fragcache_t *); boolean_t ipsec_fragcache_init(ipsec_fragcache_t *); void ipsec_fragcache_uninit(ipsec_fragcache_t *); -mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int); - -/* - * Because policy needs to know what algorithms are supported, keep the - * lists of algorithms here. - */ - -kmutex_t alg_lock; -krwlock_t itp_get_byaddr_rw_lock; -ipsec_tun_pol_t *(*itp_get_byaddr)(uint32_t *, uint32_t *, int); -uint8_t ipsec_nalgs[IPSEC_NALGTYPES]; -ipsec_alginfo_t *ipsec_alglists[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; -uint8_t ipsec_sortlist[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; -ipsec_algs_exec_mode_t ipsec_algs_exec_mode[IPSEC_NALGTYPES]; -static crypto_notify_handle_t prov_update_handle = NULL; +mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int, + ipsec_stack_t *); int ipsec_hdr_pullup_needed = 0; int ipsec_weird_null_inbound_policy = 0; @@ -248,12 +211,6 @@ static char *ipsec_policy_failure_msgs[] = { "%s: Self-Encapsulation present while not expected in the " "incoming %s packet; Source %s, Destination %s.\n", }; -/* - * Have a counter for every possible policy message in the previous array. - */ -static uint32_t ipsec_policy_failure_count[IPSEC_POLICY_MAX]; -/* Time since last ipsec policy failure that printed a message. */ -hrtime_t ipsec_policy_failure_last = 0; /* * General overviews: @@ -261,7 +218,7 @@ hrtime_t ipsec_policy_failure_last = 0; * Locking: * * All of the system policy structures are protected by a single - * rwlock, ipsec_conf_lock. These structures are threaded in a + * rwlock. These structures are threaded in a * fairly complex fashion and are not expected to change on a * regular basis, so this should not cause scaling/contention * problems. As a result, policy checks should (hopefully) be MT-hot. @@ -361,10 +318,14 @@ ipsec_policy_cmpbyid(const void *a, const void *b) return (0); } +/* + * Free what ipsec_alloc_table allocated. + */ void ipsec_polhead_free_table(ipsec_policy_head_t *iph) { int dir; + int i; for (dir = 0; dir < IPSEC_NTYPES; dir++) { ipsec_policy_root_t *ipr = &iph->iph_root[dir]; @@ -372,8 +333,12 @@ ipsec_polhead_free_table(ipsec_policy_head_t *iph) if (ipr->ipr_hash == NULL) continue; + for (i = 0; i < ipr->ipr_nchains; i++) { + ASSERT(ipr->ipr_hash[i].hash_head == NULL); + } kmem_free(ipr->ipr_hash, ipr->ipr_nchains * sizeof (ipsec_policy_hash_t)); + ipr->ipr_hash = NULL; } } @@ -397,19 +362,22 @@ ipsec_polhead_destroy(ipsec_policy_head_t *iph) } /* - * Module unload hook. + * Free the IPsec stack instance. */ -void -ipsec_policy_destroy(void) +/* ARGSUSED */ +static void +ipsec_stack_fini(netstackid_t stackid, void *arg) { - int i; + ipsec_stack_t *ipss = (ipsec_stack_t *)arg; void *cookie; ipsec_tun_pol_t *node; + netstack_t *ns = ipss->ipsec_netstack; + int i; + ipsec_algtype_t algtype; - ip_drop_unregister(&spd_dropper); - ip_drop_destroy(); + ipsec_loader_destroy(ipss); - rw_enter(&tunnel_policy_lock, RW_WRITER); + rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); /* * It's possible we can just ASSERT() the tree is empty. After all, * we aren't called until IP is ready to unload (and presumably all @@ -418,47 +386,95 @@ ipsec_policy_destroy(void) */ cookie = NULL; while ((node = (ipsec_tun_pol_t *) - avl_destroy_nodes(&tunnel_policies, &cookie)) != NULL) { - ITP_REFRELE(node); + avl_destroy_nodes(&ipss->ipsec_tunnel_policies, + &cookie)) != NULL) { + ITP_REFRELE(node, ns); } - avl_destroy(&tunnel_policies); - rw_exit(&tunnel_policy_lock); - rw_destroy(&tunnel_policy_lock); - ipsec_polhead_destroy(&system_policy); - ipsec_polhead_destroy(&inactive_policy); + avl_destroy(&ipss->ipsec_tunnel_policies); + rw_exit(&ipss->ipsec_tunnel_policy_lock); + rw_destroy(&ipss->ipsec_tunnel_policy_lock); - for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) - mutex_destroy(&(ipsec_action_hash[i].hash_lock)); + ipsec_config_flush(ns); - for (i = 0; i < ipsec_spd_hashsize; i++) - mutex_destroy(&(ipsec_sel_hash[i].hash_lock)); + ipsec_kstat_destroy(ipss); - ipsec_unregister_prov_update(); + ip_drop_unregister(&ipss->ipsec_dropper); + + ip_drop_unregister(&ipss->ipsec_spd_dropper); + ip_drop_destroy(ipss); + /* + * Globals start with ref == 1 to prevent IPPH_REFRELE() from + * attempting to free them, hence they should have 1 now. + */ + ipsec_polhead_destroy(&ipss->ipsec_system_policy); + ASSERT(ipss->ipsec_system_policy.iph_refs == 1); + ipsec_polhead_destroy(&ipss->ipsec_inactive_policy); + ASSERT(ipss->ipsec_inactive_policy.iph_refs == 1); + + for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { + ipsec_action_free_table(ipss->ipsec_action_hash[i].hash_head); + ipss->ipsec_action_hash[i].hash_head = NULL; + mutex_destroy(&(ipss->ipsec_action_hash[i].hash_lock)); + } - mutex_destroy(&alg_lock); + for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { + ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); + mutex_destroy(&(ipss->ipsec_sel_hash[i].hash_lock)); + } + + mutex_enter(&ipss->ipsec_alg_lock); + for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype ++) { + int nalgs = ipss->ipsec_nalgs[algtype]; + + for (i = 0; i < nalgs; i++) { + if (ipss->ipsec_alglists[algtype][i] != NULL) + ipsec_alg_unreg(algtype, i, ns); + } + } + mutex_exit(&ipss->ipsec_alg_lock); + mutex_destroy(&ipss->ipsec_alg_lock); + + ipsid_gc(ns); + ipsid_fini(ns); + (void) ipsec_free_tables(ipss); + kmem_free(ipss, sizeof (*ipss)); +} + +void +ipsec_policy_g_destroy(void) +{ kmem_cache_destroy(ipsec_action_cache); kmem_cache_destroy(ipsec_sel_cache); kmem_cache_destroy(ipsec_pol_cache); kmem_cache_destroy(ipsec_info_cache); - ipsid_gc(); - ipsid_fini(); + + ipsec_unregister_prov_update(); + + netstack_unregister(NS_IPSEC); } /* + * Free what ipsec_alloc_tables allocated. * Called when table allocation fails to free the table. */ static int -ipsec_alloc_tables_failed() +ipsec_free_tables(ipsec_stack_t *ipss) { - if (ipsec_sel_hash != NULL) { - kmem_free(ipsec_sel_hash, ipsec_spd_hashsize * - sizeof (*ipsec_sel_hash)); - ipsec_sel_hash = NULL; + int i; + + if (ipss->ipsec_sel_hash != NULL) { + for (i = 0; i < ipss->ipsec_spd_hashsize; i++) { + ASSERT(ipss->ipsec_sel_hash[i].hash_head == NULL); + } + kmem_free(ipss->ipsec_sel_hash, ipss->ipsec_spd_hashsize * + sizeof (*ipss->ipsec_sel_hash)); + ipss->ipsec_sel_hash = NULL; + ipss->ipsec_spd_hashsize = 0; } - ipsec_polhead_free_table(&system_policy); - ipsec_polhead_free_table(&inactive_policy); + ipsec_polhead_free_table(&ipss->ipsec_system_policy); + ipsec_polhead_free_table(&ipss->ipsec_inactive_policy); return (ENOMEM); } @@ -469,7 +485,7 @@ ipsec_alloc_tables_failed() */ int ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, - boolean_t global_cleanup) + boolean_t global_cleanup, netstack_t *ns) { int dir; @@ -480,7 +496,8 @@ ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, ipr->ipr_hash = kmem_zalloc(nchains * sizeof (ipsec_policy_hash_t), kmflag); if (ipr->ipr_hash == NULL) - return (global_cleanup ? ipsec_alloc_tables_failed() : + return (global_cleanup ? + ipsec_free_tables(ns->netstack_ipsec) : ENOMEM); } return (0); @@ -491,25 +508,26 @@ ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag, * after cleaning up any work in progress. */ static int -ipsec_alloc_tables(int kmflag) +ipsec_alloc_tables(int kmflag, netstack_t *ns) { int error; + ipsec_stack_t *ipss = ns->netstack_ipsec; - error = ipsec_alloc_table(&system_policy, ipsec_spd_hashsize, kmflag, - B_TRUE); + error = ipsec_alloc_table(&ipss->ipsec_system_policy, + ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); if (error != 0) return (error); - error = ipsec_alloc_table(&inactive_policy, ipsec_spd_hashsize, kmflag, - B_TRUE); + error = ipsec_alloc_table(&ipss->ipsec_inactive_policy, + ipss->ipsec_spd_hashsize, kmflag, B_TRUE, ns); if (error != 0) return (error); - ipsec_sel_hash = kmem_zalloc(ipsec_spd_hashsize * - sizeof (*ipsec_sel_hash), kmflag); + ipss->ipsec_sel_hash = kmem_zalloc(ipss->ipsec_spd_hashsize * + sizeof (*ipss->ipsec_sel_hash), kmflag); - if (ipsec_sel_hash == NULL) - return (ipsec_alloc_tables_failed()); + if (ipss->ipsec_sel_hash == NULL) + return (ipsec_free_tables(ipss)); return (0); } @@ -537,62 +555,145 @@ ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains) } } +static boolean_t +ipsec_kstat_init(ipsec_stack_t *ipss) +{ + ipss->ipsec_ksp = kstat_create_netstack("ip", 0, "ipsec_stat", "net", + KSTAT_TYPE_NAMED, sizeof (ipsec_kstats_t) / sizeof (kstat_named_t), + KSTAT_FLAG_PERSISTENT, ipss->ipsec_netstack->netstack_stackid); + + if (ipss->ipsec_ksp == NULL || ipss->ipsec_ksp->ks_data == NULL) + return (B_FALSE); + + ipss->ipsec_kstats = ipss->ipsec_ksp->ks_data; + +#define KI(x) kstat_named_init(&ipss->ipsec_kstats->x, #x, KSTAT_DATA_UINT64) + KI(esp_stat_in_requests); + KI(esp_stat_in_discards); + KI(esp_stat_lookup_failure); + KI(ah_stat_in_requests); + KI(ah_stat_in_discards); + KI(ah_stat_lookup_failure); + KI(sadb_acquire_maxpackets); + KI(sadb_acquire_qhiwater); +#undef KI + + kstat_install(ipss->ipsec_ksp); + return (B_TRUE); +} + +static void +ipsec_kstat_destroy(ipsec_stack_t *ipss) +{ + kstat_delete_netstack(ipss->ipsec_ksp, + ipss->ipsec_netstack->netstack_stackid); + ipss->ipsec_kstats = NULL; + +} + /* - * Module load hook. + * Initialize the IPsec stack instance. */ -void -ipsec_policy_init() +/* ARGSUSED */ +static void * +ipsec_stack_init(netstackid_t stackid, netstack_t *ns) { + ipsec_stack_t *ipss; int i; + ipss = (ipsec_stack_t *)kmem_zalloc(sizeof (*ipss), KM_SLEEP); + ipss->ipsec_netstack = ns; + + /* + * FIXME: netstack_ipsec is used by some of the routines we call + * below, but it isn't set until this routine returns. + * Either we introduce optional xxx_stack_alloc() functions + * that will be called by the netstack framework before xxx_stack_init, + * or we switch spd.c and sadb.c to operate on ipsec_stack_t + * (latter has some include file order issues for sadb.h, but makes + * sense if we merge some of the ipsec related stack_t's together. + */ + ns->netstack_ipsec = ipss; + /* * Make two attempts to allocate policy hash tables; try it at * the "preferred" size (may be set in /etc/system) first, * then fall back to the default size. */ - if (ipsec_spd_hashsize == 0) - ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; + if (ipss->ipsec_spd_hashsize == 0) + ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; - if (ipsec_alloc_tables(KM_NOSLEEP) != 0) { + if (ipsec_alloc_tables(KM_NOSLEEP, ns) != 0) { cmn_err(CE_WARN, "Unable to allocate %d entry IPsec policy hash table", - ipsec_spd_hashsize); - ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; + ipss->ipsec_spd_hashsize); + ipss->ipsec_spd_hashsize = IPSEC_SPDHASH_DEFAULT; cmn_err(CE_WARN, "Falling back to %d entries", - ipsec_spd_hashsize); - (void) ipsec_alloc_tables(KM_SLEEP); + ipss->ipsec_spd_hashsize); + (void) ipsec_alloc_tables(KM_SLEEP, ns); } /* Just set a default for tunnels. */ - if (tun_spd_hashsize == 0) - tun_spd_hashsize = TUN_SPDHASH_DEFAULT; + if (ipss->ipsec_tun_spd_hashsize == 0) + ipss->ipsec_tun_spd_hashsize = TUN_SPDHASH_DEFAULT; - ipsid_init(); + ipsid_init(ns); /* * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting * to free them. */ - system_policy.iph_refs = 1; - inactive_policy.iph_refs = 1; - ipsec_polhead_init(&system_policy, ipsec_spd_hashsize); - ipsec_polhead_init(&inactive_policy, ipsec_spd_hashsize); - rw_init(&tunnel_policy_lock, NULL, RW_DEFAULT, NULL); - avl_create(&tunnel_policies, tunnel_compare, sizeof (ipsec_tun_pol_t), - 0); + ipss->ipsec_system_policy.iph_refs = 1; + ipss->ipsec_inactive_policy.iph_refs = 1; + ipsec_polhead_init(&ipss->ipsec_system_policy, + ipss->ipsec_spd_hashsize); + ipsec_polhead_init(&ipss->ipsec_inactive_policy, + ipss->ipsec_spd_hashsize); + rw_init(&ipss->ipsec_tunnel_policy_lock, NULL, RW_DEFAULT, NULL); + avl_create(&ipss->ipsec_tunnel_policies, tunnel_compare, + sizeof (ipsec_tun_pol_t), 0); + + ipss->ipsec_next_policy_index = 1; + + rw_init(&ipss->ipsec_system_policy.iph_lock, NULL, RW_DEFAULT, NULL); + rw_init(&ipss->ipsec_inactive_policy.iph_lock, NULL, RW_DEFAULT, NULL); for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) - mutex_init(&(ipsec_action_hash[i].hash_lock), + mutex_init(&(ipss->ipsec_action_hash[i].hash_lock), NULL, MUTEX_DEFAULT, NULL); - for (i = 0; i < ipsec_spd_hashsize; i++) - mutex_init(&(ipsec_sel_hash[i].hash_lock), + for (i = 0; i < ipss->ipsec_spd_hashsize; i++) + mutex_init(&(ipss->ipsec_sel_hash[i].hash_lock), NULL, MUTEX_DEFAULT, NULL); - mutex_init(&alg_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ipss->ipsec_alg_lock, NULL, MUTEX_DEFAULT, NULL); + for (i = 0; i < IPSEC_NALGTYPES; i++) { + ipss->ipsec_nalgs[i] = 0; + } + + ip_drop_init(ipss); + ip_drop_register(&ipss->ipsec_spd_dropper, "IPsec SPD"); + + /* Set function to dummy until tun is loaded */ + rw_init(&ipss->ipsec_itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL); + rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER); + ipss->ipsec_itp_get_byaddr = itp_get_byaddr_dummy; + rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); + + /* IP's IPsec code calls the packet dropper */ + ip_drop_register(&ipss->ipsec_dropper, "IP IPsec processing"); + + (void) ipsec_kstat_init(ipss); + + ipsec_loader_init(ipss); + ipsec_loader_start(ipss); - for (i = 0; i < IPSEC_NALGTYPES; i++) - ipsec_nalgs[i] = 0; + return (ipss); +} +/* Global across all stack instances */ +void +ipsec_policy_g_init(void) +{ ipsec_action_cache = kmem_cache_create("ipsec_actions", sizeof (ipsec_action_t), _POINTER_ALIGNMENT, NULL, NULL, ipsec_action_reclaim, NULL, NULL, 0); @@ -606,14 +707,12 @@ ipsec_policy_init() sizeof (ipsec_info_t), _POINTER_ALIGNMENT, NULL, NULL, NULL, NULL, NULL, 0); - ip_drop_init(); - ip_drop_register(&spd_dropper, "IPsec SPD"); - - /* Set function to dummy until tun is loaded */ - rw_init(&itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL); - rw_enter(&itp_get_byaddr_rw_lock, RW_WRITER); - itp_get_byaddr = itp_get_byaddr_dummy; - rw_exit(&itp_get_byaddr_rw_lock); + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel, so we can maintain the + * set of ipsec_stack_t's. + */ + netstack_register(NS_IPSEC, ipsec_stack_init, NULL, ipsec_stack_fini); } /* @@ -628,38 +727,39 @@ ipsec_policy_init() * We need a better metric for sorting algorithms by preference. */ static void -alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid) +alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) { - ipsec_alginfo_t *ai = ipsec_alglists[at][algid]; + ipsec_stack_t *ipss = ns->netstack_ipsec; + ipsec_alginfo_t *ai = ipss->ipsec_alglists[at][algid]; uint8_t holder, swap; uint_t i; - uint_t count = ipsec_nalgs[at]; + uint_t count = ipss->ipsec_nalgs[at]; ASSERT(ai != NULL); ASSERT(algid == ai->alg_id); - ASSERT(MUTEX_HELD(&alg_lock)); + ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); holder = algid; for (i = 0; i < count - 1; i++) { ipsec_alginfo_t *alt; - alt = ipsec_alglists[at][ipsec_sortlist[at][i]]; + alt = ipss->ipsec_alglists[at][ipss->ipsec_sortlist[at][i]]; /* * If you want to give precedence to newly added algs, * add the = in the > comparison. */ if ((holder != algid) || (ai->alg_minbits > alt->alg_minbits)) { /* Swap sortlist[i] and holder. */ - swap = ipsec_sortlist[at][i]; - ipsec_sortlist[at][i] = holder; + swap = ipss->ipsec_sortlist[at][i]; + ipss->ipsec_sortlist[at][i] = holder; holder = swap; ai = alt; } /* Else just continue. */ } /* Store holder in last slot. */ - ipsec_sortlist[at][i] = holder; + ipss->ipsec_sortlist[at][i] = holder; } /* @@ -667,19 +767,22 @@ alg_insert_sortlist(enum ipsec_algtype at, uint8_t algid) * This should be considerably easier, even with complex sorting. */ static void -alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid) +alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid, netstack_t *ns) { boolean_t copyback = B_FALSE; int i; - int newcount = ipsec_nalgs[at]; + ipsec_stack_t *ipss = ns->netstack_ipsec; + int newcount = ipss->ipsec_nalgs[at]; - ASSERT(MUTEX_HELD(&alg_lock)); + ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); for (i = 0; i <= newcount; i++) { - if (copyback) - ipsec_sortlist[at][i-1] = ipsec_sortlist[at][i]; - else if (ipsec_sortlist[at][i] == algid) + if (copyback) { + ipss->ipsec_sortlist[at][i-1] = + ipss->ipsec_sortlist[at][i]; + } else if (ipss->ipsec_sortlist[at][i] == algid) { copyback = B_TRUE; + } } } @@ -688,16 +791,18 @@ alg_remove_sortlist(enum ipsec_algtype at, uint8_t algid) * Must be called while holding the algorithm table writer lock. */ void -ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg) +ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg, netstack_t *ns) { - ASSERT(MUTEX_HELD(&alg_lock)); + ipsec_stack_t *ipss = ns->netstack_ipsec; + + ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); - ASSERT(ipsec_alglists[algtype][alg->alg_id] == NULL); - ipsec_alg_fix_min_max(alg, algtype); - ipsec_alglists[algtype][alg->alg_id] = alg; + ASSERT(ipss->ipsec_alglists[algtype][alg->alg_id] == NULL); + ipsec_alg_fix_min_max(alg, algtype, ns); + ipss->ipsec_alglists[algtype][alg->alg_id] = alg; - ipsec_nalgs[algtype]++; - alg_insert_sortlist(algtype, alg->alg_id); + ipss->ipsec_nalgs[algtype]++; + alg_insert_sortlist(algtype, alg->alg_id, ns); } /* @@ -705,16 +810,18 @@ ipsec_alg_reg(ipsec_algtype_t algtype, ipsec_alginfo_t *alg) * Must be called while holding the algorithm table writer lock. */ void -ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid) +ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid, netstack_t *ns) { - ASSERT(MUTEX_HELD(&alg_lock)); + ipsec_stack_t *ipss = ns->netstack_ipsec; - ASSERT(ipsec_alglists[algtype][algid] != NULL); - ipsec_alg_free(ipsec_alglists[algtype][algid]); - ipsec_alglists[algtype][algid] = NULL; + ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); - ipsec_nalgs[algtype]--; - alg_remove_sortlist(algtype, algid); + ASSERT(ipss->ipsec_alglists[algtype][algid] != NULL); + ipsec_alg_free(ipss->ipsec_alglists[algtype][algid]); + ipss->ipsec_alglists[algtype][algid] = NULL; + + ipss->ipsec_nalgs[algtype]--; + alg_remove_sortlist(algtype, algid, ns); } /* @@ -722,17 +829,21 @@ ipsec_alg_unreg(ipsec_algtype_t algtype, uint8_t algid) */ ipsec_policy_head_t * -ipsec_system_policy(void) +ipsec_system_policy(netstack_t *ns) { - ipsec_policy_head_t *h = &system_policy; + ipsec_stack_t *ipss = ns->netstack_ipsec; + ipsec_policy_head_t *h = &ipss->ipsec_system_policy; + IPPH_REFHOLD(h); return (h); } ipsec_policy_head_t * -ipsec_inactive_policy(void) +ipsec_inactive_policy(netstack_t *ns) { - ipsec_policy_head_t *h = &inactive_policy; + ipsec_stack_t *ipss = ns->netstack_ipsec; + ipsec_policy_head_t *h = &ipss->ipsec_inactive_policy; + IPPH_REFHOLD(h); return (h); } @@ -742,7 +853,8 @@ ipsec_inactive_policy(void) * pointers. */ void -ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive) +ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive, + netstack_t *ns) { int af, dir; avl_tree_t r1, r2; @@ -783,7 +895,7 @@ ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive) } active->iph_gen++; inactive->iph_gen++; - ipsec_update_present_flags(); + ipsec_update_present_flags(ns->netstack_ipsec); rw_exit(&active->iph_lock); rw_exit(&inactive->iph_lock); } @@ -792,9 +904,12 @@ ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive) * Swap global policy primary/secondary. */ void -ipsec_swap_global_policy(void) +ipsec_swap_global_policy(netstack_t *ns) { - ipsec_swap_policy(&system_policy, &inactive_policy); + ipsec_stack_t *ipss = ns->netstack_ipsec; + + ipsec_swap_policy(&ipss->ipsec_system_policy, + &ipss->ipsec_inactive_policy, ns); } /* @@ -861,13 +976,14 @@ ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src, * policy head as we are not changing it. */ int -ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph) +ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph, + netstack_t *ns) { int af, dir, chain, nchains; rw_enter(&dph->iph_lock, RW_WRITER); - ipsec_polhead_flush(dph); + ipsec_polhead_flush(dph, ns); rw_enter(&sph->iph_lock, RW_READER); @@ -899,7 +1015,7 @@ ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph) return (0); abort_copy: - ipsec_polhead_flush(dph); + ipsec_polhead_flush(dph, ns); rw_exit(&sph->iph_lock); rw_exit(&dph->iph_lock); return (ENOMEM); @@ -909,9 +1025,12 @@ abort_copy: * Clone currently active policy to the inactive policy list. */ int -ipsec_clone_system_policy(void) +ipsec_clone_system_policy(netstack_t *ns) { - return (ipsec_copy_polhead(&system_policy, &inactive_policy)); + ipsec_stack_t *ipss = ns->netstack_ipsec; + + return (ipsec_copy_polhead(&ipss->ipsec_system_policy, + &ipss->ipsec_inactive_policy, ns)); } /* @@ -956,12 +1075,13 @@ iph_ipvN(ipsec_policy_head_t *iph, boolean_t v6) */ void ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, - boolean_t secure) + boolean_t secure, netstack_t *ns) { char sbuf[INET6_ADDRSTRLEN]; char dbuf[INET6_ADDRSTRLEN]; char *s; char *d; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT((ipha == NULL && ip6h != NULL) || (ip6h == NULL && ipha != NULL)); @@ -976,9 +1096,9 @@ ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, } /* Always bump the policy failure counter. */ - ipsec_policy_failure_count[type]++; + ipss->ipsec_policy_failure_count[type]++; - ipsec_rl_strlog(IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, + ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, ipsec_policy_failure_msgs[type], func_name, (secure ? "secure" : "not secure"), s, d); } @@ -989,10 +1109,13 @@ ipsec_log_policy_failure(int type, char *func_name, ipha_t *ipha, ip6_t *ip6h, * knob to turn to throttle the rate of messages. */ void -ipsec_rl_strlog(short mid, short sid, char level, ushort_t sl, char *fmt, ...) +ipsec_rl_strlog(netstack_t *ns, short mid, short sid, char level, ushort_t sl, + char *fmt, ...) { va_list adx; hrtime_t current = gethrtime(); + ip_stack_t *ipst = ns->netstack_ip; + ipsec_stack_t *ipss = ns->netstack_ipsec; sl |= SL_CONSOLE; /* @@ -1002,26 +1125,28 @@ ipsec_rl_strlog(short mid, short sid, char level, ushort_t sl, char *fmt, ...) * msec. Convert interval (in msec) to hrtime (in nsec). */ - if (ipsec_policy_log_interval) { - if (ipsec_policy_failure_last + - ((hrtime_t)ipsec_policy_log_interval * (hrtime_t)1000000) <= - current) { + if (ipst->ips_ipsec_policy_log_interval) { + if (ipss->ipsec_policy_failure_last + + ((hrtime_t)ipst->ips_ipsec_policy_log_interval * + (hrtime_t)1000000) <= current) { va_start(adx, fmt); (void) vstrlog(mid, sid, level, sl, fmt, adx); va_end(adx); - ipsec_policy_failure_last = current; + ipss->ipsec_policy_failure_last = current; } } } void -ipsec_config_flush() +ipsec_config_flush(netstack_t *ns) { - rw_enter(&system_policy.iph_lock, RW_WRITER); - ipsec_polhead_flush(&system_policy); - ipsec_next_policy_index = 1; - rw_exit(&system_policy.iph_lock); - ipsec_action_reclaim(0); + ipsec_stack_t *ipss = ns->netstack_ipsec; + + rw_enter(&ipss->ipsec_system_policy.iph_lock, RW_WRITER); + ipsec_polhead_flush(&ipss->ipsec_system_policy, ns); + ipss->ipsec_next_policy_index = 1; + rw_exit(&ipss->ipsec_system_policy.iph_lock); + ipsec_action_reclaim_stack(ns); } /* @@ -1030,9 +1155,11 @@ ipsec_config_flush() */ static void act_alg_adjust(uint_t algtype, uint_t algid, - uint16_t *minbits, uint16_t *maxbits) + uint16_t *minbits, uint16_t *maxbits, netstack_t *ns) { - ipsec_alginfo_t *algp = ipsec_alglists[algtype][algid]; + ipsec_stack_t *ipss = ns->netstack_ipsec; + ipsec_alginfo_t *algp = ipss->ipsec_alglists[algtype][algid]; + if (algp != NULL) { /* * If passed-in minbits is zero, we assume the caller trusts @@ -1063,34 +1190,36 @@ act_alg_adjust(uint_t algtype, uint_t algid, * loaded in the system. */ boolean_t -ipsec_check_action(ipsec_act_t *act, int *diag) +ipsec_check_action(ipsec_act_t *act, int *diag, netstack_t *ns) { ipsec_prot_t *ipp; + ipsec_stack_t *ipss = ns->netstack_ipsec; ipp = &act->ipa_apply; if (ipp->ipp_use_ah && - ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { + ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_auth_alg] == NULL) { *diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG; return (B_FALSE); } if (ipp->ipp_use_espa && - ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == NULL) { + ipss->ipsec_alglists[IPSEC_ALG_AUTH][ipp->ipp_esp_auth_alg] == + NULL) { *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG; return (B_FALSE); } if (ipp->ipp_use_esp && - ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { + ipss->ipsec_alglists[IPSEC_ALG_ENCR][ipp->ipp_encr_alg] == NULL) { *diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG; return (B_FALSE); } act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_auth_alg, - &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits); + &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); act_alg_adjust(IPSEC_ALG_AUTH, ipp->ipp_esp_auth_alg, - &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits); + &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); act_alg_adjust(IPSEC_ALG_ENCR, ipp->ipp_encr_alg, - &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits); + &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); if (ipp->ipp_ah_minbits > ipp->ipp_ah_maxbits) { *diag = SPD_DIAGNOSTIC_UNSUPP_AH_KEYSIZE; @@ -1113,7 +1242,7 @@ ipsec_check_action(ipsec_act_t *act, int *diag) */ static void ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, - uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg) + uint_t auth_alg, uint_t encr_alg, uint_t eauth_alg, netstack_t *ns) { ipsec_prot_t *ipp; @@ -1124,11 +1253,11 @@ ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, ipp->ipp_esp_auth_alg = (uint8_t)eauth_alg; act_alg_adjust(IPSEC_ALG_AUTH, auth_alg, - &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits); + &ipp->ipp_ah_minbits, &ipp->ipp_ah_maxbits, ns); act_alg_adjust(IPSEC_ALG_AUTH, eauth_alg, - &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits); + &ipp->ipp_espa_minbits, &ipp->ipp_espa_maxbits, ns); act_alg_adjust(IPSEC_ALG_ENCR, encr_alg, - &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits); + &ipp->ipp_espe_minbits, &ipp->ipp_espe_maxbits, ns); } /* @@ -1137,7 +1266,7 @@ ipsec_setup_act(ipsec_act_t *outact, ipsec_act_t *act, * and return a count in *nact (output only). */ static ipsec_act_t * -ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact) +ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact, netstack_t *ns) { boolean_t use_ah, use_esp, use_espa; boolean_t wild_auth, wild_encr, wild_eauth; @@ -1146,6 +1275,7 @@ ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact) uint_t encr_alg, encr_idx, encr_min, encr_max; uint_t action_count, ai; ipsec_act_t *outact; + ipsec_stack_t *ipss = ns->netstack_ipsec; if (act->ipa_type != IPSEC_ACT_APPLY) { outact = kmem_alloc(sizeof (*act), KM_NOSLEEP); @@ -1186,21 +1316,22 @@ ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact) * kernel policies should be set for these algorithms. */ -#define SET_EXP_MINMAX(type, wild, alg, min, max) if (wild) { \ - int nalgs = ipsec_nalgs[type]; \ - if (ipsec_alglists[type][alg] != NULL) \ +#define SET_EXP_MINMAX(type, wild, alg, min, max, ipss) \ + if (wild) { \ + int nalgs = ipss->ipsec_nalgs[type]; \ + if (ipss->ipsec_alglists[type][alg] != NULL) \ nalgs--; \ action_count *= nalgs; \ min = 0; \ - max = ipsec_nalgs[type] - 1; \ + max = ipss->ipsec_nalgs[type] - 1; \ } SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_auth, SADB_AALG_NONE, - auth_min, auth_max); + auth_min, auth_max, ipss); SET_EXP_MINMAX(IPSEC_ALG_AUTH, wild_eauth, SADB_AALG_NONE, - eauth_min, eauth_max); + eauth_min, eauth_max, ipss); SET_EXP_MINMAX(IPSEC_ALG_ENCR, wild_encr, SADB_EALG_NONE, - encr_min, encr_max); + encr_min, encr_max, ipss); #undef SET_EXP_MINMAX @@ -1224,26 +1355,27 @@ ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact) ai = 0; -#define WHICH_ALG(type, wild, idx) ((wild)?(ipsec_sortlist[type][idx]):(idx)) +#define WHICH_ALG(type, wild, idx, ipss) \ + ((wild)?(ipss->ipsec_sortlist[type][idx]):(idx)) for (encr_idx = encr_min; encr_idx <= encr_max; encr_idx++) { - encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx); + encr_alg = WHICH_ALG(IPSEC_ALG_ENCR, wild_encr, encr_idx, ipss); if (wild_encr && encr_alg == SADB_EALG_NONE) continue; for (auth_idx = auth_min; auth_idx <= auth_max; auth_idx++) { auth_alg = WHICH_ALG(IPSEC_ALG_AUTH, wild_auth, - auth_idx); + auth_idx, ipss); if (wild_auth && auth_alg == SADB_AALG_NONE) continue; for (eauth_idx = eauth_min; eauth_idx <= eauth_max; eauth_idx++) { eauth_alg = WHICH_ALG(IPSEC_ALG_AUTH, - wild_eauth, eauth_idx); + wild_eauth, eauth_idx, ipss); if (wild_eauth && eauth_alg == SADB_AALG_NONE) continue; ipsec_setup_act(&outact[ai], act, - auth_alg, encr_alg, eauth_alg); + auth_alg, encr_alg, eauth_alg, ns); ai++; } } @@ -1282,9 +1414,11 @@ ipsec_prot_from_req(ipsec_req_t *req, ipsec_prot_t *ipp) * Extract a new-style action from a request. */ void -ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp) +ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, + netstack_t *ns) { struct ipsec_act act; + bzero(&act, sizeof (act)); if ((req->ipsr_ah_req & IPSEC_PREF_NEVER) && (req->ipsr_esp_req & IPSEC_PREF_NEVER)) { @@ -1293,7 +1427,7 @@ ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp) act.ipa_type = IPSEC_ACT_APPLY; ipsec_prot_from_req(req, &act.ipa_apply); } - *actp = ipsec_act_wildcard_expand(&act, nactp); + *actp = ipsec_act_wildcard_expand(&act, nactp, ns); } /* @@ -1409,15 +1543,16 @@ ipsec_actvec_free(ipsec_act_t *act, uint_t nact) * an ipsec_out_t to the packet.. */ static mblk_t * -ipsec_attach_global_policy(mblk_t *mp, conn_t *connp, ipsec_selector_t *sel) +ipsec_attach_global_policy(mblk_t *mp, conn_t *connp, ipsec_selector_t *sel, + netstack_t *ns) { ipsec_policy_t *p; - p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel); + p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel, ns); if (p == NULL) return (NULL); - return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol)); + return (ipsec_attach_ipsec_out(mp, connp, p, sel->ips_protocol, ns)); } /* @@ -1426,7 +1561,7 @@ ipsec_attach_global_policy(mblk_t *mp, conn_t *connp, ipsec_selector_t *sel) */ static mblk_t * ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp, - ipsec_selector_t *sel) + ipsec_selector_t *sel, netstack_t *ns) { ipsec_out_t *io; ipsec_policy_t *p; @@ -1437,7 +1572,7 @@ ipsec_apply_global_policy(mblk_t *ipsec_mp, conn_t *connp, io = (ipsec_out_t *)ipsec_mp->b_rptr; if (io->ipsec_out_policy == NULL) { - p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel); + p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, io, sel, ns); io->ipsec_out_policy = p; } return (ipsec_mp); @@ -1453,6 +1588,7 @@ ipsec_check_loopback_policy(mblk_t *first_mp, boolean_t mctl_present, { mblk_t *ipsec_mp; ipsec_in_t *ii; + netstack_t *ns; if (!mctl_present) return (first_mp); @@ -1460,8 +1596,9 @@ ipsec_check_loopback_policy(mblk_t *first_mp, boolean_t mctl_present, ipsec_mp = first_mp; ii = (ipsec_in_t *)ipsec_mp->b_rptr; + ns = ii->ipsec_in_ns; ASSERT(ii->ipsec_in_loopback); - IPPOL_REFRELE(ipsp); + IPPOL_REFRELE(ipsp, ns); /* * We should do an actual policy check here. Revisit this @@ -1483,6 +1620,8 @@ ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason, uint64_t ah_mask, esp_mask; ipsa_t *ah_assoc; ipsa_t *esp_assoc; + netstack_t *ns = ii->ipsec_in_ns; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT(ii->ipsec_in_secure); ASSERT(!ii->ipsec_in_loopback); @@ -1506,13 +1645,13 @@ ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason, if (ah_mask != 0 && ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) { *reason = "AH inner header mismatch"; - *counter = &ipdrops_spd_ah_innermismatch; + *counter = DROPPER(ipss, ipds_spd_ah_innermismatch); return (B_FALSE); } if (esp_mask != 0 && esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) { *reason = "ESP inner header mismatch"; - *counter = &ipdrops_spd_esp_innermismatch; + *counter = DROPPER(ipss, ipds_spd_esp_innermismatch); return (B_FALSE); } return (B_TRUE); @@ -1527,6 +1666,8 @@ ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, ipsa_t *ah_assoc; ipsa_t *esp_assoc; boolean_t decaps; + netstack_t *ns = ii->ipsec_in_ns; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT((ipha == NULL && ip6h != NULL) || (ip6h == NULL && ipha != NULL)); @@ -1544,7 +1685,7 @@ ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, return (B_TRUE); /* Deep compare necessary here?? */ - *counter = &ipdrops_spd_loopback_mismatch; + *counter = DROPPER(ipss, ipds_spd_loopback_mismatch); *reason = "loopback policy mismatch"; return (B_FALSE); } @@ -1559,13 +1700,13 @@ ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, case IPSEC_ACT_DISCARD: case IPSEC_ACT_REJECT: /* Should "fail hard" */ - *counter = &ipdrops_spd_explicit; + *counter = DROPPER(ipss, ipds_spd_explicit); *reason = "blocked by policy"; return (B_FALSE); case IPSEC_ACT_BYPASS: case IPSEC_ACT_CLEAR: - *counter = &ipdrops_spd_got_secure; + *counter = DROPPER(ipss, ipds_spd_got_secure); *reason = "expected clear, got protected"; return (B_FALSE); @@ -1583,7 +1724,7 @@ ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, if (ah_assoc == NULL) { ret = ipsec_inbound_accept_clear(mp, ipha, ip6h); - *counter = &ipdrops_spd_got_clear; + *counter = DROPPER(ipss, ipds_spd_got_clear); *reason = "unprotected not accepted"; break; } @@ -1592,7 +1733,7 @@ ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, if (ah_assoc->ipsa_auth_alg != ipp->ipp_auth_alg) { - *counter = &ipdrops_spd_bad_ahalg; + *counter = DROPPER(ipss, ipds_spd_bad_ahalg); *reason = "unacceptable ah alg"; ret = B_FALSE; break; @@ -1602,7 +1743,7 @@ ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, * Don't allow this. Check IPSEC NOTE above * ip_fanout_proto(). */ - *counter = &ipdrops_spd_got_ah; + *counter = DROPPER(ipss, ipds_spd_got_ah); *reason = "unexpected AH"; ret = B_FALSE; break; @@ -1611,7 +1752,7 @@ ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, if (esp_assoc == NULL) { ret = ipsec_inbound_accept_clear(mp, ipha, ip6h); - *counter = &ipdrops_spd_got_clear; + *counter = DROPPER(ipss, ipds_spd_got_clear); *reason = "unprotected not accepted"; break; } @@ -1620,7 +1761,7 @@ ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, if (esp_assoc->ipsa_encr_alg != ipp->ipp_encr_alg) { - *counter = &ipdrops_spd_bad_espealg; + *counter = DROPPER(ipss, ipds_spd_bad_espealg); *reason = "unacceptable esp alg"; ret = B_FALSE; break; @@ -1632,7 +1773,8 @@ ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, if (ipp->ipp_use_espa) { if (esp_assoc->ipsa_auth_alg != ipp->ipp_esp_auth_alg) { - *counter = &ipdrops_spd_bad_espaalg; + *counter = DROPPER(ipss, + ipds_spd_bad_espaalg); *reason = "unacceptable esp auth alg"; ret = B_FALSE; break; @@ -1643,7 +1785,7 @@ ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, * Don't allow this. Check IPSEC NOTE above * ip_fanout_proto(). */ - *counter = &ipdrops_spd_got_esp; + *counter = DROPPER(ipss, ipds_spd_got_esp); *reason = "unexpected ESP"; ret = B_FALSE; break; @@ -1654,7 +1796,8 @@ ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, ip6h); if (!ret) { /* XXX mutant? */ - *counter = &ipdrops_spd_bad_selfencap; + *counter = DROPPER(ipss, + ipds_spd_bad_selfencap); *reason = "self encap not found"; break; } @@ -1666,7 +1809,7 @@ ipsec_check_ipsecin_action(ipsec_in_t *ii, mblk_t *mp, ipsec_action_t *ap, * is okay. But we drop to be consistent with the * other cases. */ - *counter = &ipdrops_spd_got_selfencap; + *counter = DROPPER(ipss, ipds_spd_got_selfencap); *reason = "unexpected self encap"; ret = B_FALSE; break; @@ -1750,6 +1893,9 @@ ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter, conn_t *connp) { + netstack_t *ns = ii->ipsec_in_ns; + ipsec_stack_t *ipss = ns->netstack_ipsec; + ASSERT(ipl->ipl_ids_latched == B_TRUE); if (!ii->ipsec_in_loopback) { @@ -1760,14 +1906,14 @@ ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, */ if ((ii->ipsec_in_ah_sa != NULL) && (!spd_match_inbound_ids(ipl, ii->ipsec_in_ah_sa))) { - *counter = &ipdrops_spd_ah_badid; + *counter = DROPPER(ipss, ipds_spd_ah_badid); *reason = "AH identity mismatch"; return (B_FALSE); } if ((ii->ipsec_in_esp_sa != NULL) && (!spd_match_inbound_ids(ipl, ii->ipsec_in_esp_sa))) { - *counter = &ipdrops_spd_esp_badid; + *counter = DROPPER(ipss, ipds_spd_esp_badid); *reason = "ESP identity mismatch"; return (B_FALSE); } @@ -1797,13 +1943,17 @@ ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl, */ static mblk_t * ipsec_check_ipsecin_policy(mblk_t *first_mp, ipsec_policy_t *ipsp, - ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique) + ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique, netstack_t *ns) { ipsec_in_t *ii; ipsec_action_t *ap; const char *reason = "no policy actions found"; mblk_t *data_mp, *ipsec_mp; - kstat_named_t *counter = &ipdrops_spd_got_secure; + ipsec_stack_t *ipss = ns->netstack_ipsec; + ip_stack_t *ipst = ns->netstack_ip; + kstat_named_t *counter; + + counter = DROPPER(ipss, ipds_spd_got_secure); data_mp = first_mp->b_cont; ipsec_mp = first_mp; @@ -1831,7 +1981,7 @@ ipsec_check_ipsecin_policy(mblk_t *first_mp, ipsec_policy_t *ipsp, if (!SA_IDS_MATCH(ii->ipsec_in_ah_sa, ii->ipsec_in_esp_sa)) { reason = "inbound AH and ESP identities differ"; - counter = &ipdrops_spd_ahesp_diffid; + counter = DROPPER(ipss, ipds_spd_ahesp_diffid); goto drop; } @@ -1846,19 +1996,20 @@ ipsec_check_ipsecin_policy(mblk_t *first_mp, ipsec_policy_t *ipsp, for (ap = ipsp->ipsp_act; ap != NULL; ap = ap->ipa_next) { if (ipsec_check_ipsecin_action(ii, data_mp, ap, ipha, ip6h, &reason, &counter)) { - BUMP_MIB(&ip_mib, ipsecInSucceeded); - IPPOL_REFRELE(ipsp); + BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); + IPPOL_REFRELE(ipsp, ns); return (first_mp); } } drop: - ipsec_rl_strlog(IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, + ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, "ipsec inbound policy mismatch: %s, packet dropped\n", reason); - IPPOL_REFRELE(ipsp); + IPPOL_REFRELE(ipsp, ns); ASSERT(ii->ipsec_in_action == NULL); - BUMP_MIB(&ip_mib, ipsecInFailed); - ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, &spd_dropper); + BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); + ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, + &ipss->ipsec_spd_dropper); return (NULL); } @@ -1964,7 +2115,7 @@ ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain, */ ipsec_policy_t * ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, - int direction, ipsec_selector_t *sel) + int direction, ipsec_selector_t *sel, netstack_t *ns) { ipsec_policy_t *curbest; ipsec_policy_root_t *root; @@ -2008,7 +2159,7 @@ ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, IPPOL_REFHOLD(curbest); if (best != NULL) { - IPPOL_REFRELE(best); + IPPOL_REFRELE(best, ns); } } @@ -2027,17 +2178,19 @@ ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head, */ ipsec_policy_t * ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io, - ipsec_selector_t *sel) + ipsec_selector_t *sel, netstack_t *ns) { ipsec_policy_t *p; + ipsec_stack_t *ipss = ns->netstack_ipsec; - p = ipsec_find_policy_head(NULL, &system_policy, direction, sel); + p = ipsec_find_policy_head(NULL, &ipss->ipsec_system_policy, + direction, sel, ns); if ((connp != NULL) && (connp->conn_policy != NULL)) { p = ipsec_find_policy_head(p, connp->conn_policy, - direction, sel); + direction, sel, ns); } else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) { p = ipsec_find_policy_head(p, io->ipsec_out_polhead, - direction, sel); + direction, sel, ns); } return (p); @@ -2058,7 +2211,7 @@ ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io, */ mblk_t * ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, - ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present) + ipha_t *ipha, ip6_t *ip6h, boolean_t mctl_present, netstack_t *ns) { ipsec_policy_t *p; ipsec_selector_t sel; @@ -2067,6 +2220,8 @@ ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, kstat_named_t *counter; ipsec_in_t *ii = NULL; uint64_t pkt_unique; + ipsec_stack_t *ipss = ns->netstack_ipsec; + ip_stack_t *ipst = ns->netstack_ip; data_mp = mctl_present ? first_mp->b_cont : first_mp; ipsec_mp = mctl_present ? first_mp : NULL; @@ -2077,9 +2232,9 @@ ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, (ip6h == NULL && ipha != NULL)); if (ipha != NULL) - policy_present = ipsec_inbound_v4_policy_present; + policy_present = ipss->ipsec_inbound_v4_policy_present; else - policy_present = ipsec_inbound_v6_policy_present; + policy_present = ipss->ipsec_inbound_v6_policy_present; if (!policy_present && connp == NULL) { /* @@ -2117,8 +2272,8 @@ ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, * an internal failure. */ ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, - "ipsec_init_inbound_sel", ipha, ip6h, B_FALSE); - counter = &ipdrops_spd_nomem; + "ipsec_init_inbound_sel", ipha, ip6h, B_FALSE, ns); + counter = DROPPER(ipss, ipds_spd_nomem); goto fail; } @@ -2133,7 +2288,8 @@ ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, * local policy alone. */ - p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel); + p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, + ns); pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port, sel.ips_protocol, 0); } @@ -2144,37 +2300,39 @@ ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp, * We have no policy; default to succeeding. * XXX paranoid system design doesn't do this. */ - BUMP_MIB(&ip_mib, ipsecInSucceeded); + BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); return (first_mp); } else { - counter = &ipdrops_spd_got_secure; + counter = DROPPER(ipss, ipds_spd_got_secure); ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, - "ipsec_check_global_policy", ipha, ip6h, B_TRUE); + "ipsec_check_global_policy", ipha, ip6h, B_TRUE, + ns); goto fail; } } if ((ii != NULL) && (ii->ipsec_in_secure)) { return (ipsec_check_ipsecin_policy(ipsec_mp, p, ipha, ip6h, - pkt_unique)); + pkt_unique, ns)); } if (p->ipsp_act->ipa_allow_clear) { - BUMP_MIB(&ip_mib, ipsecInSucceeded); - IPPOL_REFRELE(p); + BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); + IPPOL_REFRELE(p, ns); return (first_mp); } - IPPOL_REFRELE(p); + IPPOL_REFRELE(p, ns); /* * If we reach here, we will drop the packet because it failed the * global policy check because the packet was cleartext, and it * should not have been. */ ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, - "ipsec_check_global_policy", ipha, ip6h, B_FALSE); - counter = &ipdrops_spd_got_clear; + "ipsec_check_global_policy", ipha, ip6h, B_FALSE, ns); + counter = DROPPER(ipss, ipds_spd_got_clear); fail: - ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, &spd_dropper); - BUMP_MIB(&ip_mib, ipsecInFailed); + ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, + &ipss->ipsec_spd_dropper); + BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); return (NULL); } @@ -2395,9 +2553,15 @@ ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp, mblk_t *ipsec_mp = mctl_present ? first_mp : NULL; ipsec_latch_t *ipl; uint64_t unique_id; + ipsec_stack_t *ipss; + ip_stack_t *ipst; + netstack_t *ns; ASSERT(connp != NULL); ipl = connp->conn_latch; + ns = connp->conn_netstack; + ipss = ns->netstack_ipsec; + ipst = ns->netstack_ip; if (ipsec_mp == NULL) { clear: @@ -2418,21 +2582,24 @@ clear: ret = ipsec_inbound_accept_clear(mp, ipha, ip6h); if (ret) { - BUMP_MIB(&ip_mib, ipsecInSucceeded); + BUMP_MIB(&ipst->ips_ip_mib, + ipsecInSucceeded); return (first_mp); } else { ipsec_log_policy_failure( IPSEC_POLICY_MISMATCH, "ipsec_check_inbound_policy", ipha, - ip6h, B_FALSE); + ip6h, B_FALSE, ns); ip_drop_packet(first_mp, B_TRUE, NULL, - NULL, &ipdrops_spd_got_clear, - &spd_dropper); - BUMP_MIB(&ip_mib, ipsecInFailed); + NULL, + DROPPER(ipss, ipds_spd_got_clear), + &ipss->ipsec_spd_dropper); + BUMP_MIB(&ipst->ips_ip_mib, + ipsecInFailed); return (NULL); } } else { - BUMP_MIB(&ip_mib, ipsecInSucceeded); + BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); return (first_mp); } } else { @@ -2447,7 +2614,7 @@ clear: uchar_t db_type = mp->b_datap->db_type; mp->b_datap->db_type = M_DATA; first_mp = ipsec_check_global_policy(first_mp, connp, - ipha, ip6h, mctl_present); + ipha, ip6h, mctl_present, ns); if (first_mp != NULL) mp->b_datap->db_type = db_type; return (first_mp); @@ -2483,24 +2650,26 @@ clear: * depending on whichever is stronger. */ return (ipsec_check_global_policy(first_mp, connp, - ipha, ip6h, mctl_present)); + ipha, ip6h, mctl_present, ns)); } if (ipl->ipl_in_action != NULL) { /* Policy is cached & latched; fast(er) path */ const char *reason; kstat_named_t *counter; + if (ipsec_check_ipsecin_latch(ii, mp, ipl, ipha, ip6h, &reason, &counter, connp)) { - BUMP_MIB(&ip_mib, ipsecInSucceeded); + BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); return (first_mp); } - ipsec_rl_strlog(IP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, + ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, + SL_ERROR|SL_WARN|SL_CONSOLE, "ipsec inbound policy mismatch: %s, packet dropped\n", reason); ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, - &spd_dropper); - BUMP_MIB(&ip_mib, ipsecInFailed); + &ipss->ipsec_spd_dropper); + BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); return (NULL); } else if (ipl->ipl_in_policy == NULL) { ipsec_weird_null_inbound_policy++; @@ -2510,7 +2679,7 @@ clear: unique_id = conn_to_unique(connp, mp, ipha, ip6h); IPPOL_REFHOLD(ipl->ipl_in_policy); first_mp = ipsec_check_ipsecin_policy(first_mp, ipl->ipl_in_policy, - ipha, ip6h, unique_id); + ipha, ip6h, unique_id, ns); /* * NOTE: ipsecIn{Failed,Succeeeded} bumped by * ipsec_check_ipsecin_policy(). @@ -2659,7 +2828,7 @@ ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, static boolean_t ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, - ip6_t *ip6h, int outer_hdr_len) + ip6_t *ip6h, int outer_hdr_len, ipsec_stack_t *ipss) { /* * XXX cut&paste shared with ipsec_init_inbound_sel @@ -2695,7 +2864,8 @@ ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, /* Always works, even if NULL. */ ipsec_freemsg_chain(spare_mp); ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, - &ipdrops_spd_nomem, &spd_dropper); + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); return (B_FALSE); } else { nexthdr = *nexthdrp; @@ -2732,7 +2902,8 @@ ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, if (spare_mp == NULL && (spare_mp = msgpullup(mp, -1)) == NULL) { ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, - &ipdrops_spd_nomem, &spd_dropper); + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); return (B_FALSE); } ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len]; @@ -2908,22 +3079,25 @@ policy_hash(int size, const void *start, const void *end) * but have slightly different roles. */ static uint32_t -selkey_hash(const ipsec_selkey_t *selkey) +selkey_hash(const ipsec_selkey_t *selkey, netstack_t *ns) { uint32_t valid = selkey->ipsl_valid; + ipsec_stack_t *ipss = ns->netstack_ipsec; if (!(valid & IPSL_REMOTE_ADDR)) return (IPSEC_SEL_NOHASH); if (valid & IPSL_IPV4) { - if (selkey->ipsl_remote_pfxlen == 32) + if (selkey->ipsl_remote_pfxlen == 32) { return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4, - ipsec_spd_hashsize)); + ipss->ipsec_spd_hashsize)); + } } if (valid & IPSL_IPV6) { - if (selkey->ipsl_remote_pfxlen == 128) + if (selkey->ipsl_remote_pfxlen == 128) { return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6, - ipsec_spd_hashsize)); + ipss->ipsec_spd_hashsize)); + } } return (IPSEC_SEL_NOHASH); } @@ -2942,7 +3116,7 @@ selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root) * Intern actions into the action hash table. */ ipsec_action_t * -ipsec_act_find(const ipsec_act_t *a, int n) +ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns) { int i; uint32_t hval; @@ -2954,6 +3128,7 @@ ipsec_act_find(const ipsec_act_t *a, int n) boolean_t want_esp = B_FALSE; boolean_t want_se = B_FALSE; boolean_t want_unique = B_FALSE; + ipsec_stack_t *ipss = ns->netstack_ipsec; /* * TODO: should canonicalize a[] (i.e., zeroize any padding) @@ -2962,9 +3137,10 @@ ipsec_act_find(const ipsec_act_t *a, int n) for (i = n-1; i >= 0; i--) { hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); - HASH_LOCK(ipsec_action_hash, hval); + HASH_LOCK(ipss->ipsec_action_hash, hval); - for (HASH_ITERATE(ap, ipa_hash, ipsec_action_hash, hval)) { + for (HASH_ITERATE(ap, ipa_hash, + ipss->ipsec_action_hash, hval)) { if (bcmp(&ap->ipa_act, &a[i], sizeof (*a)) != 0) continue; if (ap->ipa_next != prev) @@ -2972,7 +3148,7 @@ ipsec_act_find(const ipsec_act_t *a, int n) break; } if (ap != NULL) { - HASH_UNLOCK(ipsec_action_hash, hval); + HASH_UNLOCK(ipss->ipsec_action_hash, hval); prev = ap; continue; } @@ -2981,12 +3157,12 @@ ipsec_act_find(const ipsec_act_t *a, int n) */ ap = kmem_cache_alloc(ipsec_action_cache, KM_NOSLEEP); if (ap == NULL) { - HASH_UNLOCK(ipsec_action_hash, hval); + HASH_UNLOCK(ipss->ipsec_action_hash, hval); if (prev != NULL) ipsec_action_free(prev); return (NULL); } - HASH_INSERT(ap, ipa_hash, ipsec_action_hash, hval); + HASH_INSERT(ap, ipa_hash, ipss->ipsec_action_hash, hval); ap->ipa_next = prev; ap->ipa_act = a[i]; @@ -3017,7 +3193,7 @@ ipsec_act_find(const ipsec_act_t *a, int n) if (prev) prev->ipa_refs++; prev = ap; - HASH_UNLOCK(ipsec_action_hash, hval); + HASH_UNLOCK(ipss->ipsec_action_hash, hval); } ap->ipa_refs++; /* caller's reference */ @@ -3051,6 +3227,48 @@ ipsec_action_free(ipsec_action_t *ap) } /* + * Called when the action hash table goes away. + * + * The actions can be queued on an mblk with ipsec_in or + * ipsec_out, hence the actions might still be around. + * But we decrement ipa_refs here since we no longer have + * a reference to the action from the hash table. + */ +static void +ipsec_action_free_table(ipsec_action_t *ap) +{ + while (ap != NULL) { + ipsec_action_t *np = ap->ipa_next; + + /* FIXME: remove? */ + (void) printf("ipsec_action_free_table(%p) ref %d\n", + (void *)ap, ap->ipa_refs); + ASSERT(ap->ipa_refs > 0); + IPACT_REFRELE(ap); + ap = np; + } +} + +/* + * Need to walk all stack instances since the reclaim function + * is global for all instances + */ +/* ARGSUSED */ +static void +ipsec_action_reclaim(void *arg) +{ + netstack_handle_t nh; + netstack_t *ns; + + netstack_next_init(&nh); + while ((ns = netstack_next(&nh)) != NULL) { + ipsec_action_reclaim_stack(ns); + netstack_rele(ns); + } + netstack_next_fini(&nh); +} + +/* * Periodically sweep action hash table for actions with refcount==1, and * nuke them. We cannot do this "on demand" (i.e., from IPACT_REFRELE) * because we can't close the race between another thread finding the action @@ -3061,30 +3279,31 @@ ipsec_action_free(ipsec_action_t *ap) * Note that it may take several passes of ipsec_action_gc() to free all * "stale" actions. */ -/* ARGSUSED */ static void -ipsec_action_reclaim(void *dummy) +ipsec_action_reclaim_stack(netstack_t *ns) { int i; + ipsec_stack_t *ipss = ns->netstack_ipsec; for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++) { ipsec_action_t *ap, *np; /* skip the lock if nobody home */ - if (ipsec_action_hash[i].hash_head == NULL) + if (ipss->ipsec_action_hash[i].hash_head == NULL) continue; - HASH_LOCK(ipsec_action_hash, i); - for (ap = ipsec_action_hash[i].hash_head; + HASH_LOCK(ipss->ipsec_action_hash, i); + for (ap = ipss->ipsec_action_hash[i].hash_head; ap != NULL; ap = np) { ASSERT(ap->ipa_refs > 0); np = ap->ipa_hash.hash_next; if (ap->ipa_refs > 1) continue; - HASH_UNCHAIN(ap, ipa_hash, ipsec_action_hash, i); + HASH_UNCHAIN(ap, ipa_hash, + ipss->ipsec_action_hash, i); IPACT_REFRELE(ap); } - HASH_UNLOCK(ipsec_action_hash, i); + HASH_UNLOCK(ipss->ipsec_action_hash, i); } } @@ -3093,10 +3312,11 @@ ipsec_action_reclaim(void *dummy) * This is simpler than the actions case.. */ static ipsec_sel_t * -ipsec_find_sel(ipsec_selkey_t *selkey) +ipsec_find_sel(ipsec_selkey_t *selkey, netstack_t *ns) { ipsec_sel_t *sp; uint32_t hval, bucket; + ipsec_stack_t *ipss = ns->netstack_ipsec; /* * Exactly one AF bit should be set in selkey. @@ -3104,16 +3324,16 @@ ipsec_find_sel(ipsec_selkey_t *selkey) ASSERT(!(selkey->ipsl_valid & IPSL_IPV4) ^ !(selkey->ipsl_valid & IPSL_IPV6)); - hval = selkey_hash(selkey); + hval = selkey_hash(selkey, ns); /* Set pol_hval to uninitialized until we put it in a polhead. */ selkey->ipsl_sel_hval = hval; bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval; - ASSERT(!HASH_LOCKED(ipsec_sel_hash, bucket)); - HASH_LOCK(ipsec_sel_hash, bucket); + ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, bucket)); + HASH_LOCK(ipss->ipsec_sel_hash, bucket); - for (HASH_ITERATE(sp, ipsl_hash, ipsec_sel_hash, bucket)) { + for (HASH_ITERATE(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket)) { if (bcmp(&sp->ipsl_key, selkey, offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0) break; @@ -3121,17 +3341,17 @@ ipsec_find_sel(ipsec_selkey_t *selkey) if (sp != NULL) { sp->ipsl_refs++; - HASH_UNLOCK(ipsec_sel_hash, bucket); + HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); return (sp); } sp = kmem_cache_alloc(ipsec_sel_cache, KM_NOSLEEP); if (sp == NULL) { - HASH_UNLOCK(ipsec_sel_hash, bucket); + HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); return (NULL); } - HASH_INSERT(sp, ipsl_hash, ipsec_sel_hash, bucket); + HASH_INSERT(sp, ipsl_hash, ipss->ipsec_sel_hash, bucket); sp->ipsl_refs = 2; /* one for hash table, one for caller */ sp->ipsl_key = *selkey; /* Set to uninitalized and have insertion into polhead fix things. */ @@ -3140,46 +3360,49 @@ ipsec_find_sel(ipsec_selkey_t *selkey) else sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH; - HASH_UNLOCK(ipsec_sel_hash, bucket); + HASH_UNLOCK(ipss->ipsec_sel_hash, bucket); return (sp); } static void -ipsec_sel_rel(ipsec_sel_t **spp) +ipsec_sel_rel(ipsec_sel_t **spp, netstack_t *ns) { ipsec_sel_t *sp = *spp; int hval = sp->ipsl_key.ipsl_sel_hval; + ipsec_stack_t *ipss = ns->netstack_ipsec; + *spp = NULL; if (hval == IPSEC_SEL_NOHASH) hval = 0; - ASSERT(!HASH_LOCKED(ipsec_sel_hash, hval)); - HASH_LOCK(ipsec_sel_hash, hval); + ASSERT(!HASH_LOCKED(ipss->ipsec_sel_hash, hval)); + HASH_LOCK(ipss->ipsec_sel_hash, hval); if (--sp->ipsl_refs == 1) { - HASH_UNCHAIN(sp, ipsl_hash, ipsec_sel_hash, hval); + HASH_UNCHAIN(sp, ipsl_hash, ipss->ipsec_sel_hash, hval); sp->ipsl_refs--; - HASH_UNLOCK(ipsec_sel_hash, hval); + HASH_UNLOCK(ipss->ipsec_sel_hash, hval); ASSERT(sp->ipsl_refs == 0); kmem_cache_free(ipsec_sel_cache, sp); /* Caller unlocks */ return; } - HASH_UNLOCK(ipsec_sel_hash, hval); + HASH_UNLOCK(ipss->ipsec_sel_hash, hval); } /* * Free a policy rule which we know is no longer being referenced. */ void -ipsec_policy_free(ipsec_policy_t *ipp) +ipsec_policy_free(ipsec_policy_t *ipp, netstack_t *ns) { ASSERT(ipp->ipsp_refs == 0); ASSERT(ipp->ipsp_sel != NULL); ASSERT(ipp->ipsp_act != NULL); - ipsec_sel_rel(&ipp->ipsp_sel); + + ipsec_sel_rel(&ipp->ipsp_sel, ns); IPACT_REFRELE(ipp->ipsp_act); kmem_cache_free(ipsec_pol_cache, ipp); } @@ -3190,25 +3413,26 @@ ipsec_policy_free(ipsec_policy_t *ipp) */ ipsec_policy_t * ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, - int nacts, int prio, uint64_t *index_ptr) + int nacts, int prio, uint64_t *index_ptr, netstack_t *ns) { ipsec_action_t *ap; ipsec_sel_t *sp; ipsec_policy_t *ipp; + ipsec_stack_t *ipss = ns->netstack_ipsec; if (index_ptr == NULL) - index_ptr = &ipsec_next_policy_index; + index_ptr = &ipss->ipsec_next_policy_index; ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP); - ap = ipsec_act_find(a, nacts); - sp = ipsec_find_sel(keys); + ap = ipsec_act_find(a, nacts, ns); + sp = ipsec_find_sel(keys, ns); if ((ap == NULL) || (sp == NULL) || (ipp == NULL)) { if (ap != NULL) { IPACT_REFRELE(ap); } if (sp != NULL) - ipsec_sel_rel(&sp); + ipsec_sel_rel(&sp, ns); if (ipp != NULL) kmem_cache_free(ipsec_pol_cache, ipp); return (NULL); @@ -3227,41 +3451,44 @@ ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a, } static void -ipsec_update_present_flags() +ipsec_update_present_flags(ipsec_stack_t *ipss) { - boolean_t hashpol = (avl_numnodes(&system_policy.iph_rulebyid) > 0); + boolean_t hashpol; + + hashpol = (avl_numnodes(&ipss->ipsec_system_policy.iph_rulebyid) > 0); if (hashpol) { - ipsec_outbound_v4_policy_present = B_TRUE; - ipsec_outbound_v6_policy_present = B_TRUE; - ipsec_inbound_v4_policy_present = B_TRUE; - ipsec_inbound_v6_policy_present = B_TRUE; + ipss->ipsec_outbound_v4_policy_present = B_TRUE; + ipss->ipsec_outbound_v6_policy_present = B_TRUE; + ipss->ipsec_inbound_v4_policy_present = B_TRUE; + ipss->ipsec_inbound_v6_policy_present = B_TRUE; return; } - ipsec_outbound_v4_policy_present = (NULL != - system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. + ipss->ipsec_outbound_v4_policy_present = (NULL != + ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. ipr_nonhash[IPSEC_AF_V4]); - ipsec_outbound_v6_policy_present = (NULL != - system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. + ipss->ipsec_outbound_v6_policy_present = (NULL != + ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_OUTBOUND]. ipr_nonhash[IPSEC_AF_V6]); - ipsec_inbound_v4_policy_present = (NULL != - system_policy.iph_root[IPSEC_TYPE_INBOUND]. + ipss->ipsec_inbound_v4_policy_present = (NULL != + ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. ipr_nonhash[IPSEC_AF_V4]); - ipsec_inbound_v6_policy_present = (NULL != - system_policy.iph_root[IPSEC_TYPE_INBOUND]. + ipss->ipsec_inbound_v6_policy_present = (NULL != + ipss->ipsec_system_policy.iph_root[IPSEC_TYPE_INBOUND]. ipr_nonhash[IPSEC_AF_V6]); } boolean_t -ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir) +ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir, + netstack_t *ns) { ipsec_sel_t *sp; ipsec_policy_t *ip, *nip, *head; int af; ipsec_policy_root_t *pr = &php->iph_root[dir]; - sp = ipsec_find_sel(keys); + sp = ipsec_find_sel(keys, ns); if (sp == NULL) return (B_FALSE); @@ -3282,25 +3509,26 @@ ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir) continue; } - IPPOL_UNCHAIN(php, ip); + IPPOL_UNCHAIN(php, ip, ns); php->iph_gen++; - ipsec_update_present_flags(); + ipsec_update_present_flags(ns->netstack_ipsec); rw_exit(&php->iph_lock); - ipsec_sel_rel(&sp); + ipsec_sel_rel(&sp, ns); return (B_TRUE); } rw_exit(&php->iph_lock); - ipsec_sel_rel(&sp); + ipsec_sel_rel(&sp, ns); return (B_FALSE); } int -ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index) +ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index, + netstack_t *ns) { boolean_t found = B_FALSE; ipsec_policy_t ipkey; @@ -3332,13 +3560,13 @@ ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index) break; } - IPPOL_UNCHAIN(php, ip); + IPPOL_UNCHAIN(php, ip, ns); found = B_TRUE; } if (found) { php->iph_gen++; - ipsec_update_present_flags(); + ipsec_update_present_flags(ns->netstack_ipsec); } rw_exit(&php->iph_lock); @@ -3530,7 +3758,8 @@ ipsec_compare_action(ipsec_policy_t *p1, ipsec_policy_t *p2) * duplicates). */ void -ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) +ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction, + netstack_t *ns) { ipsec_policy_root_t *pr = &php->iph_root[direction]; ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key; @@ -3560,20 +3789,20 @@ ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction) ipsec_insert_always(&php->iph_rulebyid, ipp); - ipsec_update_present_flags(); + ipsec_update_present_flags(ns->netstack_ipsec); } static void -ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr) +ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr, + netstack_t *ns) { ipsec_policy_t *ip, *nip; - int af, chain, nchain; for (af = 0; af < IPSEC_NAF; af++) { for (ip = ipr->ipr_nonhash[af]; ip != NULL; ip = nip) { nip = ip->ipsp_hash.hash_next; - IPPOL_UNCHAIN(php, ip); + IPPOL_UNCHAIN(php, ip, ns); } ipr->ipr_nonhash[af] = NULL; } @@ -3583,33 +3812,34 @@ ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr) for (ip = ipr->ipr_hash[chain].hash_head; ip != NULL; ip = nip) { nip = ip->ipsp_hash.hash_next; - IPPOL_UNCHAIN(php, ip); + IPPOL_UNCHAIN(php, ip, ns); } ipr->ipr_hash[chain].hash_head = NULL; } } void -ipsec_polhead_flush(ipsec_policy_head_t *php) +ipsec_polhead_flush(ipsec_policy_head_t *php, netstack_t *ns) { int dir; ASSERT(RW_WRITE_HELD(&php->iph_lock)); for (dir = 0; dir < IPSEC_NTYPES; dir++) - ipsec_ipr_flush(php, &php->iph_root[dir]); + ipsec_ipr_flush(php, &php->iph_root[dir], ns); - ipsec_update_present_flags(); + ipsec_update_present_flags(ns->netstack_ipsec); } void -ipsec_polhead_free(ipsec_policy_head_t *php) +ipsec_polhead_free(ipsec_policy_head_t *php, netstack_t *ns) { int dir; ASSERT(php->iph_refs == 0); + rw_enter(&php->iph_lock, RW_WRITER); - ipsec_polhead_flush(php); + ipsec_polhead_flush(php, ns); rw_exit(&php->iph_lock); rw_destroy(&php->iph_lock); for (dir = 0; dir < IPSEC_NTYPES; dir++) { @@ -3665,7 +3895,7 @@ ipsec_polhead_create(void) * If the old one had a refcount of 1, just return it. */ ipsec_policy_head_t * -ipsec_polhead_split(ipsec_policy_head_t *php) +ipsec_polhead_split(ipsec_policy_head_t *php, netstack_t *ns) { ipsec_policy_head_t *nphp; @@ -3678,11 +3908,11 @@ ipsec_polhead_split(ipsec_policy_head_t *php) if (nphp == NULL) return (NULL); - if (ipsec_copy_polhead(php, nphp) != 0) { - ipsec_polhead_free(nphp); + if (ipsec_copy_polhead(php, nphp, ns) != 0) { + ipsec_polhead_free(nphp, ns); return (NULL); } - IPPH_REFRELE(php); + IPPH_REFRELE(php, ns); return (nphp); } @@ -3721,6 +3951,7 @@ ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) ipsec_selector_t sel; ipsec_action_t *reflect_action = NULL; zoneid_t zoneid; + netstack_t *ns; ASSERT(ipsec_mp->b_datap->db_type == M_CTL); @@ -3742,9 +3973,10 @@ ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) ifindex = ii->ipsec_in_ill_index; zoneid = ii->ipsec_in_zoneid; ASSERT(zoneid != ALL_ZONES); + ns = ii->ipsec_in_ns; v4 = ii->ipsec_in_v4; - ipsec_in_release_refs(ii); + ipsec_in_release_refs(ii); /* No netstack_rele/hold needed */ /* * The caller is going to send the datagram out which might @@ -3764,7 +3996,8 @@ ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) io->ipsec_out_frtn.free_arg = (char *)io; io->ipsec_out_act = reflect_action; - if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) + if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, + ns->netstack_ipsec)) return (B_FALSE); io->ipsec_out_src_port = sel.ips_local_port; @@ -3784,21 +4017,30 @@ ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h) io->ipsec_out_attach_if = attach_if; io->ipsec_out_ill_index = ifindex; io->ipsec_out_zoneid = zoneid; + io->ipsec_out_ns = ns; /* No netstack_hold */ + return (B_TRUE); } mblk_t * -ipsec_in_tag(mblk_t *mp, mblk_t *cont) +ipsec_in_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) { ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr; ipsec_in_t *nii; mblk_t *nmp; frtn_t nfrtn; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT(ii->ipsec_in_type == IPSEC_IN); ASSERT(ii->ipsec_in_len == sizeof (ipsec_in_t)); - nmp = ipsec_in_alloc(ii->ipsec_in_v4); + nmp = ipsec_in_alloc(ii->ipsec_in_v4, ns); + if (nmp == NULL) { + ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); + return (NULL); + } ASSERT(nmp->b_datap->db_type == M_CTL); ASSERT(nmp->b_wptr == (nmp->b_rptr + sizeof (ipsec_info_t))); @@ -3828,20 +4070,22 @@ ipsec_in_tag(mblk_t *mp, mblk_t *cont) } mblk_t * -ipsec_out_tag(mblk_t *mp, mblk_t *cont) +ipsec_out_tag(mblk_t *mp, mblk_t *cont, netstack_t *ns) { ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; ipsec_out_t *nio; mblk_t *nmp; frtn_t nfrtn; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT(io->ipsec_out_type == IPSEC_OUT); ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); - nmp = ipsec_alloc_ipsec_out(); + nmp = ipsec_alloc_ipsec_out(ns); if (nmp == NULL) { ip_drop_packet_chain(cont, B_FALSE, NULL, NULL, - &ipdrops_spd_nomem, &spd_dropper); + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); return (NULL); } ASSERT(nmp->b_datap->db_type == M_CTL); @@ -3882,8 +4126,11 @@ ipsec_out_tag(mblk_t *mp, mblk_t *cont) static void ipsec_out_release_refs(ipsec_out_t *io) { + netstack_t *ns = io->ipsec_out_ns; + ASSERT(io->ipsec_out_type == IPSEC_OUT); ASSERT(io->ipsec_out_len == sizeof (ipsec_out_t)); + ASSERT(io->ipsec_out_ns != NULL); /* Note: IPSA_REFRELE is multi-line macro */ if (io->ipsec_out_ah_sa != NULL) @@ -3891,9 +4138,9 @@ ipsec_out_release_refs(ipsec_out_t *io) if (io->ipsec_out_esp_sa != NULL) IPSA_REFRELE(io->ipsec_out_esp_sa); if (io->ipsec_out_polhead != NULL) - IPPH_REFRELE(io->ipsec_out_polhead); + IPPH_REFRELE(io->ipsec_out_polhead, ns); if (io->ipsec_out_policy != NULL) - IPPOL_REFRELE(io->ipsec_out_policy); + IPPOL_REFRELE(io->ipsec_out_policy, ns); if (io->ipsec_out_act != NULL) IPACT_REFRELE(io->ipsec_out_act); if (io->ipsec_out_cred != NULL) { @@ -3901,7 +4148,7 @@ ipsec_out_release_refs(ipsec_out_t *io) io->ipsec_out_cred = NULL; } if (io->ipsec_out_latch) { - IPLATCH_REFRELE(io->ipsec_out_latch); + IPLATCH_REFRELE(io->ipsec_out_latch, ns); io->ipsec_out_latch = NULL; } } @@ -3917,13 +4164,17 @@ ipsec_out_free(void *arg) static void ipsec_in_release_refs(ipsec_in_t *ii) { + netstack_t *ns = ii->ipsec_in_ns; + + ASSERT(ii->ipsec_in_ns != NULL); + /* Note: IPSA_REFRELE is multi-line macro */ if (ii->ipsec_in_ah_sa != NULL) IPSA_REFRELE(ii->ipsec_in_ah_sa); if (ii->ipsec_in_esp_sa != NULL) IPSA_REFRELE(ii->ipsec_in_esp_sa); if (ii->ipsec_in_policy != NULL) - IPPH_REFRELE(ii->ipsec_in_policy); + IPPH_REFRELE(ii->ipsec_in_policy, ns); if (ii->ipsec_in_da != NULL) { freeb(ii->ipsec_in_da); ii->ipsec_in_da = NULL; @@ -3947,10 +4198,9 @@ ipsec_in_free(void *arg) * we can't make it fast by calling a dup. */ mblk_t * -ipsec_alloc_ipsec_out() +ipsec_alloc_ipsec_out(netstack_t *ns) { mblk_t *ipsec_mp; - ipsec_out_t *io = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); if (io == NULL) @@ -3969,6 +4219,7 @@ ipsec_alloc_ipsec_out() * a sane value. */ io->ipsec_out_zoneid = ALL_ZONES; + io->ipsec_out_ns = ns; /* No netstack_hold */ ipsec_mp = desballoc((uint8_t *)io, sizeof (ipsec_info_t), BPRI_HI, &io->ipsec_out_frtn); @@ -3991,22 +4242,24 @@ ipsec_alloc_ipsec_out() */ mblk_t * ipsec_attach_ipsec_out(mblk_t *mp, conn_t *connp, ipsec_policy_t *pol, - uint8_t proto) + uint8_t proto, netstack_t *ns) { mblk_t *ipsec_mp; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT((pol != NULL) || (connp != NULL)); - ipsec_mp = ipsec_alloc_ipsec_out(); + ipsec_mp = ipsec_alloc_ipsec_out(ns); if (ipsec_mp == NULL) { - ipsec_rl_strlog(IP_MOD_ID, 0, 0, SL_ERROR|SL_NOTE, + ipsec_rl_strlog(ns, IP_MOD_ID, 0, 0, SL_ERROR|SL_NOTE, "ipsec_attach_ipsec_out: Allocation failure\n"); - ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_spd_nomem, - &spd_dropper); + ip_drop_packet(mp, B_FALSE, NULL, NULL, + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); return (NULL); } ipsec_mp->b_cont = mp; - return (ipsec_init_ipsec_out(ipsec_mp, connp, pol, proto)); + return (ipsec_init_ipsec_out(ipsec_mp, connp, pol, proto, ns)); } /* @@ -4017,13 +4270,14 @@ ipsec_attach_ipsec_out(mblk_t *mp, conn_t *connp, ipsec_policy_t *pol, */ mblk_t * ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol, - uint8_t proto) + uint8_t proto, netstack_t *ns) { mblk_t *mp; ipsec_out_t *io; ipsec_policy_t *p; ipha_t *ipha; ip6_t *ip6h; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT((pol != NULL) || (connp != NULL)); @@ -4046,6 +4300,8 @@ ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol, if (connp != NULL) io->ipsec_out_zoneid = connp->conn_zoneid; + io->ipsec_out_ns = ns; /* No netstack_hold */ + if (mp != NULL) { ipha = (ipha_t *)mp->b_rptr; if (IPH_HDR_VERSION(ipha) == IP_VERSION) { @@ -4071,6 +4327,7 @@ ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol, * around in IP. */ if (connp != NULL && connp->conn_latch != NULL) { + ASSERT(ns == connp->conn_netstack); p = connp->conn_latch->ipl_out_policy; io->ipsec_out_latch = connp->conn_latch; IPLATCH_REFHOLD(connp->conn_latch); @@ -4081,7 +4338,7 @@ ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol, io->ipsec_out_dst_port = connp->conn_fport; io->ipsec_out_icmp_type = io->ipsec_out_icmp_code = 0; if (pol != NULL) - IPPOL_REFRELE(pol); + IPPOL_REFRELE(pol, ns); } else if (pol != NULL) { ipsec_selector_t sel; @@ -4093,7 +4350,8 @@ ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol, * it from the packet. */ - if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) { + if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, + ns->netstack_ipsec)) { /* Callee did ip_drop_packet(). */ return (NULL); } @@ -4123,7 +4381,8 @@ ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol, if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD || p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) { ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL, - &ipdrops_spd_explicit, &spd_dropper); + DROPPER(ipss, ipds_spd_explicit), + &ipss->ipsec_spd_dropper); ipsec_mp = NULL; } } @@ -4137,7 +4396,7 @@ ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol, * datagram. */ mblk_t * -ipsec_in_alloc(boolean_t isv4) +ipsec_in_alloc(boolean_t isv4, netstack_t *ns) { mblk_t *ipsec_in; ipsec_in_t *ii = kmem_cache_alloc(ipsec_info_cache, KM_NOSLEEP); @@ -4151,6 +4410,7 @@ ipsec_in_alloc(boolean_t isv4) ii->ipsec_in_v4 = isv4; ii->ipsec_in_secure = B_TRUE; + ii->ipsec_in_ns = ns; /* No netstack_hold */ ii->ipsec_in_frtn.free_func = ipsec_in_free; ii->ipsec_in_frtn.free_arg = (char *)ii; @@ -4192,6 +4452,7 @@ ipsec_out_to_in(mblk_t *ipsec_mp) ipsec_action_t *act; boolean_t v4, icmp_loopback; zoneid_t zoneid; + netstack_t *ns; ASSERT(ipsec_mp->b_datap->db_type == M_CTL); @@ -4200,6 +4461,7 @@ ipsec_out_to_in(mblk_t *ipsec_mp) v4 = io->ipsec_out_v4; zoneid = io->ipsec_out_zoneid; icmp_loopback = io->ipsec_out_icmp_loopback; + ns = io->ipsec_out_ns; act = io->ipsec_out_act; if (act == NULL) { @@ -4211,13 +4473,15 @@ ipsec_out_to_in(mblk_t *ipsec_mp) } io->ipsec_out_act = NULL; - ipsec_out_release_refs(io); + ipsec_out_release_refs(io); /* No netstack_rele/hold needed */ ii = (ipsec_in_t *)ipsec_mp->b_rptr; bzero(ii, sizeof (ipsec_in_t)); ii->ipsec_in_type = IPSEC_IN; ii->ipsec_in_len = sizeof (ipsec_in_t); ii->ipsec_in_loopback = B_TRUE; + ii->ipsec_in_ns = ns; /* No netstack_hold */ + ii->ipsec_in_frtn.free_func = ipsec_in_free; ii->ipsec_in_frtn.free_arg = (char *)ii; ii->ipsec_in_action = act; @@ -4258,6 +4522,9 @@ ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, boolean_t conn_dontroutex; boolean_t conn_multicast_loopx; boolean_t policy_present; + ip_stack_t *ipst = ire->ire_ipst; + netstack_t *ns = ipst->ips_netstack; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT((ipha != NULL && ip6h == NULL) || (ip6h != NULL && ipha == NULL)); @@ -4265,9 +4532,9 @@ ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, bzero((void*)&sel, sizeof (sel)); if (ipha != NULL) - policy_present = ipsec_outbound_v4_policy_present; + policy_present = ipss->ipsec_outbound_v4_policy_present; else - policy_present = ipsec_outbound_v6_policy_present; + policy_present = ipss->ipsec_outbound_v6_policy_present; /* * Fast Path to see if there is any policy. */ @@ -4370,7 +4637,8 @@ ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, default: if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp)) { - BUMP_MIB(&ip6_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip6_mib, + ipIfStatsOutDiscards); freemsg(ipsec_mp); /* Not IPsec-related drop. */ return (NULL); } @@ -4379,11 +4647,11 @@ ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, } } - if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) { + if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0, ipss)) { if (ipha != NULL) { - BUMP_MIB(&ip_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); } else { - BUMP_MIB(&ip6_mib, ipIfStatsOutDiscards); + BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); } /* Callee dropped the packet. */ @@ -4397,13 +4665,14 @@ ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire, * whether we have to inherit or not. */ io->ipsec_out_need_policy = B_FALSE; - ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, &sel); + ipsec_mp = ipsec_apply_global_policy(ipsec_mp, connp, + &sel, ns); ASSERT((io->ipsec_out_policy != NULL) || (io->ipsec_out_act != NULL)); ASSERT(io->ipsec_out_need_policy == B_FALSE); return (ipsec_mp); } - ipsec_mp = ipsec_attach_global_policy(mp, connp, &sel); + ipsec_mp = ipsec_attach_global_policy(mp, connp, &sel, ns); if (ipsec_mp == NULL) return (mp); @@ -4447,6 +4716,8 @@ int ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) { boolean_t global_policy_present; + netstack_t *ns = connp->conn_netstack; + ipsec_stack_t *ipss = ns->netstack_ipsec; /* * There is no policy latching for ICMP sockets because we can't @@ -4458,7 +4729,7 @@ ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) connp->conn_in_enforce_policy = connp->conn_out_enforce_policy = B_TRUE; if (connp->conn_latch != NULL) { - IPLATCH_REFRELE(connp->conn_latch); + IPLATCH_REFRELE(connp->conn_latch, ns); connp->conn_latch = NULL; } connp->conn_flags |= IPCL_CHECK_POLICY; @@ -4466,10 +4737,10 @@ ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) } global_policy_present = isv4 ? - (ipsec_outbound_v4_policy_present || - ipsec_inbound_v4_policy_present) : - (ipsec_outbound_v6_policy_present || - ipsec_inbound_v6_policy_present); + (ipss->ipsec_outbound_v4_policy_present || + ipss->ipsec_inbound_v4_policy_present) : + (ipss->ipsec_outbound_v6_policy_present || + ipss->ipsec_inbound_v6_policy_present); if ((connp->conn_policy != NULL) || global_policy_present) { ipsec_selector_t sel; @@ -4493,15 +4764,17 @@ ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) sel.ips_remote_addr_v6 = connp->conn_remv6; } - p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel); + p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel, + ns); if (connp->conn_latch->ipl_in_policy != NULL) - IPPOL_REFRELE(connp->conn_latch->ipl_in_policy); + IPPOL_REFRELE(connp->conn_latch->ipl_in_policy, ns); connp->conn_latch->ipl_in_policy = p; connp->conn_in_enforce_policy = (p != NULL); - p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel); + p = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, &sel, + ns); if (connp->conn_latch->ipl_out_policy != NULL) - IPPOL_REFRELE(connp->conn_latch->ipl_out_policy); + IPPOL_REFRELE(connp->conn_latch->ipl_out_policy, ns); connp->conn_latch->ipl_out_policy = p; connp->conn_out_enforce_policy = (p != NULL); @@ -4531,12 +4804,12 @@ ipsec_conn_cache_policy(conn_t *connp, boolean_t isv4) } void -iplatch_free(ipsec_latch_t *ipl) +iplatch_free(ipsec_latch_t *ipl, netstack_t *ns) { if (ipl->ipl_out_policy != NULL) - IPPOL_REFRELE(ipl->ipl_out_policy); + IPPOL_REFRELE(ipl->ipl_out_policy, ns); if (ipl->ipl_in_policy != NULL) - IPPOL_REFRELE(ipl->ipl_in_policy); + IPPOL_REFRELE(ipl->ipl_in_policy, ns); if (ipl->ipl_in_action != NULL) IPACT_REFRELE(ipl->ipl_in_action); if (ipl->ipl_out_action != NULL) @@ -4564,32 +4837,6 @@ iplatch_create() } /* - * Identity hash table. - * - * Identities are refcounted and "interned" into the hash table. - * Only references coming from other objects (SA's, latching state) - * are counted in ipsid_refcnt. - * - * Locking: IPSID_REFHOLD is safe only when (a) the object's hash bucket - * is locked, (b) we know that the refcount must be > 0. - * - * The ipsid_next and ipsid_ptpn fields are only to be referenced or - * modified when the bucket lock is held; in particular, we only - * delete objects while holding the bucket lock, and we only increase - * the refcount from 0 to 1 while the bucket lock is held. - */ - -#define IPSID_HASHSIZE 64 - -typedef struct ipsif_s -{ - ipsid_t *ipsif_head; - kmutex_t ipsif_lock; -} ipsif_t; - -ipsif_t ipsid_buckets[IPSID_HASHSIZE]; - -/* * Hash function for ID hash table. */ static uint32_t @@ -4613,13 +4860,15 @@ ipsid_hash(int idtype, char *idstring) * Return NULL if we need to allocate a new one and can't get memory. */ ipsid_t * -ipsid_lookup(int idtype, char *idstring) +ipsid_lookup(int idtype, char *idstring, netstack_t *ns) { ipsid_t *retval; char *nstr; int idlen = strlen(idstring) + 1; + ipsec_stack_t *ipss = ns->netstack_ipsec; + ipsif_t *bucket; - ipsif_t *bucket = &ipsid_buckets[ipsid_hash(idtype, idstring)]; + bucket = &ipss->ipsec_ipsid_buckets[ipsid_hash(idtype, idstring)]; mutex_enter(&bucket->ipsif_lock); @@ -4666,14 +4915,15 @@ ipsid_lookup(int idtype, char *idstring) * Garbage collect the identity hash table. */ void -ipsid_gc() +ipsid_gc(netstack_t *ns) { int i, len; ipsid_t *id, *nid; ipsif_t *bucket; + ipsec_stack_t *ipss = ns->netstack_ipsec; for (i = 0; i < IPSID_HASHSIZE; i++) { - bucket = &ipsid_buckets[i]; + bucket = &ipss->ipsec_ipsid_buckets[i]; mutex_enter(&bucket->ipsif_lock); for (id = bucket->ipsif_head; id != NULL; id = nid) { nid = id->ipsid_next; @@ -4714,13 +4964,14 @@ ipsid_equal(ipsid_t *id1, ipsid_t *id2) * Initialize identity table; called during module initialization. */ static void -ipsid_init() +ipsid_init(netstack_t *ns) { ipsif_t *bucket; int i; + ipsec_stack_t *ipss = ns->netstack_ipsec; for (i = 0; i < IPSID_HASHSIZE; i++) { - bucket = &ipsid_buckets[i]; + bucket = &ipss->ipsec_ipsid_buckets[i]; mutex_init(&bucket->ipsif_lock, NULL, MUTEX_DEFAULT, NULL); } } @@ -4729,13 +4980,15 @@ ipsid_init() * Free identity table (preparatory to module unload) */ static void -ipsid_fini() +ipsid_fini(netstack_t *ns) { ipsif_t *bucket; int i; + ipsec_stack_t *ipss = ns->netstack_ipsec; for (i = 0; i < IPSID_HASHSIZE; i++) { - bucket = &ipsid_buckets[i]; + bucket = &ipss->ipsec_ipsid_buckets[i]; + ASSERT(bucket->ipsif_head == NULL); mutex_destroy(&bucket->ipsif_lock); } } @@ -4745,7 +4998,8 @@ ipsid_fini() * specified algorithm. Must be called while holding the algorithms lock. */ void -ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type) +ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type, + netstack_t *ns) { size_t crypto_min = (size_t)-1, crypto_max = 0; size_t cur_crypto_min, cur_crypto_max; @@ -4754,8 +5008,9 @@ ipsec_alg_fix_min_max(ipsec_alginfo_t *alg, ipsec_algtype_t alg_type) uint_t nmech_infos; int crypto_rc, i; crypto_mech_usage_t mask; + ipsec_stack_t *ipss = ns->netstack_ipsec; - ASSERT(MUTEX_HELD(&alg_lock)); + ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock)); /* * Compute the min, max, and default key sizes (in number of @@ -4934,14 +5189,16 @@ ipsec_alg_free(ipsec_alginfo_t *alg) if (alg == NULL) return; - if (alg->alg_key_sizes != NULL) + if (alg->alg_key_sizes != NULL) { kmem_free(alg->alg_key_sizes, (alg->alg_nkey_sizes + 1) * sizeof (uint16_t)); - - if (alg->alg_block_sizes != NULL) + alg->alg_key_sizes = NULL; + } + if (alg->alg_block_sizes != NULL) { kmem_free(alg->alg_block_sizes, (alg->alg_nblock_sizes + 1) * sizeof (uint16_t)); - + alg->alg_block_sizes = NULL; + } kmem_free(alg, sizeof (*alg)); } @@ -4978,10 +5235,28 @@ ipsec_valid_key_size(uint16_t key_size, ipsec_alginfo_t *alg) * tables when a crypto algorithm is no longer available or becomes * available, and triggers the freeing/creation of context templates * associated with existing SAs, if needed. + * + * Need to walk all stack instances since the callback is global + * for all instances */ void ipsec_prov_update_callback(uint32_t event, void *event_arg) { + netstack_handle_t nh; + netstack_t *ns; + + netstack_next_init(&nh); + while ((ns = netstack_next(&nh)) != NULL) { + ipsec_prov_update_callback_stack(event, event_arg, ns); + netstack_rele(ns); + } + netstack_next_fini(&nh); +} + +static void +ipsec_prov_update_callback_stack(uint32_t event, void *event_arg, + netstack_t *ns) +{ crypto_notify_event_change_t *prov_change = (crypto_notify_event_change_t *)event_arg; uint_t algidx, algid, algtype, mech_count, mech_idx; @@ -4989,6 +5264,7 @@ ipsec_prov_update_callback(uint32_t event, void *event_arg) ipsec_alginfo_t oalg; crypto_mech_name_t *mechs; boolean_t alg_changed = B_FALSE; + ipsec_stack_t *ipss = ns->netstack_ipsec; /* ignore events for which we didn't register */ if (event != CRYPTO_EVENT_MECHS_CHANGED) { @@ -5006,12 +5282,13 @@ ipsec_prov_update_callback(uint32_t event, void *event_arg) * the algorithm valid flag and trigger an update of the * SAs that depend on that algorithm. */ - mutex_enter(&alg_lock); + mutex_enter(&ipss->ipsec_alg_lock); for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { - for (algidx = 0; algidx < ipsec_nalgs[algtype]; algidx++) { + for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; + algidx++) { - algid = ipsec_sortlist[algtype][algidx]; - alg = ipsec_alglists[algtype][algid]; + algid = ipss->ipsec_sortlist[algtype][algidx]; + alg = ipss->ipsec_alglists[algtype][algid]; ASSERT(alg != NULL); /* @@ -5048,7 +5325,7 @@ ipsec_prov_update_callback(uint32_t event, void *event_arg) * removed. */ oalg = *alg; - ipsec_alg_fix_min_max(alg, algtype); + ipsec_alg_fix_min_max(alg, algtype, ns); if (!alg_changed && alg->alg_ef_minbits != oalg.alg_ef_minbits || alg->alg_ef_maxbits != oalg.alg_ef_maxbits || @@ -5065,10 +5342,10 @@ ipsec_prov_update_callback(uint32_t event, void *event_arg) CRYPTO_SW_PROVIDER) sadb_alg_update(algtype, alg->alg_id, prov_change->ec_change == - CRYPTO_MECH_ADDED); + CRYPTO_MECH_ADDED, ns); } } - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); crypto_free_mech_list(mechs, mech_count); if (alg_changed) { @@ -5078,8 +5355,8 @@ ipsec_prov_update_callback(uint32_t event, void *event_arg) * Notify ipsecah and ipsecesp of this change so * that they can send a SADB_REGISTER to their consumers. */ - ipsecah_algs_changed(); - ipsecesp_algs_changed(); + ipsecah_algs_changed(ns); + ipsecesp_algs_changed(ns); } } @@ -5088,17 +5365,23 @@ ipsec_prov_update_callback(uint32_t event, void *event_arg) * providers changes. Used to update the algorithm tables and * to free or create context templates if needed. Invoked after IPsec * is loaded successfully. + * + * This is called separately for each IP instance, so we ensure we only + * register once. */ void ipsec_register_prov_update(void) { + if (prov_update_handle != NULL) + return; + prov_update_handle = crypto_notify_events( ipsec_prov_update_callback, CRYPTO_EVENT_MECHS_CHANGED); } /* * Unregisters from the framework to be notified of crypto providers - * changes. Called from ipsec_policy_destroy(). + * changes. Called from ipsec_policy_g_destroy(). */ static void ipsec_unregister_prov_update(void) @@ -5122,7 +5405,8 @@ ipsec_unregister_prov_update(void) */ mblk_t * ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, - ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len) + ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len, + netstack_t *ns) { ipsec_tun_pol_t *itp = atp->tun_itp; ipsec_policy_head_t *polhead; @@ -5132,6 +5416,7 @@ ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, ipsec_out_t *io; boolean_t is_fragment; ipsec_policy_t *pol; + ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || outer_ipv4 != NULL && outer_ipv6 == NULL); @@ -5179,7 +5464,7 @@ ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, * We have a fragment we need to track! */ mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp, - outer_hdr_len); + outer_hdr_len, ipss); if (mp == NULL) return (NULL); @@ -5197,8 +5482,9 @@ ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION); if ((spare_mp = msgpullup(mp, -1)) == NULL) { ip_drop_packet_chain(mp, B_FALSE, - NULL, NULL, &ipdrops_spd_nomem, - &spd_dropper); + NULL, NULL, + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); } ip6h = (ip6_t *)spare_mp->b_rptr; (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h, @@ -5221,8 +5507,9 @@ ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, if ((spare_mp == NULL) && ((spare_mp = msgpullup(mp, -1)) == NULL)) { ip_drop_packet_chain(mp, B_FALSE, - NULL, NULL, &ipdrops_spd_nomem, - &spd_dropper); + NULL, NULL, + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); } inner_ipv6 = (ip6_t *)(spare_mp->b_rptr + hdr_len); @@ -5244,7 +5531,7 @@ ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, /* Get ports... */ if (spare_mp != NULL) { if (!ipsec_init_outbound_ports(&sel, spare_mp, - inner_ipv4, inner_ipv6, outer_hdr_len)) { + inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { /* * callee did ip_drop_packet_chain() on * spare_mp @@ -5254,7 +5541,7 @@ ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, } } else { if (!ipsec_init_outbound_ports(&sel, mp, - inner_ipv4, inner_ipv6, outer_hdr_len)) { + inner_ipv4, inner_ipv6, outer_hdr_len, ipss)) { /* callee did ip_drop_packet_chain() on mp. */ return (NULL); } @@ -5279,7 +5566,8 @@ ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, ipsec_freemsg_chain(spare_mp); } rw_enter(&polhead->iph_lock, RW_READER); - pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, &sel); + pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, + &sel, ns); rw_exit(&polhead->iph_lock); if (pol == NULL) { /* @@ -5302,7 +5590,8 @@ ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, "per-port policy\n"); #endif ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, - &ipdrops_spd_explicit, &spd_dropper); + DROPPER(ipss, ipds_spd_explicit), + &ipss->ipsec_spd_dropper); return (NULL); } @@ -5311,11 +5600,12 @@ ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, #endif /* Construct an IPSEC_OUT message. */ - ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out(); + ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out(ns); if (ipsec_mp == NULL) { - IPPOL_REFRELE(pol); - ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_spd_nomem, - &spd_dropper); + IPPOL_REFRELE(pol, ns); + ip_drop_packet(mp, B_FALSE, NULL, NULL, + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); return (NULL); } ipsec_mp->b_cont = mp; @@ -5384,12 +5674,14 @@ ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, ASSERT(ipsec_mp != NULL); while (mp != NULL) { nmp = mp->b_next; - ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp); + ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp, ns); if (ipsec_mp->b_next == NULL) { ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL, - &ipdrops_spd_nomem, &spd_dropper); + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); ip_drop_packet_chain(mp, B_FALSE, NULL, NULL, - &ipdrops_spd_nomem, &spd_dropper); + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); return (NULL); } ipsec_mp = ipsec_mp->b_next; @@ -5405,7 +5697,7 @@ ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, */ mblk_t * ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, - ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique) + ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique, netstack_t *ns) { /* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */ mblk_t *data_chain = NULL, *data_tail = NULL; @@ -5422,7 +5714,7 @@ ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, IPPOL_REFHOLD(pol); if (ipsec_check_ipsecin_policy(ipsec_mp, pol, inner_ipv4, - inner_ipv6, pkt_unique) != NULL) { + inner_ipv6, pkt_unique, ns) != NULL) { if (data_tail == NULL) { /* First one */ data_chain = data_tail = ipsec_mp->b_cont; @@ -5437,7 +5729,7 @@ ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, * already. Need to get rid of any extra pol * references, and any remaining bits as well. */ - IPPOL_REFRELE(pol); + IPPOL_REFRELE(pol, ns); ipsec_freemsg_chain(data_chain); ipsec_freemsg_chain(ii_next); /* ipdrop stats? */ return (NULL); @@ -5448,7 +5740,7 @@ ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, * One last release because either the loop bumped it up, or we never * called ipsec_check_ipsecin_policy(). */ - IPPOL_REFRELE(pol); + IPPOL_REFRELE(pol, ns); /* data_chain is ready for return to tun module. */ return (data_chain); @@ -5473,7 +5765,7 @@ ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol, boolean_t ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4, - ip6_t *outer_ipv6, int outer_hdr_len) + ip6_t *outer_ipv6, int outer_hdr_len, netstack_t *ns) { ipsec_policy_head_t *polhead; ipsec_selector_t sel; @@ -5484,6 +5776,7 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, boolean_t retval, port_policy_present, is_icmp, global_present; in6_addr_t tmpaddr; ipaddr_t tmp4; + ipsec_stack_t *ipss = ns->netstack_ipsec; uint8_t flags, *holder, *outer_hdr; sel.ips_is_icmp_inv_acq = 0; @@ -5491,10 +5784,10 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, if (outer_ipv4 != NULL) { ASSERT(outer_ipv6 == NULL); outer_hdr = (uint8_t *)outer_ipv4; - global_present = ipsec_inbound_v4_policy_present; + global_present = ipss->ipsec_inbound_v4_policy_present; } else { outer_hdr = (uint8_t *)outer_ipv6; - global_present = ipsec_inbound_v6_policy_present; + global_present = ipss->ipsec_inbound_v6_policy_present; } ASSERT(outer_hdr != NULL); @@ -5530,7 +5823,8 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, switch (rc) { case SELRET_NOMEM: ip_drop_packet(message, B_TRUE, NULL, NULL, - &ipdrops_spd_nomem, &spd_dropper); + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); return (B_FALSE); case SELRET_TUNFRAG: /* @@ -5539,14 +5833,15 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, */ if (ipsec_mp == NULL) { ip_drop_packet(*data_mp, B_TRUE, NULL, NULL, - &ipdrops_spd_got_clear, &spd_dropper); + DROPPER(ipss, ipds_spd_got_clear), + &ipss->ipsec_spd_dropper); *data_mp = NULL; return (B_FALSE); } ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)-> ipsec_in_secure); message = ipsec_fragcache_add(&itp->itp_fragcache, - ipsec_mp, *data_mp, outer_hdr_len); + ipsec_mp, *data_mp, outer_hdr_len, ipss); if (message == NULL) { /* @@ -5580,13 +5875,16 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, */ break; case SELRET_NOMEM: - ip_drop_packet_chain(message, B_TRUE, NULL, - NULL, &ipdrops_spd_nomem, &spd_dropper); + ip_drop_packet_chain(message, B_TRUE, + NULL, NULL, + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); return (B_FALSE); case SELRET_BADPKT: - ip_drop_packet_chain(message, B_TRUE, NULL, - NULL, &ipdrops_spd_malformed_frag, - &spd_dropper); + ip_drop_packet_chain(message, B_TRUE, + NULL, NULL, + DROPPER(ipss, ipds_spd_malformed_frag), + &ipss->ipsec_spd_dropper); return (B_FALSE); case SELRET_TUNFRAG: cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)"); @@ -5637,7 +5935,7 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, /* find_policy_head() */ rw_enter(&polhead->iph_lock, RW_READER); pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, - &sel); + &sel, ns); rw_exit(&polhead->iph_lock); if (pol != NULL) { if (ipsec_mp == NULL || @@ -5651,13 +5949,14 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, */ ASSERT(message->b_next == NULL); ip_drop_packet(message, B_TRUE, NULL, - NULL, &ipdrops_spd_got_clear, - &spd_dropper); + NULL, + DROPPER(ipss, ipds_spd_got_clear), + &ipss->ipsec_spd_dropper); } else if (ipsec_mp != NULL) { freeb(ipsec_mp); } - IPPOL_REFRELE(pol); + IPPOL_REFRELE(pol, ns); return (retval); } /* @@ -5670,7 +5969,7 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID( sel.ips_remote_port, sel.ips_local_port, (inner_ipv4 == NULL) ? IPPROTO_IPV6 : - IPPROTO_ENCAP, sel.ips_protocol)); + IPPROTO_ENCAP, sel.ips_protocol), ns); return (*data_mp != NULL); } @@ -5682,7 +5981,9 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, */ if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) { ip_drop_packet_chain(message, B_TRUE, NULL, - NULL, &ipdrops_spd_explicit, &spd_dropper); + NULL, + DROPPER(ipss, ipds_spd_explicit), + &ipss->ipsec_spd_dropper); return (B_FALSE); } } @@ -5708,7 +6009,8 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, } ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL, - &ipdrops_spd_got_secure, &spd_dropper); + DROPPER(ipss, ipds_spd_got_secure), + &ipss->ipsec_spd_dropper); return (B_FALSE); } @@ -5747,7 +6049,7 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, /* NOTE: Frees message if it returns NULL. */ if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6, - (ipsec_mp != NULL)) == NULL) { + (ipsec_mp != NULL), ns) == NULL) { return (B_FALSE); } @@ -5797,23 +6099,25 @@ tunnel_compare(const void *arg1, const void *arg2) * Free a tunnel policy node. */ void -itp_free(ipsec_tun_pol_t *node) +itp_free(ipsec_tun_pol_t *node, netstack_t *ns) { - IPPH_REFRELE(node->itp_policy); - IPPH_REFRELE(node->itp_inactive); + IPPH_REFRELE(node->itp_policy, ns); + IPPH_REFRELE(node->itp_inactive, ns); mutex_destroy(&node->itp_lock); kmem_free(node, sizeof (*node)); } void -itp_unlink(ipsec_tun_pol_t *node) +itp_unlink(ipsec_tun_pol_t *node, netstack_t *ns) { - rw_enter(&tunnel_policy_lock, RW_WRITER); - tunnel_policy_gen++; + ipsec_stack_t *ipss = ns->netstack_ipsec; + + rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); + ipss->ipsec_tunnel_policy_gen++; ipsec_fragcache_uninit(&node->itp_fragcache); - avl_remove(&tunnel_policies, node); - rw_exit(&tunnel_policy_lock); - ITP_REFRELE(node); + avl_remove(&ipss->ipsec_tunnel_policies, node); + rw_exit(&ipss->ipsec_tunnel_policy_lock); + ITP_REFRELE(node, ns); } /* @@ -5821,18 +6125,20 @@ itp_unlink(ipsec_tun_pol_t *node) * spdsock mostly. Returns "node" with a bumped refcnt. */ ipsec_tun_pol_t * -get_tunnel_policy(char *name) +get_tunnel_policy(char *name, netstack_t *ns) { ipsec_tun_pol_t *node, lookup; + ipsec_stack_t *ipss = ns->netstack_ipsec; (void) strncpy(lookup.itp_name, name, LIFNAMSIZ); - rw_enter(&tunnel_policy_lock, RW_READER); - node = (ipsec_tun_pol_t *)avl_find(&tunnel_policies, &lookup, NULL); + rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); + node = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, + &lookup, NULL); if (node != NULL) { ITP_REFHOLD(node); } - rw_exit(&tunnel_policy_lock); + rw_exit(&ipss->ipsec_tunnel_policy_lock); return (node); } @@ -5842,32 +6148,37 @@ get_tunnel_policy(char *name) * DUMP operations. iterator() will not consume a reference. */ void -itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *), void *arg) +itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *, netstack_t *), + void *arg, netstack_t *ns) { ipsec_tun_pol_t *node; + ipsec_stack_t *ipss = ns->netstack_ipsec; - rw_enter(&tunnel_policy_lock, RW_READER); - for (node = avl_first(&tunnel_policies); node != NULL; - node = AVL_NEXT(&tunnel_policies, node)) { - iterator(node, arg); + rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); + for (node = avl_first(&ipss->ipsec_tunnel_policies); node != NULL; + node = AVL_NEXT(&ipss->ipsec_tunnel_policies, node)) { + iterator(node, arg, ns); } - rw_exit(&tunnel_policy_lock); + rw_exit(&ipss->ipsec_tunnel_policy_lock); } /* * Initialize policy head. This can only fail if there's a memory problem. */ static boolean_t -tunnel_polhead_init(ipsec_policy_head_t *iph) +tunnel_polhead_init(ipsec_policy_head_t *iph, netstack_t *ns) { + ipsec_stack_t *ipss = ns->netstack_ipsec; + rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL); iph->iph_refs = 1; iph->iph_gen = 0; - if (ipsec_alloc_table(iph, tun_spd_hashsize, KM_SLEEP, B_FALSE) != 0) { + if (ipsec_alloc_table(iph, ipss->ipsec_tun_spd_hashsize, + KM_SLEEP, B_FALSE, ns) != 0) { ipsec_polhead_free_table(iph); return (B_FALSE); } - ipsec_polhead_init(iph, tun_spd_hashsize); + ipsec_polhead_init(iph, ipss->ipsec_tun_spd_hashsize); return (B_TRUE); } @@ -5877,10 +6188,11 @@ tunnel_polhead_init(ipsec_policy_head_t *iph) * node. */ ipsec_tun_pol_t * -create_tunnel_policy(char *name, int *errno, uint64_t *gen) +create_tunnel_policy(char *name, int *errno, uint64_t *gen, netstack_t *ns) { ipsec_tun_pol_t *newbie, *existing; avl_index_t where; + ipsec_stack_t *ipss = ns->netstack_ipsec; newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP); if (newbie == NULL) { @@ -5895,20 +6207,20 @@ create_tunnel_policy(char *name, int *errno, uint64_t *gen) (void) strncpy(newbie->itp_name, name, LIFNAMSIZ); - rw_enter(&tunnel_policy_lock, RW_WRITER); - existing = (ipsec_tun_pol_t *)avl_find(&tunnel_policies, newbie, - &where); + rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_WRITER); + existing = (ipsec_tun_pol_t *)avl_find(&ipss->ipsec_tunnel_policies, + newbie, &where); if (existing != NULL) { - itp_free(newbie); + itp_free(newbie, ns); *errno = EEXIST; - rw_exit(&tunnel_policy_lock); + rw_exit(&ipss->ipsec_tunnel_policy_lock); return (NULL); } - tunnel_policy_gen++; - *gen = tunnel_policy_gen; + ipss->ipsec_tunnel_policy_gen++; + *gen = ipss->ipsec_tunnel_policy_gen; newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */ newbie->itp_next_policy_index = 1; - avl_insert(&tunnel_policies, newbie, where); + avl_insert(&ipss->ipsec_tunnel_policies, newbie, where); mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL); newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t), KM_NOSLEEP); @@ -5921,16 +6233,16 @@ create_tunnel_policy(char *name, int *errno, uint64_t *gen) goto nomem; } - if (!tunnel_polhead_init(newbie->itp_policy)) { + if (!tunnel_polhead_init(newbie->itp_policy, ns)) { kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t)); kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); goto nomem; - } else if (!tunnel_polhead_init(newbie->itp_inactive)) { - IPPH_REFRELE(newbie->itp_policy); + } else if (!tunnel_polhead_init(newbie->itp_inactive, ns)) { + IPPH_REFRELE(newbie->itp_policy, ns); kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t)); goto nomem; } - rw_exit(&tunnel_policy_lock); + rw_exit(&ipss->ipsec_tunnel_policy_lock); return (newbie); nomem: @@ -5946,7 +6258,7 @@ nomem: */ /* ARGSUSED */ ipsec_tun_pol_t * -itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af) +itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af, netstack_t *ns) { return (NULL); /* Always return NULL. */ } @@ -6072,7 +6384,7 @@ ipsec_fragcache_uninit(ipsec_fragcache_t *frag) mblk_t * ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, - int outer_hdr_len) + int outer_hdr_len, ipsec_stack_t *ipss) { boolean_t is_v4; time_t itpf_time; @@ -6104,7 +6416,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, if ((spare_mp = msgpullup(mp, -1)) == NULL) { mutex_exit(&frag->itpf_lock); ip_drop_packet(first_mp, inbound, NULL, NULL, - &ipdrops_spd_nomem, &spd_dropper); + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); return (NULL); } ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len); @@ -6117,7 +6430,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, */ mutex_exit(&frag->itpf_lock); ip_drop_packet(first_mp, inbound, NULL, NULL, - &ipdrops_spd_malformed_packet, &spd_dropper); + DROPPER(ipss, ipds_spd_malformed_packet), + &ipss->ipsec_spd_dropper); freemsg(spare_mp); return (NULL); } else { @@ -6134,7 +6448,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, */ mutex_exit(&frag->itpf_lock); ip_drop_packet(first_mp, inbound, NULL, NULL, - &ipdrops_spd_malformed_frag, &spd_dropper); + DROPPER(ipss, ipds_spd_malformed_frag), + &ipss->ipsec_spd_dropper); freemsg(spare_mp); return (NULL); } @@ -6212,7 +6527,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, (void) fragcache_delentry(i, fep, frag); mutex_exit(&frag->itpf_lock); ip_drop_packet(first_mp, inbound, NULL, NULL, - &ipdrops_spd_malformed_frag, &spd_dropper); + DROPPER(ipss, ipds_spd_malformed_frag), + &ipss->ipsec_spd_dropper); freemsg(spare_mp); return (NULL); } @@ -6225,7 +6541,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, if (frag->itpf_freelist == NULL) { mutex_exit(&frag->itpf_lock); ip_drop_packet(first_mp, inbound, NULL, NULL, - &ipdrops_spd_nomem, &spd_dropper); + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); freemsg(spare_mp); return (NULL); } @@ -6303,7 +6620,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, if ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL) { mutex_exit(&frag->itpf_lock); ip_drop_packet_chain(nmp, inbound, NULL, NULL, - &ipdrops_spd_nomem, &spd_dropper); + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); return (NULL); } nip6h = (ip6_t *)nspare_mp->b_rptr; @@ -6330,7 +6648,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL)) { mutex_exit(&frag->itpf_lock); ip_drop_packet_chain(nmp, inbound, NULL, NULL, - &ipdrops_spd_nomem, &spd_dropper); + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); return (NULL); } nip6h = (ip6_t *)(nspare_mp->b_rptr + hdr_len); @@ -6338,7 +6657,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, &nip6_hdr_length, &nv6_proto_p)) { mutex_exit(&frag->itpf_lock); ip_drop_packet_chain(nmp, inbound, NULL, NULL, - &ipdrops_spd_malformed_frag, &spd_dropper); + DROPPER(ipss, ipds_spd_malformed_frag), + &ipss->ipsec_spd_dropper); ipsec_freemsg_chain(nspare_mp); return (NULL); } @@ -6378,8 +6698,9 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, /* Overlapping data does not match */ (void) fragcache_delentry(i, fep, frag); mutex_exit(&frag->itpf_lock); - ip_drop_packet(first_mp, inbound, NULL, NULL, - &ipdrops_spd_overlap_frag, &spd_dropper); + ip_drop_packet(first_mp, inbound, NULL, NULL, + DROPPER(ipss, ipds_spd_overlap_frag), + &ipss->ipsec_spd_dropper); return (NULL); } /* Part of defense for jolt2.c fragmentation attack */ @@ -6395,7 +6716,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, */ mutex_exit(&frag->itpf_lock); ip_drop_packet(first_mp, inbound, NULL, NULL, - &ipdrops_spd_evil_frag, &spd_dropper); + DROPPER(ipss, ipds_spd_evil_frag), + &ipss->ipsec_spd_dropper); return (NULL); } @@ -6435,9 +6757,10 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, /* Overlap mismatch */ (void) fragcache_delentry(i, fep, frag); mutex_exit(&frag->itpf_lock); - ip_drop_packet(first_mp, inbound, NULL, - NULL, &ipdrops_spd_overlap_frag, - &spd_dropper); + ip_drop_packet(first_mp, inbound, NULL, + NULL, + DROPPER(ipss, ipds_spd_overlap_frag), + &ipss->ipsec_spd_dropper); return (NULL); } } @@ -6466,7 +6789,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, (void) fragcache_delentry(i, fep, frag); mutex_exit(&frag->itpf_lock); ip_drop_packet(first_mp, inbound, NULL, NULL, - &ipdrops_spd_max_frags, &spd_dropper); + DROPPER(ipss, ipds_spd_max_frags), + &ipss->ipsec_spd_dropper); return (NULL); } @@ -6504,7 +6828,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, if ((spare_mp = msgpullup(data_mp, -1)) == NULL) { mutex_exit(&frag->itpf_lock); ip_drop_packet_chain(mp, inbound, NULL, NULL, - &ipdrops_spd_nomem, &spd_dropper); + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); return (NULL); } ip6h = (ip6_t *)spare_mp->b_rptr; @@ -6527,7 +6852,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, ((spare_mp = msgpullup(data_mp, -1)) == NULL)) { mutex_exit(&frag->itpf_lock); ip_drop_packet_chain(mp, inbound, NULL, NULL, - &ipdrops_spd_nomem, &spd_dropper); + DROPPER(ipss, ipds_spd_nomem), + &ipss->ipsec_spd_dropper); return (NULL); } ip6h = (ip6_t *)(spare_mp->b_rptr + hdr_len); @@ -6535,7 +6861,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, &ip6_hdr_length, &v6_proto_p)) { mutex_exit(&frag->itpf_lock); ip_drop_packet_chain(mp, inbound, NULL, NULL, - &ipdrops_spd_malformed_frag, &spd_dropper); + DROPPER(ipss, ipds_spd_malformed_frag), + &ipss->ipsec_spd_dropper); ipsec_freemsg_chain(spare_mp); return (NULL); } @@ -6589,7 +6916,8 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp, /* It is an invalid "ping-o-death" packet */ /* Discard it */ ip_drop_packet_chain(mp, inbound, NULL, NULL, - &ipdrops_spd_evil_frag, &spd_dropper); + DROPPER(ipss, ipds_spd_evil_frag), + &ipss->ipsec_spd_dropper); ipsec_freemsg_chain(spare_mp); return (NULL); } diff --git a/usr/src/uts/common/inet/ip/spdsock.c b/usr/src/uts/common/inet/ip/spdsock.c index aac93259bc..b7309ba125 100644 --- a/usr/src/uts/common/inet/ip/spdsock.c +++ b/usr/src/uts/common/inet/ip/spdsock.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,6 +31,7 @@ #include <sys/strsubr.h> #include <sys/strsun.h> #include <sys/stropts.h> +#include <sys/zone.h> #include <sys/vnode.h> #include <sys/sysmacros.h> #define _SUN_TPI_VERSION 2 @@ -104,7 +105,7 @@ static struct T_info_ack spdsock_g_t_info_ack = { }; /* Named Dispatch Parameter Management Structure */ -typedef struct spdsockpparam_s { +typedef struct spdsockparam_s { uint_t spdsock_param_min; uint_t spdsock_param_max; uint_t spdsock_param_value; @@ -116,7 +117,7 @@ typedef struct spdsockpparam_s { * spdsock_g_nd in spdsock_init_nd. * All of these are alterable, within the min/max values given, at run time. */ -static spdsockparam_t spdsock_param_arr[] = { +static spdsockparam_t lcl_param_arr[] = { /* min max value name */ { 4096, 65536, 8192, "spdsock_xmit_hiwat"}, { 0, 65536, 1024, "spdsock_xmit_lowat"}, @@ -124,41 +125,28 @@ static spdsockparam_t spdsock_param_arr[] = { { 65536, 1024*1024*1024, 256*1024, "spdsock_max_buf"}, { 0, 3, 0, "spdsock_debug"}, }; -#define spdsock_xmit_hiwat spdsock_param_arr[0].spdsock_param_value -#define spdsock_xmit_lowat spdsock_param_arr[1].spdsock_param_value -#define spdsock_recv_hiwat spdsock_param_arr[2].spdsock_param_value -#define spdsock_max_buf spdsock_param_arr[3].spdsock_param_value -#define spdsock_debug spdsock_param_arr[4].spdsock_param_value - -kmutex_t spdsock_param_lock; /* Protects the NDD variables. */ - -/* - * To save algorithm update messages that are processed only after IPsec - * is loaded. - */ -static spd_ext_t *spdsock_extv_algs[SPD_EXT_MAX + 1]; -static mblk_t *spdsock_mp_algs = NULL; -static boolean_t spdsock_algs_pending = B_FALSE; -static ipsec_alginfo_t *spdsock_algs[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; -static ipsec_algs_exec_mode_t spdsock_algs_exec_mode[IPSEC_NALGTYPES]; -static kmutex_t spdsock_alg_lock; +#define spds_xmit_hiwat spds_params[0].spdsock_param_value +#define spds_xmit_lowat spds_params[1].spdsock_param_value +#define spds_recv_hiwat spds_params[2].spdsock_param_value +#define spds_max_buf spds_params[3].spdsock_param_value +#define spds_debug spds_params[4].spdsock_param_value #define ss0dbg(a) printf a /* NOTE: != 0 instead of > 0 so lint doesn't complain. */ -#define ss1dbg(a) if (spdsock_debug != 0) printf a -#define ss2dbg(a) if (spdsock_debug > 1) printf a -#define ss3dbg(a) if (spdsock_debug > 2) printf a - -static IDP spdsock_g_nd; +#define ss1dbg(spds, a) if (spds->spds_debug != 0) printf a +#define ss2dbg(spds, a) if (spds->spds_debug > 1) printf a +#define ss3dbg(spds, a) if (spds->spds_debug > 2) printf a static int spdsock_close(queue_t *); static int spdsock_open(queue_t *, dev_t *, int, int, cred_t *); static void spdsock_wput(queue_t *, mblk_t *); static void spdsock_wsrv(queue_t *); static void spdsock_rsrv(queue_t *); +static void *spdsock_stack_init(netstackid_t stackid, netstack_t *ns); +static void spdsock_stack_fini(netstackid_t stackid, void *arg); static void spdsock_loadcheck(void *); -static void spdsock_merge_algs(void); -static void spdsock_flush_one(ipsec_policy_head_t *); +static void spdsock_merge_algs(spd_stack_t *); +static void spdsock_flush_one(ipsec_policy_head_t *, netstack_t *); static mblk_t *spdsock_dump_next_record(spdsock_t *); static struct module_info info = { @@ -207,10 +195,12 @@ spdsock_param_get(q, mp, cp, cr) { spdsockparam_t *spdsockpa = (spdsockparam_t *)cp; uint_t value; + spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; - mutex_enter(&spdsock_param_lock); + mutex_enter(&spds->spds_param_lock); value = spdsockpa->spdsock_param_value; - mutex_exit(&spdsock_param_lock); + mutex_exit(&spds->spds_param_lock); (void) mi_mpprintf(mp, "%u", value); return (0); @@ -228,69 +218,125 @@ spdsock_param_set(q, mp, value, cp, cr) { ulong_t new_value; spdsockparam_t *spdsockpa = (spdsockparam_t *)cp; + spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; /* Convert the value from a string into a long integer. */ if (ddi_strtoul(value, NULL, 10, &new_value) != 0) return (EINVAL); - mutex_enter(&spdsock_param_lock); + mutex_enter(&spds->spds_param_lock); /* * Fail the request if the new value does not lie within the * required bounds. */ if (new_value < spdsockpa->spdsock_param_min || new_value > spdsockpa->spdsock_param_max) { - mutex_exit(&spdsock_param_lock); + mutex_exit(&spds->spds_param_lock); return (EINVAL); } /* Set the new value */ spdsockpa->spdsock_param_value = new_value; - mutex_exit(&spdsock_param_lock); + mutex_exit(&spds->spds_param_lock); return (0); } +/* + * Initialize at module load time + */ boolean_t spdsock_ddi_init(void) { - spdsockparam_t *ssp = spdsock_param_arr; - int count = A_CNT(spdsock_param_arr); - - if (!spdsock_g_nd) { - for (; count-- > 0; ssp++) { - if (ssp->spdsock_param_name != NULL && - (ssp->spdsock_param_name[0] != '\0')) { - if (!nd_load(&spdsock_g_nd, - ssp->spdsock_param_name, - spdsock_param_get, spdsock_param_set, - (caddr_t)ssp)) { - nd_free(&spdsock_g_nd); - return (B_FALSE); - } - } - } - } - spdsock_max_optsize = optcom_max_optsize( spdsock_opt_obj.odb_opt_des_arr, spdsock_opt_obj.odb_opt_arr_cnt); spdsock_vmem = vmem_create("spdsock", (void *)1, MAXMIN, 1, NULL, NULL, NULL, 1, VM_SLEEP | VMC_IDENTIFIER); - mutex_init(&spdsock_param_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&spdsock_alg_lock, NULL, MUTEX_DEFAULT, NULL); + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel, so we can maintain the + * set of spd_stack_t's. + */ + netstack_register(NS_SPDSOCK, spdsock_stack_init, NULL, + spdsock_stack_fini); + + return (B_TRUE); +} +/* + * Walk through the param array specified registering each element with the + * named dispatch handler. + */ +static boolean_t +spdsock_param_register(IDP *ndp, spdsockparam_t *ssp, int cnt) +{ + for (; cnt-- > 0; ssp++) { + if (ssp->spdsock_param_name != NULL && + ssp->spdsock_param_name[0]) { + if (!nd_load(ndp, + ssp->spdsock_param_name, + spdsock_param_get, spdsock_param_set, + (caddr_t)ssp)) { + nd_free(ndp); + return (B_FALSE); + } + } + } return (B_TRUE); } +/* + * Initialize for each stack instance + */ +/* ARGSUSED */ +static void * +spdsock_stack_init(netstackid_t stackid, netstack_t *ns) +{ + spd_stack_t *spds; + spdsockparam_t *ssp; + + spds = (spd_stack_t *)kmem_zalloc(sizeof (*spds), KM_SLEEP); + spds->spds_netstack = ns; + + ASSERT(spds->spds_g_nd == NULL); + + ssp = (spdsockparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); + spds->spds_params = ssp; + bcopy(lcl_param_arr, ssp, sizeof (lcl_param_arr)); + + (void) spdsock_param_register(&spds->spds_g_nd, ssp, + A_CNT(lcl_param_arr)); + + mutex_init(&spds->spds_param_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spds->spds_alg_lock, NULL, MUTEX_DEFAULT, NULL); + + return (spds); +} + void spdsock_ddi_destroy(void) { vmem_destroy(spdsock_vmem); - mutex_destroy(&spdsock_param_lock); - mutex_destroy(&spdsock_alg_lock); - nd_free(&spdsock_g_nd); + + netstack_unregister(NS_SPDSOCK); +} + +/* ARGSUSED */ +static void +spdsock_stack_fini(netstackid_t stackid, void *arg) +{ + spd_stack_t *spds = (spd_stack_t *)arg; + + mutex_destroy(&spds->spds_param_lock); + mutex_destroy(&spds->spds_alg_lock); + nd_free(&spds->spds_g_nd); + kmem_free(spds->spds_params, sizeof (lcl_param_arr)); + spds->spds_params = NULL; + + kmem_free(spds, sizeof (*spds)); } /* @@ -488,8 +534,9 @@ spd_echo(queue_t *q, mblk_t *mp) /* * Do NOT consume a reference to itp. */ +/*ARGSUSED*/ static void -spdsock_flush_node(ipsec_tun_pol_t *itp, void *cookie) +spdsock_flush_node(ipsec_tun_pol_t *itp, void *cookie, netstack_t *ns) { boolean_t active = (boolean_t)cookie; ipsec_policy_head_t *iph; @@ -497,7 +544,7 @@ spdsock_flush_node(ipsec_tun_pol_t *itp, void *cookie) iph = active ? itp->itp_policy : itp->itp_inactive; IPPH_REFHOLD(iph); mutex_enter(&itp->itp_lock); - spdsock_flush_one(iph); + spdsock_flush_one(iph, ns); if (active) itp->itp_flags &= ~ITPF_PFLAGS; else @@ -509,30 +556,34 @@ spdsock_flush_node(ipsec_tun_pol_t *itp, void *cookie) * Clear out one polhead. */ static void -spdsock_flush_one(ipsec_policy_head_t *iph) +spdsock_flush_one(ipsec_policy_head_t *iph, netstack_t *ns) { rw_enter(&iph->iph_lock, RW_WRITER); - ipsec_polhead_flush(iph); + ipsec_polhead_flush(iph, ns); rw_exit(&iph->iph_lock); - IPPH_REFRELE(iph); + IPPH_REFRELE(iph, ns); } static void spdsock_flush(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp) { boolean_t active; + spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; + netstack_t *ns = spds->spds_netstack; if (iph != ALL_ACTIVE_POLHEADS && iph != ALL_INACTIVE_POLHEADS) { - spdsock_flush_one(iph); + spdsock_flush_one(iph, spds->spds_netstack); } else { active = (iph == ALL_ACTIVE_POLHEADS); /* First flush the global policy. */ - spdsock_flush_one(active ? ipsec_system_policy() : - ipsec_inactive_policy()); + spdsock_flush_one(active ? ipsec_system_policy(ns) : + ipsec_inactive_policy(ns), ns); /* Then flush every tunnel's appropriate one. */ - itp_walk(spdsock_flush_node, (void *)active); + itp_walk(spdsock_flush_node, (void *)active, + spds->spds_netstack); } spd_echo(q, mp); @@ -664,7 +715,8 @@ spdsock_reset_act(ipsec_act_t *act) * Sanity check action against reality, and shrink-wrap key sizes.. */ static boolean_t -spdsock_check_action(ipsec_act_t *act, boolean_t tunnel_polhead, int *diag) +spdsock_check_action(ipsec_act_t *act, boolean_t tunnel_polhead, int *diag, + spd_stack_t *spds) { if (tunnel_polhead && act->ipa_apply.ipp_use_unique) { *diag = SPD_DIAGNOSTIC_ADD_INCON_FLAGS; @@ -685,7 +737,7 @@ spdsock_check_action(ipsec_act_t *act, boolean_t tunnel_polhead, int *diag) *diag = SPD_DIAGNOSTIC_ADD_INCON_FLAGS; return (B_FALSE); } - return (ipsec_check_action(act, diag)); + return (ipsec_check_action(act, diag, spds->spds_netstack)); } /* @@ -693,7 +745,7 @@ spdsock_check_action(ipsec_act_t *act, boolean_t tunnel_polhead, int *diag) */ static boolean_t spdsock_ext_to_actvec(spd_ext_t **extv, ipsec_act_t **actpp, uint_t *nactp, - int *diag) + int *diag, spd_stack_t *spds) { struct spd_ext_actions *sactp = (struct spd_ext_actions *)extv[SPD_EXT_ACTION]; @@ -754,7 +806,8 @@ spdsock_ext_to_actvec(spd_ext_t **extv, ipsec_act_t **actpp, uint_t *nactp, *diag = SPD_DIAGNOSTIC_ADD_WRONG_ACT_COUNT; goto fail; } - if (!spdsock_check_action(&act, tunnel_polhead, diag)) + if (!spdsock_check_action(&act, tunnel_polhead, + diag, spds)) goto fail; *actp++ = act; spdsock_reset_act(&act); @@ -869,7 +922,7 @@ typedef struct static int mkrule(ipsec_policy_head_t *iph, struct spd_rule *rule, ipsec_selkey_t *sel, ipsec_act_t *actp, int nact, uint_t dir, uint_t af, - tmprule_t **rp, uint64_t *index) + tmprule_t **rp, uint64_t *index, spd_stack_t *spds) { ipsec_policy_t *pol; @@ -877,7 +930,7 @@ mkrule(ipsec_policy_head_t *iph, struct spd_rule *rule, sel->ipsl_valid |= af; pol = ipsec_policy_create(sel, actp, nact, rule->spd_rule_priority, - index); + index, spds->spds_netstack); if (pol == NULL) return (ENOMEM); @@ -895,19 +948,19 @@ mkrule(ipsec_policy_head_t *iph, struct spd_rule *rule, static int mkrulepair(ipsec_policy_head_t *iph, struct spd_rule *rule, ipsec_selkey_t *sel, ipsec_act_t *actp, int nact, uint_t dir, uint_t afs, - tmprule_t **rp, uint64_t *index) + tmprule_t **rp, uint64_t *index, spd_stack_t *spds) { int error; if (afs & IPSL_IPV4) { error = mkrule(iph, rule, sel, actp, nact, dir, IPSL_IPV4, rp, - index); + index, spds); if (error != 0) return (error); } if (afs & IPSL_IPV6) { error = mkrule(iph, rule, sel, actp, nact, dir, IPSL_IPV6, rp, - index); + index, spds); if (error != 0) return (error); } @@ -927,6 +980,8 @@ spdsock_addrule(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp, tmprule_t rules[4], *rulep = &rules[0]; boolean_t tunnel_mode, empty_itp, active; uint64_t *index = (itp == NULL) ? NULL : &itp->itp_next_policy_index; + spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; if (rule == NULL) { spdsock_diag(q, mp, SPD_DIAGNOSTIC_NO_RULE_EXT); @@ -992,7 +1047,7 @@ spdsock_addrule(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp, } } - if (!spdsock_ext_to_actvec(extv, &actp, &nact, &diag)) { + if (!spdsock_ext_to_actvec(extv, &actp, &nact, &diag, spds)) { error = EINVAL; goto fail2; } @@ -1007,21 +1062,22 @@ spdsock_addrule(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp, if (rule->spd_rule_flags & SPD_RULE_FLAG_OUTBOUND) { error = mkrulepair(iph, rule, &sel, actp, nact, - IPSEC_TYPE_OUTBOUND, afs, &rulep, index); + IPSEC_TYPE_OUTBOUND, afs, &rulep, index, spds); if (error != 0) goto fail; } if (rule->spd_rule_flags & SPD_RULE_FLAG_INBOUND) { error = mkrulepair(iph, rule, &sel, actp, nact, - IPSEC_TYPE_INBOUND, afs, &rulep, index); + IPSEC_TYPE_INBOUND, afs, &rulep, index, spds); if (error != 0) goto fail; } - while ((--rulep) >= &rules[0]) - ipsec_enter_policy(iph, rulep->pol, rulep->dir); - + while ((--rulep) >= &rules[0]) { + ipsec_enter_policy(iph, rulep->pol, rulep->dir, + spds->spds_netstack); + } rw_exit(&iph->iph_lock); if (itp != NULL) mutex_exit(&itp->itp_lock); @@ -1033,7 +1089,7 @@ spdsock_addrule(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp, fail: rw_exit(&iph->iph_lock); while ((--rulep) >= &rules[0]) { - IPPOL_REFRELE(rulep->pol); + IPPOL_REFRELE(rulep->pol, spds->spds_netstack); } ipsec_actvec_free(actp, nact); fail2: @@ -1052,6 +1108,8 @@ spdsock_deleterule(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp, ipsec_selkey_t sel; struct spd_rule *rule = (struct spd_rule *)extv[SPD_EXT_RULE]; int err, diag = 0; + spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; if (rule == NULL) { spdsock_diag(q, mp, SPD_DIAGNOSTIC_NO_RULE_EXT); @@ -1066,7 +1124,8 @@ spdsock_deleterule(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp, mutex_enter(&itp->itp_lock); if (rule->spd_rule_index != 0) { - if (ipsec_policy_delete_index(iph, rule->spd_rule_index) != 0) { + if (ipsec_policy_delete_index(iph, rule->spd_rule_index, + spds->spds_netstack) != 0) { err = ESRCH; goto fail; } @@ -1077,13 +1136,15 @@ spdsock_deleterule(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp, } if ((rule->spd_rule_flags & SPD_RULE_FLAG_INBOUND) && - !ipsec_policy_delete(iph, &sel, IPSEC_TYPE_INBOUND)) { + !ipsec_policy_delete(iph, &sel, IPSEC_TYPE_INBOUND, + spds->spds_netstack)) { err = ESRCH; goto fail; } if ((rule->spd_rule_flags & SPD_RULE_FLAG_OUTBOUND) && - !ipsec_policy_delete(iph, &sel, IPSEC_TYPE_OUTBOUND)) { + !ipsec_policy_delete(iph, &sel, IPSEC_TYPE_OUTBOUND, + spds->spds_netstack)) { err = ESRCH; goto fail; } @@ -1113,11 +1174,11 @@ fail: /* Do NOT consume a reference to itp. */ /* ARGSUSED */ static void -spdsock_flip_node(ipsec_tun_pol_t *itp, void *ignoreme) +spdsock_flip_node(ipsec_tun_pol_t *itp, void *ignoreme, netstack_t *ns) { mutex_enter(&itp->itp_lock); ITPF_SWAP(itp->itp_flags); - ipsec_swap_policy(itp->itp_policy, itp->itp_inactive); + ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); mutex_exit(&itp->itp_lock); } @@ -1126,24 +1187,27 @@ spdsock_flip(queue_t *q, mblk_t *mp, spd_if_t *tunname) { char *tname; ipsec_tun_pol_t *itp; + spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; if (tunname != NULL) { tname = (char *)tunname->spd_if_name; if (*tname == '\0') { - ipsec_swap_global_policy(); /* can't fail */ - itp_walk(spdsock_flip_node, NULL); + /* can't fail */ + ipsec_swap_global_policy(spds->spds_netstack); + itp_walk(spdsock_flip_node, NULL, spds->spds_netstack); } else { - itp = get_tunnel_policy(tname); + itp = get_tunnel_policy(tname, spds->spds_netstack); if (itp == NULL) { /* Better idea for "tunnel not found"? */ spdsock_error(q, mp, ESRCH, 0); return; } - spdsock_flip_node(itp, NULL); - ITP_REFRELE(itp); + spdsock_flip_node(itp, NULL, NULL); + ITP_REFRELE(itp, spds->spds_netstack); } } else { - ipsec_swap_global_policy(); /* can't fail */ + ipsec_swap_global_policy(spds->spds_netstack); /* can't fail */ } spd_echo(q, mp); } @@ -1197,18 +1261,22 @@ spdsock_dump_finish(spdsock_t *ss, int error) ipsec_policy_head_t *iph = ss->spdsock_dump_head; mblk_t *req = ss->spdsock_dump_req; ipsec_tun_pol_t *itp, dummy; + spd_stack_t *spds = ss->spdsock_spds; + netstack_t *ns = spds->spds_netstack; + ipsec_stack_t *ipss = ns->netstack_ipsec; ss->spdsock_dump_remaining_polheads--; if (error == 0 && ss->spdsock_dump_remaining_polheads != 0) { /* Attempt a respin with a new policy head. */ - rw_enter(&tunnel_policy_lock, RW_READER); + rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); /* NOTE: No need for ITP_REF*() macros here. */ - if (tunnel_policy_gen > ss->spdsock_dump_tun_gen) { + if (ipss->ipsec_tunnel_policy_gen > ss->spdsock_dump_tun_gen) { /* Bail with EAGAIN. */ error = EAGAIN; } else if (ss->spdsock_dump_name[0] == '\0') { /* Just finished global, find first node. */ - itp = (ipsec_tun_pol_t *)avl_first(&tunnel_policies); + itp = (ipsec_tun_pol_t *)avl_first( + &ipss->ipsec_tunnel_policies); } else { /* * We just finished current-named polhead, find @@ -1216,11 +1284,11 @@ spdsock_dump_finish(spdsock_t *ss, int error) */ (void) strncpy(dummy.itp_name, ss->spdsock_dump_name, LIFNAMSIZ); - itp = (ipsec_tun_pol_t *)avl_find(&tunnel_policies, - &dummy, NULL); + itp = (ipsec_tun_pol_t *)avl_find( + &ipss->ipsec_tunnel_policies, &dummy, NULL); ASSERT(itp != NULL); - itp = (ipsec_tun_pol_t *)AVL_NEXT(&tunnel_policies, - itp); + itp = (ipsec_tun_pol_t *)AVL_NEXT( + &ipss->ipsec_tunnel_policies, itp); /* remaining_polheads should maintain this assertion. */ ASSERT(itp != NULL); } @@ -1228,7 +1296,7 @@ spdsock_dump_finish(spdsock_t *ss, int error) (void) strncpy(ss->spdsock_dump_name, itp->itp_name, LIFNAMSIZ); /* Reset other spdsock_dump thingies. */ - IPPH_REFRELE(ss->spdsock_dump_head); + IPPH_REFRELE(ss->spdsock_dump_head, ns); if (ss->spdsock_dump_active) { ss->spdsock_dump_tunnel = itp->itp_flags & ITPF_P_TUNNEL; @@ -1248,17 +1316,17 @@ spdsock_dump_finish(spdsock_t *ss, int error) ss->spdsock_dump_count = 0; ss->spdsock_dump_cur_chain = 0; rw_exit(&iph->iph_lock); - rw_exit(&tunnel_policy_lock); + rw_exit(&ipss->ipsec_tunnel_policy_lock); /* And start again. */ return (spdsock_dump_next_record(ss)); } - rw_exit(&tunnel_policy_lock); + rw_exit(&ipss->ipsec_tunnel_policy_lock); } rw_enter(&iph->iph_lock, RW_READER); m = spdsock_dump_ruleset(req, iph, ss->spdsock_dump_count, error); rw_exit(&iph->iph_lock); - IPPH_REFRELE(iph); + IPPH_REFRELE(iph, ns); ss->spdsock_dump_req = NULL; freemsg(req); @@ -1794,20 +1862,23 @@ static void spdsock_dump(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp) { spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; + netstack_t *ns = spds->spds_netstack; + ipsec_stack_t *ipss = ns->netstack_ipsec; mblk_t *mr; /* spdsock_parse() already NULL-terminated spdsock_dump_name. */ if (iph == ALL_ACTIVE_POLHEADS || iph == ALL_INACTIVE_POLHEADS) { - rw_enter(&tunnel_policy_lock, RW_READER); + rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER); ss->spdsock_dump_remaining_polheads = 1 + - avl_numnodes(&tunnel_policies); - ss->spdsock_dump_tun_gen = tunnel_policy_gen; - rw_exit(&tunnel_policy_lock); + avl_numnodes(&ipss->ipsec_tunnel_policies); + ss->spdsock_dump_tun_gen = ipss->ipsec_tunnel_policy_gen; + rw_exit(&ipss->ipsec_tunnel_policy_lock); if (iph == ALL_ACTIVE_POLHEADS) { - iph = ipsec_system_policy(); + iph = ipsec_system_policy(ns); ss->spdsock_dump_active = B_TRUE; } else { - iph = ipsec_inactive_policy(); + iph = ipsec_inactive_policy(spds->spds_netstack); ss->spdsock_dump_active = B_FALSE; } ASSERT(ss->spdsock_dump_name[0] == '\0'); @@ -1841,7 +1912,7 @@ spdsock_dump(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp) /* Do NOT consume a reference to ITP. */ void -spdsock_clone_node(ipsec_tun_pol_t *itp, void *ep) +spdsock_clone_node(ipsec_tun_pol_t *itp, void *ep, netstack_t *ns) { int *errptr = (int *)ep; @@ -1849,7 +1920,7 @@ spdsock_clone_node(ipsec_tun_pol_t *itp, void *ep) return; /* We've failed already for some reason. */ mutex_enter(&itp->itp_lock); ITPF_CLONE(itp->itp_flags); - *errptr = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive); + *errptr = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); mutex_exit(&itp->itp_lock); } @@ -1859,24 +1930,27 @@ spdsock_clone(queue_t *q, mblk_t *mp, spd_if_t *tunname) int error; char *tname; ipsec_tun_pol_t *itp; + spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; if (tunname != NULL) { tname = (char *)tunname->spd_if_name; if (*tname == '\0') { - error = ipsec_clone_system_policy(); + error = ipsec_clone_system_policy(spds->spds_netstack); if (error == 0) - itp_walk(spdsock_clone_node, &error); + itp_walk(spdsock_clone_node, &error, + spds->spds_netstack); } else { - itp = get_tunnel_policy(tname); + itp = get_tunnel_policy(tname, spds->spds_netstack); if (itp == NULL) { spdsock_error(q, mp, ENOENT, 0); return; } - spdsock_clone_node(itp, &error); - ITP_REFRELE(itp); + spdsock_clone_node(itp, &error, NULL); + ITP_REFRELE(itp, spds->spds_netstack); } } else { - error = ipsec_clone_system_policy(); + error = ipsec_clone_system_policy(spds->spds_netstack); } if (error != 0) @@ -1937,9 +2011,11 @@ spdsock_alglist(queue_t *q, mblk_t *mp) spd_msg_t *msg; struct spd_ext_actions *act; struct spd_attribute *attr; + spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; + ipsec_stack_t *ipss = spds->spds_netstack->netstack_ipsec; - mutex_enter(&alg_lock); - + mutex_enter(&ipss->ipsec_alg_lock); /* * The SPD client expects to receive separate entries for * AH authentication and ESP authentication supported algorithms. @@ -1947,12 +2023,12 @@ spdsock_alglist(queue_t *q, mblk_t *mp) * Don't return the "any" algorithms, if defined, as no * kernel policies can be set for these algorithms. */ - algcount = 2 * ipsec_nalgs[IPSEC_ALG_AUTH] + - ipsec_nalgs[IPSEC_ALG_ENCR]; + algcount = 2 * ipss->ipsec_nalgs[IPSEC_ALG_AUTH] + + ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; - if (ipsec_alglists[IPSEC_ALG_AUTH][SADB_AALG_NONE] != NULL) + if (ipss->ipsec_alglists[IPSEC_ALG_AUTH][SADB_AALG_NONE] != NULL) algcount--; - if (ipsec_alglists[IPSEC_ALG_ENCR][SADB_EALG_NONE] != NULL) + if (ipss->ipsec_alglists[IPSEC_ALG_ENCR][SADB_EALG_NONE] != NULL) algcount--; /* @@ -1967,7 +2043,7 @@ spdsock_alglist(queue_t *q, mblk_t *mp) m = allocb(size, BPRI_HI); if (m == NULL) { - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); spdsock_error(q, mp, ENOMEM, 0); return; } @@ -2014,9 +2090,11 @@ spdsock_alglist(queue_t *q, mblk_t *mp) } for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { - for (algidx = 0; algidx < ipsec_nalgs[algtype]; algidx++) { - int algid = ipsec_sortlist[algtype][algidx]; - ipsec_alginfo_t *alg = ipsec_alglists[algtype][algid]; + for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; + algidx++) { + int algid = ipss->ipsec_sortlist[algtype][algidx]; + ipsec_alginfo_t *alg = + ipss->ipsec_alglists[algtype][algid]; uint_t minbits = alg->alg_minbits; uint_t maxbits = alg->alg_maxbits; uint_t defbits = alg->alg_default_bits; @@ -2036,7 +2114,7 @@ spdsock_alglist(queue_t *q, mblk_t *mp) } } - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); #undef EMITALGATTRS #undef EMIT @@ -2070,8 +2148,11 @@ spdsock_dumpalgs(queue_t *q, mblk_t *mp) uint_t algid; uint_t i; uint_t alg_size; + spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; + ipsec_stack_t *ipss = spds->spds_netstack->netstack_ipsec; - mutex_enter(&alg_lock); + mutex_enter(&ipss->ipsec_alg_lock); /* * For each algorithm, we encode: @@ -2087,9 +2168,10 @@ spdsock_dumpalgs(queue_t *q, mblk_t *mp) size = sizeof (spd_msg_t) + sizeof (struct spd_ext_actions); for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { - for (algidx = 0; algidx < ipsec_nalgs[algtype]; algidx++) { - algid = ipsec_sortlist[algtype][algidx]; - alg = ipsec_alglists[algtype][algid]; + for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; + algidx++) { + algid = ipss->ipsec_sortlist[algtype][algidx]; + alg = ipss->ipsec_alglists[algtype][algid]; alg_size = sizeof (struct spd_attribute) * (ATTRPERALG + alg->alg_nkey_sizes + alg->alg_nblock_sizes) + CRYPTO_MAX_MECH_NAME; @@ -2101,7 +2183,7 @@ spdsock_dumpalgs(queue_t *q, mblk_t *mp) m = allocb(size, BPRI_HI); if (m == NULL) { - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); spdsock_error(q, mp, ENOMEM, 0); return; } @@ -2123,8 +2205,8 @@ spdsock_dumpalgs(queue_t *q, mblk_t *mp) act->spd_actions_len = SPD_8TO64(size - sizeof (spd_msg_t)); act->spd_actions_exttype = SPD_EXT_ACTION; - act->spd_actions_count = ipsec_nalgs[IPSEC_ALG_AUTH] + - ipsec_nalgs[IPSEC_ALG_ENCR]; + act->spd_actions_count = ipss->ipsec_nalgs[IPSEC_ALG_AUTH] + + ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; act->spd_actions_reserved = 0; attr = (struct spd_attribute *)cur; @@ -2136,10 +2218,11 @@ spdsock_dumpalgs(queue_t *q, mblk_t *mp) } for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { - for (algidx = 0; algidx < ipsec_nalgs[algtype]; algidx++) { + for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype]; + algidx++) { - algid = ipsec_sortlist[algtype][algidx]; - alg = ipsec_alglists[algtype][algid]; + algid = ipss->ipsec_sortlist[algtype][algidx]; + alg = ipss->ipsec_alglists[algtype][algid]; /* * If you change the number of EMIT's here, change @@ -2168,7 +2251,7 @@ spdsock_dumpalgs(queue_t *q, mblk_t *mp) } } - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); #undef EMITALGATTRS #undef EMIT @@ -2187,7 +2270,7 @@ spdsock_dumpalgs(queue_t *q, mblk_t *mp) * when a request is received while IPsec is loaded. */ static void -spdsock_do_updatealg(spd_ext_t *extv[], int *diag) +spdsock_do_updatealg(spd_ext_t *extv[], int *diag, spd_stack_t *spds) { struct spd_ext_actions *actp; struct spd_attribute *attr, *endattr; @@ -2198,7 +2281,7 @@ spdsock_do_updatealg(spd_ext_t *extv[], int *diag) uint_t i, cur_key, cur_block, algid; *diag = -1; - ASSERT(MUTEX_HELD(&spdsock_alg_lock)); + ASSERT(MUTEX_HELD(&spds->spds_alg_lock)); /* parse the message, building the list of algorithms */ @@ -2213,7 +2296,7 @@ spdsock_do_updatealg(spd_ext_t *extv[], int *diag) endattr = (struct spd_attribute *)end; attr = (struct spd_attribute *)&actp[1]; - bzero(spdsock_algs, IPSEC_NALGTYPES * IPSEC_MAX_ALGS * + bzero(spds->spds_algs, IPSEC_NALGTYPES * IPSEC_MAX_ALGS * sizeof (ipsec_alginfo_t *)); alg = kmem_zalloc(sizeof (*alg), KM_SLEEP); @@ -2238,15 +2321,16 @@ spdsock_do_updatealg(spd_ext_t *extv[], int *diag) ipsec_alg_free(alg); } else { ipsec_alg_free( - spdsock_algs[alg_type][alg->alg_id]); - spdsock_algs[alg_type][alg->alg_id] = alg; + spds->spds_algs[alg_type][alg->alg_id]); + spds->spds_algs[alg_type][alg->alg_id] = + alg; } alg = kmem_zalloc(sizeof (*alg), KM_SLEEP); break; case SPD_ATTR_ALG_ID: if (attr->spd_attr_value >= IPSEC_MAX_ALGS) { - ss1dbg(("spdsock_do_updatealg: " + ss1dbg(spds, ("spdsock_do_updatealg: " "invalid alg id %d\n", attr->spd_attr_value)); *diag = SPD_DIAGNOSTIC_ALG_ID_RANGE; @@ -2287,7 +2371,7 @@ spdsock_do_updatealg(spd_ext_t *extv[], int *diag) case SPD_ATTR_ALG_KEYSIZE: if (alg->alg_key_sizes == NULL || cur_key >= alg->alg_nkey_sizes) { - ss1dbg(("spdsock_do_updatealg: " + ss1dbg(spds, ("spdsock_do_updatealg: " "too many key sizes\n")); *diag = SPD_DIAGNOSTIC_ALG_NUM_KEY_SIZES; goto bail; @@ -2313,7 +2397,7 @@ spdsock_do_updatealg(spd_ext_t *extv[], int *diag) case SPD_ATTR_ALG_BLOCKSIZE: if (alg->alg_block_sizes == NULL || cur_block >= alg->alg_nblock_sizes) { - ss1dbg(("spdsock_do_updatealg: " + ss1dbg(spds, ("spdsock_do_updatealg: " "too many block sizes\n")); *diag = SPD_DIAGNOSTIC_ALG_NUM_BLOCK_SIZES; goto bail; @@ -2326,7 +2410,7 @@ spdsock_do_updatealg(spd_ext_t *extv[], int *diag) char *mech_name; if (attr->spd_attr_value > CRYPTO_MAX_MECH_NAME) { - ss1dbg(("spdsock_do_updatealg: " + ss1dbg(spds, ("spdsock_do_updatealg: " "mech name too long\n")); *diag = SPD_DIAGNOSTIC_ALG_MECH_NAME_LEN; goto bail; @@ -2355,7 +2439,7 @@ spdsock_do_updatealg(spd_ext_t *extv[], int *diag) break; for (i = 0; i < NEXECMODES; i++) { if (execmodes[i] == attr->spd_attr_value) { - spdsock_algs_exec_mode[alg_type] = i; + spds->spds_algs_exec_mode[alg_type] = i; break; } } @@ -2368,14 +2452,14 @@ spdsock_do_updatealg(spd_ext_t *extv[], int *diag) #undef ALG_BLOCK_SIZES /* update the algorithm tables */ - spdsock_merge_algs(); + spdsock_merge_algs(spds); bail: /* cleanup */ ipsec_alg_free(alg); for (alg_type = 0; alg_type < IPSEC_NALGTYPES; alg_type++) - for (algid = 0; algid < IPSEC_MAX_ALGS; algid++) - if (spdsock_algs[alg_type][algid] != NULL) - ipsec_alg_free(spdsock_algs[alg_type][algid]); + for (algid = 0; algid < IPSEC_MAX_ALGS; algid++) + if (spds->spds_algs[alg_type][algid] != NULL) + ipsec_alg_free(spds->spds_algs[alg_type][algid]); } /* @@ -2387,7 +2471,11 @@ bail: static void spdsock_updatealg(queue_t *q, mblk_t *mp, spd_ext_t *extv[]) { - if (!ipsec_loaded()) { + spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; + ipsec_stack_t *ipss = spds->spds_netstack->netstack_ipsec; + + if (!ipsec_loaded(ipss)) { /* * IPsec is not loaded, save request and return nicely, * the message will be processed once IPsec loads. @@ -2399,14 +2487,14 @@ spdsock_updatealg(queue_t *q, mblk_t *mp, spd_ext_t *extv[]) spdsock_error(q, mp, ENOMEM, 0); return; } - mutex_enter(&spdsock_alg_lock); - bcopy(extv, spdsock_extv_algs, + mutex_enter(&spds->spds_alg_lock); + bcopy(extv, spds->spds_extv_algs, sizeof (spd_ext_t *) * (SPD_EXT_MAX + 1)); - if (spdsock_mp_algs != NULL) - freemsg(spdsock_mp_algs); - spdsock_mp_algs = mp; - spdsock_algs_pending = B_TRUE; - mutex_exit(&spdsock_alg_lock); + if (spds->spds_mp_algs != NULL) + freemsg(spds->spds_mp_algs); + spds->spds_mp_algs = mp; + spds->spds_algs_pending = B_TRUE; + mutex_exit(&spds->spds_alg_lock); spd_echo(q, new_mp); } else { @@ -2415,9 +2503,9 @@ spdsock_updatealg(queue_t *q, mblk_t *mp, spd_ext_t *extv[]) */ int diag; - mutex_enter(&spdsock_alg_lock); - spdsock_do_updatealg(extv, &diag); - mutex_exit(&spdsock_alg_lock); + mutex_enter(&spds->spds_alg_lock); + spdsock_do_updatealg(extv, &diag, spds); + mutex_exit(&spds->spds_alg_lock); if (diag == -1) spd_echo(q, mp); else @@ -2494,6 +2582,8 @@ get_appropriate_polhead(queue_t *q, mblk_t *mp, spd_if_t *tunname, int spdid, char *tname; boolean_t active; spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; + netstack_t *ns = spds->spds_netstack; uint64_t gen; /* Placeholder */ ill_t *v4, *v6; @@ -2518,7 +2608,7 @@ get_appropriate_polhead(queue_t *q, mblk_t *mp, spd_if_t *tunname, int spdid, ALL_INACTIVE_POLHEADS); } - itp = get_tunnel_policy(tname); + itp = get_tunnel_policy(tname, spds->spds_netstack); if (itp == NULL) { if (msgtype != SPD_ADDRULE) { /* "Tunnel not found" */ @@ -2527,7 +2617,8 @@ get_appropriate_polhead(queue_t *q, mblk_t *mp, spd_if_t *tunname, int spdid, } errno = 0; - itp = create_tunnel_policy(tname, &errno, &gen); + itp = create_tunnel_policy(tname, &errno, &gen, + spds->spds_netstack); if (itp == NULL) { /* * Something very bad happened, most likely @@ -2543,11 +2634,11 @@ get_appropriate_polhead(queue_t *q, mblk_t *mp, spd_if_t *tunname, int spdid, * policy AFTER plumbing a tunnel. */ v4 = ill_lookup_on_name(tname, B_FALSE, B_FALSE, NULL, - NULL, NULL, &errno, NULL); + NULL, NULL, &errno, NULL, ns->netstack_ip); if (v4 != NULL) find_tun_and_set_itp(v4, itp); v6 = ill_lookup_on_name(tname, B_FALSE, B_TRUE, NULL, - NULL, NULL, &errno, NULL); + NULL, NULL, &errno, NULL, ns->netstack_ip); if (v6 != NULL) find_tun_and_set_itp(v6, itp); ASSERT(itp != NULL); @@ -2566,9 +2657,9 @@ get_appropriate_polhead(queue_t *q, mblk_t *mp, spd_if_t *tunname, int spdid, } if (active) - iph = (itp == NULL) ? ipsec_system_policy() : itp->itp_policy; + iph = (itp == NULL) ? ipsec_system_policy(ns) : itp->itp_policy; else - iph = (itp == NULL) ? ipsec_inactive_policy() : + iph = (itp == NULL) ? ipsec_inactive_policy(ns) : itp->itp_inactive; ASSERT(iph != NULL); @@ -2588,6 +2679,10 @@ spdsock_parse(queue_t *q, mblk_t *mp) ipsec_policy_head_t *iph; ipsec_tun_pol_t *itp; spd_if_t *tunname; + spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; + netstack_t *ns = spds->spds_netstack; + ipsec_stack_t *ipss = ns->netstack_ipsec; /* Make sure nothing's below me. */ ASSERT(WR(q)->q_next == NULL); @@ -2605,7 +2700,8 @@ spdsock_parse(queue_t *q, mblk_t *mp) * do the right thing. Then again, maybe just letting * the error delivery do the right thing. */ - ss2dbg(("mblk (%lu) and base (%d) message sizes don't jibe.\n", + ss2dbg(spds, + ("mblk (%lu) and base (%d) message sizes don't jibe.\n", msgdsize(mp), msgsize)); spdsock_error(q, mp, EMSGSIZE, SPD_DIAGNOSTIC_NONE); return; @@ -2617,7 +2713,7 @@ spdsock_parse(queue_t *q, mblk_t *mp) /* * Something screwy happened. */ - ss3dbg(("spdsock_parse: pullupmsg() failed.\n")); + ss3dbg(spds, ("spdsock_parse: pullupmsg() failed.\n")); return; } else { spmsg = (spd_msg_t *)mp->b_rptr; @@ -2627,25 +2723,25 @@ spdsock_parse(queue_t *q, mblk_t *mp) switch (spdsock_get_ext(extv, spmsg, msgsize)) { case KGE_DUP: /* Handle duplicate extension. */ - ss1dbg(("Got duplicate extension of type %d.\n", + ss1dbg(spds, ("Got duplicate extension of type %d.\n", extv[0]->spd_ext_type)); spdsock_diag(q, mp, dup_ext_diag[extv[0]->spd_ext_type]); return; case KGE_UNK: /* Handle unknown extension. */ - ss1dbg(("Got unknown extension of type %d.\n", + ss1dbg(spds, ("Got unknown extension of type %d.\n", extv[0]->spd_ext_type)); spdsock_diag(q, mp, SPD_DIAGNOSTIC_UNKNOWN_EXT); return; case KGE_LEN: /* Length error. */ - ss1dbg(("Length %d on extension type %d overrun or 0.\n", + ss1dbg(spds, ("Length %d on extension type %d overrun or 0.\n", extv[0]->spd_ext_len, extv[0]->spd_ext_type)); spdsock_diag(q, mp, SPD_DIAGNOSTIC_BAD_EXTLEN); return; case KGE_CHK: /* Reality check failed. */ - ss1dbg(("Reality check failed on extension type %d.\n", + ss1dbg(spds, ("Reality check failed on extension type %d.\n", extv[0]->spd_ext_type)); spdsock_diag(q, mp, bad_ext_diag[extv[0]->spd_ext_type]); return; @@ -2657,11 +2753,11 @@ spdsock_parse(queue_t *q, mblk_t *mp) /* * Special-case SPD_UPDATEALGS so as not to load IPsec. */ - if (!ipsec_loaded() && spmsg->spd_msg_type != SPD_UPDATEALGS) { + if (!ipsec_loaded(ipss) && spmsg->spd_msg_type != SPD_UPDATEALGS) { spdsock_t *ss = (spdsock_t *)q->q_ptr; ASSERT(ss != NULL); - ipsec_loader_loadnow(); + ipsec_loader_loadnow(ipss); ss->spdsock_timeout_arg = mp; ss->spdsock_timeout = qtimeout(q, spdsock_loadcheck, q, LOADCHECK_INTERVAL); @@ -2714,13 +2810,13 @@ spdsock_parse(queue_t *q, mblk_t *mp) else itp->itp_flags &= ~ITPF_IFLAGS; mutex_exit(&itp->itp_lock); - ITP_REFRELE(itp); + ITP_REFRELE(itp, ns); } spdsock_flush(q, iph, mp); return; case SPD_DUMP: if (itp != NULL) - ITP_REFRELE(itp); + ITP_REFRELE(itp, ns); spdsock_dump(q, iph, mp); return; } @@ -2746,9 +2842,9 @@ spdsock_parse(queue_t *q, mblk_t *mp) break; } - IPPH_REFRELE(iph); + IPPH_REFRELE(iph, spds->spds_netstack); if (itp != NULL) - ITP_REFRELE(itp); + ITP_REFRELE(itp, ns); } /* @@ -2756,15 +2852,19 @@ spdsock_parse(queue_t *q, mblk_t *mp) * Called from the IPsec loader. */ void -spdsock_update_pending_algs(void) +spdsock_update_pending_algs(netstack_t *ns) { - mutex_enter(&spdsock_alg_lock); - if (spdsock_algs_pending) { + spd_stack_t *spds = ns->netstack_spdsock; + + mutex_enter(&spds->spds_alg_lock); + if (spds->spds_algs_pending) { int diag; - spdsock_do_updatealg(spdsock_extv_algs, &diag); - spdsock_algs_pending = B_FALSE; + + spdsock_do_updatealg(spds->spds_extv_algs, &diag, + spds); + spds->spds_algs_pending = B_FALSE; } - mutex_exit(&spdsock_alg_lock); + mutex_exit(&spds->spds_alg_lock); } static void @@ -2773,6 +2873,8 @@ spdsock_loadcheck(void *arg) queue_t *q = (queue_t *)arg; spdsock_t *ss = (spdsock_t *)q->q_ptr; mblk_t *mp; + spd_stack_t *spds = ss->spdsock_spds; + ipsec_stack_t *ipss = spds->spds_netstack->netstack_ipsec; ASSERT(ss != NULL); @@ -2780,7 +2882,7 @@ spdsock_loadcheck(void *arg) mp = ss->spdsock_timeout_arg; ASSERT(mp != NULL); ss->spdsock_timeout_arg = NULL; - if (ipsec_failed()) + if (ipsec_failed(ipss)) spdsock_error(q, mp, EPROTONOSUPPORT, 0); else spdsock_parse(q, mp); @@ -2908,17 +3010,19 @@ spdsock_opt_set(queue_t *q, uint_t mgmt_flags, int level, int name, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) { int *i1 = (int *)invalp; + spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; switch (level) { case SOL_SOCKET: switch (name) { case SO_SNDBUF: - if (*i1 > spdsock_max_buf) + if (*i1 > spds->spds_max_buf) return (ENOBUFS); q->q_hiwat = *i1; break; case SO_RCVBUF: - if (*i1 > spdsock_max_buf) + if (*i1 > spds->spds_max_buf) return (ENOBUFS); RD(q)->q_hiwat = *i1; (void) mi_set_sth_hiwat(RD(q), *i1); @@ -2938,43 +3042,52 @@ spdsock_wput_other(queue_t *q, mblk_t *mp) { struct iocblk *iocp; int error; + spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; + cred_t *cr; switch (mp->b_datap->db_type) { case M_PROTO: case M_PCPROTO: if ((mp->b_wptr - mp->b_rptr) < sizeof (long)) { - ss3dbg(( + ss3dbg(spds, ( "spdsock_wput_other: Not big enough M_PROTO\n")); freemsg(mp); return; } + cr = zone_get_kcred(netstackid_to_zoneid( + spds->spds_netstack->netstack_stackid)); + ASSERT(cr != NULL); + switch (((union T_primitives *)mp->b_rptr)->type) { case T_CAPABILITY_REQ: spdsock_capability_req(q, mp); - return; + break; case T_INFO_REQ: spdsock_info_req(q, mp); - return; + break; case T_SVR4_OPTMGMT_REQ: - (void) svr4_optcom_req(q, mp, DB_CREDDEF(mp, kcred), + (void) svr4_optcom_req(q, mp, DB_CREDDEF(mp, cr), &spdsock_opt_obj); - return; + break; case T_OPTMGMT_REQ: - (void) tpi_optcom_req(q, mp, DB_CREDDEF(mp, kcred), + (void) tpi_optcom_req(q, mp, DB_CREDDEF(mp, cr), &spdsock_opt_obj); - return; + break; case T_DATA_REQ: case T_EXDATA_REQ: case T_ORDREL_REQ: /* Illegal for spdsock. */ freemsg(mp); (void) putnextctl1(RD(q), M_ERROR, EPROTO); - return; + break; default: /* Not supported by spdsock. */ spdsock_err_ack(q, mp, TNOTSUPPORT, 0); - return; + break; } + crfree(cr); + return; case M_IOCTL: iocp = (struct iocblk *)mp->b_rptr; error = EINVAL; @@ -2982,7 +3095,7 @@ spdsock_wput_other(queue_t *q, mblk_t *mp) switch (iocp->ioc_cmd) { case ND_SET: case ND_GET: - if (nd_getset(q, spdsock_g_nd, mp)) { + if (nd_getset(q, spds->spds_g_nd, mp)) { qreply(q, mp); return; } else @@ -3014,6 +3127,7 @@ spdsock_wput(queue_t *q, mblk_t *mp) uint8_t *rptr = mp->b_rptr; mblk_t *mp1; spdsock_t *ss = (spdsock_t *)q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; /* * If we're dumping, defer processing other messages until the @@ -3030,7 +3144,7 @@ spdsock_wput(queue_t *q, mblk_t *mp) /* * Silently discard. */ - ss2dbg(("raw M_DATA in spdsock.\n")); + ss2dbg(spds, ("raw M_DATA in spdsock.\n")); freemsg(mp); return; case M_PROTO: @@ -3039,19 +3153,20 @@ spdsock_wput(queue_t *q, mblk_t *mp) if (((union T_primitives *)rptr)->type == T_DATA_REQ) { if ((mp1 = mp->b_cont) == NULL) { /* No data after T_DATA_REQ. */ - ss2dbg(("No data after DATA_REQ.\n")); + ss2dbg(spds, + ("No data after DATA_REQ.\n")); freemsg(mp); return; } freeb(mp); mp = mp1; - ss2dbg(("T_DATA_REQ\n")); + ss2dbg(spds, ("T_DATA_REQ\n")); break; /* Out of switch. */ } } /* FALLTHRU */ default: - ss3dbg(("In default wput case (%d %d).\n", + ss3dbg(spds, ("In default wput case (%d %d).\n", mp->b_datap->db_type, ((union T_primitives *)rptr)->type)); spdsock_wput_other(q, mp); return; @@ -3072,8 +3187,10 @@ spdsock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) spdsock_t *ss; queue_t *oq = OTHERQ(q); minor_t ssminor; + netstack_t *ns; + spd_stack_t *spds; - if (secpolicy_net_config(credp, B_FALSE) != 0) + if (secpolicy_ip_config(credp, B_FALSE) != 0) return (EPERM); if (q->q_ptr != NULL) @@ -3082,15 +3199,22 @@ spdsock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (sflag & MODOPEN) return (EINVAL); - ss2dbg(("Made it into PF_POLICY socket open.\n")); + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + spds = ns->netstack_spdsock; + ASSERT(spds != NULL); + + ss2dbg(spds, ("Made it into PF_POLICY socket open.\n")); ssminor = (minor_t)(uintptr_t)vmem_alloc(spdsock_vmem, 1, VM_NOSLEEP); - if (ssminor == 0) + if (ssminor == 0) { + netstack_rele(spds->spds_netstack); return (ENOMEM); - + } ss = kmem_zalloc(sizeof (spdsock_t), KM_NOSLEEP); if (ss == NULL) { vmem_free(spdsock_vmem, (void *)(uintptr_t)ssminor, 1); + netstack_rele(spds->spds_netstack); return (ENOMEM); } @@ -3098,16 +3222,18 @@ spdsock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) ss->spdsock_state = TS_UNBND; ss->spdsock_dump_req = NULL; + ss->spdsock_spds = spds; + q->q_ptr = ss; oq->q_ptr = ss; - q->q_hiwat = spdsock_recv_hiwat; + q->q_hiwat = spds->spds_recv_hiwat; - oq->q_hiwat = spdsock_xmit_hiwat; - oq->q_lowat = spdsock_xmit_lowat; + oq->q_hiwat = spds->spds_xmit_hiwat; + oq->q_lowat = spds->spds_xmit_lowat; qprocson(q); - (void) mi_set_sth_hiwat(q, spdsock_recv_hiwat); + (void) mi_set_sth_hiwat(q, spds->spds_recv_hiwat); *devp = makedevice(getmajor(*devp), ss->spdsock_minor); return (0); @@ -3141,6 +3267,8 @@ spdsock_wsrv(queue_t *q) { spdsock_t *ss = q->q_ptr; mblk_t *mp; + spd_stack_t *spds = ss->spdsock_spds; + ipsec_stack_t *ipss = spds->spds_netstack->netstack_ipsec; if (ss->spdsock_dump_req != NULL) { qenable(OTHERQ(q)); @@ -3148,11 +3276,11 @@ spdsock_wsrv(queue_t *q) } while ((mp = getq(q)) != NULL) { - if (ipsec_loaded()) { + if (ipsec_loaded(ipss)) { spdsock_wput(q, mp); if (ss->spdsock_dump_req != NULL) return; - } else if (!ipsec_failed()) { + } else if (!ipsec_failed(ipss)) { (void) putq(q, mp); } else { spdsock_error(q, mp, EPFNOSUPPORT, 0); @@ -3164,6 +3292,7 @@ static int spdsock_close(queue_t *q) { spdsock_t *ss = q->q_ptr; + spd_stack_t *spds = ss->spdsock_spds; qprocsoff(q); @@ -3173,9 +3302,10 @@ spdsock_close(queue_t *q) if (ss->spdsock_timeout != 0) (void) quntimeout(q, ss->spdsock_timeout); - ss3dbg(("Driver close, PF_POLICY socket is going away.\n")); + ss3dbg(spds, ("Driver close, PF_POLICY socket is going away.\n")); vmem_free(spdsock_vmem, (void *)(uintptr_t)ss->spdsock_minor, 1); + netstack_rele(ss->spdsock_spds->spds_netstack); kmem_free(ss, sizeof (spdsock_t)); return (0); @@ -3185,15 +3315,17 @@ spdsock_close(queue_t *q) * Merge the IPsec algorithms tables with the received algorithm information. */ void -spdsock_merge_algs(void) +spdsock_merge_algs(spd_stack_t *spds) { ipsec_alginfo_t *alg, *oalg; ipsec_algtype_t algtype; uint_t algidx, algid, nalgs; crypto_mech_name_t *mechs; uint_t mech_count, mech_idx; + netstack_t *ns = spds->spds_netstack; + ipsec_stack_t *ipss = ns->netstack_ipsec; - ASSERT(MUTEX_HELD(&spdsock_alg_lock)); + ASSERT(MUTEX_HELD(&spds->spds_alg_lock)); /* * Get the list of supported mechanisms from the crypto framework. @@ -3209,7 +3341,8 @@ spdsock_merge_algs(void) int algflags = 0; crypto_mech_type_t mt = CRYPTO_MECHANISM_INVALID; - if ((alg = spdsock_algs[algtype][algid]) == NULL) + alg = spds->spds_algs[algtype][algid]; + if (alg == NULL) continue; /* @@ -3237,7 +3370,7 @@ spdsock_merge_algs(void) } } - mutex_enter(&alg_lock); + mutex_enter(&ipss->ipsec_alg_lock); /* * For each algorithm currently defined, check if it is @@ -3247,11 +3380,11 @@ spdsock_merge_algs(void) * but not part of the new tables. */ for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { - nalgs = ipsec_nalgs[algtype]; + nalgs = ipss->ipsec_nalgs[algtype]; for (algidx = 0; algidx < nalgs; algidx++) { - algid = ipsec_sortlist[algtype][algidx]; - if (spdsock_algs[algtype][algid] == NULL) - ipsec_alg_unreg(algtype, algid); + algid = ipss->ipsec_sortlist[algtype][algidx]; + if (spds->spds_algs[algtype][algid] == NULL) + ipsec_alg_unreg(algtype, algid, ns); } } @@ -3264,35 +3397,39 @@ spdsock_merge_algs(void) */ for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { for (algid = 0; algid < IPSEC_MAX_ALGS; algid++) { - if ((alg = spdsock_algs[algtype][algid]) == NULL) + alg = spds->spds_algs[algtype][algid]; + if (alg == NULL) continue; - if ((oalg = ipsec_alglists[algtype][algid]) == NULL) { + if ((oalg = ipss->ipsec_alglists[algtype][algid]) == + NULL) { /* * New algorithm, add it to the algorithm * table. */ - ipsec_alg_reg(algtype, alg); + ipsec_alg_reg(algtype, alg, ns); } else { /* * Algorithm is already in the table. Swap * the existing entry with the new one. */ - ipsec_alg_fix_min_max(alg, algtype); - ipsec_alglists[algtype][algid] = alg; + ipsec_alg_fix_min_max(alg, algtype, ns); + ipss->ipsec_alglists[algtype][algid] = alg; ipsec_alg_free(oalg); } - spdsock_algs[algtype][algid] = NULL; + spds->spds_algs[algtype][algid] = NULL; } } - for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) - ipsec_algs_exec_mode[algtype] = spdsock_algs_exec_mode[algtype]; + for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) { + ipss->ipsec_algs_exec_mode[algtype] = + spds->spds_algs_exec_mode[algtype]; + } - mutex_exit(&alg_lock); + mutex_exit(&ipss->ipsec_alg_lock); crypto_free_mech_list(mechs, mech_count); - ipsecah_algs_changed(); - ipsecesp_algs_changed(); + ipsecah_algs_changed(ns); + ipsecesp_algs_changed(ns); } diff --git a/usr/src/uts/common/inet/ip/spdsock_opt_data.c b/usr/src/uts/common/inet/ip/spdsock_opt_data.c index 7b5d570350..df797bb37a 100644 --- a/usr/src/uts/common/inet/ip/spdsock_opt_data.c +++ b/usr/src/uts/common/inet/ip/spdsock_opt_data.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,9 +18,10 @@ * * CDDL HEADER END */ + /* - * Copyright (c) 2001 by Sun Microsystems, Inc. - * All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -33,6 +33,7 @@ #include <sys/socket.h> #include <sys/xti_xtiopt.h> +#include <net/pfpolicy.h> #include <inet/common.h> #include <netinet/ip6.h> #include <inet/ip.h> diff --git a/usr/src/uts/common/inet/ip/tn_ipopt.c b/usr/src/uts/common/inet/ip/tn_ipopt.c index d07bc5ac73..51f8b4ad72 100644 --- a/usr/src/uts/common/inet/ip/tn_ipopt.c +++ b/usr/src/uts/common/inet/ip/tn_ipopt.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -201,7 +201,7 @@ tsol_get_option(mblk_t *mp, uchar_t **buffer) */ int tsol_compute_label(const cred_t *credp, ipaddr_t dst, uchar_t *opt_storage, - boolean_t isexempt) + boolean_t isexempt, ip_stack_t *ipst) { uint_t sec_opt_len; ts_label_t *tsl; @@ -209,7 +209,7 @@ tsol_compute_label(const cred_t *credp, ipaddr_t dst, uchar_t *opt_storage, ire_t *ire, *sire = NULL; boolean_t compute_label = B_FALSE; tsol_ire_gw_secattr_t *attrp; - zoneid_t zoneid; + zoneid_t zoneid, ip_zoneid; if (opt_storage != NULL) opt_storage[IPOPT_OLEN] = 0; @@ -230,6 +230,15 @@ tsol_compute_label(const cred_t *credp, ipaddr_t dst, uchar_t *opt_storage, zoneid = crgetzoneid(credp); + /* + * For exclusive stacks we set the zoneid to zero + * to operate as if in the global zone for IRE and conn_t comparisons. + */ + if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) + ip_zoneid = GLOBAL_ZONEID; + else + ip_zoneid = zoneid; + switch (dst_rhtp->tpc_tp.host_type) { case UNLABELED: /* @@ -238,7 +247,7 @@ tsol_compute_label(const cred_t *credp, ipaddr_t dst, uchar_t *opt_storage, * not on the same subnet, and that the next-hop * gateway is labeled. */ - ire = ire_cache_lookup(dst, zoneid, tsl); + ire = ire_cache_lookup(dst, ip_zoneid, tsl, ipst); if (ire != NULL && (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK | IRE_INTERFACE)) != 0) { @@ -247,8 +256,8 @@ tsol_compute_label(const cred_t *credp, ipaddr_t dst, uchar_t *opt_storage, return (0); } else if (ire == NULL) { ire = ire_ftable_lookup(dst, 0, 0, 0, NULL, &sire, - zoneid, 0, tsl, (MATCH_IRE_RECURSIVE | - MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR)); + ip_zoneid, 0, tsl, (MATCH_IRE_RECURSIVE | + MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR), ipst); } /* no route to destination */ @@ -607,7 +616,7 @@ tsol_prepend_option(uchar_t *optbuf, ipha_t *ipha, int buflen) */ int tsol_check_label(const cred_t *credp, mblk_t **mpp, int *addedp, - boolean_t isexempt) + boolean_t isexempt, ip_stack_t *ipst) { mblk_t *mp = *mpp; ipha_t *ipha; @@ -625,7 +634,8 @@ tsol_check_label(const cred_t *credp, mblk_t **mpp, int *addedp, ipha = (ipha_t *)mp->b_rptr; - retv = tsol_compute_label(credp, ipha->ipha_dst, opt_storage, isexempt); + retv = tsol_compute_label(credp, ipha->ipha_dst, opt_storage, isexempt, + ipst); if (retv != 0) return (retv); @@ -724,13 +734,13 @@ param_prob: */ int tsol_compute_label_v6(const cred_t *credp, const in6_addr_t *dst, - uchar_t *opt_storage, boolean_t isexempt) + uchar_t *opt_storage, boolean_t isexempt, ip_stack_t *ipst) { tsol_tpc_t *dst_rhtp; ts_label_t *tsl; uint_t sec_opt_len; uint32_t doi; - zoneid_t zoneid; + zoneid_t zoneid, ip_zoneid; ire_t *ire, *sire; tsol_ire_gw_secattr_t *attrp; boolean_t compute_label; @@ -758,6 +768,15 @@ tsol_compute_label_v6(const cred_t *credp, const in6_addr_t *dst, zoneid = crgetzoneid(credp); /* + * For exclusive stacks we set the zoneid to zero + * to operate as if in the global zone for IRE and conn_t comparisons. + */ + if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) + ip_zoneid = GLOBAL_ZONEID; + else + ip_zoneid = zoneid; + + /* * Fill in a V6 label. If a new format is added here, make certain * that the maximum size of this label is reflected in sys/tsol/tnet.h * as TSOL_MAX_IPV6_OPTION. @@ -772,7 +791,7 @@ tsol_compute_label_v6(const cred_t *credp, const in6_addr_t *dst, * gateway is labeled. */ sire = NULL; - ire = ire_cache_lookup_v6(dst, zoneid, tsl); + ire = ire_cache_lookup_v6(dst, ip_zoneid, tsl, ipst); if (ire != NULL && (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_INTERFACE)) != 0) { @@ -781,8 +800,8 @@ tsol_compute_label_v6(const cred_t *credp, const in6_addr_t *dst, return (0); } else if (ire == NULL) { ire = ire_ftable_lookup_v6(dst, NULL, NULL, 0, NULL, - &sire, zoneid, 0, tsl, (MATCH_IRE_RECURSIVE | - MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR)); + &sire, ip_zoneid, 0, tsl, (MATCH_IRE_RECURSIVE | + MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR), ipst); } /* no route to destination */ @@ -1153,7 +1172,7 @@ tsol_prepend_option_v6(uchar_t *optbuf, ip6_t *ip6h, int buflen) */ int tsol_check_label_v6(const cred_t *credp, mblk_t **mpp, int *addedp, - boolean_t isexempt) + boolean_t isexempt, ip_stack_t *ipst) { mblk_t *mp = *mpp; ip6_t *ip6h; @@ -1177,7 +1196,7 @@ tsol_check_label_v6(const cred_t *credp, mblk_t **mpp, int *addedp, ip6h = (ip6_t *)mp->b_rptr; retv = tsol_compute_label_v6(credp, &ip6h->ip6_dst, opt_storage, - isexempt); + isexempt, ipst); if (retv != 0) return (retv); diff --git a/usr/src/uts/common/inet/ip/tnet.c b/usr/src/uts/common/inet/ip/tnet.c index 82b564c397..92df854751 100644 --- a/usr/src/uts/common/inet/ip/tnet.c +++ b/usr/src/uts/common/inet/ip/tnet.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -923,8 +923,12 @@ tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version, blequal(label, conn_label)) return (B_TRUE); + /* + * conn_zoneid is global for an exclusive stack, thus we use + * conn_cred to get the zoneid + */ if (!connp->conn_mac_exempt || - (connp->conn_zoneid != GLOBAL_ZONEID && + (crgetzoneid(connp->conn_cred) != GLOBAL_ZONEID && (plabel->tsl_doi != conn_plabel->tsl_doi || !bldominates(conn_label, label)))) { DTRACE_PROBE3( @@ -1179,6 +1183,11 @@ tsol_can_reply_error(const mblk_t *mp) * * This is used by the classifier when the packet matches an ALL_ZONES IRE, and * there's no MLP defined. + * + * Note that we assume that this is only invoked in the ALL_ZONES case. + * Handling other cases would require handle exclusive stack zones where either + * this routine or the callers would have to map from + * the zoneid (zone->zone_id) to what IP uses in conn_zoneid etc. */ zoneid_t tsol_packet_to_zoneid(const mblk_t *mp) @@ -1460,6 +1469,7 @@ tsol_ip_forward(ire_t *ire, mblk_t *mp) uint16_t iplen; boolean_t need_tpc_rele = B_FALSE; ipaddr_t *gw; + ip_stack_t *ipst = ire->ire_ipst; ASSERT(ire != NULL && mp != NULL); ASSERT(ire->ire_stq != NULL); @@ -1659,9 +1669,10 @@ tsol_ip_forward(ire_t *ire, mblk_t *mp) goto keep_label; if ((af == AF_INET && - tsol_check_label(DB_CRED(mp), &mp, &adjust, B_FALSE) != 0) || + tsol_check_label(DB_CRED(mp), &mp, &adjust, B_FALSE, ipst) != 0) || (af == AF_INET6 && - tsol_check_label_v6(DB_CRED(mp), &mp, &adjust, B_FALSE) != 0)) { + tsol_check_label_v6(DB_CRED(mp), &mp, &adjust, B_FALSE, + ipst) != 0)) { mp = NULL; goto keep_label; } @@ -1865,17 +1876,32 @@ tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc, * * If we can't figure out what it is, then return mlptSingle. That's actually * an error case. + * + * The callers are assume to pass in zone->zone_id and not the zoneid that + * is stored in a conn_t (since the latter will be GLOBAL_ZONEID in an + * exclusive stack zone). */ mlp_type_t -tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr) +tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr, + ip_stack_t *ipst) { in_addr_t in4; ire_t *ire; ipif_t *ipif; zoneid_t addrzone; + zoneid_t ip_zoneid; ASSERT(addr != NULL); + /* + * For exclusive stacks we set the zoneid to zero + * to operate as if in the global zone for IRE and conn_t comparisons. + */ + if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) + ip_zoneid = GLOBAL_ZONEID; + else + ip_zoneid = zoneid; + if (version == IPV6_VERSION && IN6_IS_ADDR_V4MAPPED((const in6_addr_t *)addr)) { IN6_V4MAPPED_TO_IPADDR((const in6_addr_t *)addr, in4); @@ -1885,13 +1911,15 @@ tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr) if (version == IPV4_VERSION) { in4 = *(const in_addr_t *)addr; - if (in4 == INADDR_ANY) + if (in4 == INADDR_ANY) { return (mlptBoth); - ire = ire_cache_lookup(in4, zoneid, NULL); + } + ire = ire_cache_lookup(in4, ip_zoneid, NULL, ipst); } else { - if (IN6_IS_ADDR_UNSPECIFIED((const in6_addr_t *)addr)) + if (IN6_IS_ADDR_UNSPECIFIED((const in6_addr_t *)addr)) { return (mlptBoth); - ire = ire_cache_lookup_v6(addr, zoneid, NULL); + } + ire = ire_cache_lookup_v6(addr, ip_zoneid, NULL, ipst); } /* * If we can't find the IRE, then we have to behave exactly like @@ -1905,12 +1933,13 @@ tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr) if (ire == NULL) { if (version == IPV4_VERSION) ipif = ipif_lookup_addr(*(const in_addr_t *)addr, NULL, - zoneid, NULL, NULL, NULL, NULL); + ip_zoneid, NULL, NULL, NULL, NULL, ipst); else ipif = ipif_lookup_addr_v6((const in6_addr_t *)addr, - NULL, zoneid, NULL, NULL, NULL, NULL); - if (ipif == NULL) + NULL, ip_zoneid, NULL, NULL, NULL, NULL, ipst); + if (ipif == NULL) { return (mlptSingle); + } addrzone = ipif->ipif_zoneid; ipif_refrele(ipif); } else { @@ -1947,6 +1976,7 @@ tsol_check_interface_address(const ipif_t *ipif) const char *ifname; boolean_t retval; tsol_rhent_t rhent; + netstack_t *ns = ipif->ipif_ill->ill_ipst->ips_netstack; if (IN6_IS_ADDR_V4MAPPED(&ipif->ipif_v6lcl_addr)) { af = AF_INET; @@ -1957,8 +1987,17 @@ tsol_check_interface_address(const ipif_t *ipif) } tp = find_tpc(&ipif->ipif_v6lcl_addr, IPV6_VERSION, B_FALSE); - zone = ipif->ipif_zoneid == ALL_ZONES ? NULL : - zone_find_by_id(ipif->ipif_zoneid); + + /* assumes that ALL_ZONES implies that there is no exclusive stack */ + if (ipif->ipif_zoneid == ALL_ZONES) { + zone = NULL; + } else if (ns->netstack_stackid == GLOBAL_NETSTACKID) { + /* Shared stack case */ + zone = zone_find_by_id(ipif->ipif_zoneid); + } else { + /* Exclusive stack case */ + zone = zone_find_by_id(crgetzoneid(ipif->ipif_ill->ill_credp)); + } if (zone != NULL) { plabel = zone->zone_slabel; ASSERT(plabel != NULL); diff --git a/usr/src/uts/common/inet/ip/tun.c b/usr/src/uts/common/inet/ip/tun.c index b69acfb32f..b7b7c014c1 100644 --- a/usr/src/uts/common/inet/ip/tun.c +++ b/usr/src/uts/common/inet/ip/tun.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -46,6 +46,7 @@ #include <sys/cmn_err.h> #include <sys/debug.h> #include <sys/kmem.h> +#include <sys/netstack.h> #include <sys/systm.h> #include <sys/param.h> @@ -74,6 +75,7 @@ #include <net/if_dl.h> #include <inet/ip_if.h> #include <sys/strsun.h> +#include <inet/ipsec_impl.h> #include <inet/ipdrop.h> #include <inet/tun.h> #include <inet/ipsec_impl.h> @@ -123,7 +125,7 @@ static void icmp_ricmp_err_v6_v6(queue_t *, mblk_t *, mblk_t *, icmp6_t *); static void tun_rput_icmp_err_v6(queue_t *, mblk_t *, mblk_t *); static int tun_rput_tpi(queue_t *, mblk_t *); static int tun_send_bind_req(queue_t *); -static void tun_statinit(tun_stats_t *, char *); +static void tun_statinit(tun_stats_t *, char *, netstackid_t); static int tun_stat_kstat_update(kstat_t *, int); static void tun_wdata_v4(queue_t *, mblk_t *); static void tun_wdata_v6(queue_t *, mblk_t *); @@ -133,6 +135,8 @@ static int tun_wputnext_v6(queue_t *, mblk_t *); static int tun_wputnext_v4(queue_t *, mblk_t *); static boolean_t tun_limit_value_v6(queue_t *, mblk_t *, ip6_t *, int *); static void tun_freemsg_chain(mblk_t *, uint64_t *); +static void *tun_stack_init(netstackid_t, netstack_t *); +static void tun_stack_fini(netstackid_t, void *); /* module's defined constants, globals and data structures */ @@ -238,31 +242,14 @@ static struct tun_encap_limit tun_limit_init_upper_v6 = { 0 }; -/* - * Linked list of tunnels. - */ - -#define TUN_PPA_SZ 64 -#define TUN_LIST_HASH(ppa) ((ppa) % TUN_PPA_SZ) - -/* - * protects global data structures such as tun_ppa_list - * also protects tun_t at ts_next and *ts_atp - * should be acquired before ts_lock - */ -static kmutex_t tun_global_lock; -static tun_stats_t *tun_ppa_list[TUN_PPA_SZ]; static tun_stats_t *tun_add_stat(queue_t *); -#define TUN_T_SZ 251 -#define TUN_BYADDR_LIST_HASH(a) (((a).s6_addr32[3]) % (TUN_T_SZ)) - -tun_t *tun_byaddr_list[TUN_T_SZ]; static void tun_add_byaddr(tun_t *); -static ipsec_tun_pol_t *itp_get_byaddr_fn(uint32_t *, uint32_t *, int); +static ipsec_tun_pol_t *itp_get_byaddr_fn(uint32_t *, uint32_t *, int, + netstack_t *); +/* Setable in /etc/system */ static boolean_t tun_do_fastpath = B_TRUE; -static ipaddr_t relay_rtr_addr_v4 = INADDR_ANY; /* streams linkages */ static struct module_info info = { @@ -326,29 +313,31 @@ _init(void) IP_MAJ = ddi_name_to_major(IP); IP6_MAJ = ddi_name_to_major(IP6); + + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel, so we can maintain the + * set of tun_stack_t's. + */ + netstack_register(NS_TUN, tun_stack_init, NULL, tun_stack_fini); + rc = mod_install(&modlinkage); - if (rc == 0) { - mutex_init(&tun_global_lock, NULL, MUTEX_DEFAULT, NULL); - } - rw_enter(&itp_get_byaddr_rw_lock, RW_WRITER); - itp_get_byaddr = itp_get_byaddr_fn; - rw_exit(&itp_get_byaddr_rw_lock); + if (rc != 0) + netstack_unregister(NS_TUN); + return (rc); } int _fini(void) { - int rc; + int error; - rc = mod_remove(&modlinkage); - if (rc == 0) { - mutex_destroy(&tun_global_lock); - rw_enter(&itp_get_byaddr_rw_lock, RW_WRITER); - itp_get_byaddr = itp_get_byaddr_dummy; - rw_exit(&itp_get_byaddr_rw_lock); - } - return (rc); + error = mod_remove(&modlinkage); + if (error == 0) + netstack_unregister(NS_TUN); + + return (error); } int @@ -369,6 +358,8 @@ tun_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) tun_t *atp; mblk_t *hello; ipsec_info_t *ii; + netstack_t *ns; + zoneid_t zoneid; if (q->q_ptr != NULL) { /* re-open of an already open instance */ @@ -381,16 +372,31 @@ tun_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) tun1dbg(("tun_open\n")); + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + + /* + * For exclusive stacks we set the zoneid to zero + * to make IP operate as if in the global zone. + */ + if (ns->netstack_stackid != GLOBAL_NETSTACKID) + zoneid = GLOBAL_ZONEID; + else + zoneid = crgetzoneid(credp); + hello = allocb(sizeof (ipsec_info_t), BPRI_HI); - if (hello == NULL) + if (hello == NULL) { + netstack_rele(ns); return (ENOMEM); + } /* allocate per-instance structure */ atp = kmem_zalloc(sizeof (tun_t), KM_SLEEP); atp->tun_state = DL_UNATTACHED; atp->tun_dev = *devp; - atp->tun_zoneid = crgetzoneid(credp); + atp->tun_zoneid = zoneid; + atp->tun_netstack = ns; /* * Based on the lower version of IP, initialize stuff that @@ -423,6 +429,7 @@ tun_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) atp->tun_ip6h.ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; atp->tun_ip6h.ip6_hops = IPV6_DEFAULT_HOPS; } else { + netstack_rele(ns); kmem_free(atp, sizeof (tun_t)); return (ENXIO); } @@ -442,6 +449,7 @@ tun_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) atp->tun_mtu = ATUN_MTU; } else { /* Error. */ + netstack_rele(ns); kmem_free(atp, sizeof (tun_t)); return (ENXIO); } @@ -455,6 +463,7 @@ tun_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) atp->tun_mtu = ATUN_MTU; } else { /* Error. */ + netstack_rele(ns); kmem_free(atp, sizeof (tun_t)); return (ENXIO); } @@ -478,9 +487,12 @@ int tun_close(queue_t *q, int flag, cred_t *cred_p) { tun_t *atp = (tun_t *)q->q_ptr; + netstack_t *ns; ASSERT(atp != NULL); + ns = atp->tun_netstack; + /* Cancel outstanding qtimeouts() or qbufcalls() */ tun_cancel_rec_evs(q, &atp->tun_events); @@ -492,9 +504,11 @@ tun_close(queue_t *q, int flag, cred_t *cred_p) if (atp->tun_itp != NULL) { /* In brackets because of ITP_REFRELE's brackets. */ - ITP_REFRELE(atp->tun_itp); + ITP_REFRELE(atp->tun_itp, ns); } + netstack_rele(ns); + mutex_destroy(&atp->tun_lock); /* remove tun_t from global list */ @@ -859,13 +873,15 @@ tun_freemsg_chain(mblk_t *mp, uint64_t *bytecount) * (tun)->tun_itp_gen so we don't lose races with other possible updates via * PF_POLICY. */ -#define tun_policy_present(tun) (((tun)->tun_itp != NULL) || \ - (((tun)->tun_itp_gen < tunnel_policy_gen) && \ - ((tun)->tun_itp_gen = tunnel_policy_gen) && \ - (((tun)->tun_itp = get_tunnel_policy((tun)->tun_lifname)) != NULL))) +#define tun_policy_present(tun, ns, ipss) \ + (((tun)->tun_itp != NULL) || \ + (((tun)->tun_itp_gen < ipss->ipsec_tunnel_policy_gen) && \ + ((tun)->tun_itp_gen = ipss->ipsec_tunnel_policy_gen) && \ + (((tun)->tun_itp = get_tunnel_policy((tun)->tun_lifname, ns)) \ + != NULL))) /* - * Search tun_byaddr_list for occurrence of tun_t with matching + * Search tuns_byaddr_list for occurrence of tun_t with matching * inner addresses. This function does not take into account * prefixes. Possibly we could generalize this function in the * future with V6_MASK_EQ() and pass in an all 1's prefix for IP @@ -874,11 +890,13 @@ tun_freemsg_chain(mblk_t *mp, uint64_t *bytecount) * This function is not directly called - it's assigned into itp_get_byaddr(). */ static ipsec_tun_pol_t * -itp_get_byaddr_fn(uint32_t *lin, uint32_t *fin, int af) +itp_get_byaddr_fn(uint32_t *lin, uint32_t *fin, int af, netstack_t *ns) { tun_t *tun_list; uint_t index; in6_addr_t lmapped, fmapped, *laddr, *faddr; + ipsec_stack_t *ipss = ns->netstack_ipsec; + tun_stack_t *tuns = ns->netstack_tun; if (af == AF_INET) { laddr = &lmapped; @@ -895,7 +913,7 @@ itp_get_byaddr_fn(uint32_t *lin, uint32_t *fin, int af) /* * it's ok to grab global lock while holding tun_lock/perimeter */ - mutex_enter(&tun_global_lock); + mutex_enter(&tuns->tuns_global_lock); /* * walk through list of tun_t looking for a match of @@ -903,13 +921,13 @@ itp_get_byaddr_fn(uint32_t *lin, uint32_t *fin, int af) * IN6_IPADDR_TO_V4MAPPED(), so v6 matching works for * all cases. */ - for (tun_list = tun_byaddr_list[index]; tun_list; + for (tun_list = tuns->tuns_byaddr_list[index]; tun_list; tun_list = tun_list->tun_next) { if (IN6_ARE_ADDR_EQUAL(&tun_list->tun_laddr, laddr) && IN6_ARE_ADDR_EQUAL(&tun_list->tun_faddr, faddr)) { ipsec_tun_pol_t *itp; - if (!tun_policy_present(tun_list)) { + if (!tun_policy_present(tun_list, ns, ipss)) { tun1dbg(("itp_get_byaddr: No IPsec policy on " "matching tun_t instance %p/%s\n", (void *)tun_list, tun_list->tun_lifname)); @@ -919,7 +937,7 @@ itp_get_byaddr_fn(uint32_t *lin, uint32_t *fin, int af) "IPsec policy\n", (void *)tun_list)); mutex_enter(&tun_list->tun_itp->itp_lock); itp = tun_list->tun_itp; - mutex_exit(&tun_global_lock); + mutex_exit(&tuns->tuns_global_lock); ITP_REFHOLD(itp); mutex_exit(&itp->itp_lock); tun1dbg(("itp_get_byaddr: Found itp %p \n", @@ -931,12 +949,12 @@ itp_get_byaddr_fn(uint32_t *lin, uint32_t *fin, int af) /* didn't find one, return zilch */ tun1dbg(("itp_get_byaddr: No matching tunnel instances with policy\n")); - mutex_exit(&tun_global_lock); + mutex_exit(&tuns->tuns_global_lock); return (NULL); } /* - * Search tun_byaddr_list for occurrence of tun_t, same upper and lower stream, + * Search tuns_byaddr_list for occurrence of tun_t, same upper and lower stream, * and same type (6to4 vs automatic vs configured) * If none is found, insert this tun entry. */ @@ -948,6 +966,7 @@ tun_add_byaddr(tun_t *atp) uint_t mask = atp->tun_flags & (TUN_LOWER_MASK | TUN_UPPER_MASK); uint_t tun_type = (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)); uint_t index = TUN_BYADDR_LIST_HASH(atp->tun_faddr); + tun_stack_t *tuns = atp->tun_netstack->netstack_tun; tun1dbg(("tun_add_byaddr: index = %d\n", index)); @@ -955,7 +974,7 @@ tun_add_byaddr(tun_t *atp) /* * it's ok to grab global lock while holding tun_lock/perimeter */ - mutex_enter(&tun_global_lock); + mutex_enter(&tuns->tuns_global_lock); /* * walk through list of tun_t looking for a match of @@ -964,7 +983,7 @@ tun_add_byaddr(tun_t *atp) * There shouldn't be all that many tunnels, so a sequential * search of the bucket should be fine. */ - for (tun_list = tun_byaddr_list[index]; tun_list; + for (tun_list = tuns->tuns_byaddr_list[index]; tun_list; tun_list = tun_list->tun_next) { if (tun_list->tun_ppa == ppa && ((tun_list->tun_flags & (TUN_LOWER_MASK | @@ -975,23 +994,23 @@ tun_add_byaddr(tun_t *atp) "tun_stats 0x%p\n", (void *)atp, ppa, (void *)tun_list)); tun1dbg(("tun_add_byaddr: Nothing to do.")); - mutex_exit(&tun_global_lock); + mutex_exit(&tuns->tuns_global_lock); return; } } /* didn't find one, throw it in the global list */ - atp->tun_next = tun_byaddr_list[index]; - atp->tun_ptpn = &(tun_byaddr_list[index]); - if (tun_byaddr_list[index] != NULL) - tun_byaddr_list[index]->tun_ptpn = &(atp->tun_next); - tun_byaddr_list[index] = atp; - mutex_exit(&tun_global_lock); + atp->tun_next = tuns->tuns_byaddr_list[index]; + atp->tun_ptpn = &(tuns->tuns_byaddr_list[index]); + if (tuns->tuns_byaddr_list[index] != NULL) + tuns->tuns_byaddr_list[index]->tun_ptpn = &(atp->tun_next); + tuns->tuns_byaddr_list[index] = atp; + mutex_exit(&tuns->tuns_global_lock); } /* - * Search tun_ppa_list for occurrence of tun_ppa, same lower stream, + * Search tuns_ppa_list for occurrence of tun_ppa, same lower stream, * and same type (6to4 vs automatic vs configured) * If none is found, insert this tun entry and create a new kstat for * the entry. @@ -1010,6 +1029,7 @@ tun_add_stat(queue_t *q) uint_t lower = atp->tun_flags & TUN_LOWER_MASK; uint_t tun_type = (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)); uint_t index = TUN_LIST_HASH(ppa); + tun_stack_t *tuns = atp->tun_netstack->netstack_tun; ASSERT(atp->tun_stats == NULL); @@ -1017,7 +1037,7 @@ tun_add_stat(queue_t *q) /* * it's ok to grab global lock while holding tun_lock/perimeter */ - mutex_enter(&tun_global_lock); + mutex_enter(&tuns->tuns_global_lock); /* * walk through list of tun_stats looking for a match of @@ -1027,7 +1047,7 @@ tun_add_stat(queue_t *q) * search should be fine * XXX - this may change if tunnels get ever get created on the fly */ - for (tun_list = tun_ppa_list[index]; tun_list; + for (tun_list = tuns->tuns_ppa_list[index]; tun_list; tun_list = tun_list->ts_next) { if (tun_list->ts_ppa == ppa && tun_list->ts_lower == lower && @@ -1036,7 +1056,7 @@ tun_add_stat(queue_t *q) "tun_stats 0x%p\n", (void *)atp, ppa, (void *)tun_list)); mutex_enter(&tun_list->ts_lock); - mutex_exit(&tun_global_lock); + mutex_exit(&tuns->tuns_global_lock); ASSERT(tun_list->ts_refcnt > 0); tun_list->ts_refcnt++; ASSERT(atp->tun_kstat_next == NULL); @@ -1058,7 +1078,8 @@ tun_add_stat(queue_t *q) if (atp->tun_lifname[0] != '\0' && atp->tun_itp == NULL) { atp->tun_itp = - get_tunnel_policy(atp->tun_lifname); + get_tunnel_policy(atp->tun_lifname, + atp->tun_netstack); } return (tun_list); } @@ -1076,27 +1097,30 @@ tun_add_stat(queue_t *q) tun_stat->ts_lower = lower; tun_stat->ts_type = tun_type; tun_stat->ts_ppa = ppa; - tun_stat->ts_next = tun_ppa_list[index]; - tun_ppa_list[index] = tun_stat; + tun_stat->ts_next = tuns->tuns_ppa_list[index]; + tuns->tuns_ppa_list[index] = tun_stat; tun_stat->ts_atp = atp; atp->tun_kstat_next = NULL; atp->tun_stats = tun_stat; - mutex_exit(&tun_global_lock); - tun_statinit(tun_stat, q->q_qinfo->qi_minfo->mi_idname); + mutex_exit(&tuns->tuns_global_lock); + tun_statinit(tun_stat, q->q_qinfo->qi_minfo->mi_idname, + atp->tun_netstack->netstack_stackid); } else { - mutex_exit(&tun_global_lock); + mutex_exit(&tuns->tuns_global_lock); } return (tun_stat); } /* - * remove tun from tun_byaddr_list + * remove tun from tuns_byaddr_list * called either holding tun_lock or in perimeter */ static void tun_rem_tun_byaddr_list(tun_t *atp) { - mutex_enter(&tun_global_lock); + tun_stack_t *tuns = atp->tun_netstack->netstack_tun; + + mutex_enter(&tuns->tuns_global_lock); /* * remove tunnel instance from list of tun_t @@ -1109,11 +1133,11 @@ tun_rem_tun_byaddr_list(tun_t *atp) atp->tun_ptpn = NULL; ASSERT(atp->tun_next == NULL); - mutex_exit(&tun_global_lock); + mutex_exit(&tuns->tuns_global_lock); } /* - * remove tun from tun_ppa_list + * remove tun from tuns_ppa_list * called either holding tun_lock or in perimeter */ static void @@ -1123,12 +1147,13 @@ tun_rem_ppa_list(tun_t *atp) tun_stats_t *tun_stat = atp->tun_stats; tun_stats_t **tun_list; tun_t **at_list; + tun_stack_t *tuns = atp->tun_netstack->netstack_tun; if (tun_stat == NULL) return; ASSERT(atp->tun_ppa == tun_stat->ts_ppa); - mutex_enter(&tun_global_lock); + mutex_enter(&tuns->tuns_global_lock); mutex_enter(&tun_stat->ts_lock); atp->tun_stats = NULL; tun_stat->ts_refcnt--; @@ -1145,10 +1170,10 @@ tun_rem_ppa_list(tun_t *atp) (void *)tun_stat)); if (atp->tun_itp != NULL) - itp_unlink(atp->tun_itp); + itp_unlink(atp->tun_itp, atp->tun_netstack); ASSERT(atp->tun_kstat_next == NULL); - for (tun_list = &tun_ppa_list[index]; *tun_list; + for (tun_list = &tuns->tuns_ppa_list[index]; *tun_list; tun_list = &(*tun_list)->ts_next) { if (tun_stat == *tun_list) { *tun_list = tun_stat->ts_next; @@ -1156,16 +1181,17 @@ tun_rem_ppa_list(tun_t *atp) break; } } - mutex_exit(&tun_global_lock); + mutex_exit(&tuns->tuns_global_lock); tksp = tun_stat->ts_ksp; tun_stat->ts_ksp = NULL; mutex_exit(&tun_stat->ts_lock); - kstat_delete(tksp); + kstat_delete_netstack(tksp, + atp->tun_netstack->netstack_stackid); mutex_destroy(&tun_stat->ts_lock); kmem_free(tun_stat, sizeof (tun_stats_t)); return; } - mutex_exit(&tun_global_lock); + mutex_exit(&tuns->tuns_global_lock); tun1dbg(("tun_rem_ppa_list: tun 0x%p Removing ref ppa %d tun_stat " \ "0x%p\n", (void *)atp, tun_stat->ts_ppa, (void *)tun_stat)); @@ -1612,6 +1638,7 @@ tun_sparam(queue_t *q, mblk_t *mp) sin6_t *sin6; size_t size; boolean_t new; + ipsec_stack_t *ipss = atp->tun_netstack->netstack_ipsec; /* don't allow changes after dl_bind_req */ if (atp->tun_state == DL_IDLE) { @@ -1836,17 +1863,17 @@ tun_sparam(queue_t *q, mblk_t *mp) * The version number checked out, so just cast * ifta_secinfo to an ipsr. */ - if (ipsec_loaded()) { + if (ipsec_loaded(ipss)) { uerr = tun_set_sec_simple(atp, (ipsec_req_t *)&ta->ifta_secinfo); } else { - if (ipsec_failed()) { + if (ipsec_failed(ipss)) { uerr = EPROTONOSUPPORT; goto nak; } /* Otherwise, try again later and load IPsec. */ (void) putq(q, mp); - ipsec_loader_loadnow(); + ipsec_loader_loadnow(ipss); return; } if (uerr != 0) @@ -1921,6 +1948,9 @@ tun_ioctl(queue_t *q, mblk_t *mp) ipaddr_t *rr_addr; char buf[INET6_ADDRSTRLEN]; struct lifreq *lifr; + netstack_t *ns = atp->tun_netstack; + ipsec_stack_t *ipss = ns->netstack_ipsec; + tun_stack_t *tuns = ns->netstack_tun; lvers = atp->tun_flags & TUN_LOWER_MASK; @@ -1980,7 +2010,8 @@ tun_ioctl(queue_t *q, mblk_t *mp) * dependent. */ - if (tun_policy_present(atp) && tun_thisvers_policy(atp)) { + if (tun_policy_present(atp, ns, ipss) && + tun_thisvers_policy(atp)) { mutex_enter(&atp->tun_itp->itp_lock); if (!(atp->tun_itp->itp_flags & ITPF_P_TUNNEL) && (atp->tun_policy_index >= @@ -2110,7 +2141,7 @@ tun_ioctl(queue_t *q, mblk_t *mp) tun1dbg(("tun_ioctl: 6to4 Relay Router = %s\n", inet_ntop(AF_INET, rr_addr, buf, sizeof (buf)))); - relay_rtr_addr_v4 = *rr_addr; + tuns->tuns_relay_rtr_addr_v4 = *rr_addr; } else { tun1dbg(("tun_ioctl: Invalid 6to4 Relay Router " \ "address (%s)\n", @@ -2136,7 +2167,7 @@ tun_ioctl(queue_t *q, mblk_t *mp) } rr_addr = (ipaddr_t *)mp1->b_rptr; - *rr_addr = relay_rtr_addr_v4; + *rr_addr = tuns->tuns_relay_rtr_addr_v4; break; case DL_IOC_HDR_INFO: uerr = tun_fastpath(q, mp); @@ -2164,7 +2195,8 @@ tun_ioctl(queue_t *q, mblk_t *mp) lifr->lifr_name, LIFNAMSIZ); ASSERT(atp->tun_itp == NULL); atp->tun_itp = - get_tunnel_policy(atp->tun_lifname); + get_tunnel_policy(atp->tun_lifname, + ns); /* * It really doesn't matter if we return * NULL or not. If we get the itp pointer, @@ -2427,7 +2459,7 @@ tun_wproc_mdata(queue_t *q, mblk_t *mp) * filled in in TUNSPARAM cases. */ static void -flush_af(ipsec_policy_head_t *polhead, int ulp_vector) +flush_af(ipsec_policy_head_t *polhead, int ulp_vector, netstack_t *ns) { int dir; int af = (ulp_vector == TUN_U_V4) ? IPSEC_AF_V4 : IPSEC_AF_V6; @@ -2439,7 +2471,7 @@ flush_af(ipsec_policy_head_t *polhead, int ulp_vector) for (ip = polhead->iph_root[dir].ipr_nonhash[af]; ip != NULL; ip = nip) { nip = ip->ipsp_hash.hash_next; - IPPOL_UNCHAIN(polhead, ip); + IPPOL_UNCHAIN(polhead, ip, ns); } } } @@ -2449,7 +2481,7 @@ flush_af(ipsec_policy_head_t *polhead, int ulp_vector) */ static boolean_t insert_actual_policies(ipsec_tun_pol_t *itp, ipsec_act_t *actp, uint_t nact, - int ulp_vector) + int ulp_vector, netstack_t *ns) { ipsec_selkey_t selkey; ipsec_policy_t *pol; @@ -2463,7 +2495,7 @@ insert_actual_policies(ipsec_tun_pol_t *itp, ipsec_act_t *actp, uint_t nact, /* v4 inbound */ pol = ipsec_policy_create(&selkey, actp, nact, - IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index); + IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns); if (pol == NULL) return (B_FALSE); pr = &polhead->iph_root[IPSEC_TYPE_INBOUND]; @@ -2472,7 +2504,7 @@ insert_actual_policies(ipsec_tun_pol_t *itp, ipsec_act_t *actp, uint_t nact, /* v4 outbound */ pol = ipsec_policy_create(&selkey, actp, nact, - IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index); + IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns); if (pol == NULL) return (B_FALSE); pr = &polhead->iph_root[IPSEC_TYPE_OUTBOUND]; @@ -2485,7 +2517,7 @@ insert_actual_policies(ipsec_tun_pol_t *itp, ipsec_act_t *actp, uint_t nact, /* v6 inbound */ pol = ipsec_policy_create(&selkey, actp, nact, - IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index); + IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns); if (pol == NULL) return (B_FALSE); pr = &polhead->iph_root[IPSEC_TYPE_INBOUND]; @@ -2494,7 +2526,7 @@ insert_actual_policies(ipsec_tun_pol_t *itp, ipsec_act_t *actp, uint_t nact, /* v6 outbound */ pol = ipsec_policy_create(&selkey, actp, nact, - IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index); + IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns); if (pol == NULL) return (B_FALSE); pr = &polhead->iph_root[IPSEC_TYPE_OUTBOUND]; @@ -2519,6 +2551,8 @@ tun_set_sec_simple(tun_t *atp, ipsec_req_t *ipsr) boolean_t clear_all, old_policy = B_FALSE; ipsec_tun_pol_t *itp; tun_t *other_tun; + netstack_t *ns = atp->tun_netstack; + ipsec_stack_t *ipss = ns->netstack_ipsec; tun1dbg( ("tun_set_sec_simple: adjusting tunnel security the old way.")); @@ -2537,7 +2571,7 @@ tun_set_sec_simple(tun_t *atp, ipsec_req_t *ipsr) #undef REQ_MASK mutex_enter(&atp->tun_lock); - if (!tun_policy_present(atp)) { + if (!tun_policy_present(atp, ns, ipss)) { if (clear_all) { bzero(&atp->tun_secinfo, sizeof (ipsec_req_t)); atp->tun_policy_index = 0; @@ -2546,7 +2580,7 @@ tun_set_sec_simple(tun_t *atp, ipsec_req_t *ipsr) ASSERT(atp->tun_lifname[0] != '\0'); atp->tun_itp = create_tunnel_policy(atp->tun_lifname, - &rc, &atp->tun_itp_gen); + &rc, &atp->tun_itp_gen, ns); /* NOTE: "rc" set by create_tunnel_policy(). */ if (atp->tun_itp == NULL) goto bail; @@ -2554,7 +2588,7 @@ tun_set_sec_simple(tun_t *atp, ipsec_req_t *ipsr) itp = atp->tun_itp; /* Allocate the actvec now, before holding itp or polhead locks. */ - ipsec_actvec_from_req(ipsr, &actp, &nact); + ipsec_actvec_from_req(ipsr, &actp, &nact, ns); if (actp == NULL) { rc = ENOMEM; goto bail; @@ -2584,14 +2618,14 @@ tun_set_sec_simple(tun_t *atp, ipsec_req_t *ipsr) * in the spdosock code-paths, due to backward compatibility. */ ITPF_CLONE(itp->itp_flags); - rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive); + rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); if (rc != 0) { /* inactive has already been cleared. */ itp->itp_flags &= ~ITPF_IFLAGS; goto mutex_bail; } rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); - flush_af(itp->itp_policy, atp->tun_flags & TUN_UPPER_MASK); + flush_af(itp->itp_policy, atp->tun_flags & TUN_UPPER_MASK, ns); } else { /* Else assume itp->itp_policy is already flushed. */ rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); @@ -2607,7 +2641,7 @@ tun_set_sec_simple(tun_t *atp, ipsec_req_t *ipsr) goto recover_bail; } if (insert_actual_policies(itp, actp, nact, - atp->tun_flags & TUN_UPPER_MASK)) { + atp->tun_flags & TUN_UPPER_MASK, ns)) { rw_exit(&itp->itp_policy->iph_lock); /* * Adjust MTU and make sure the DL side knows what's up. @@ -2647,7 +2681,7 @@ recover_bail: if (old_policy) { /* Recover policy in in active polhead. */ - ipsec_swap_policy(itp->itp_policy, itp->itp_inactive); + ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); ITPF_SWAP(itp->itp_flags); atp->tun_extra_offset = TUN_LINK_EXTRA_OFF; } @@ -2655,7 +2689,7 @@ recover_bail: /* Clear policy in inactive polhead. */ itp->itp_flags &= ~ITPF_IFLAGS; rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER); - ipsec_polhead_flush(itp->itp_inactive); + ipsec_polhead_flush(itp->itp_inactive, ns); rw_exit(&itp->itp_inactive->iph_lock); mutex_bail: @@ -3012,7 +3046,8 @@ tun_rdata_v6(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp) ASSERT(IN6_ARE_ADDR_EQUAL(&v6dst, &atp->tun_laddr) && IN6_ARE_ADDR_EQUAL(&v6src, &atp->tun_faddr)); if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp, - inner_iph, NULL, NULL, outer_ip6h, 0)) { + inner_iph, NULL, NULL, outer_ip6h, 0, + atp->tun_netstack)) { data_mp = NULL; ipsec_mp = NULL; atomic_add_32(&atp->tun_InErrors, 1); @@ -3046,7 +3081,7 @@ tun_rdata_v6(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp) ASSERT(IN6_ARE_ADDR_EQUAL(&v6dst, &atp->tun_laddr)); if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp, NULL, - ip6h, NULL, outer_ip6h, 0)) { + ip6h, NULL, outer_ip6h, 0, atp->tun_netstack)) { data_mp = NULL; ipsec_mp = NULL; atomic_add_32(&atp->tun_InErrors, 1); @@ -3121,6 +3156,7 @@ tun_rdata_v4(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp) char buf2[INET6_ADDRSTRLEN]; char buf[TUN_WHO_BUF]; int pullup_len; + tun_stack_t *tuns = atp->tun_netstack->netstack_tun; /* need at least an IP header */ ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t)); @@ -3168,7 +3204,7 @@ tun_rdata_v4(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp) /* NOTE: ipsec_tun_inbound() always frees ipsec_mp. */ if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp, - inner_iph, NULL, iph, NULL, 0)) { + inner_iph, NULL, iph, NULL, 0, atp->tun_netstack)) { data_mp = NULL; atomic_add_32(&atp->tun_InErrors, 1); goto drop; @@ -3201,7 +3237,7 @@ tun_rdata_v4(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp) /* NOTE: ipsec_tun_inbound() always frees ipsec_mp. */ if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp, NULL, - ip6h, iph, NULL, 0)) { + ip6h, iph, NULL, 0, atp->tun_netstack)) { data_mp = NULL; atomic_add_32(&atp->tun_InErrors, 1); goto drop; @@ -3374,7 +3410,7 @@ tun_rdata_v4(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp) /* * Check if tun module support 6to4 Relay * Router is disabled or enabled. - * relay_rtr_addr_v4 will equal INADDR_ANY + * tuns_relay_rtr_addr_v4 will equal INADDR_ANY * if support is disabled. Otherwise, it will * equal a valid, routable, IPv4 address; * denoting that the packet will be accepted. @@ -3384,14 +3420,15 @@ tun_rdata_v4(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp) * support is disabled by default for * security reasons. */ - if (relay_rtr_addr_v4 == INADDR_ANY) { - tun1dbg(("tun_rdata_v4: " \ - "%s relay_rtr_addr_v4 = %s, " \ - "dropping packet from IPv4 src " \ + if (tuns->tuns_relay_rtr_addr_v4 == + INADDR_ANY) { + tun1dbg(("tun_rdata_v4: " + "%s tuns_relay_rtr_addr_v4 = %s, " + "dropping packet from IPv4 src " "%s\n", tun_who(q, buf), inet_ntop(AF_INET, - &relay_rtr_addr_v4, buf1, - sizeof (buf1)), + &tuns->tuns_relay_rtr_addr_v4, + buf1, sizeof (buf1)), inet_ntop(AF_INET, &v4src, buf2, sizeof (buf2)))); for (nmp = data_mp; nmp != NULL; @@ -3584,7 +3621,7 @@ icmp_ricmp_err_v4_v4(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp) * ipsec_tun_inbound() always frees ipsec_mp. */ if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, inner_ipha, NULL, - outer_ipha, NULL, -outer_hlen)) { + outer_ipha, NULL, -outer_hlen, atp->tun_netstack)) { /* Callee did all of the freeing */ return; } @@ -3739,7 +3776,7 @@ icmp_ricmp_err_v4_v6(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp, icmp6_t *icmph) * ipsec_tun_inbound() always frees ipsec_mp. */ if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, ipha, NULL, NULL, - ip6, -outer_hlen)) + ip6, -outer_hlen, atp->tun_netstack)) /* Callee did all of the freeing */ return; ASSERT(mp == orig_mp); @@ -3894,7 +3931,7 @@ icmp_ricmp_err_v6_v6(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp, icmp6_t *icmph) * ipsec_tun_inbound() always frees ipsec_mp. */ if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, NULL, inner_ip6, - NULL, ip6, -outer_hlen)) + NULL, ip6, -outer_hlen, atp->tun_netstack)) /* Callee did all of the freeing */ return; ASSERT(mp == orig_mp); @@ -4101,7 +4138,7 @@ icmp_ricmp_err_v6_v4(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp) * ipsec_tun_inbound() always frees ipsec_mp. */ if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, NULL, ip6h, - outer_ipha, NULL, -outer_hlen)) + outer_ipha, NULL, -outer_hlen, atp->tun_netstack)) /* Callee did all of the freeing */ return; ASSERT(mp == orig_mp); @@ -4761,7 +4798,7 @@ tun_wdata_v4(queue_t *q, mblk_t *mp) atomic_add_64(&atp->tun_HCOutOctets, (int64_t)msgdsize(mp)); mp = ipsec_tun_outbound(mp, atp, inner_ipha, NULL, outer_ipha, ip6, - hdrlen); + hdrlen, atp->tun_netstack); if (mp == NULL) return; @@ -4888,7 +4925,7 @@ tun_wputnext_v4(queue_t *q, mblk_t *mp) atomic_add_64(&atp->tun_HCOutOctets, (int64_t)msgsize(mp)); mp = ipsec_tun_outbound(mp, atp, inner_ipha, NULL, outer_ipha, ip6, - hdrlen); + hdrlen, atp->tun_netstack); if (mp == NULL) return (0); @@ -5044,7 +5081,8 @@ tun_wputnext_v6(queue_t *q, mblk_t *mp) tun_send_ire_req(q); /* send the packet down the transport stream to IPv4/IPv6 */ - mp = ipsec_tun_outbound(mp, atp, NULL, ip6h, ipha, outer_ip6, hdrlen); + mp = ipsec_tun_outbound(mp, atp, NULL, ip6h, ipha, outer_ip6, hdrlen, + atp->tun_netstack); if (mp == NULL) return (0); @@ -5160,6 +5198,7 @@ tun_wdata_v6(queue_t *q, mblk_t *mp) size_t hdrlen; int encap_limit = 0; struct ip6_opt_tunnel *encap_opt; + tun_stack_t *tuns = atp->tun_netstack->netstack_tun; ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (ip6_t)); @@ -5292,7 +5331,8 @@ tun_wdata_v6(queue_t *q, mblk_t *mp) * This implementation will drop packets with native * IPv6 destinations if 6to4 Relay Router communication * support is disabled. This support is checked - * by examining relay_rtr_addr_v4; INADDR_ANY denotes + * by examining tuns_relay_rtr_addr_v4; INADDR_ANY + * denotes * support is disabled; a valid, routable IPv4 addr * denotes support is enabled. Support is disabled * by default, because there is no standard trust @@ -5307,18 +5347,19 @@ tun_wdata_v6(queue_t *q, mblk_t *mp) /* * destination is a native IPv6 address */ - if (relay_rtr_addr_v4 == INADDR_ANY) { + if (tuns->tuns_relay_rtr_addr_v4 == + INADDR_ANY) { /* * 6to4 Relay Router communication * support is disabled. */ - tun1dbg(("tun_wdata_v6: " \ - "%s relay_rtr_addr_v4 = %s, " \ - "dropping packet with IPv6 dst " \ + tun1dbg(("tun_wdata_v6: " + "%s tuns_relay_rtr_addr_v4 = %s, " + "dropping packet with IPv6 dst " "%s\n", tun_who(q, buf), inet_ntop(AF_INET, - &relay_rtr_addr_v4, buf1, - sizeof (buf1)), + &tuns->tuns_relay_rtr_addr_v4, + buf1, sizeof (buf1)), inet_ntop(AF_INET6, &ip6h->ip6_dst, buf2, sizeof (buf2)))); atomic_add_32(&atp->tun_OutDiscard, 1); @@ -5332,7 +5373,7 @@ tun_wdata_v6(queue_t *q, mblk_t *mp) * 6to4 Relay Router anycast address, * defined in RFC 3068) */ - ipha->ipha_dst = relay_rtr_addr_v4; + ipha->ipha_dst = tuns->tuns_relay_rtr_addr_v4; } } /* @@ -5430,7 +5471,8 @@ tun_wdata_v6(queue_t *q, mblk_t *mp) tun_send_ire_req(q); /* send the packet down the transport stream to IP */ - mp = ipsec_tun_outbound(mp, atp, NULL, ip6h, ipha, outer_ip6, hdrlen); + mp = ipsec_tun_outbound(mp, atp, NULL, ip6h, ipha, outer_ip6, hdrlen, + atp->tun_netstack); if (mp == NULL) return; @@ -5613,6 +5655,9 @@ tun_stat_kstat_update(kstat_t *ksp, int rw) tun_stats_t *tstats; struct tunstat *tunsp; + if (ksp == NULL || ksp->ks_data == NULL) + return (EIO); + tstats = (tun_stats_t *)ksp->ks_private; mutex_enter(&tstats->ts_lock); tunsp = (struct tunstat *)ksp->ks_data; @@ -5706,7 +5751,7 @@ tun_stat_kstat_update(kstat_t *ksp, int rw) * Initialize kstats */ static void -tun_statinit(tun_stats_t *tun_stat, char *modname) +tun_statinit(tun_stats_t *tun_stat, char *modname, netstackid_t stackid) { kstat_t *ksp; struct tunstat *tunsp; @@ -5723,9 +5768,9 @@ tun_statinit(tun_stats_t *tun_stat, char *modname) } (void) sprintf(buf, "%s.%s%d", mod_buf, modname, tun_stat->ts_ppa); tun1dbg(("tunstatinit: Creating kstat %s\n", buf)); - if ((ksp = kstat_create(mod_buf, tun_stat->ts_ppa, buf, "net", + if ((ksp = kstat_create_netstack(mod_buf, tun_stat->ts_ppa, buf, "net", KSTAT_TYPE_NAMED, sizeof (struct tunstat) / sizeof (kstat_named_t), - KSTAT_FLAG_PERSISTENT)) == NULL) { + KSTAT_FLAG_PERSISTENT, stackid)) == NULL) { cmn_err(CE_CONT, "tun: kstat_create failed tun%d", tun_stat->ts_ppa); return; @@ -5793,3 +5838,50 @@ tun_who(queue_t *q, char *buf) "<unknown af>"); return (buf); } + +/* + * Initialize the tunnel stack instance. + */ +/*ARGSUSED*/ +static void * +tun_stack_init(netstackid_t stackid, netstack_t *ns) +{ + tun_stack_t *tuns; + ipsec_stack_t *ipss = ns->netstack_ipsec; + + tuns = (tun_stack_t *)kmem_zalloc(sizeof (*tuns), KM_SLEEP); + tuns->tuns_netstack = ns; + + mutex_init(&tuns->tuns_global_lock, NULL, MUTEX_DEFAULT, NULL); + + rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER); + ipss->ipsec_itp_get_byaddr = itp_get_byaddr_fn; + rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); + + return (tuns); +} + +/* + * Free the tunnel stack instance. + */ +/*ARGSUSED*/ +static void +tun_stack_fini(netstackid_t stackid, void *arg) +{ + tun_stack_t *tuns = (tun_stack_t *)arg; + ipsec_stack_t *ipss = tuns->tuns_netstack->netstack_ipsec; + int i; + + rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER); + ipss->ipsec_itp_get_byaddr = itp_get_byaddr_dummy; + rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); + + for (i = 0; i < TUN_PPA_SZ; i++) { + ASSERT(tuns->tuns_ppa_list[i] == NULL); + } + for (i = 0; i < TUN_T_SZ; i++) { + ASSERT(tuns->tuns_byaddr_list[i] == NULL); + } + mutex_destroy(&tuns->tuns_global_lock); + kmem_free(tuns, sizeof (*tuns)); +} diff --git a/usr/src/uts/common/inet/ip6.h b/usr/src/uts/common/inet/ip6.h index 200d66f3af..05091833e6 100644 --- a/usr/src/uts/common/inet/ip6.h +++ b/usr/src/uts/common/inet/ip6.h @@ -315,13 +315,10 @@ typedef struct ip6_info ip6i_t; ((ntohl((v6addr).s6_addr32[3]) ^ (i ^ (i >> 8))) % \ ILL_FRAG_HASH_TBL_COUNT) + /* * GLOBAL EXTERNALS */ -extern uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ -extern uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ -extern int ipv6_ire_cache_cnt; /* Number of IPv6 IRE_CACHE entries */ - extern const in6_addr_t ipv6_all_ones; extern const in6_addr_t ipv6_all_zeros; extern const in6_addr_t ipv6_loopback; @@ -332,13 +329,6 @@ extern const in6_addr_t ipv6_solicited_node_mcast; extern const in6_addr_t ipv6_unspecified_group; /* - * IPv6 mibs when the interface (ill) is not known. - * When the ill is known the per-interface mib in the ill is used. - */ -extern mib2_ipIfStatsEntry_t ip6_mib; -extern mib2_ipv6IfIcmpEntry_t icmp6_mib; - -/* * FUNCTION PROTOTYPES */ @@ -348,9 +338,9 @@ extern void convert2ascii(char *buf, const in6_addr_t *addr); extern char *inet_ntop(int, const void *, char *, int); extern int inet_pton(int, char *, void *); extern void icmp_time_exceeded_v6(queue_t *, mblk_t *, uint8_t, - boolean_t, boolean_t, zoneid_t); + boolean_t, boolean_t, zoneid_t, ip_stack_t *); extern void icmp_unreachable_v6(queue_t *, mblk_t *, uint8_t, - boolean_t, boolean_t, zoneid_t); + boolean_t, boolean_t, zoneid_t, ip_stack_t *); extern void icmp_inbound_error_fanout_v6(queue_t *, mblk_t *, ip6_t *, icmp6_t *, ill_t *, boolean_t, zoneid_t); extern boolean_t conn_wantpacket_v6(conn_t *, ill_t *, ip6_t *, int, zoneid_t); @@ -359,16 +349,16 @@ extern in6addr_scope_t ip_addr_scope_v6(const in6_addr_t *); extern mblk_t *ip_bind_v6(queue_t *, mblk_t *, conn_t *, ip6_pkt_t *); extern void ip_build_hdrs_v6(uchar_t *, uint_t, ip6_pkt_t *, uint8_t); extern int ip_fanout_send_icmp_v6(queue_t *, mblk_t *, uint_t, - uint_t, uint8_t, uint_t, boolean_t, zoneid_t); + uint_t, uint8_t, uint_t, boolean_t, zoneid_t, ip_stack_t *); extern int ip_find_hdr_v6(mblk_t *, ip6_t *, ip6_pkt_t *, uint8_t *); extern in6_addr_t ip_get_dst_v6(ip6_t *, boolean_t *); extern ip6_rthdr_t *ip_find_rthdr_v6(ip6_t *, uint8_t *); -extern int ip_hdr_complete_v6(ip6_t *, zoneid_t); +extern int ip_hdr_complete_v6(ip6_t *, zoneid_t, ip_stack_t *); extern boolean_t ip_hdr_length_nexthdr_v6(mblk_t *, ip6_t *, uint16_t *, uint8_t **); extern int ip_hdr_length_v6(mblk_t *, ip6_t *); extern int ip_check_v6_mblk(mblk_t *, ill_t *); -extern uint32_t ip_massage_options_v6(ip6_t *, ip6_rthdr_t *); +extern uint32_t ip_massage_options_v6(ip6_t *, ip6_rthdr_t *, netstack_t *); extern void ip_wput_frag_v6(mblk_t *, ire_t *, uint_t, conn_t *, int, int); extern void ip_wput_ipsec_out_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, ire_t *); @@ -399,8 +389,9 @@ extern int ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, extern void ip_newroute_ipif_v6(queue_t *, mblk_t *, ipif_t *, in6_addr_t, int, zoneid_t); extern void ip_newroute_v6(queue_t *, mblk_t *, const in6_addr_t *, - const in6_addr_t *, ill_t *, zoneid_t); -extern void ip6_kstat_init(void); + const in6_addr_t *, ill_t *, zoneid_t, ip_stack_t *); +extern void *ip6_kstat_init(netstackid_t, ip6_stat_t *); +extern void ip6_kstat_fini(netstackid_t, kstat_t *); extern size_t ip6_get_src_preferences(conn_t *, uint32_t *); extern int ip6_set_src_preferences(conn_t *, uint32_t); extern int ip6_set_pktinfo(cred_t *, conn_t *, struct in6_pktinfo *, diff --git a/usr/src/uts/common/inet/ip6_asp.h b/usr/src/uts/common/inet/ip6_asp.h index 6b4bfb3e0d..79bc7191fc 100644 --- a/usr/src/uts/common/inet/ip6_asp.h +++ b/usr/src/uts/common/inet/ip6_asp.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -78,19 +77,19 @@ typedef struct ip6_asp32 { #endif /* _SYSCALL32 && _LONG_LONG_ALIGNMENT_32 == 4 */ -#define IP6_ASP_TABLE_REFHOLD() { \ - ip6_asp_refcnt++; \ - ASSERT(ip6_asp_refcnt != 0); \ +#define IP6_ASP_TABLE_REFHOLD(ipst) { \ + ipst->ips_ip6_asp_refcnt++; \ + ASSERT(ipst->ips_ip6_asp_refcnt != 0); \ } -#define IP6_ASP_TABLE_REFRELE() { \ - mutex_enter(&ip6_asp_lock); \ - ASSERT(ip6_asp_refcnt != 0); \ - if (--ip6_asp_refcnt == 0) { \ - mutex_exit(&ip6_asp_lock); \ - ip6_asp_check_for_updates(); \ +#define IP6_ASP_TABLE_REFRELE(ipst) { \ + mutex_enter(&ipst->ips_ip6_asp_lock); \ + ASSERT(ipst->ips_ip6_asp_refcnt != 0); \ + if (--ipst->ips_ip6_asp_refcnt == 0) { \ + mutex_exit(&ipst->ips_ip6_asp_lock); \ + ip6_asp_check_for_updates(ipst); \ } else { \ - mutex_exit(&ip6_asp_lock); \ + mutex_exit(&ipst->ips_ip6_asp_lock); \ } \ } @@ -117,14 +116,14 @@ struct dstinforeq { typedef void (*aspfunc_t)(ipsq_t *, queue_t *, mblk_t *, void *); -extern void ip6_asp_free(void); -extern void ip6_asp_init(void); -extern boolean_t ip6_asp_can_lookup(); -extern void ip6_asp_table_refrele(); -extern char *ip6_asp_lookup(const in6_addr_t *, uint32_t *); -extern void ip6_asp_replace(mblk_t *mp, - ip6_asp_t *, size_t, boolean_t, model_t); -extern int ip6_asp_get(ip6_asp_t *, size_t); +extern void ip6_asp_free(ip_stack_t *); +extern void ip6_asp_init(ip_stack_t *); +extern boolean_t ip6_asp_can_lookup(ip_stack_t *); +extern void ip6_asp_table_refrele(ip_stack_t *); +extern char *ip6_asp_lookup(const in6_addr_t *, uint32_t *, ip_stack_t *); +extern void ip6_asp_replace(mblk_t *mp, ip6_asp_t *, size_t, boolean_t, + ip_stack_t *, model_t); +extern int ip6_asp_get(ip6_asp_t *, size_t, ip_stack_t *); extern boolean_t ip6_asp_labelcmp(const char *, const char *); extern void ip6_asp_pending_op(queue_t *, mblk_t *, aspfunc_t); diff --git a/usr/src/uts/common/inet/ip_ftable.h b/usr/src/uts/common/inet/ip_ftable.h index c53e8cc359..6acafba949 100644 --- a/usr/src/uts/common/inet/ip_ftable.h +++ b/usr/src/uts/common/inet/ip_ftable.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -53,7 +53,15 @@ struct rt_entry { irb_t rt_irb; }; -/* vehicle for passing args through rn_walktree */ +/* + * vehicle for passing args through rn_walktree + * + * The comment below (and for other netstack_t references) refers + * to the fact that we only do netstack_hold in particular cases, + * such as the references from open streams (ill_t and conn_t's + * pointers). Internally within IP we rely on IP's ability to cleanup e.g. + * ire_t's when an ill goes away. + */ struct rtfuncarg { pfv_t rt_func; char *rt_arg; @@ -61,6 +69,7 @@ struct rtfuncarg { uint_t rt_ire_type; ill_t *rt_ill; zoneid_t rt_zoneid; + ip_stack_t *rt_ipst; /* Does not have a netstack_hold */ }; int rtfunc(struct radix_node *, void *); @@ -70,21 +79,19 @@ typedef struct rtfuncarg rtf_t; struct ts_label_s; extern ire_t *ire_ftable_lookup(ipaddr_t, ipaddr_t, ipaddr_t, int, const ipif_t *, ire_t **, zoneid_t, uint32_t, - const struct ts_label_s *, int); -extern ire_t *ire_lookup_multi(ipaddr_t, zoneid_t); + const struct ts_label_s *, int, ip_stack_t *); +extern ire_t *ire_lookup_multi(ipaddr_t, zoneid_t, ip_stack_t *); extern ire_t *ipif_lookup_multi_ire(ipif_t *, ipaddr_t); -extern void ire_delete_host_redirects(ipaddr_t); +extern void ire_delete_host_redirects(ipaddr_t, ip_stack_t *); extern ire_t *ire_ihandle_lookup_onlink(ire_t *); extern ire_t *ire_forward(ipaddr_t, boolean_t *, ire_t *, ire_t *, - const struct ts_label_s *); + const struct ts_label_s *, ip_stack_t *); extern void ire_ftable_walk(struct rt_entry *, uint_t, uint_t, - ill_t *, zoneid_t, pfv_t, char *); + ill_t *, zoneid_t, pfv_t, char *, ip_stack_t *); extern irb_t *ire_get_bucket(ire_t *); extern uint_t ifindex_lookup(const struct sockaddr *, zoneid_t); extern int ipfil_sendpkt(const struct sockaddr *, mblk_t *, uint_t, zoneid_t); -extern struct radix_node_head *ip_ftable; - extern void irb_refhold_rn(struct radix_node *); extern void irb_refrele_rn(struct radix_node *); diff --git a/usr/src/uts/common/inet/ip_if.h b/usr/src/uts/common/inet/ip_if.h index fd4afba556..1573ad9184 100644 --- a/usr/src/uts/common/inet/ip_if.h +++ b/usr/src/uts/common/inet/ip_if.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -160,12 +160,16 @@ extern mblk_t *ill_ared_alloc(ill_t *, ipaddr_t); extern void ill_dlpi_done(ill_t *, t_uscalar_t); extern void ill_dlpi_send(ill_t *, mblk_t *); extern mblk_t *ill_dlur_gen(uchar_t *, uint_t, t_uscalar_t, t_scalar_t); +/* NOTE: Keep unmodified ill_lookup_on_ifindex for ipp for now */ +extern ill_t *ill_lookup_on_ifindex_global_instance(uint_t, boolean_t, + queue_t *, mblk_t *, ipsq_func_t, int *); extern ill_t *ill_lookup_on_ifindex(uint_t, boolean_t, queue_t *, mblk_t *, - ipsq_func_t, int *); + ipsq_func_t, int *, ip_stack_t *); extern ill_t *ill_lookup_on_name(char *, boolean_t, - boolean_t, queue_t *, mblk_t *, ipsq_func_t, int *, boolean_t *); -extern uint_t ill_get_next_ifindex(uint_t, boolean_t); -extern uint_t ill_get_ifindex_by_name(char *); + boolean_t, queue_t *, mblk_t *, ipsq_func_t, int *, boolean_t *, + ip_stack_t *); +extern uint_t ill_get_next_ifindex(uint_t, boolean_t, ip_stack_t *); +extern uint_t ill_get_ifindex_by_name(char *, ip_stack_t *); extern ill_t *ill_get_first(boolean_t isv6); extern void ill_ipif_cache_delete(ire_t *, char *); extern void ill_send_all_deferred_mp(ill_t *); @@ -201,7 +205,8 @@ extern void ill_waiter_dcr(ill_t *); extern void ill_trace_ref(ill_t *); extern void ill_untrace_ref(ill_t *); extern boolean_t ill_down_start(queue_t *, mblk_t *); -extern ill_t *ill_lookup_group_v6(const in6_addr_t *, zoneid_t); +extern ill_t *ill_lookup_group_v6(const in6_addr_t *, zoneid_t, + ip_stack_t *); extern void ill_capability_ack(ill_t *, mblk_t *); extern void ill_capability_probe(ill_t *); extern void ill_capability_reset(ill_t *); @@ -210,21 +215,24 @@ extern void ill_group_cleanup(ill_t *); extern int ill_up_ipifs(ill_t *, queue_t *, mblk_t *); extern boolean_t ill_is_probeonly(ill_t *); +extern void ip_loopback_cleanup(ip_stack_t *); extern char *ipif_get_name(const ipif_t *, char *, int); -extern ipif_t *ipif_getby_indexes(uint_t, uint_t, boolean_t); -extern void ipif_init(void); +extern ipif_t *ipif_getby_indexes(uint_t, uint_t, boolean_t, ip_stack_t *); +extern void ipif_init(ip_stack_t *); extern ipif_t *ipif_lookup_addr(ipaddr_t, ill_t *, zoneid_t, queue_t *, - mblk_t *, ipsq_func_t, int *); + mblk_t *, ipsq_func_t, int *, ip_stack_t *); extern ipif_t *ipif_lookup_addr_v6(const in6_addr_t *, ill_t *, zoneid_t, - queue_t *, mblk_t *, ipsq_func_t, int *); -extern zoneid_t ipif_lookup_addr_zoneid(ipaddr_t, ill_t *); -extern zoneid_t ipif_lookup_addr_zoneid_v6(const in6_addr_t *, ill_t *); -extern ipif_t *ipif_lookup_group(ipaddr_t, zoneid_t); -extern ipif_t *ipif_lookup_group_v6(const in6_addr_t *, zoneid_t); + queue_t *, mblk_t *, ipsq_func_t, int *, ip_stack_t *); +extern zoneid_t ipif_lookup_addr_zoneid(ipaddr_t, ill_t *, ip_stack_t *); +extern zoneid_t ipif_lookup_addr_zoneid_v6(const in6_addr_t *, ill_t *, + ip_stack_t *); +extern ipif_t *ipif_lookup_group(ipaddr_t, zoneid_t, ip_stack_t *); +extern ipif_t *ipif_lookup_group_v6(const in6_addr_t *, zoneid_t, + ip_stack_t *); extern ipif_t *ipif_lookup_interface(ipaddr_t, ipaddr_t, - queue_t *, mblk_t *, ipsq_func_t, int *); + queue_t *, mblk_t *, ipsq_func_t, int *, ip_stack_t *); extern ipif_t *ipif_lookup_remote(ill_t *, ipaddr_t, zoneid_t); -extern ipif_t *ipif_lookup_onlink_addr(ipaddr_t, zoneid_t); +extern ipif_t *ipif_lookup_onlink_addr(ipaddr_t, zoneid_t, ip_stack_t *); extern ipif_t *ipif_lookup_seqid(ill_t *, uint_t); extern boolean_t ipif_lookup_zoneid(ill_t *, zoneid_t, int, ipif_t **); @@ -256,7 +264,7 @@ extern boolean_t ipif_cant_setlinklocal(ipif_t *); extern int ipif_setlinklocal(ipif_t *); extern void ipif_set_tun_llink(ill_t *, struct iftun_req *); extern ipif_t *ipif_lookup_on_ifindex(uint_t, boolean_t, zoneid_t, queue_t *, - mblk_t *, ipsq_func_t, int *); + mblk_t *, ipsq_func_t, int *, ip_stack_t *); extern ipif_t *ipif_get_next_ipif(ipif_t *curr, ill_t *ill); extern void ipif_ill_refrele_tail(ill_t *ill); extern void ipif_arp_down(ipif_t *ipif); @@ -289,19 +297,20 @@ extern void ip_ll_subnet_defaults(ill_t *, mblk_t *); extern int ip_rt_add(ipaddr_t, ipaddr_t, ipaddr_t, ipaddr_t, int, ipif_t *, ipif_t *, ire_t **, boolean_t, queue_t *, mblk_t *, ipsq_func_t, - struct rtsa_s *); + struct rtsa_s *, ip_stack_t *); extern int ip_mrtun_rt_add(ipaddr_t, int, ipif_t *, ipif_t *, ire_t **, - queue_t *, mblk_t *, ipsq_func_t); + queue_t *, mblk_t *, ipsq_func_t, ip_stack_t *); extern int ip_rt_add_v6(const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, int, ipif_t *, ire_t **, - queue_t *, mblk_t *, ipsq_func_t, struct rtsa_s *); + queue_t *, mblk_t *, ipsq_func_t, struct rtsa_s *, ip_stack_t *ipst); extern int ip_rt_delete(ipaddr_t, ipaddr_t, ipaddr_t, uint_t, int, - ipif_t *, ipif_t *, boolean_t, queue_t *, mblk_t *, ipsq_func_t); + ipif_t *, ipif_t *, boolean_t, queue_t *, mblk_t *, ipsq_func_t, + ip_stack_t *); extern int ip_mrtun_rt_delete(ipaddr_t, ipif_t *); extern int ip_rt_delete_v6(const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, uint_t, int, ipif_t *, queue_t *, mblk_t *, - ipsq_func_t); + ipsq_func_t, ip_stack_t *); extern int ip_siocdelndp_v6(ipif_t *, sin_t *, queue_t *, mblk_t *, ip_ioctl_cmd_t *, void *); extern int ip_siocqueryndp_v6(ipif_t *, sin_t *, queue_t *, mblk_t *, diff --git a/usr/src/uts/common/inet/ip_impl.h b/usr/src/uts/common/inet/ip_impl.h index a91febb854..d6aa9c88b6 100644 --- a/usr/src/uts/common/inet/ip_impl.h +++ b/usr/src/uts/common/inet/ip_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -509,7 +509,7 @@ typedef struct ip_pdescinfo_s PDESCINFO_STRUCT(2) ip_pdescinfo_t; * Macro that hands off one or more messages directly to DLD * when the interface is marked with ILL_CAPAB_POLL. */ -#define IP_DLS_ILL_TX(ill, ipha, mp) { \ +#define IP_DLS_ILL_TX(ill, ipha, mp, ipst) { \ ill_dls_capab_t *ill_dls = ill->ill_dls_capab; \ ASSERT(ILL_DLS_CAPABLE(ill)); \ ASSERT(ill_dls != NULL); \ @@ -518,8 +518,9 @@ typedef struct ip_pdescinfo_s PDESCINFO_STRUCT(2) ip_pdescinfo_t; DTRACE_PROBE4(ip4__physical__out__start, \ ill_t *, NULL, ill_t *, ill, \ ipha_t *, ipha, mblk_t *, mp); \ - FW_HOOKS(ip4_physical_out_event, ipv4firewall_physical_out, \ - NULL, ill, ipha, mp, mp); \ + FW_HOOKS(ipst->ips_ip4_physical_out_event, \ + ipst->ips_ipv4firewall_physical_out, \ + NULL, ill, ipha, mp, mp, ipst); \ DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); \ if (mp != NULL) \ ill_dls->ill_tx(ill_dls->ill_tx_handle, mp); \ @@ -527,7 +528,7 @@ typedef struct ip_pdescinfo_s PDESCINFO_STRUCT(2) ip_pdescinfo_t; extern int ip_wput_frag_mdt_min; extern boolean_t ip_can_frag_mdt(mblk_t *, ssize_t, ssize_t); -extern mblk_t *ip_prepend_zoneid(mblk_t *, zoneid_t); +extern mblk_t *ip_prepend_zoneid(mblk_t *, zoneid_t, ip_stack_t *); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/inet/ip_ire.h b/usr/src/uts/common/inet/ip_ire.h index 78a6d6cb82..af0ba8e6e5 100644 --- a/usr/src/uts/common/inet/ip_ire.h +++ b/usr/src/uts/common/inet/ip_ire.h @@ -58,13 +58,12 @@ extern "C" { ((table_size) - 1)) /* - * Exclusive-or those bytes that are likely to contain the MAC - * address. Assumes EUI-64 format for good hashing. + * To make a byte-order neutral hash for IPv6, just take all the + * bytes in the bottom 32 bits into account. */ #define IRE_ADDR_HASH_V6(addr, table_size) \ - (((addr).s6_addr32[3] ^ \ - (((addr).s6_addr32[3] ^ (addr).s6_addr32[2]) >> 12)) & \ - ((table_size) - 1)) + IRE_ADDR_HASH((addr).s6_addr32[3], table_size) + /* This assumes that the ftable size is a power of 2. */ #define IRE_ADDR_MASK_HASH_V6(addr, mask, table_size) \ ((((addr).s6_addr8[8] & (mask).s6_addr8[8]) ^ \ @@ -175,44 +174,27 @@ typedef struct { /* * Structure for ire_cache_reclaim(). Each field is a fraction i.e. 1 meaning * reclaim all, N meaning reclaim 1/Nth of all entries, 0 meaning reclaim none. + * + * The comment below (and for other netstack_t references) refers + * to the fact that we only do netstack_hold in particular cases, + * such as the references from open streams (ill_t and conn_t's + * pointers). Internally within IP we rely on IP's ability to cleanup e.g. + * ire_t's when an ill goes away. */ typedef struct { int icr_unused; /* Fraction for unused since last reclaim */ int icr_offlink; /* Fraction for offlink without PMTU info */ int icr_pmtu; /* Fraction for offlink with PMTU info */ int icr_onlink; /* Fraction for onlink */ + ip_stack_t *icr_ipst; /* Does not have a netstack_hold */ } ire_cache_reclaim_t; -typedef struct { - uint64_t ire_stats_alloced; /* # of ires alloced */ - uint64_t ire_stats_freed; /* # of ires freed */ - uint64_t ire_stats_inserted; /* # of ires inserted in the bucket */ - uint64_t ire_stats_deleted; /* # of ires deleted from the bucket */ -} ire_stats_t; - -extern ire_stats_t ire_stats_v4; -extern uint32_t ip_cache_table_size; -extern uint32_t ip6_cache_table_size; -extern irb_t *ip_cache_table; -extern uint32_t ip6_ftable_hash_size; - /* * We use atomics so that we get an accurate accounting on the ires. * Otherwise we can't determine leaks correctly. */ #define BUMP_IRE_STATS(ire_stats, x) atomic_add_64(&(ire_stats).x, 1) -extern irb_t *ip_forwarding_table_v6[]; -extern irb_t *ip_cache_table_v6; -extern irb_t *ip_mrtun_table; -extern irb_t *ip_srcif_table; -extern kmutex_t ire_ft_init_lock; -extern kmutex_t ire_mrtun_lock; -extern kmutex_t ire_srcif_table_lock; -extern ire_stats_t ire_stats_v6; -extern uint_t ire_mrtun_count; -extern uint_t ire_srcif_table_count; - #ifdef _KERNEL struct ts_label_s; @@ -221,8 +203,9 @@ extern in6_addr_t *ip_plen_to_mask_v6(uint_t, in6_addr_t *); extern int ip_ire_advise(queue_t *, mblk_t *, cred_t *); extern int ip_ire_delete(queue_t *, mblk_t *, cred_t *); -extern boolean_t ip_ire_clookup_and_delete(ipaddr_t, ipif_t *); -extern void ip_ire_clookup_and_delete_v6(const in6_addr_t *); +extern boolean_t ip_ire_clookup_and_delete(ipaddr_t, ipif_t *, ip_stack_t *); +extern void ip_ire_clookup_and_delete_v6(const in6_addr_t *, + ip_stack_t *); extern int ip_ire_report(queue_t *, mblk_t *, caddr_t, cred_t *); extern int ip_ire_report_mrtun(queue_t *, mblk_t *, caddr_t, cred_t *); @@ -248,9 +231,9 @@ extern void ire_atomic_end(irb_t *irb_ptr, ire_t *ire); extern void ire_cache_count(ire_t *, char *); extern ire_t *ire_cache_lookup(ipaddr_t, zoneid_t, - const struct ts_label_s *); + const struct ts_label_s *, ip_stack_t *); extern ire_t *ire_cache_lookup_v6(const in6_addr_t *, zoneid_t, - const struct ts_label_s *); + const struct ts_label_s *, ip_stack_t *); extern void ire_cache_reclaim(ire_t *, char *); extern void ire_check_bcast_present(ipif_t *, ipaddr_t, int, boolean_t *, @@ -259,12 +242,11 @@ extern void ire_check_bcast_present(ipif_t *, ipaddr_t, int, boolean_t *, extern ire_t *ire_create_mp(uchar_t *, uchar_t *, uchar_t *, uchar_t *, uchar_t *, uint_t, mblk_t *, queue_t *, queue_t *, ushort_t, mblk_t *, ipif_t *, ill_t *, ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *, - tsol_gc_t *, tsol_gcgrp_t *); - + tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); extern ire_t *ire_create(uchar_t *, uchar_t *, uchar_t *, uchar_t *, uchar_t *, uint_t *, mblk_t *, queue_t *, queue_t *, ushort_t, mblk_t *, ipif_t *, ill_t *, ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *, - tsol_gc_t *, tsol_gcgrp_t *); + tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); extern ire_t **ire_check_and_create_bcast(ipif_t *, ipaddr_t, ire_t **, int); @@ -272,38 +254,42 @@ extern ire_t **ire_create_bcast(ipif_t *, ipaddr_t, ire_t **); extern ire_t *ire_init(ire_t *, uchar_t *, uchar_t *, uchar_t *, uchar_t *, uchar_t *, uint_t *, mblk_t *, queue_t *, queue_t *, ushort_t, mblk_t *, ipif_t *, ill_t *, ipaddr_t, uint32_t, uint32_t, uint32_t, - const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *); + const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); extern boolean_t ire_init_common(ire_t *, uint_t *, mblk_t *, queue_t *, queue_t *, ushort_t, mblk_t *, ipif_t *, ill_t *, uint32_t, - uint32_t, uint32_t, uchar_t, const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *); + uint32_t, uint32_t, uchar_t, const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, + ip_stack_t *); extern ire_t *ire_create_v6(const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, uint_t *, mblk_t *, queue_t *, queue_t *, ushort_t, mblk_t *, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *, - tsol_gc_t *, tsol_gcgrp_t *); + tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); extern ire_t *ire_create_mp_v6(const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, mblk_t *, queue_t *, queue_t *, ushort_t, mblk_t *, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *, - tsol_gc_t *, tsol_gcgrp_t *); + tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); extern ire_t *ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, uint_t *, mblk_t *, queue_t *, queue_t *, ushort_t, mblk_t *, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *, - tsol_gc_t *, tsol_gcgrp_t *); + tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); -extern void ire_clookup_delete_cache_gw(ipaddr_t, zoneid_t); -extern void ire_clookup_delete_cache_gw_v6(const in6_addr_t *, zoneid_t); +extern void ire_clookup_delete_cache_gw(ipaddr_t, zoneid_t, + ip_stack_t *); +extern void ire_clookup_delete_cache_gw_v6(const in6_addr_t *, zoneid_t, + ip_stack_t *); extern ire_t *ire_ctable_lookup(ipaddr_t, ipaddr_t, int, const ipif_t *, - zoneid_t, const struct ts_label_s *, int); + zoneid_t, const struct ts_label_s *, int, ip_stack_t *); extern ire_t *ire_ctable_lookup_v6(const in6_addr_t *, const in6_addr_t *, - int, const ipif_t *, zoneid_t, const struct ts_label_s *, int); + int, const ipif_t *, zoneid_t, const struct ts_label_s *, int, + ip_stack_t *); extern void ire_delete(ire_t *); extern void ire_delete_cache_gw(ire_t *, char *); @@ -319,7 +305,7 @@ extern void ire_flush_cache_v6(ire_t *, int); extern ire_t *ire_ftable_lookup_v6(const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, int, const ipif_t *, ire_t **, zoneid_t, - uint32_t, const struct ts_label_s *, int); + uint32_t, const struct ts_label_s *, int, ip_stack_t *); extern ire_t *ire_ihandle_lookup_onlink(ire_t *); extern ire_t *ire_ihandle_lookup_offlink(ire_t *, ire_t *); @@ -327,49 +313,52 @@ extern ire_t *ire_ihandle_lookup_offlink_v6(ire_t *, ire_t *); extern boolean_t ire_local_same_ill_group(ire_t *, ire_t *); extern boolean_t ire_local_ok_across_zones(ire_t *, zoneid_t, void *, - const struct ts_label_s *tsl); + const struct ts_label_s *, ip_stack_t *); -extern ire_t *ire_lookup_local(zoneid_t); -extern ire_t *ire_lookup_local_v6(zoneid_t); +extern ire_t *ire_lookup_local(zoneid_t, ip_stack_t *); +extern ire_t *ire_lookup_local_v6(zoneid_t, ip_stack_t *); -extern ire_t *ire_lookup_multi(ipaddr_t, zoneid_t); -extern ire_t *ire_lookup_multi_v6(const in6_addr_t *, zoneid_t); +extern ire_t *ire_lookup_multi(ipaddr_t, zoneid_t, ip_stack_t *); +extern ire_t *ire_lookup_multi_v6(const in6_addr_t *, zoneid_t, + ip_stack_t *); extern ire_t *ire_mrtun_lookup(ipaddr_t, ill_t *); extern void ire_refrele(ire_t *); extern void ire_refrele_notr(ire_t *); extern ire_t *ire_route_lookup(ipaddr_t, ipaddr_t, ipaddr_t, int, - const ipif_t *, ire_t **, zoneid_t, const struct ts_label_s *, int); + const ipif_t *, ire_t **, zoneid_t, const struct ts_label_s *, int, + ip_stack_t *); extern ire_t *ire_route_lookup_v6(const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, int, const ipif_t *, ire_t **, zoneid_t, - const struct ts_label_s *, int); + const struct ts_label_s *, int, ip_stack_t *); extern ire_t *ire_srcif_table_lookup(ipaddr_t, int, ipif_t *, ill_t *, int); extern ill_t *ire_to_ill(const ire_t *); -extern void ire_walk(pfv_t, void *); +extern void ire_walk(pfv_t, void *, ip_stack_t *); extern void ire_walk_ill(uint_t, uint_t, pfv_t, void *, ill_t *); -extern void ire_walk_ill_mrtun(uint_t, uint_t, pfv_t, void *, ill_t *); +extern void ire_walk_ill_mrtun(uint_t, uint_t, pfv_t, void *, ill_t *, + ip_stack_t *); extern void ire_walk_ill_v4(uint_t, uint_t, pfv_t, void *, ill_t *); extern void ire_walk_ill_v6(uint_t, uint_t, pfv_t, void *, ill_t *); -extern void ire_walk_v4(pfv_t, void *, zoneid_t); +extern void ire_walk_v4(pfv_t, void *, zoneid_t, ip_stack_t *); extern void ire_walk_ill_tables(uint_t match_flags, uint_t ire_type, pfv_t func, void *arg, size_t ftbl_sz, size_t htbl_sz, irb_t **ipftbl, size_t ctbl_sz, irb_t *ipctbl, ill_t *ill, - zoneid_t zoneid); -extern void ire_walk_srcif_table_v4(pfv_t, void *); -extern void ire_walk_v6(pfv_t, void *, zoneid_t); + zoneid_t zoneid, ip_stack_t *); +extern void ire_walk_srcif_table_v4(pfv_t, void *, ip_stack_t *); +extern void ire_walk_v6(pfv_t, void *, zoneid_t, ip_stack_t *); extern boolean_t ire_multirt_lookup(ire_t **, ire_t **, uint32_t, - const struct ts_label_s *); + const struct ts_label_s *, ip_stack_t *); extern boolean_t ire_multirt_need_resolve(ipaddr_t, - const struct ts_label_s *); + const struct ts_label_s *, ip_stack_t *); extern boolean_t ire_multirt_lookup_v6(ire_t **, ire_t **, uint32_t, - const struct ts_label_s *); + const struct ts_label_s *, ip_stack_t *); extern boolean_t ire_multirt_need_resolve_v6(const in6_addr_t *, - const struct ts_label_s *); + const struct ts_label_s *, ip_stack_t *); extern ire_t *ipif_lookup_multi_ire(ipif_t *, ipaddr_t); extern ire_t *ipif_lookup_multi_ire_v6(ipif_t *, const in6_addr_t *); @@ -383,7 +372,7 @@ extern boolean_t ire_match_args(ire_t *, ipaddr_t, ipaddr_t, ipaddr_t, int, const ipif_t *, zoneid_t, uint32_t, const struct ts_label_s *, int); extern int ire_nce_init(ire_t *, mblk_t *, mblk_t *); extern boolean_t ire_walk_ill_match(uint_t, uint_t, ire_t *, ill_t *, - zoneid_t); + zoneid_t, ip_stack_t *); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/inet/ip_multi.h b/usr/src/uts/common/inet/ip_multi.h index 513d6e6891..8e0f6b3573 100644 --- a/usr/src/uts/common/inet/ip_multi.h +++ b/usr/src/uts/common/inet/ip_multi.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -36,12 +36,6 @@ extern "C" { #if defined(_KERNEL) #define INFINITY 0xffffffffU - -extern uint_t igmp_deferred_next; -extern uint_t mld_deferred_next; -extern kmutex_t igmp_slowtimeout_lock; -extern kmutex_t mld_slowtimeout_lock; - /* * Enum used to pass ilg status to ip_addmulti() and friends. There * are three possibilities: the group is being joined from within ip, @@ -113,15 +107,16 @@ extern int ip_leave_allmulti(ipif_t *); extern void ip_multicast_loopback(queue_t *, ill_t *, mblk_t *, int, zoneid_t); extern int ip_mforward(ill_t *, ipha_t *, mblk_t *); -extern void ip_mroute_decap(queue_t *, mblk_t *); -extern int ip_mroute_mrt(mblk_t *); -extern int ip_mroute_stats(mblk_t *); -extern int ip_mroute_vif(mblk_t *); -extern int ip_mrouter_done(mblk_t *); +extern void ip_mroute_decap(queue_t *, mblk_t *, ill_t *); +extern int ip_mroute_mrt(mblk_t *, ip_stack_t *); +extern int ip_mroute_stats(mblk_t *, ip_stack_t *); +extern int ip_mroute_vif(mblk_t *, ip_stack_t *); +extern int ip_mrouter_done(mblk_t *, ip_stack_t *); extern int ip_mrouter_get(int, queue_t *, uchar_t *); extern int ip_mrouter_set(int, queue_t *, int, uchar_t *, int, mblk_t *); - +extern void ip_mrouter_stack_init(ip_stack_t *); +extern void ip_mrouter_stack_destroy(ip_stack_t *); extern int ip_opt_add_group(conn_t *, boolean_t, ipaddr_t, ipaddr_t, uint_t *, mcast_record_t, ipaddr_t, mblk_t *first_mp); @@ -143,17 +138,16 @@ extern void ip_wput_ctl(queue_t *, mblk_t *); extern int mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, ip_ioctl_cmd_t *ipip, void *if_req); -extern int pim_input(queue_t *, mblk_t *); +extern int pim_input(queue_t *, mblk_t *, ill_t *); extern void reset_conn_ipif(ipif_t *); extern void reset_conn_ill(ill_t *); extern void reset_mrt_ill(ill_t *); extern void reset_mrt_vif_ipif(ipif_t *); -extern void igmp_start_timers(unsigned); -extern void mld_start_timers(unsigned); +extern void igmp_start_timers(unsigned, ip_stack_t *); +extern void mld_start_timers(unsigned, ip_stack_t *); /* * Extern variables */ -extern queue_t *ip_g_mrouter; #endif /* _KERNEL */ diff --git a/usr/src/uts/common/inet/ip_ndp.h b/usr/src/uts/common/inet/ip_ndp.h index ad12297624..cca5848748 100644 --- a/usr/src/uts/common/inet/ip_ndp.h +++ b/usr/src/uts/common/inet/ip_ndp.h @@ -86,9 +86,9 @@ typedef struct nce_s { /* * The ndp_g_t structure contains protocol specific information needed * to synchronize and manage neighbor cache entries for IPv4 and IPv6. - * There are 2 such structures, ndp4 and ndp6. - * ndp6 contains the data structures needed for IPv6 Neighbor Discovery. - * ndp4 has IPv4 link layer info in its nce_t structures + * There are 2 such structures, ips_ndp4 and ips_ndp6. + * ips_ndp6 contains the data structures needed for IPv6 Neighbor Discovery. + * ips_ndp4 has IPv4 link layer info in its nce_t structures * Note that the nce_t is not currently used as the arp cache itself; * it is used for the following purposes: * - queue packets in nce_qd_mp while waiting for arp resolution to complete @@ -117,8 +117,6 @@ typedef struct ndp_g_s { boolean_t ndp_g_walker_cleanup; /* true implies defer deletion. */ } ndp_g_t; -extern ndp_g_t ndp4, ndp6; - /* nce_flags */ #define NCE_F_PERMANENT 0x1 #define NCE_F_MAPPING 0x2 @@ -289,6 +287,9 @@ typedef struct { (addr).s6_addr8[10] ^ (addr).s6_addr8[13] ^ \ (addr).s6_addr8[14] ^ (addr).s6_addr8[15]) % (table_size)) +/* NDP Cache Entry Hash Table */ +#define NCE_TABLE_SIZE 256 + extern void ndp_cache_count(nce_t *, char *); extern void ndp_cache_reclaim(nce_t *, char *); extern void ndp_delete(nce_t *); @@ -298,7 +299,7 @@ extern boolean_t ndp_fastpath_update(nce_t *, void *); extern nd_opt_hdr_t *ndp_get_option(nd_opt_hdr_t *, int, int); extern void ndp_inactive(nce_t *); extern void ndp_input(ill_t *, mblk_t *, mblk_t *); -extern boolean_t ndp_lookup_ipaddr(in_addr_t); +extern boolean_t ndp_lookup_ipaddr(in_addr_t, netstack_t *); extern nce_t *ndp_lookup_v6(ill_t *, const in6_addr_t *, boolean_t); extern nce_t *ndp_lookup_v4(ill_t *, const in_addr_t *, boolean_t); extern int ndp_lookup_then_add(ill_t *, uchar_t *, const void *, @@ -314,7 +315,7 @@ extern int ndp_resolver(ill_t *, const in6_addr_t *, mblk_t *, zoneid_t); extern int ndp_sioc_update(ill_t *, lif_nd_req_t *); extern boolean_t ndp_verify_optlen(nd_opt_hdr_t *, int); extern void ndp_timer(void *); -extern void ndp_walk(ill_t *, pfi_t, void *); +extern void ndp_walk(ill_t *, pfi_t, void *, ip_stack_t *); extern void ndp_walk_common(ndp_g_t *, ill_t *, pfi_t, void *, boolean_t); extern int ndp_add(ill_t *, uchar_t *, const void *, diff --git a/usr/src/uts/common/inet/ip_netinfo.h b/usr/src/uts/common/inet/ip_netinfo.h index 8523f8afea..4be30ebbc8 100644 --- a/usr/src/uts/common/inet/ip_netinfo.h +++ b/usr/src/uts/common/inet/ip_netinfo.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,12 +35,14 @@ extern "C" { #ifdef _KERNEL -extern void ip_net_init(); -extern void ip_net_destroy(); -extern void ipv4_hook_init(); -extern void ipv6_hook_init(); -extern void ipv4_hook_destroy(); -extern void ipv6_hook_destroy(); +extern void ip_net_g_init(); +extern void ip_net_g_destroy(); +extern void ip_net_init(ip_stack_t *, netstack_t *); +extern void ip_net_destroy(ip_stack_t *); +extern void ipv4_hook_init(ip_stack_t *); +extern void ipv6_hook_init(ip_stack_t *); +extern void ipv4_hook_destroy(ip_stack_t *); +extern void ipv6_hook_destroy(ip_stack_t *); extern void ip_ne_queue_func(void *); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/inet/ip_rts.h b/usr/src/uts/common/inet/ip_rts.h index 8a8d6d1add..7b780fb112 100644 --- a/usr/src/uts/common/inet/ip_rts.h +++ b/usr/src/uts/common/inet/ip_rts.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -40,12 +40,14 @@ extern "C" { #define TSOL_RTSA_REQUEST_MAX 1 /* one per route destination */ #ifdef _KERNEL + extern void ip_rts_change(int, ipaddr_t, ipaddr_t, ipaddr_t, ipaddr_t, ipaddr_t, int, int, - int); + int, ip_stack_t *); extern void ip_rts_change_v6(int, const in6_addr_t *, const in6_addr_t *, - const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, int, int, int); + const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, int, int, int, + ip_stack_t *); extern void ip_rts_ifmsg(const ipif_t *); @@ -53,7 +55,7 @@ extern void ip_rts_newaddrmsg(int, int, const ipif_t *); extern int ip_rts_request(queue_t *, mblk_t *, cred_t *); -extern void ip_rts_rtmsg(int, ire_t *, int); +extern void ip_rts_rtmsg(int, ire_t *, int, ip_stack_t *); extern mblk_t *rts_alloc_msg(int, int, sa_family_t, uint_t); @@ -66,7 +68,8 @@ extern void rts_fill_msg_v6(int, int, const in6_addr_t *, extern size_t rts_header_msg_size(int); -extern void rts_queue_input(mblk_t *, queue_t *, sa_family_t); +extern void rts_queue_input(mblk_t *, queue_t *, sa_family_t, + ip_stack_t *); #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/inet/ip_stack.h b/usr/src/uts/common/inet/ip_stack.h new file mode 100644 index 0000000000..22311bcc95 --- /dev/null +++ b/usr/src/uts/common/inet/ip_stack.h @@ -0,0 +1,461 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _INET_IP_STACK_H +#define _INET_IP_STACK_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/isa_defs.h> +#include <sys/md5.h> +#include <sys/types.h> +#include <inet/mib2.h> +#include <inet/nd.h> +#include <sys/atomic.h> +#include <sys/socket.h> +#include <sys/netstack.h> +#include <net/if_dl.h> +#include <net/if.h> +#include <netinet/ip.h> +#include <netinet/igmp_var.h> +#include <inet/ip.h> +#include <sys/list.h> +#include <sys/taskq.h> + +#ifdef _KERNEL +#include <netinet/ip6.h> +#include <sys/avl.h> +#include <sys/vmem.h> +#include <sys/squeue.h> +#endif /* _KERNEL */ + +#ifdef _KERNEL + + +/* + * IP statistics. + */ +#define IP_STAT(ipst, x) ((ipst)->ips_ip_statistics.x.value.ui64++) +#define IP_STAT_UPDATE(ipst, x, n) \ + ((ipst)->ips_ip_statistics.x.value.ui64 += (n)) + +typedef struct ip_stat { + kstat_named_t ipsec_fanout_proto; + kstat_named_t ip_udp_fannorm; + kstat_named_t ip_udp_fanmb; + kstat_named_t ip_udp_fanothers; + kstat_named_t ip_udp_fast_path; + kstat_named_t ip_udp_slow_path; + kstat_named_t ip_udp_input_err; + kstat_named_t ip_tcppullup; + kstat_named_t ip_tcpoptions; + kstat_named_t ip_multipkttcp; + kstat_named_t ip_tcp_fast_path; + kstat_named_t ip_tcp_slow_path; + kstat_named_t ip_tcp_input_error; + kstat_named_t ip_db_ref; + kstat_named_t ip_notaligned1; + kstat_named_t ip_notaligned2; + kstat_named_t ip_multimblk3; + kstat_named_t ip_multimblk4; + kstat_named_t ip_ipoptions; + kstat_named_t ip_classify_fail; + kstat_named_t ip_opt; + kstat_named_t ip_udp_rput_local; + kstat_named_t ipsec_proto_ahesp; + kstat_named_t ip_conn_flputbq; + kstat_named_t ip_conn_walk_drain; + kstat_named_t ip_out_sw_cksum; + kstat_named_t ip_in_sw_cksum; + kstat_named_t ip_trash_ire_reclaim_calls; + kstat_named_t ip_trash_ire_reclaim_success; + kstat_named_t ip_ire_arp_timer_expired; + kstat_named_t ip_ire_redirect_timer_expired; + kstat_named_t ip_ire_pmtu_timer_expired; + kstat_named_t ip_input_multi_squeue; + kstat_named_t ip_tcp_in_full_hw_cksum_err; + kstat_named_t ip_tcp_in_part_hw_cksum_err; + kstat_named_t ip_tcp_in_sw_cksum_err; + kstat_named_t ip_tcp_out_sw_cksum_bytes; + kstat_named_t ip_udp_in_full_hw_cksum_err; + kstat_named_t ip_udp_in_part_hw_cksum_err; + kstat_named_t ip_udp_in_sw_cksum_err; + kstat_named_t ip_udp_out_sw_cksum_bytes; + kstat_named_t ip_frag_mdt_pkt_out; + kstat_named_t ip_frag_mdt_discarded; + kstat_named_t ip_frag_mdt_allocfail; + kstat_named_t ip_frag_mdt_addpdescfail; + kstat_named_t ip_frag_mdt_allocd; +} ip_stat_t; + + +/* + * IP6 statistics. + */ +#define IP6_STAT(ipst, x) ((ipst)->ips_ip6_statistics.x.value.ui64++) +#define IP6_STAT_UPDATE(ipst, x, n) \ + ((ipst)->ips_ip6_statistics.x.value.ui64 += (n)) + +typedef struct ip6_stat { + kstat_named_t ip6_udp_fast_path; + kstat_named_t ip6_udp_slow_path; + kstat_named_t ip6_udp_fannorm; + kstat_named_t ip6_udp_fanmb; + kstat_named_t ip6_out_sw_cksum; + kstat_named_t ip6_in_sw_cksum; + kstat_named_t ip6_tcp_in_full_hw_cksum_err; + kstat_named_t ip6_tcp_in_part_hw_cksum_err; + kstat_named_t ip6_tcp_in_sw_cksum_err; + kstat_named_t ip6_tcp_out_sw_cksum_bytes; + kstat_named_t ip6_udp_in_full_hw_cksum_err; + kstat_named_t ip6_udp_in_part_hw_cksum_err; + kstat_named_t ip6_udp_in_sw_cksum_err; + kstat_named_t ip6_udp_out_sw_cksum_bytes; + kstat_named_t ip6_frag_mdt_pkt_out; + kstat_named_t ip6_frag_mdt_discarded; + kstat_named_t ip6_frag_mdt_allocfail; + kstat_named_t ip6_frag_mdt_addpdescfail; + kstat_named_t ip6_frag_mdt_allocd; +} ip6_stat_t; + +typedef struct ire_stats { + uint64_t ire_stats_alloced; /* # of ires alloced */ + uint64_t ire_stats_freed; /* # of ires freed */ + uint64_t ire_stats_inserted; /* # of ires inserted in the bucket */ + uint64_t ire_stats_deleted; /* # of ires deleted from the bucket */ +} ire_stats_t; + + +/* + * IP stack instances + */ +struct ip_stack { + netstack_t *ips_netstack; /* Common netstack */ + + struct ipparam_s *ips_param_arr; /* ndd variable table */ + struct ipndp_s *ips_ndp_arr; + + mib2_ipIfStatsEntry_t ips_ip_mib; /* SNMP fixed size info */ + mib2_icmp_t ips_icmp_mib; + /* + * IPv6 mibs when the interface (ill) is not known. + * When the ill is known the per-interface mib in the ill is used. + */ + mib2_ipIfStatsEntry_t ips_ip6_mib; + mib2_ipv6IfIcmpEntry_t ips_icmp6_mib; + + struct igmpstat ips_igmpstat; + + kstat_t *ips_ip_mibkp; /* kstat exporting ip_mib data */ + kstat_t *ips_icmp_mibkp; /* kstat exporting icmp_mib data */ + kstat_t *ips_ip_kstat; + ip_stat_t ips_ip_statistics; + kstat_t *ips_ip6_kstat; + ip6_stat_t ips_ip6_statistics; + +/* ip.c */ + krwlock_t ips_ip_g_nd_lock; + kmutex_t ips_igmp_timer_lock; + kmutex_t ips_mld_timer_lock; + kmutex_t ips_ip_mi_lock; + kmutex_t ips_ip_addr_avail_lock; + krwlock_t ips_ill_g_lock; + krwlock_t ips_ipsec_capab_ills_lock; + /* protects the list of IPsec capable ills */ + struct ipsec_capab_ill_s *ips_ipsec_capab_ills_ah; + struct ipsec_capab_ill_s *ips_ipsec_capab_ills_esp; + + krwlock_t ips_ill_g_usesrc_lock; + + struct ill_group *ips_illgrp_head_v4; /* Head of IPv4 ill groups */ + struct ill_group *ips_illgrp_head_v6; /* Head of IPv6 ill groups */ + +/* ipclassifier.c - keep in ip_stack_t */ + /* ipclassifier hash tables */ + struct connf_s *ips_rts_clients; + struct connf_s *ips_ipcl_conn_fanout; + struct connf_s *ips_ipcl_bind_fanout; + struct connf_s *ips_ipcl_proto_fanout; + struct connf_s *ips_ipcl_proto_fanout_v6; + struct connf_s *ips_ipcl_udp_fanout; + struct connf_s *ips_ipcl_raw_fanout; + uint_t ips_ipcl_conn_fanout_size; + uint_t ips_ipcl_bind_fanout_size; + uint_t ips_ipcl_udp_fanout_size; + uint_t ips_ipcl_raw_fanout_size; + struct connf_s *ips_ipcl_globalhash_fanout; + int ips_conn_g_index; + +/* ip.c */ + /* Following protected by ips_igmp_timer_lock */ + int ips_igmp_time_to_next; /* Time since last timeout */ + int ips_igmp_timer_fired_last; + int ips_igmp_deferred_next; + timeout_id_t ips_igmp_timeout_id; + /* Protected by igmp_timer_lock */ + boolean_t ips_igmp_timer_setter_active; + + /* Following protected by mld_timer_lock */ + int ips_mld_time_to_next; /* Time since last timeout */ + int ips_mld_timer_fired_last; + int ips_mld_deferred_next; + timeout_id_t ips_mld_timeout_id; + /* Protected by mld_timer_lock */ + boolean_t ips_mld_timer_setter_active; + + /* Protected by igmp_slowtimeout_lock */ + timeout_id_t ips_igmp_slowtimeout_id; + kmutex_t ips_igmp_slowtimeout_lock; + + /* Protected by mld_slowtimeout_lock */ + timeout_id_t ips_mld_slowtimeout_id; + kmutex_t ips_mld_slowtimeout_lock; + + /* IPv4 forwarding table */ + struct radix_node_head *ips_ip_ftable; + + /* This is dynamically allocated in ip_ire_init */ + struct irb *ips_ip_cache_table; + /* This is dynamically allocated in ire_add_mrtun */ + struct irb *ips_ip_mrtun_table; + +#define IPV6_ABITS 128 +#define IP6_MASK_TABLE_SIZE (IPV6_ABITS + 1) /* 129 ptrs */ + + struct irb *ips_ip_forwarding_table_v6[IP6_MASK_TABLE_SIZE]; + /* This is dynamically allocated in ip_ire_init */ + struct irb *ips_ip_cache_table_v6; + + uint32_t ips_ire_handle; + /* + * ire_ft_init_lock is used while initializing ip_forwarding_table + * dynamically in ire_add. + */ + kmutex_t ips_ire_ft_init_lock; + kmutex_t ips_ire_mrtun_lock; /* Protects mrtun table and count */ + kmutex_t ips_ire_srcif_table_lock; /* Same as above */ + /* + * The following counts are used to determine whether a walk is + * needed through the reverse tunnel table or through ills + */ + kmutex_t ips_ire_handle_lock; /* Protects ire_handle */ + + /* # of ires in reverse tun table */ + uint_t ips_ire_mrtun_count; + + /* # of ires in all srcif tables */ + uint_t ips_ire_srcif_table_count; + + uint32_t ips_ip_cache_table_size; + uint32_t ips_ip6_cache_table_size; + uint32_t ips_ip6_ftable_hash_size; + + ire_stats_t ips_ire_stats_v4; /* IPv4 ire statistics */ + ire_stats_t ips_ire_stats_v6; /* IPv6 ire statistics */ + + /* pending binds */ + mblk_t *ips_ip6_asp_pending_ops; + mblk_t *ips_ip6_asp_pending_ops_tail; + + /* Synchronize updates with table usage */ + mblk_t *ips_ip6_asp_pending_update; /* pending table updates */ + + boolean_t ips_ip6_asp_uip; /* table update in progress */ + kmutex_t ips_ip6_asp_lock; /* protect all the above */ + uint32_t ips_ip6_asp_refcnt; /* outstanding references */ + + struct ip6_asp *ips_ip6_asp_table; + /* The number of policy entries in the table */ + uint_t ips_ip6_asp_table_count; + + int ips_ip_g_forward; + int ips_ipv6_forward; + + time_t ips_ip_g_frag_timeout; + clock_t ips_ip_g_frag_timo_ms; + + queue_t *ips_ip_g_mrouter; + + /* Time since last icmp_pkt_err */ + clock_t ips_icmp_pkt_err_last; + /* Number of packets sent in burst */ + uint_t ips_icmp_pkt_err_sent; + /* Used by icmp_send_redirect_v6 for picking random src. */ + uint_t ips_icmp_redirect_v6_src_index; + + /* Protected by ip_mi_lock */ + void *ips_ip_g_head; /* Instance Data List Head */ + + caddr_t ips_ip_g_nd; /* Named Dispatch List Head */ + + /* Multirouting stuff */ + /* Interval (in ms) between consecutive 'bad MTU' warnings */ + hrtime_t ips_ip_multirt_log_interval; + /* Time since last warning issued. */ + hrtime_t ips_multirt_bad_mtu_last_time; + + kmutex_t ips_ip_trash_timer_lock; + timeout_id_t ips_ip_ire_expire_id; /* IRE expiration timer. */ + struct ipsq_s *ips_ipsq_g_head; + uint_t ips_ill_index; /* Used to assign interface indicies */ + /* When set search for unused index */ + boolean_t ips_ill_index_wrap; + + clock_t ips_ip_ire_arp_time_elapsed; + /* Time since IRE cache last flushed */ + clock_t ips_ip_ire_rd_time_elapsed; + /* ... redirect IREs last flushed */ + clock_t ips_ip_ire_pmtu_time_elapsed; + /* Time since path mtu increase */ + + uint_t ips_ip_redirect_cnt; + /* Num of redirect routes in ftable */ + uint_t ips_ipv6_ire_default_count; + /* Number of IPv6 IRE_DEFAULT entries */ + uint_t ips_ipv6_ire_default_index; + /* Walking IPv6 index used to mod in */ + + uint_t ips_loopback_packets; + + /* NDP/NCE structures for IPv4 and IPv6 */ + struct ndp_g_s *ips_ndp4; + struct ndp_g_s *ips_ndp6; + + /* ip_mroute stuff */ + kmutex_t ips_ip_g_mrouter_mutex; + + struct mrtstat *ips_mrtstat; /* Stats for netstat */ + int ips_saved_ip_g_forward; + + /* numvifs is only a hint about the max interface being used. */ + ushort_t ips_numvifs; + kmutex_t ips_numvifs_mutex; + + struct vif *ips_vifs; + struct mfcb *ips_mfcs; /* kernel routing table */ + struct tbf *ips_tbfs; + /* + * One-back cache used to locate a tunnel's vif, + * given a datagram's src ip address. + */ + ipaddr_t ips_last_encap_src; + struct vif *ips_last_encap_vif; + kmutex_t ips_last_encap_lock; /* Protects the above */ + + /* + * reg_vif_num is protected by numvifs_mutex + */ + /* Whether or not special PIM assert processing is enabled. */ + ushort_t ips_reg_vif_num; /* Index to Register vif */ + int ips_pim_assert; + + union ill_g_head_u *ips_ill_g_heads; /* ILL List Head */ + + kstat_t *ips_loopback_ksp; + + uint_t ips_ipif_src_random; + + struct idl_s *ips_conn_drain_list; /* Array of conn drain lists */ + uint_t ips_conn_drain_list_cnt; /* Count of conn_drain_list */ + int ips_conn_drain_list_index; /* Next drain_list */ + + /* + * ID used to assign next free one. + * Increases by one. Once it wraps we search for an unused ID. + */ + uint_t ips_ip_src_id; + boolean_t ips_srcid_wrapped; + + struct srcid_map *ips_srcid_head; + krwlock_t ips_srcid_lock; + + uint64_t ips_ipif_g_seqid; + union phyint_list_u *ips_phyint_g_list; /* start of phyint list */ + + /* + * Reflects value of FAILBACK variable in IPMP config file + * /etc/default/mpathd. Default value is B_TRUE. + * Set to B_FALSE if user disabled failback by configuring + * "FAILBACK=no" in.mpathd uses SIOCSIPMPFAILBACK ioctl to pass this + * information to kernel. + */ + boolean_t ips_ipmp_enable_failback; + +/* ip_neti.c */ + hook_family_t ips_ipv4root; + hook_family_t ips_ipv6root; + + /* + * Hooks for firewalling + */ + hook_event_t ips_ip4_physical_in_event; + hook_event_t ips_ip4_physical_out_event; + hook_event_t ips_ip4_forwarding_event; + hook_event_t ips_ip4_loopback_in_event; + hook_event_t ips_ip4_loopback_out_event; + hook_event_t ips_ip4_nic_events; + hook_event_t ips_ip6_physical_in_event; + hook_event_t ips_ip6_physical_out_event; + hook_event_t ips_ip6_forwarding_event; + hook_event_t ips_ip6_loopback_in_event; + hook_event_t ips_ip6_loopback_out_event; + hook_event_t ips_ip6_nic_events; + + hook_event_token_t ips_ipv4firewall_physical_in; + hook_event_token_t ips_ipv4firewall_physical_out; + hook_event_token_t ips_ipv4firewall_forwarding; + hook_event_token_t ips_ipv4firewall_loopback_in; + hook_event_token_t ips_ipv4firewall_loopback_out; + hook_event_token_t ips_ipv4nicevents; + hook_event_token_t ips_ipv6firewall_physical_in; + hook_event_token_t ips_ipv6firewall_physical_out; + hook_event_token_t ips_ipv6firewall_forwarding; + hook_event_token_t ips_ipv6firewall_loopback_in; + hook_event_token_t ips_ipv6firewall_loopback_out; + hook_event_token_t ips_ipv6nicevents; + + net_data_t ips_ipv4_net_data; + net_data_t ips_ipv6_net_data; +}; +typedef struct ip_stack ip_stack_t; + +/* Finding an ip_stack_t */ +#define CONNQ_TO_IPST(_q) (Q_TO_CONN(_q)->conn_netstack->netstack_ip) +#define ILLQ_TO_IPST(_q) (((ill_t *)(_q)->q_ptr)->ill_ipst) + +#else /* _KERNEL */ +typedef int ip_stack_t; +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _INET_IP_STACK_H */ diff --git a/usr/src/uts/common/inet/ipclassifier.h b/usr/src/uts/common/inet/ipclassifier.h index cd85aaedf7..4f81d19601 100644 --- a/usr/src/uts/common/inet/ipclassifier.h +++ b/usr/src/uts/common/inet/ipclassifier.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -121,6 +121,7 @@ typedef void (*edesc_rpf)(void *, mblk_t *, void *); ((connp)->conn_flags & IPCL_IPTUN)) typedef struct connf_s connf_t; + typedef struct { int ctb_depth; @@ -271,6 +272,7 @@ struct conn_s { conn_mac_exempt : 1, /* unlabeled with loose MAC */ conn_spare : 26; + netstack_t *conn_netstack; /* Corresponds to a netstack_hold */ #ifdef CONN_DEBUG #define CONN_TRACE_MAX 10 int conn_trace_last; /* ndx of last used tracebuf */ @@ -290,7 +292,6 @@ struct conn_s { * protected by the per-bucket lock. Each conn_t inserted in the list * points back at the connf_t that heads the bucket. */ - struct connf_s { struct conn_s *connf_head; kmutex_t connf_lock; @@ -385,12 +386,12 @@ struct connf_s { (conn_wantpacket_v6((connp), (ill), (ip6h), \ (fanout_flags), (zoneid)) || ((protocol) == IPPROTO_RSVP))) -#define IPCL_CONN_HASH(src, ports) \ +#define IPCL_CONN_HASH(src, ports, ipst) \ ((unsigned)(ntohl((src)) ^ ((ports) >> 24) ^ ((ports) >> 16) ^ \ - ((ports) >> 8) ^ (ports)) % ipcl_conn_fanout_size) + ((ports) >> 8) ^ (ports)) % (ipst)->ips_ipcl_conn_fanout_size) -#define IPCL_CONN_HASH_V6(src, ports) \ - IPCL_CONN_HASH(V4_PART_OF_V6((src)), (ports)) +#define IPCL_CONN_HASH_V6(src, ports, ipst) \ + IPCL_CONN_HASH(V4_PART_OF_V6((src)), (ports), (ipst)) #define IPCL_CONN_MATCH(connp, proto, src, dst, ports) \ ((connp)->conn_ulp == (proto) && \ @@ -422,7 +423,9 @@ struct connf_s { #define IPCL_PORT_HASH(port, size) \ ((((port) >> 8) ^ (port)) & ((size) - 1)) -#define IPCL_BIND_HASH(lport) IPCL_PORT_HASH(lport, ipcl_bind_fanout_size) +#define IPCL_BIND_HASH(lport, ipst) \ + ((unsigned)(((lport) >> 8) ^ (lport)) % \ + (ipst)->ips_ipcl_bind_fanout_size) #define IPCL_BIND_MATCH(connp, proto, laddr, lport) \ ((connp)->conn_ulp == (proto) && \ @@ -474,15 +477,14 @@ struct connf_s { (connp)->conn_sqp = IP_SQUEUE_GET(lbolt); \ } -#define ipcl_proto_search(protocol) \ - (ipcl_proto_fanout[(protocol)].connf_head) - -#define IPCL_UDP_HASH(lport) IPCL_PORT_HASH(lport, ipcl_udp_fanout_size) +#define IPCL_UDP_HASH(lport, ipst) \ + IPCL_PORT_HASH(lport, (ipst)->ips_ipcl_udp_fanout_size) #define CONN_G_HASH_SIZE 1024 /* Raw socket hash function. */ -#define IPCL_RAW_HASH(lport) IPCL_PORT_HASH(lport, ipcl_raw_fanout_size) +#define IPCL_RAW_HASH(lport, ipst) \ + IPCL_PORT_HASH(lport, (ipst)->ips_ipcl_raw_fanout_size) /* * This is similar to IPCL_BIND_MATCH except that the local port check @@ -500,24 +502,12 @@ struct connf_s { (IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_srcv6) || \ IN6_ARE_ADDR_EQUAL(&(connp)->conn_srcv6, &(laddr)))) -/* hash tables */ -extern connf_t rts_clients; -extern connf_t *ipcl_conn_fanout; -extern connf_t *ipcl_bind_fanout; -extern connf_t ipcl_proto_fanout[IPPROTO_MAX + 1]; -extern connf_t ipcl_proto_fanout_v6[IPPROTO_MAX + 1]; -extern connf_t *ipcl_udp_fanout; -extern connf_t *ipcl_globalhash_fanout; -extern connf_t *ipcl_raw_fanout; -extern uint_t ipcl_conn_fanout_size; -extern uint_t ipcl_bind_fanout_size; -extern uint_t ipcl_udp_fanout_size; -extern uint_t ipcl_raw_fanout_size; - /* Function prototypes */ -extern void ipcl_init(void); -extern void ipcl_destroy(void); -extern conn_t *ipcl_conn_create(uint32_t, int); +extern void ipcl_g_init(void); +extern void ipcl_init(ip_stack_t *); +extern void ipcl_g_destroy(void); +extern void ipcl_destroy(ip_stack_t *); +extern conn_t *ipcl_conn_create(uint32_t, int, netstack_t *); extern void ipcl_conn_destroy(conn_t *); void ipcl_hash_insert_connected(connf_t *, conn_t *); @@ -537,21 +527,26 @@ extern conn_t *ipcl_get_next_conn(connf_t *, conn_t *, uint32_t); void ipcl_proto_insert(conn_t *, uint8_t); void ipcl_proto_insert_v6(conn_t *, uint8_t); -conn_t *ipcl_classify_v4(mblk_t *, uint8_t, uint_t, zoneid_t); -conn_t *ipcl_classify_v6(mblk_t *, uint8_t, uint_t, zoneid_t); -conn_t *ipcl_classify(mblk_t *, zoneid_t); -conn_t *ipcl_classify_raw(mblk_t *, uint8_t, zoneid_t, uint32_t, ipha_t *); +conn_t *ipcl_classify_v4(mblk_t *, uint8_t, uint_t, zoneid_t, ip_stack_t *); +conn_t *ipcl_classify_v6(mblk_t *, uint8_t, uint_t, zoneid_t, ip_stack_t *); +conn_t *ipcl_classify(mblk_t *, zoneid_t, ip_stack_t *); +conn_t *ipcl_classify_raw(mblk_t *, uint8_t, zoneid_t, uint32_t, ipha_t *, + ip_stack_t *); void ipcl_globalhash_insert(conn_t *); void ipcl_globalhash_remove(conn_t *); -void ipcl_walk(pfv_t, void *); -conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int); -conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t); -conn_t *ipcl_lookup_listener_v4(uint16_t, ipaddr_t, zoneid_t); -conn_t *ipcl_lookup_listener_v6(uint16_t, in6_addr_t *, uint_t, zoneid_t); +void ipcl_walk(pfv_t, void *, ip_stack_t *); +conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack_t *); +conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, + ip_stack_t *); +conn_t *ipcl_lookup_listener_v4(uint16_t, ipaddr_t, zoneid_t, ip_stack_t *); +conn_t *ipcl_lookup_listener_v6(uint16_t, in6_addr_t *, uint_t, zoneid_t, + ip_stack_t *); int conn_trace_ref(conn_t *); int conn_untrace_ref(conn_t *); -conn_t *ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *, ipha_t *, tcph_t *); -conn_t *ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *, ip6_t *, tcph_t *); +conn_t *ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *, ipha_t *, tcph_t *, + ip_stack_t *); +conn_t *ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *, ip6_t *, tcph_t *, + ip_stack_t *); #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/inet/ipdrop.h b/usr/src/uts/common/inet/ipdrop.h index b9ad882c32..7ddc5403de 100644 --- a/usr/src/uts/common/inet/ipdrop.h +++ b/usr/src/uts/common/inet/ipdrop.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ extern "C" { #endif +#ifdef _KERNEL /* * Opaque data type which will contain state about an entity that is dropping * a packet (e.g. IPsec SPD, IPsec SADB, TCP, IP forwarding, etc.). @@ -45,16 +46,10 @@ void ip_drop_unregister(ipdropper_t *); void ip_drop_packet(mblk_t *, boolean_t, ill_t *, ire_t *, struct kstat_named *, ipdropper_t *); -extern kstat_t *ip_drop_kstat; -extern struct ip_dropstats *ip_drop_types; -void ip_drop_init(void); -void ip_drop_destroy(void); - /* * ip_dropstats - When a protocol developer comes up with a new reason to * drop a packet, it should have a bean counter placed here in this structure, - * an ipdrops_* definition for that bean counter, and an initializer in - * ipdrop.c's ip_drop_init(). + * and an initializer in ipdrop.c's ip_drop_init(). * * This will suffice until we come up with a more dynamic way of adding * named kstats to a single kstat instance (if that is possible). @@ -126,76 +121,7 @@ struct ip_dropstats { kstat_named_t ipds_ip_ipsec_not_loaded; }; -/* - * Use this section to create easy-to-name definitions for specific IP Drop - * statistics. As a naming convention, prefix them with ipdrops_<foo>. - */ -/* TCP IPsec drop statistics. */ -#define ipdrops_tcp_clear ip_drop_types->ipds_tcp_clear -#define ipdrops_tcp_secure ip_drop_types->ipds_tcp_secure -#define ipdrops_tcp_mismatch ip_drop_types->ipds_tcp_mismatch -#define ipdrops_tcp_ipsec_alloc ip_drop_types->ipds_tcp_ipsec_alloc - -/* SADB-specific drop statistics. */ -#define ipdrops_sadb_inlarval_timeout ip_drop_types->ipds_sadb_inlarval_timeout -#define ipdrops_sadb_inlarval_replace ip_drop_types->ipds_sadb_inlarval_replace -#define ipdrops_sadb_acquire_nomem ip_drop_types->ipds_sadb_acquire_nomem -#define ipdrops_sadb_acquire_toofull ip_drop_types->ipds_sadb_acquire_toofull -#define ipdrops_sadb_acquire_timeout ip_drop_types->ipds_sadb_acquire_timeout - -/* SPD drop statistics. */ -#define ipdrops_spd_ahesp_diffid ip_drop_types->ipds_spd_ahesp_diffid -#define ipdrops_spd_loopback_mismatch ip_drop_types->ipds_spd_loopback_mismatch -#define ipdrops_spd_explicit ip_drop_types->ipds_spd_explicit -#define ipdrops_spd_got_secure ip_drop_types->ipds_spd_got_secure -#define ipdrops_spd_got_clear ip_drop_types->ipds_spd_got_clear -#define ipdrops_spd_bad_ahalg ip_drop_types->ipds_spd_bad_ahalg -#define ipdrops_spd_got_ah ip_drop_types->ipds_spd_got_ah -#define ipdrops_spd_bad_espealg ip_drop_types->ipds_spd_bad_espealg -#define ipdrops_spd_bad_espaalg ip_drop_types->ipds_spd_bad_espaalg -#define ipdrops_spd_got_esp ip_drop_types->ipds_spd_got_esp -#define ipdrops_spd_got_selfencap ip_drop_types->ipds_spd_got_selfencap -#define ipdrops_spd_bad_selfencap ip_drop_types->ipds_spd_bad_selfencap -#define ipdrops_spd_nomem ip_drop_types->ipds_spd_nomem -#define ipdrops_spd_ah_badid ip_drop_types->ipds_spd_ah_badid -#define ipdrops_spd_esp_badid ip_drop_types->ipds_spd_esp_badid -#define ipdrops_spd_ah_innermismatch \ - ip_drop_types->ipds_spd_ah_innermismatch -#define ipdrops_spd_esp_innermismatch \ - ip_drop_types->ipds_spd_esp_innermismatch -#define ipdrops_spd_no_policy ip_drop_types->ipds_spd_no_policy -#define ipdrops_spd_malformed_packet ip_drop_types->ipds_spd_malformed_packet -#define ipdrops_spd_malformed_frag ip_drop_types->ipds_spd_malformed_frag -#define ipdrops_spd_overlap_frag ip_drop_types->ipds_spd_overlap_frag -#define ipdrops_spd_evil_frag ip_drop_types->ipds_spd_evil_frag -#define ipdrops_spd_max_frags ip_drop_types->ipds_spd_max_frags - -/* ESP-specific drop statistics. */ -#define ipdrops_esp_nomem ip_drop_types->ipds_esp_nomem -#define ipdrops_esp_no_sa ip_drop_types->ipds_esp_no_sa -#define ipdrops_esp_early_replay ip_drop_types->ipds_esp_early_replay -#define ipdrops_esp_replay ip_drop_types->ipds_esp_replay -#define ipdrops_esp_bytes_expire ip_drop_types->ipds_esp_bytes_expire -#define ipdrops_esp_bad_padlen ip_drop_types->ipds_esp_bad_padlen -#define ipdrops_esp_bad_padding ip_drop_types->ipds_esp_bad_padding -#define ipdrops_esp_bad_auth ip_drop_types->ipds_esp_bad_auth -#define ipdrops_esp_crypto_failed ip_drop_types->ipds_esp_crypto_failed -#define ipdrops_esp_icmp ip_drop_types->ipds_esp_icmp - -/* AH-specific drop statistics. */ -#define ipdrops_ah_nomem ip_drop_types->ipds_ah_nomem -#define ipdrops_ah_bad_v6_hdrs ip_drop_types->ipds_ah_bad_v6_hdrs -#define ipdrops_ah_bad_v4_opts ip_drop_types->ipds_ah_bad_v4_opts -#define ipdrops_ah_no_sa ip_drop_types->ipds_ah_no_sa -#define ipdrops_ah_bad_length ip_drop_types->ipds_ah_bad_length -#define ipdrops_ah_bad_auth ip_drop_types->ipds_ah_bad_auth -#define ipdrops_ah_crypto_failed ip_drop_types->ipds_ah_crypto_failed -#define ipdrops_ah_early_replay ip_drop_types->ipds_ah_early_replay -#define ipdrops_ah_replay ip_drop_types->ipds_ah_replay -#define ipdrops_ah_bytes_expire ip_drop_types->ipds_ah_bytes_expire - -/* IP-specific drop statistics. */ -#define ipdrops_ip_ipsec_not_loaded ip_drop_types->ipds_ip_ipsec_not_loaded +#endif /* _KERNEL */ #ifdef __cplusplus } diff --git a/usr/src/uts/common/inet/ipf/fil.c b/usr/src/uts/common/inet/ipf/fil.c index b882b8ee9c..0f0b80392f 100644 --- a/usr/src/uts/common/inet/ipf/fil.c +++ b/usr/src/uts/common/inet/ipf/fil.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -115,6 +115,7 @@ struct file; #include "netinet/ip_state.h" #include "netinet/ip_proxy.h" #include "netinet/ip_auth.h" +#include "netinet/ipf_stack.h" #ifdef IPFILTER_SCAN # include "netinet/ip_scan.h" #endif @@ -157,41 +158,7 @@ extern int opts; #endif /* _KERNEL */ -fr_info_t frcache[2][8]; -struct filterstats frstats[2] = { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }; -struct frentry *ipfilter[2][2] = { { NULL, NULL }, { NULL, NULL } }, - *ipfilter6[2][2] = { { NULL, NULL }, { NULL, NULL } }, - *ipacct6[2][2] = { { NULL, NULL }, { NULL, NULL } }, - *ipacct[2][2] = { { NULL, NULL }, { NULL, NULL } }, - *ipnatrules[2][2] = { { NULL, NULL }, { NULL, NULL } }; -struct frgroup *ipfgroups[IPL_LOGSIZE][2]; char ipfilter_version[] = IPL_VERSION; -int fr_refcnt = 0; -/* - * For fr_running: - * 0 == loading, 1 = running, -1 = disabled, -2 = unloading - */ -int fr_running = 0; -int fr_flags = IPF_LOGGING; -int fr_active = 0; -int fr_control_forwarding = 0; -int fr_update_ipid = 0; -u_short fr_ip_id = 0; -int fr_chksrc = 0; /* causes a system crash if enabled */ -int fr_minttl = 4; -int fr_icmpminfragmtu = 68; -u_long fr_frouteok[2] = {0, 0}; -u_long fr_userifqs = 0; -u_long fr_badcoalesces[2] = {0, 0}; -u_char ipf_iss_secret[32]; -#if SOLARIS2 >= 10 -int ipf_loopback = 0; -#endif -#if defined(IPFILTER_DEFAULT_BLOCK) -int fr_pass = FR_BLOCK|FR_NOMATCH; -#else -int fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH; -#endif int fr_features = 0 #ifdef IPFILTER_LKM | IPF_FEAT_LKM @@ -224,10 +191,11 @@ int fr_features = 0 static INLINE int fr_ipfcheck __P((fr_info_t *, frentry_t *, int)); static int fr_portcheck __P((frpcmp_t *, u_short *)); -static int frflushlist __P((int, minor_t, int *, frentry_t **)); +static int frflushlist __P((int, minor_t, int *, frentry_t **, + ipf_stack_t *)); static ipfunc_t fr_findfunc __P((ipfunc_t)); static frentry_t *fr_firewall __P((fr_info_t *, u_32_t *)); -static int fr_funcinit __P((frentry_t *fr)); +static int fr_funcinit __P((frentry_t *fr, ipf_stack_t *)); static INLINE void frpr_ah __P((fr_info_t *)); static INLINE void frpr_esp __P((fr_info_t *)); static INLINE void frpr_gre __P((fr_info_t *)); @@ -241,14 +209,16 @@ static INLINE void frpr_tcpcommon __P((fr_info_t *)); static INLINE void frpr_udpcommon __P((fr_info_t *)); static INLINE int fr_updateipid __P((fr_info_t *)); #ifdef IPFILTER_LOOKUP -static int fr_grpmapinit __P((frentry_t *fr)); -static INLINE void *fr_resolvelookup __P((u_int, u_int, lookupfunc_t *)); +static int fr_grpmapinit __P((frentry_t *fr, ipf_stack_t *)); +static INLINE void *fr_resolvelookup __P((u_int, u_int, lookupfunc_t *, + ipf_stack_t *)); #endif -static void frsynclist __P((int, int, void *, char *, frentry_t *)); +static void frsynclist __P((int, int, void *, char *, frentry_t *, + ipf_stack_t *)); static void *fr_ifsync __P((int, int, char *, char *, - void *, void *)); -static ipftuneable_t *fr_findtunebyname __P((const char *)); -static ipftuneable_t *fr_findtunebycookie __P((void *, void **)); + void *, void *, ipf_stack_t *)); +static ipftuneable_t *fr_findtunebyname __P((const char *, ipf_stack_t *)); +static ipftuneable_t *fr_findtunebycookie __P((void *, void **, ipf_stack_t *)); /* @@ -965,6 +935,7 @@ fr_info_t *fin; int minicmpsz = sizeof(struct icmp); icmphdr_t *icmp; ip_t *oip; + ipf_stack_t *ifs = fin->fin_ifs; if (fin->fin_off != 0) { frpr_short(fin, ICMPERR_ICMPHLEN); @@ -1011,7 +982,7 @@ fr_info_t *fin; */ case ICMP_UNREACH : if (icmp->icmp_code == ICMP_UNREACH_NEEDFRAG) { - if (icmp->icmp_nextmtu < fr_icmpminfragmtu) + if (icmp->icmp_nextmtu < ifs->ifs_fr_icmpminfragmtu) fin->fin_flx |= FI_BAD; } /* FALLTHRU */ @@ -1697,6 +1668,7 @@ int portcmp; fripf_t *fri; fr_ip_t *fi; int i; + ipf_stack_t *ifs = fin->fin_ifs; fi = &fin->fin_fi; fri = fr->fr_ipf; @@ -1734,7 +1706,7 @@ int portcmp; */ #ifdef IPFILTER_LOOKUP if (fr->fr_satype == FRI_LOOKUP) { - i = (*fr->fr_srcfunc)(fr->fr_srcptr, fi->fi_v, lip); + i = (*fr->fr_srcfunc)(fr->fr_srcptr, fi->fi_v, lip, ifs); if (i == -1) return 1; lip += 3; @@ -1776,7 +1748,7 @@ int portcmp; lip++, lm++, ld++; #ifdef IPFILTER_LOOKUP if (fr->fr_datype == FRI_LOOKUP) { - i = (*fr->fr_dstfunc)(fr->fr_dstptr, fi->fi_v, lip); + i = (*fr->fr_dstfunc)(fr->fr_dstptr, fi->fi_v, lip, ifs); if (i == -1) return 1; lip += 3; @@ -1882,6 +1854,7 @@ u_32_t pass; int rulen, portcmp, off, logged, skip; struct frentry *fr, *fnext; u_32_t passt, passo; + ipf_stack_t *ifs = fin->fin_ifs; /* * Do not allow nesting deeper than 16 levels. @@ -2030,9 +2003,9 @@ u_32_t pass; passt &= ~FR_CMDMASK; passt |= FR_BLOCK|FR_QUICK; } - ATOMIC_INCL(frstats[fin->fin_out].fr_skip); + ATOMIC_INCL(ifs->ifs_frstats[fin->fin_out].fr_skip); } - ATOMIC_INCL(frstats[fin->fin_out].fr_pkl); + ATOMIC_INCL(ifs->ifs_frstats[fin->fin_out].fr_pkl); logged = 1; } #endif /* IPFILTER_LOG */ @@ -2074,9 +2047,9 @@ u_32_t pass; int out = fin->fin_out; if (fr_addstate(fin, NULL, 0) != NULL) { - ATOMIC_INCL(frstats[out].fr_ads); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_ads); } else { - ATOMIC_INCL(frstats[out].fr_bads); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_bads); pass = passo; continue; } @@ -2110,14 +2083,15 @@ u_32_t *passp; char group[FR_GROUPLEN]; frentry_t *fr, *frsave; u_32_t pass, rulen; + ipf_stack_t *ifs = fin->fin_ifs; passp = passp; #ifdef USE_INET6 if (fin->fin_v == 6) - fr = ipacct6[fin->fin_out][fr_active]; + fr = ifs->ifs_ipacct6[fin->fin_out][ifs->ifs_fr_active]; else #endif - fr = ipacct[fin->fin_out][fr_active]; + fr = ifs->ifs_ipacct[fin->fin_out][ifs->ifs_fr_active]; if (fr != NULL) { frsave = fin->fin_fr; @@ -2126,7 +2100,7 @@ u_32_t *passp; fin->fin_fr = fr; pass = fr_scanlist(fin, FR_NOMATCH); if (FR_ISACCOUNT(pass)) { - ATOMIC_INCL(frstats[0].fr_acct); + ATOMIC_INCL(ifs->ifs_frstats[0].fr_acct); } fin->fin_fr = frsave; bcopy(group, fin->fin_group, FR_GROUPLEN); @@ -2157,6 +2131,7 @@ u_32_t *passp; fr_info_t *fc; u_32_t pass; int out; + ipf_stack_t *ifs = fin->fin_ifs; out = fin->fin_out; pass = *passp; @@ -2166,40 +2141,40 @@ u_32_t *passp; * the access lists for permission but we do need to consider * the result as if it were from the ACL's. */ - fc = &frcache[out][CACHE_HASH(fin)]; - READ_ENTER(&ipf_frcache); + fc = &ifs->ifs_frcache[out][CACHE_HASH(fin)]; + READ_ENTER(&ifs->ifs_ipf_frcache); if (!bcmp((char *)fin, (char *)fc, FI_CSIZE)) { /* * copy cached data so we can unlock the mutexes earlier. */ bcopy((char *)fc, (char *)fin, FI_COPYSIZE); - RWLOCK_EXIT(&ipf_frcache); - ATOMIC_INCL(frstats[out].fr_chit); + RWLOCK_EXIT(&ifs->ifs_ipf_frcache); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_chit); if ((fr = fin->fin_fr) != NULL) { ATOMIC_INC64(fr->fr_hits); pass = fr->fr_flags; } } else { - RWLOCK_EXIT(&ipf_frcache); + RWLOCK_EXIT(&ifs->ifs_ipf_frcache); #ifdef USE_INET6 if (fin->fin_v == 6) - fin->fin_fr = ipfilter6[out][fr_active]; + fin->fin_fr = ifs->ifs_ipfilter6[out][ifs->ifs_fr_active]; else #endif - fin->fin_fr = ipfilter[out][fr_active]; + fin->fin_fr = ifs->ifs_ipfilter[out][ifs->ifs_fr_active]; if (fin->fin_fr != NULL) - pass = fr_scanlist(fin, fr_pass); + pass = fr_scanlist(fin, ifs->ifs_fr_pass); if (((pass & FR_KEEPSTATE) == 0) && ((fin->fin_flx & FI_DONTCACHE) == 0)) { - WRITE_ENTER(&ipf_frcache); + WRITE_ENTER(&ifs->ifs_ipf_frcache); bcopy((char *)fin, (char *)fc, FI_COPYSIZE); - RWLOCK_EXIT(&ipf_frcache); + RWLOCK_EXIT(&ifs->ifs_ipf_frcache); } if ((pass & FR_NOMATCH)) { - ATOMIC_INCL(frstats[out].fr_nom); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_nom); } fr = fin->fin_fr; } @@ -2211,7 +2186,7 @@ u_32_t *passp; !ppsratecheck(&fr->fr_lastpkt, &fr->fr_curpps, fr->fr_pps)) { pass &= ~(FR_CMDMASK|FR_DUP|FR_RETICMP|FR_RETRST); pass |= FR_BLOCK; - ATOMIC_INCL(frstats[out].fr_ppshit); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_ppshit); } /* @@ -2242,8 +2217,8 @@ u_32_t *passp; * is treated as "not a pass", hence the packet is blocked. */ if (FR_ISPREAUTH(pass)) { - if ((fin->fin_fr = ipauth) != NULL) - pass = fr_scanlist(fin, fr_pass); + if ((fin->fin_fr = ifs->ifs_ipauth) != NULL) + pass = fr_scanlist(fin, ifs->ifs_fr_pass); } /* @@ -2253,12 +2228,12 @@ u_32_t *passp; if ((pass & (FR_KEEPFRAG|FR_KEEPSTATE)) == FR_KEEPFRAG) { if (fin->fin_flx & FI_FRAG) { if (fr_newfrag(fin, pass) == -1) { - ATOMIC_INCL(frstats[out].fr_bnfr); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_bnfr); } else { - ATOMIC_INCL(frstats[out].fr_nfr); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_nfr); } } else { - ATOMIC_INCL(frstats[out].fr_cfr); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_cfr); } } @@ -2267,9 +2242,9 @@ u_32_t *passp; */ if ((pass & FR_KEEPSTATE) && !(fin->fin_flx & FI_STATE)) { if (fr_addstate(fin, NULL, 0) != NULL) { - ATOMIC_INCL(frstats[out].fr_ads); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_ads); } else { - ATOMIC_INCL(frstats[out].fr_bads); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_bads); if (FR_ISPASS(pass)) { pass &= ~FR_CMDMASK; pass |= FR_BLOCK; @@ -2318,23 +2293,24 @@ u_32_t *passp; /* ------------------------------------------------------------------------ */ int fr_check(ip, hlen, ifp, out #if defined(_KERNEL) && defined(MENTAT) -, qif, mp) +, qif, mp, ifs) void *qif; #else -, mp) +, mp, ifs) #endif mb_t **mp; ip_t *ip; int hlen; void *ifp; int out; +ipf_stack_t *ifs; { /* * The above really sucks, but short of writing a diff */ fr_info_t frinfo; fr_info_t *fin = &frinfo; - u_32_t pass = fr_pass; + u_32_t pass; frentry_t *fr = NULL; int v = IP_V(ip); mb_t *mc = NULL; @@ -2347,7 +2323,9 @@ int out; qpktinfo_t *qpi = qif; #endif #endif + SPL_INT(s); + pass = ifs->ifs_fr_pass; /* * The first part of fr_check() deals with making sure that what goes @@ -2362,10 +2340,10 @@ int out; return 2; # endif - READ_ENTER(&ipf_global); + READ_ENTER(&ifs->ifs_ipf_global); - if (fr_running <= 0) { - RWLOCK_EXIT(&ipf_global); + if (ifs->ifs_fr_running <= 0) { + RWLOCK_EXIT(&ifs->ifs_ipf_global); return 0; } @@ -2412,7 +2390,7 @@ int out; # endif /* CSUM_DELAY_DATA */ # endif /* MENTAT */ #else - READ_ENTER(&ipf_global); + READ_ENTER(&ifs->ifs_ipf_global); bzero((char *)fin, sizeof(*fin)); m = *mp; @@ -2427,14 +2405,14 @@ int out; fin->fin_error = ENETUNREACH; fin->fin_hlen = (u_short)hlen; fin->fin_dp = (char *)ip + hlen; - fin->fin_ipoff = (char *)ip - MTOD(m, char *); + fin->fin_ifs = ifs; SPL_NET(s); #ifdef USE_INET6 if (v == 6) { - ATOMIC_INCL(frstats[out].fr_ipv6); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_ipv6); /* * Jumbo grams are quite likely too big for internal buffer * structures to handle comfortably, for now, so just drop @@ -2443,7 +2421,7 @@ int out; ip6 = (ip6_t *)ip; fin->fin_plen = ntohs(ip6->ip6_plen); if (fin->fin_plen == 0) { - READ_ENTER(&ipf_mutex); + READ_ENTER(&ifs->ifs_ipf_mutex); pass = FR_BLOCK|FR_NOMATCH; goto filtered; } @@ -2459,7 +2437,7 @@ int out; } if (fr_makefrip(hlen, ip, fin) == -1) { - READ_ENTER(&ipf_mutex); + READ_ENTER(&ifs->ifs_ipf_mutex); pass = FR_BLOCK; goto filtered; } @@ -2474,13 +2452,13 @@ int out; if (!out) { if (v == 4) { #ifdef _KERNEL - if (fr_chksrc && !fr_verifysrc(fin)) { - ATOMIC_INCL(frstats[0].fr_badsrc); + if (ifs->ifs_fr_chksrc && !fr_verifysrc(fin)) { + ATOMIC_INCL(ifs->ifs_frstats[0].fr_badsrc); fin->fin_flx |= FI_BADSRC; } #endif - if (fin->fin_ip->ip_ttl < fr_minttl) { - ATOMIC_INCL(frstats[0].fr_badttl); + if (fin->fin_ip->ip_ttl < ifs->ifs_fr_minttl) { + ATOMIC_INCL(ifs->ifs_frstats[0].fr_badttl); fin->fin_flx |= FI_LOWTTL; } } @@ -2488,13 +2466,13 @@ int out; else if (v == 6) { ip6 = (ip6_t *)ip; #ifdef _KERNEL - if (fr_chksrc && !fr_verifysrc(fin)) { - ATOMIC_INCL(frstats[0].fr_badsrc); + if (ifs->ifs_fr_chksrc && !fr_verifysrc(fin)) { + ATOMIC_INCL(ifs->ifs_frstats[0].fr_badsrc); fin->fin_flx |= FI_BADSRC; } #endif - if (ip6->ip6_hlim < fr_minttl) { - ATOMIC_INCL(frstats[0].fr_badttl); + if (ip6->ip6_hlim < ifs->ifs_fr_minttl) { + ATOMIC_INCL(ifs->ifs_frstats[0].fr_badttl); fin->fin_flx |= FI_LOWTTL; } } @@ -2502,10 +2480,10 @@ int out; } if (fin->fin_flx & FI_SHORT) { - ATOMIC_INCL(frstats[out].fr_short); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_short); } - READ_ENTER(&ipf_mutex); + READ_ENTER(&ifs->ifs_ipf_mutex); /* * Check auth now. This, combined with the check below to see if apass @@ -2517,7 +2495,7 @@ int out; fr = fr_checkauth(fin, &pass); if (!out) { if (fr_checknatin(fin, &pass) == -1) { - RWLOCK_EXIT(&ipf_mutex); + RWLOCK_EXIT(&ifs->ifs_ipf_mutex); goto finished; } } @@ -2543,30 +2521,30 @@ int out; (void) fr_acctpkt(fin, NULL); if (fr_checknatout(fin, &pass) == -1) { - RWLOCK_EXIT(&ipf_mutex); + RWLOCK_EXIT(&ifs->ifs_ipf_mutex); goto finished; - } else if ((fr_update_ipid != 0) && (v == 4)) { + } else if ((ifs->ifs_fr_update_ipid != 0) && (v == 4)) { if (fr_updateipid(fin) == -1) { - ATOMIC_INCL(frstats[1].fr_ipud); + ATOMIC_INCL(ifs->ifs_frstats[1].fr_ipud); pass &= ~FR_CMDMASK; pass |= FR_BLOCK; } else { - ATOMIC_INCL(frstats[0].fr_ipud); + ATOMIC_INCL(ifs->ifs_frstats[0].fr_ipud); } } } #ifdef IPFILTER_LOG - if ((fr_flags & FF_LOGGING) || (pass & FR_LOGMASK)) { + if ((ifs->ifs_fr_flags & FF_LOGGING) || (pass & FR_LOGMASK)) { (void) fr_dolog(fin, &pass); } #endif if (fin->fin_state != NULL) - fr_statederef(fin, (ipstate_t **)&fin->fin_state); + fr_statederef(fin, (ipstate_t **)&fin->fin_state, ifs); if (fin->fin_nat != NULL) - fr_natderef((nat_t **)&fin->fin_nat); + fr_natderef((nat_t **)&fin->fin_nat, ifs); /* * Only allow FR_DUP to work if a rule matched - it makes no sense to @@ -2596,11 +2574,11 @@ int out; else dst = 0; (void) fr_send_icmp_err(ICMP_UNREACH, fin, dst); - ATOMIC_INCL(frstats[0].fr_ret); + ATOMIC_INCL(ifs->ifs_frstats[0].fr_ret); } else if (((pass & FR_RETMASK) == FR_RETRST) && !(fin->fin_flx & FI_SHORT)) { if (fr_send_reset(fin) == 0) { - ATOMIC_INCL(frstats[1].fr_ret); + ATOMIC_INCL(ifs->ifs_frstats[1].fr_ret); } } } else { @@ -2649,17 +2627,17 @@ filtered: /* * This late because the likes of fr_fastroute() use fin_fr. */ - RWLOCK_EXIT(&ipf_mutex); + RWLOCK_EXIT(&ifs->ifs_ipf_mutex); finished: if (!FR_ISPASS(pass)) { - ATOMIC_INCL(frstats[out].fr_block); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_block); if (*mp != NULL) { FREE_MB_T(*mp); m = *mp = NULL; } } else { - ATOMIC_INCL(frstats[out].fr_pass); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_pass); #if defined(_KERNEL) && defined(__sgi) if ((fin->fin_hbuf != NULL) && (mtod(fin->fin_m, struct ip *) != fin->fin_ip)) { @@ -2669,7 +2647,7 @@ finished: } SPL_X(s); - RWLOCK_EXIT(&ipf_global); + RWLOCK_EXIT(&ifs->ifs_ipf_global); #ifdef _KERNEL # if OpenBSD >= 200311 @@ -2730,28 +2708,29 @@ u_32_t *passp; { u_32_t pass; int out; + ipf_stack_t *ifs = fin->fin_ifs; out = fin->fin_out; pass = *passp; - if ((fr_flags & FF_LOGNOMATCH) && (pass & FR_NOMATCH)) { + if ((ifs->ifs_fr_flags & FF_LOGNOMATCH) && (pass & FR_NOMATCH)) { pass |= FF_LOGNOMATCH; - ATOMIC_INCL(frstats[out].fr_npkl); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_npkl); goto logit; } else if (((pass & FR_LOGMASK) == FR_LOGP) || - (FR_ISPASS(pass) && (fr_flags & FF_LOGPASS))) { + (FR_ISPASS(pass) && (ifs->ifs_fr_flags & FF_LOGPASS))) { if ((pass & FR_LOGMASK) != FR_LOGP) pass |= FF_LOGPASS; - ATOMIC_INCL(frstats[out].fr_ppkl); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_ppkl); goto logit; } else if (((pass & FR_LOGMASK) == FR_LOGB) || - (FR_ISBLOCK(pass) && (fr_flags & FF_LOGBLOCK))) { + (FR_ISBLOCK(pass) && (ifs->ifs_fr_flags & FF_LOGBLOCK))) { if ((pass & FR_LOGMASK) != FR_LOGB) pass |= FF_LOGBLOCK; - ATOMIC_INCL(frstats[out].fr_bpkl); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_bpkl); logit: if (ipflog(fin, pass) == -1) { - ATOMIC_INCL(frstats[out].fr_skip); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_skip); /* * If the "or-block" option has been used then @@ -3203,11 +3182,12 @@ out: /* */ /* Search amongst the defined groups for a particular group number. */ /* ------------------------------------------------------------------------ */ -frgroup_t *fr_findgroup(group, unit, set, fgpp) +frgroup_t *fr_findgroup(group, unit, set, fgpp, ifs) char *group; minor_t unit; int set; frgroup_t ***fgpp; +ipf_stack_t *ifs; { frgroup_t *fg, **fgp; @@ -3215,7 +3195,7 @@ frgroup_t ***fgpp; * Which list of groups to search in is dependent on which list of * rules are being operated on. */ - fgp = &ipfgroups[unit][set]; + fgp = &ifs->ifs_ipfgroups[unit][set]; while ((fg = *fgp) != NULL) { if (strncmp(group, fg->fg_name, FR_GROUPLEN) == 0) @@ -3243,12 +3223,13 @@ frgroup_t ***fgpp; /* Add a new group head, or if it already exists, increase the reference */ /* count to it. */ /* ------------------------------------------------------------------------ */ -frgroup_t *fr_addgroup(group, head, flags, unit, set) +frgroup_t *fr_addgroup(group, head, flags, unit, set, ifs) char *group; void *head; u_32_t flags; minor_t unit; int set; +ipf_stack_t *ifs; { frgroup_t *fg, **fgp; u_32_t gflags; @@ -3262,7 +3243,7 @@ int set; fgp = NULL; gflags = flags & FR_INOUT; - fg = fr_findgroup(group, unit, set, &fgp); + fg = fr_findgroup(group, unit, set, &fgp, ifs); if (fg != NULL) { if (fg->fg_flags == 0) fg->fg_flags = gflags; @@ -3296,14 +3277,15 @@ int set; /* Attempt to delete a group head. */ /* Only do this when its reference count reaches 0. */ /* ------------------------------------------------------------------------ */ -void fr_delgroup(group, unit, set) +void fr_delgroup(group, unit, set, ifs) char *group; minor_t unit; int set; +ipf_stack_t *ifs; { frgroup_t *fg, **fgp; - fg = fr_findgroup(group, unit, set, &fgp); + fg = fr_findgroup(group, unit, set, &fgp, ifs); if (fg == NULL) return; @@ -3326,15 +3308,16 @@ int set; /* Find rule # n in group # g and return a pointer to it. Return NULl if */ /* group # g doesn't exist or there are less than n rules in the group. */ /* ------------------------------------------------------------------------ */ -frentry_t *fr_getrulen(unit, group, n) +frentry_t *fr_getrulen(unit, group, n, ifs) int unit; char *group; u_32_t n; +ipf_stack_t *ifs; { frentry_t *fr; frgroup_t *fg; - fg = fr_findgroup(group, unit, fr_active, NULL); + fg = fr_findgroup(group, unit, ifs->ifs_fr_active, NULL, ifs); if (fg == NULL) return NULL; for (fr = fg->fg_head; fr && n; fr = fr->fr_next, n--) @@ -3353,9 +3336,10 @@ u_32_t n; /* */ /* Return the number for a rule on a specific filtering device. */ /* ------------------------------------------------------------------------ */ -int fr_rulen(unit, fr) +int fr_rulen(unit, fr, ifs) int unit; frentry_t *fr; +ipf_stack_t *ifs; { frentry_t *fh; frgroup_t *fg; @@ -3363,7 +3347,7 @@ frentry_t *fr; if (fr == NULL) return -1; - fg = fr_findgroup(fr->fr_group, unit, fr_active, NULL); + fg = fr_findgroup(fr->fr_group, unit, ifs->ifs_fr_active, NULL, ifs); if (fg == NULL) return -1; for (fh = fg->fg_head; fh; n++, fh = fh->fr_next) @@ -3394,11 +3378,12 @@ frentry_t *fr; /* */ /* NOTE: Rules not loaded from user space cannot be flushed. */ /* ------------------------------------------------------------------------ */ -static int frflushlist(set, unit, nfreedp, listp) +static int frflushlist(set, unit, nfreedp, listp, ifs) int set; minor_t unit; int *nfreedp; frentry_t **listp; +ipf_stack_t *ifs; { int freed = 0, i; frentry_t *fp; @@ -3411,18 +3396,18 @@ frentry_t **listp; } *listp = fp->fr_next; if (fp->fr_grp != NULL) { - i = frflushlist(set, unit, nfreedp, fp->fr_grp); + i = frflushlist(set, unit, nfreedp, fp->fr_grp, ifs); fp->fr_ref -= i; } if (fp->fr_grhead != NULL) { - fr_delgroup(fp->fr_grhead, unit, set); + fr_delgroup(fp->fr_grhead, unit, set, ifs); *fp->fr_grhead = '\0'; } ASSERT(fp->fr_ref > 0); fp->fr_next = NULL; - if (fr_derefrule(&fp) == 0) + if (fr_derefrule(&fp, ifs) == 0) freed++; } *nfreedp += freed; @@ -3439,53 +3424,54 @@ frentry_t **listp; /* Calls flushlist() for all filter rules (accounting, firewall - both IPv4 */ /* and IPv6) as defined by the value of flags. */ /* ------------------------------------------------------------------------ */ -int frflush(unit, proto, flags) +int frflush(unit, proto, flags, ifs) minor_t unit; int proto, flags; +ipf_stack_t *ifs; { int flushed = 0, set; - WRITE_ENTER(&ipf_mutex); - bzero((char *)frcache, sizeof(frcache)); + WRITE_ENTER(&ifs->ifs_ipf_mutex); + bzero((char *)&ifs->ifs_frcache, sizeof (ifs->ifs_frcache)); - set = fr_active; + set = ifs->ifs_fr_active; if ((flags & FR_INACTIVE) == FR_INACTIVE) set = 1 - set; if (flags & FR_OUTQUE) { if (proto == 0 || proto == 6) { (void) frflushlist(set, unit, - &flushed, &ipfilter6[1][set]); + &flushed, &ifs->ifs_ipfilter6[1][set], ifs); (void) frflushlist(set, unit, - &flushed, &ipacct6[1][set]); + &flushed, &ifs->ifs_ipacct6[1][set], ifs); } if (proto == 0 || proto == 4) { (void) frflushlist(set, unit, - &flushed, &ipfilter[1][set]); + &flushed, &ifs->ifs_ipfilter[1][set], ifs); (void) frflushlist(set, unit, - &flushed, &ipacct[1][set]); + &flushed, &ifs->ifs_ipacct[1][set], ifs); } } if (flags & FR_INQUE) { if (proto == 0 || proto == 6) { (void) frflushlist(set, unit, - &flushed, &ipfilter6[0][set]); + &flushed, &ifs->ifs_ipfilter6[0][set], ifs); (void) frflushlist(set, unit, - &flushed, &ipacct6[0][set]); + &flushed, &ifs->ifs_ipacct6[0][set], ifs); } if (proto == 0 || proto == 4) { (void) frflushlist(set, unit, - &flushed, &ipfilter[0][set]); + &flushed, &ifs->ifs_ipfilter[0][set], ifs); (void) frflushlist(set, unit, - &flushed, &ipacct[0][set]); + &flushed, &ifs->ifs_ipacct[0][set], ifs); } } - RWLOCK_EXIT(&ipf_mutex); + RWLOCK_EXIT(&ifs->ifs_ipf_mutex); if (unit == IPL_LOGIPF) { int tmp; - tmp = frflush(IPL_LOGCOUNT, proto, flags); + tmp = frflush(IPL_LOGCOUNT, proto, flags, ifs); if (tmp >= 0) flushed += tmp; } @@ -3639,10 +3625,11 @@ u_32_t *msk; /* if oldifp matches newifp then we are are doing a sync to remove any */ /* references to oldifp, so we return "-1". */ /* ------------------------------------------------------------------------ */ -static void *fr_ifsync(action, v, newname, oldname, newifp, oldifp) +static void *fr_ifsync(action, v, newname, oldname, newifp, oldifp, ifs) int action, v; char *newname, *oldname; void *newifp, *oldifp; +ipf_stack_t *ifs; { void *rval = oldifp; @@ -3650,7 +3637,7 @@ void *newifp, *oldifp; { case IPFSYNC_RESYNC : if (oldname[0] != '\0') { - rval = fr_resolvenic(oldname, v); + rval = fr_resolvenic(oldname, v, ifs); } break; case IPFSYNC_NEWIFP : @@ -3681,11 +3668,12 @@ void *newifp, *oldifp; /* used in the rule. The interface pointer is used to limit the lookups to */ /* a specific set of matching names if it is non-NULL. */ /* ------------------------------------------------------------------------ */ -static void frsynclist(action, v, ifp, ifname, fr) +static void frsynclist(action, v, ifp, ifname, fr, ifs) int action, v; void *ifp; char *ifname; frentry_t *fr; +ipf_stack_t *ifs; { frdest_t *fdp; int rv, i; @@ -3703,20 +3691,21 @@ frentry_t *fr; continue; fr->fr_ifas[i] = fr_ifsync(action, rv, ifname, fr->fr_ifnames[i], - ifp, fr->fr_ifas[i]); + ifp, fr->fr_ifas[i], + ifs); } fdp = &fr->fr_tifs[0]; fdp->fd_ifp = fr_ifsync(action, rv, ifname, fdp->fd_ifname, - ifp, fdp->fd_ifp); + ifp, fdp->fd_ifp, ifs); fdp = &fr->fr_tifs[1]; fdp->fd_ifp = fr_ifsync(action, rv, ifname, fdp->fd_ifname, - ifp, fdp->fd_ifp); + ifp, fdp->fd_ifp, ifs); fdp = &fr->fr_dif; fdp->fd_ifp = fr_ifsync(action, rv, ifname, fdp->fd_ifname, - ifp, fdp->fd_ifp); + ifp, fdp->fd_ifp, ifs); if (action != IPFSYNC_RESYNC) return; @@ -3726,13 +3715,15 @@ frentry_t *fr; fr->fr_satype != FRI_LOOKUP) { (void)fr_ifpaddr(rv, fr->fr_satype, fr->fr_ifas[fr->fr_sifpidx], - &fr->fr_src, &fr->fr_smsk); + &fr->fr_src, &fr->fr_smsk, + ifs); } if (fr->fr_datype != FRI_NORMAL && fr->fr_datype != FRI_LOOKUP) { (void)fr_ifpaddr(rv, fr->fr_datype, fr->fr_ifas[fr->fr_difpidx], - &fr->fr_dst, &fr->fr_dmsk); + &fr->fr_dst, &fr->fr_dmsk, + ifs); } } @@ -3741,13 +3732,13 @@ frentry_t *fr; fr->fr_srcptr == NULL) { fr->fr_srcptr = fr_resolvelookup(fr->fr_srctype, fr->fr_srcnum, - &fr->fr_srcfunc); + &fr->fr_srcfunc, ifs); } if (fr->fr_type == FR_T_IPF && fr->fr_datype == FRI_LOOKUP && fr->fr_dstptr == NULL) { fr->fr_dstptr = fr_resolvelookup(fr->fr_dsttype, fr->fr_dstnum, - &fr->fr_dstfunc); + &fr->fr_dstfunc, ifs); } #endif } @@ -3773,32 +3764,33 @@ frentry_t *fr; /* - new interface being announced with its name and identifier */ /* - interface removal being announced by only its identifier */ /* ------------------------------------------------------------------------ */ -void frsync(action, v, ifp, name) +void frsync(action, v, ifp, name, ifs) int action, v; void *ifp; char *name; +ipf_stack_t *ifs; { int i; - WRITE_ENTER(&ipf_mutex); - frsynclist(action, v, ifp, name, ipacct[0][fr_active]); - frsynclist(action, v, ifp, name, ipacct[1][fr_active]); - frsynclist(action, v, ifp, name, ipfilter[0][fr_active]); - frsynclist(action, v, ifp, name, ipfilter[1][fr_active]); - frsynclist(action, v, ifp, name, ipacct6[0][fr_active]); - frsynclist(action, v, ifp, name, ipacct6[1][fr_active]); - frsynclist(action, v, ifp, name, ipfilter6[0][fr_active]); - frsynclist(action, v, ifp, name, ipfilter6[1][fr_active]); + WRITE_ENTER(&ifs->ifs_ipf_mutex); + frsynclist(action, v, ifp, name, ifs->ifs_ipacct[0][ifs->ifs_fr_active], ifs); + frsynclist(action, v, ifp, name, ifs->ifs_ipacct[1][ifs->ifs_fr_active], ifs); + frsynclist(action, v, ifp, name, ifs->ifs_ipfilter[0][ifs->ifs_fr_active], ifs); + frsynclist(action, v, ifp, name, ifs->ifs_ipfilter[1][ifs->ifs_fr_active], ifs); + frsynclist(action, v, ifp, name, ifs->ifs_ipacct6[0][ifs->ifs_fr_active], ifs); + frsynclist(action, v, ifp, name, ifs->ifs_ipacct6[1][ifs->ifs_fr_active], ifs); + frsynclist(action, v, ifp, name, ifs->ifs_ipfilter6[0][ifs->ifs_fr_active], ifs); + frsynclist(action, v, ifp, name, ifs->ifs_ipfilter6[1][ifs->ifs_fr_active], ifs); for (i = 0; i < IPL_LOGSIZE; i++) { frgroup_t *g; - for (g = ipfgroups[i][0]; g != NULL; g = g->fg_next) - frsynclist(action, v, ifp, name, g->fg_start); - for (g = ipfgroups[i][1]; g != NULL; g = g->fg_next) - frsynclist(action, v, ifp, name, g->fg_start); + for (g = ifs->ifs_ipfgroups[i][0]; g != NULL; g = g->fg_next) + frsynclist(action, v, ifp, name, g->fg_start, ifs); + for (g = ifs->ifs_ipfgroups[i][1]; g != NULL; g = g->fg_next) + frsynclist(action, v, ifp, name, g->fg_start, ifs); } - RWLOCK_EXIT(&ipf_mutex); + RWLOCK_EXIT(&ifs->ifs_ipf_mutex); } @@ -3897,41 +3889,43 @@ int *lockp; /* Stores a copy of current pointers, counters, etc, in the friostat */ /* structure. */ /* ------------------------------------------------------------------------ */ -void fr_getstat(fiop) +void fr_getstat(fiop, ifs) friostat_t *fiop; +ipf_stack_t *ifs; { int i, j; - bcopy((char *)frstats, (char *)fiop->f_st, sizeof(filterstats_t) * 2); - fiop->f_locks[IPL_LOGSTATE] = fr_state_lock; - fiop->f_locks[IPL_LOGNAT] = fr_nat_lock; - fiop->f_locks[IPL_LOGIPF] = fr_frag_lock; - fiop->f_locks[IPL_LOGAUTH] = fr_auth_lock; + bcopy((char *)&ifs->ifs_frstats, (char *)fiop->f_st, + sizeof(filterstats_t) * 2); + fiop->f_locks[IPL_LOGSTATE] = ifs->ifs_fr_state_lock; + fiop->f_locks[IPL_LOGNAT] = ifs->ifs_fr_nat_lock; + fiop->f_locks[IPL_LOGIPF] = ifs->ifs_fr_frag_lock; + fiop->f_locks[IPL_LOGAUTH] = ifs->ifs_fr_auth_lock; for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) { - fiop->f_ipf[i][j] = ipfilter[i][j]; - fiop->f_acct[i][j] = ipacct[i][j]; - fiop->f_ipf6[i][j] = ipfilter6[i][j]; - fiop->f_acct6[i][j] = ipacct6[i][j]; + fiop->f_ipf[i][j] = ifs->ifs_ipfilter[i][j]; + fiop->f_acct[i][j] = ifs->ifs_ipacct[i][j]; + fiop->f_ipf6[i][j] = ifs->ifs_ipfilter6[i][j]; + fiop->f_acct6[i][j] = ifs->ifs_ipacct6[i][j]; } - fiop->f_ticks = fr_ticks; - fiop->f_active = fr_active; - fiop->f_froute[0] = fr_frouteok[0]; - fiop->f_froute[1] = fr_frouteok[1]; + fiop->f_ticks = ifs->ifs_fr_ticks; + fiop->f_active = ifs->ifs_fr_active; + fiop->f_froute[0] = ifs->ifs_fr_frouteok[0]; + fiop->f_froute[1] = ifs->ifs_fr_frouteok[1]; - fiop->f_running = fr_running; + fiop->f_running = ifs->ifs_fr_running; for (i = 0; i < IPL_LOGSIZE; i++) { - fiop->f_groups[i][0] = ipfgroups[i][0]; - fiop->f_groups[i][1] = ipfgroups[i][1]; + fiop->f_groups[i][0] = ifs->ifs_ipfgroups[i][0]; + fiop->f_groups[i][1] = ifs->ifs_ipfgroups[i][1]; } #ifdef IPFILTER_LOG fiop->f_logging = 1; #else fiop->f_logging = 0; #endif - fiop->f_defpass = fr_pass; + fiop->f_defpass = ifs->ifs_fr_pass; fiop->f_features = fr_features; (void) strncpy(fiop->f_version, ipfilter_version, sizeof(fiop->f_version)); @@ -4048,9 +4042,10 @@ int rev; /* call to do the IP address search will be change, regardless of whether */ /* or not the "table" number exists. */ /* ------------------------------------------------------------------------ */ -static void *fr_resolvelookup(type, number, funcptr) +static void *fr_resolvelookup(type, number, funcptr, ifs) u_int type, number; lookupfunc_t *funcptr; +ipf_stack_t *ifs; { char name[FR_GROUPLEN]; iphtable_t *iph; @@ -4063,7 +4058,7 @@ lookupfunc_t *funcptr; (void) sprintf(name, "%u", number); #endif - READ_ENTER(&ip_poolrw); + READ_ENTER(&ifs->ifs_ip_poolrw); switch (type) { @@ -4072,7 +4067,7 @@ lookupfunc_t *funcptr; ptr = NULL; *funcptr = NULL; # else - ipo = ip_pool_find(IPL_LOGIPF, name); + ipo = ip_pool_find(IPL_LOGIPF, name, ifs); ptr = ipo; if (ipo != NULL) { ATOMIC_INC32(ipo->ipo_ref); @@ -4081,7 +4076,7 @@ lookupfunc_t *funcptr; # endif break; case IPLT_HASH : - iph = fr_findhtable(IPL_LOGIPF, name); + iph = fr_findhtable(IPL_LOGIPF, name, ifs); ptr = iph; if (iph != NULL) { ATOMIC_INC32(iph->iph_ref); @@ -4093,7 +4088,7 @@ lookupfunc_t *funcptr; *funcptr = NULL; break; } - RWLOCK_EXIT(&ip_poolrw); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); return ptr; } @@ -4117,11 +4112,12 @@ lookupfunc_t *funcptr; /* of the rule structure being loaded. If a rule has user defined timeouts */ /* then make sure they are created and initialised before exiting. */ /* ------------------------------------------------------------------------ */ -int frrequest(unit, req, data, set, makecopy) +int frrequest(unit, req, data, set, makecopy, ifs) int unit; ioctlcmd_t req; int set, makecopy; caddr_t data; +ipf_stack_t *ifs; { frentry_t frd, *fp, *f, **fprev, **ftail; int error = 0, in, v; @@ -4175,7 +4171,7 @@ caddr_t data; if ((makecopy == 1) && (fp->fr_func != NULL)) { if (fr_findfunc(fp->fr_func) == NULL) return ESRCH; - error = fr_funcinit(fp); + error = fr_funcinit(fp, ifs); if (error != 0) return error; } @@ -4195,7 +4191,7 @@ caddr_t data; unit = IPL_LOGCOUNT; if ((req != (int)SIOCZRLST) && (*group != '\0')) { - fg = fr_findgroup(group, unit, set, NULL); + fg = fr_findgroup(group, unit, set, NULL, ifs); if (fg == NULL) return ESRCH; if (fg->fg_flags == 0) @@ -4212,23 +4208,23 @@ caddr_t data; ftail = NULL; fprev = NULL; if (unit == IPL_LOGAUTH) - fprev = &ipauth; + fprev = &ifs->ifs_ipauth; else if (v == 4) { if (FR_ISACCOUNT(fp->fr_flags)) - fprev = &ipacct[in][set]; + fprev = &ifs->ifs_ipacct[in][set]; else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) != 0) - fprev = &ipfilter[in][set]; + fprev = &ifs->ifs_ipfilter[in][set]; } else if (v == 6) { if (FR_ISACCOUNT(fp->fr_flags)) - fprev = &ipacct6[in][set]; + fprev = &ifs->ifs_ipacct6[in][set]; else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) != 0) - fprev = &ipfilter6[in][set]; + fprev = &ifs->ifs_ipfilter6[in][set]; } if (fprev == NULL) return ESRCH; if (*group != '\0') { - if (!fg && !(fg = fr_findgroup(group, unit, set, NULL))) + if (!fg && !(fg = fr_findgroup(group, unit, set, NULL, ifs))) return ESRCH; fprev = &fg->fg_start; } @@ -4311,7 +4307,7 @@ caddr_t data; case FRI_LOOKUP : fp->fr_srcptr = fr_resolvelookup(fp->fr_srctype, fp->fr_srcnum, - &fp->fr_srcfunc); + &fp->fr_srcfunc, ifs); break; #endif default : @@ -4336,7 +4332,7 @@ caddr_t data; case FRI_LOOKUP : fp->fr_dstptr = fr_resolvelookup(fp->fr_dsttype, fp->fr_dstnum, - &fp->fr_dstfunc); + &fp->fr_dstfunc, ifs); break; #endif default : @@ -4359,7 +4355,7 @@ caddr_t data; /* * Lookup all the interface names that are part of the rule. */ - frsynclist(0, 0, NULL, NULL, fp); + frsynclist(0, 0, NULL, NULL, fp, ifs); fp->fr_statecnt = 0; /* @@ -4376,8 +4372,8 @@ caddr_t data; for (p = (u_int *)fp->fr_data; p < pp; p++) fp->fr_cksum += *p; - WRITE_ENTER(&ipf_mutex); - bzero((char *)frcache, sizeof(frcache)); + WRITE_ENTER(&ifs->ifs_ipf_mutex); + bzero((char *)&ifs->ifs_frcache, sizeof (ifs->ifs_frcache)); for (; (f = *ftail) != NULL; ftail = &f->fr_next) { if ((fp->fr_cksum != f->fr_cksum) || @@ -4430,7 +4426,7 @@ caddr_t data; if ((ptr != NULL) && (makecopy != 0)) { KFREES(ptr, fp->fr_dsize); } - RWLOCK_EXIT(&ipf_mutex); + RWLOCK_EXIT(&ifs->ifs_ipf_mutex); return error; } @@ -4508,15 +4504,15 @@ caddr_t data; if ((fg != NULL) && (fg->fg_head != NULL)) fg->fg_head->fr_ref--; if (unit == IPL_LOGAUTH) { - error = fr_preauthcmd(req, f, ftail); + error = fr_preauthcmd(req, f, ftail, ifs); goto done; } if (*f->fr_grhead != '\0') - fr_delgroup(f->fr_grhead, unit, set); + fr_delgroup(f->fr_grhead, unit, set, ifs); fr_fixskip(ftail, f, -1); *ftail = f->fr_next; f->fr_next = NULL; - (void)fr_derefrule(&f); + (void)fr_derefrule(&f, ifs); } } else { /* @@ -4526,7 +4522,7 @@ caddr_t data; error = EEXIST; else { if (unit == IPL_LOGAUTH) { - error = fr_preauthcmd(req, fp, ftail); + error = fr_preauthcmd(req, fp, ftail, ifs); goto done; } if (makecopy) { @@ -4558,7 +4554,7 @@ caddr_t data; group = f->fr_grhead; if (*group != '\0') { fg = fr_addgroup(group, f, f->fr_flags, - unit, set); + unit, set, ifs); if (fg != NULL) f->fr_grp = &fg->fg_start; } @@ -4567,7 +4563,7 @@ caddr_t data; } } done: - RWLOCK_EXIT(&ipf_mutex); + RWLOCK_EXIT(&ifs->ifs_ipf_mutex); if ((ptr != NULL) && (error != 0) && (makecopy != 0)) { KFREES(ptr, fp->fr_dsize); } @@ -4583,8 +4579,9 @@ done: /* If a rule is a call rule, then check if the function it points to needs */ /* an init function to be called now the rule has been loaded. */ /* ------------------------------------------------------------------------ */ -static int fr_funcinit(fr) +static int fr_funcinit(fr, ifs) frentry_t *fr; +ipf_stack_t *ifs; { ipfunc_resolve_t *ft; int err; @@ -4595,7 +4592,7 @@ frentry_t *fr; if (ft->ipfu_addr == fr->fr_func) { err = 0; if (ft->ipfu_init != NULL) - err = (*ft->ipfu_init)(fr); + err = (*ft->ipfu_init)(fr, ifs); break; } return err; @@ -4724,8 +4721,9 @@ ppsratecheck(lasttime, curpps, maxpps) /* Decrement the reference counter to a rule by one. If it reaches zero, */ /* free it and any associated storage space being used by it. */ /* ------------------------------------------------------------------------ */ -int fr_derefrule(frp) +int fr_derefrule(frp, ifs) frentry_t **frp; +ipf_stack_t *ifs; { frentry_t *fr; @@ -4739,9 +4737,9 @@ frentry_t **frp; #ifdef IPFILTER_LOOKUP if (fr->fr_type == FR_T_IPF && fr->fr_satype == FRI_LOOKUP) - ip_lookup_deref(fr->fr_srctype, fr->fr_srcptr); + ip_lookup_deref(fr->fr_srctype, fr->fr_srcptr, ifs); if (fr->fr_type == FR_T_IPF && fr->fr_datype == FRI_LOOKUP) - ip_lookup_deref(fr->fr_dsttype, fr->fr_dstptr); + ip_lookup_deref(fr->fr_dsttype, fr->fr_dstptr, ifs); #endif if (fr->fr_dsize) { @@ -4769,8 +4767,9 @@ frentry_t **frp; /* Looks for group hash table fr_arg and stores a pointer to it in fr_ptr. */ /* fr_ptr is later used by fr_srcgrpmap and fr_dstgrpmap. */ /* ------------------------------------------------------------------------ */ -static int fr_grpmapinit(fr) +static int fr_grpmapinit(fr, ifs) frentry_t *fr; +ipf_stack_t *ifs; { char name[FR_GROUPLEN]; iphtable_t *iph; @@ -4780,7 +4779,7 @@ frentry_t *fr; #else (void) sprintf(name, "%d", fr->fr_arg); #endif - iph = fr_findhtable(IPL_LOGIPF, name); + iph = fr_findhtable(IPL_LOGIPF, name, ifs); if (iph == NULL) return ESRCH; if ((iph->iph_flags & FR_INOUT) != (fr->fr_flags & FR_INOUT)) @@ -4806,8 +4805,9 @@ u_32_t *passp; { frgroup_t *fg; void *rval; + ipf_stack_t *ifs = fin->fin_ifs; - rval = fr_iphmfindgroup(fin->fin_fr->fr_ptr, fin->fin_v, &fin->fin_src); + rval = fr_iphmfindgroup(fin->fin_fr->fr_ptr, fin->fin_v, &fin->fin_src, ifs); if (rval == NULL) return NULL; @@ -4834,8 +4834,9 @@ u_32_t *passp; { frgroup_t *fg; void *rval; + ipf_stack_t *ifs = fin->fin_ifs; - rval = fr_iphmfindgroup(fin->fin_fr->fr_ptr, fin->fin_v, &fin->fin_dst); + rval = fr_iphmfindgroup(fin->fin_fr->fr_ptr, fin->fin_v, &fin->fin_dst, ifs); if (rval == NULL) return NULL; @@ -4874,16 +4875,17 @@ u_32_t *passp; /* It is assumed that the caller of this function has an appropriate lock */ /* held (exclusively) in the domain that encompases 'parent'. */ /* ------------------------------------------------------------------------ */ -ipftq_t *fr_addtimeoutqueue(parent, seconds) +ipftq_t *fr_addtimeoutqueue(parent, seconds, ifs) ipftq_t **parent; u_int seconds; +ipf_stack_t *ifs; { ipftq_t *ifq; u_int period; period = seconds * IPF_HZ_DIVIDE; - MUTEX_ENTER(&ipf_timeoutlock); + MUTEX_ENTER(&ifs->ifs_ipf_timeoutlock); for (ifq = *parent; ifq != NULL; ifq = ifq->ifq_next) { if (ifq->ifq_ttl == period) { /* @@ -4894,7 +4896,7 @@ u_int seconds; ifq->ifq_flags &= ~IFQF_DELETE; ifq->ifq_ref++; MUTEX_EXIT(&ifq->ifq_lock); - MUTEX_EXIT(&ipf_timeoutlock); + MUTEX_EXIT(&ifs->ifs_ipf_timeoutlock); return ifq; } @@ -4910,11 +4912,11 @@ u_int seconds; ifq->ifq_ref = 1; ifq->ifq_flags = IFQF_USER; *parent = ifq; - fr_userifqs++; + ifs->ifs_fr_userifqs++; MUTEX_NUKE(&ifq->ifq_lock); MUTEX_INIT(&ifq->ifq_lock, "ipftq mutex"); } - MUTEX_EXIT(&ipf_timeoutlock); + MUTEX_EXIT(&ifs->ifs_ipf_timeoutlock); return ifq; } @@ -4960,8 +4962,9 @@ ipftq_t *ifq; /* Remove a user definde timeout queue from the list of queues it is in and */ /* tidy up after this is done. */ /* ------------------------------------------------------------------------ */ -void fr_freetimeoutqueue(ifq) +void fr_freetimeoutqueue(ifq, ifs) ipftq_t *ifq; +ipf_stack_t *ifs; { @@ -4981,7 +4984,7 @@ ipftq_t *ifq; ifq->ifq_next->ifq_pnext = ifq->ifq_pnext; MUTEX_DESTROY(&ifq->ifq_lock); - fr_userifqs--; + ifs->ifs_fr_userifqs--; KFREE(ifq); } @@ -5065,15 +5068,16 @@ ipftqent_t *tqe; /* */ /* Move a queue entry to the back of the queue, if it isn't already there. */ /* ------------------------------------------------------------------------ */ -void fr_queueback(tqe) +void fr_queueback(tqe, ifs) ipftqent_t *tqe; +ipf_stack_t *ifs; { ipftq_t *ifq; ifq = tqe->tqe_ifq; if (ifq == NULL) return; - tqe->tqe_die = fr_ticks + ifq->ifq_ttl; + tqe->tqe_die = ifs->ifs_fr_ticks + ifq->ifq_ttl; MUTEX_ENTER(&ifq->ifq_lock); if (tqe->tqe_next == NULL) { /* at the end already ? */ @@ -5107,10 +5111,11 @@ ipftqent_t *tqe; /* */ /* Add a new item to this queue and put it on the very end. */ /* ------------------------------------------------------------------------ */ -void fr_queueappend(tqe, ifq, parent) +void fr_queueappend(tqe, ifq, parent, ifs) ipftqent_t *tqe; ipftq_t *ifq; void *parent; +ipf_stack_t *ifs; { MUTEX_ENTER(&ifq->ifq_lock); @@ -5120,7 +5125,7 @@ void *parent; ifq->ifq_tail = &tqe->tqe_next; tqe->tqe_next = NULL; tqe->tqe_ifq = ifq; - tqe->tqe_die = fr_ticks + ifq->ifq_ttl; + tqe->tqe_die = ifs->ifs_fr_ticks + ifq->ifq_ttl; ifq->ifq_ref++; MUTEX_EXIT(&ifq->ifq_lock); } @@ -5137,9 +5142,10 @@ void *parent; /* If it notices that the current entry is already last and does not need */ /* to move queue, the return. */ /* ------------------------------------------------------------------------ */ -void fr_movequeue(tqe, oifq, nifq) +void fr_movequeue(tqe, oifq, nifq, ifs) ipftqent_t *tqe; ipftq_t *oifq, *nifq; +ipf_stack_t *ifs; { /* * Is the operation here going to be a no-op ? @@ -5181,7 +5187,7 @@ ipftq_t *oifq, *nifq; /* * Add to the bottom of the new queue */ - tqe->tqe_die = fr_ticks + nifq->ifq_ttl; + tqe->tqe_die = ifs->ifs_fr_ticks + nifq->ifq_ttl; tqe->tqe_pnext = nifq->ifq_tail; *nifq->ifq_tail = tqe; nifq->ifq_tail = &tqe->tqe_next; @@ -5260,8 +5266,11 @@ char *buffer; char *s; # endif + ASSERT(buffer != NULL); +#ifdef notdef if (buffer == NULL) buffer = namebuf; +#endif (void) strncpy(buffer, ifp->if_name, LIFNAMSIZ); buffer[LIFNAMSIZ - 1] = '\0'; # if defined(MENTAT) || defined(__FreeBSD__) || defined(__osf__) || \ @@ -5297,10 +5306,11 @@ char *buffer; /* EIO if ipfilter is not running. Also checks if write perms are req'd */ /* for the device in order to execute the ioctl. */ /* ------------------------------------------------------------------------ */ -int fr_ioctlswitch(unit, data, cmd, mode) -int unit, mode; +INLINE int fr_ioctlswitch(unit, data, cmd, mode, uid, ctx, ifs) +int unit, mode, uid; ioctlcmd_t cmd; -void *data; +void *data, *ctx; +ipf_stack_t *ifs; { int error = 0; @@ -5310,53 +5320,53 @@ void *data; error = -1; break; case IPL_LOGNAT : - if (fr_running > 0) - error = fr_nat_ioctl(data, cmd, mode); + if (ifs->ifs_fr_running > 0) + error = fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs); else error = EIO; break; case IPL_LOGSTATE : - if (fr_running > 0) - error = fr_state_ioctl(data, cmd, mode); + if (ifs->ifs_fr_running > 0) + error = fr_state_ioctl(data, cmd, mode, uid, ctx, ifs); else error = EIO; break; case IPL_LOGAUTH : - if (fr_running > 0) { + if (ifs->ifs_fr_running > 0) { if ((cmd == (ioctlcmd_t)SIOCADAFR) || (cmd == (ioctlcmd_t)SIOCRMAFR)) { if (!(mode & FWRITE)) { error = EPERM; } else { error = frrequest(unit, cmd, data, - fr_active, 1); + ifs->ifs_fr_active, 1, ifs); } } else { - error = fr_auth_ioctl(data, cmd, mode); + error = fr_auth_ioctl(data, cmd, mode, uid, ctx, ifs); } } else error = EIO; break; case IPL_LOGSYNC : #ifdef IPFILTER_SYNC - if (fr_running > 0) - error = fr_sync_ioctl(data, cmd, mode); + if (ifs->ifs_fr_running > 0) + error = fr_sync_ioctl(data, cmd, mode, ifs); else #endif error = EIO; break; case IPL_LOGSCAN : #ifdef IPFILTER_SCAN - if (fr_running > 0) - error = fr_scan_ioctl(data, cmd, mode); + if (ifs->ifs_fr_running > 0) + error = fr_scan_ioctl(data, cmd, mode, ifs); else #endif error = EIO; break; case IPL_LOGLOOKUP : #ifdef IPFILTER_LOOKUP - if (fr_running > 0) - error = ip_lookup_ioctl(data, cmd, mode); + if (ifs->ifs_fr_running > 0) + error = ip_lookup_ioctl(data, cmd, mode, uid, ctx, ifs); else #endif error = EIO; @@ -5374,7 +5384,7 @@ void *data; * This array defines the expected size of objects coming into the kernel * for the various recognised object types. */ -#define NUM_OBJ_TYPES 14 +#define NUM_OBJ_TYPES 19 static int fr_objbytes[NUM_OBJ_TYPES][2] = { { 1, sizeof(struct frentry) }, /* frentry */ @@ -5390,7 +5400,12 @@ static int fr_objbytes[NUM_OBJ_TYPES][2] = { { 1, sizeof(struct ipstate) }, /* ipstate */ { 0, sizeof(struct ips_stat) }, { 0, sizeof(struct frauth) }, - { 0, sizeof(struct ipftune) } + { 0, sizeof(struct ipftune) }, + { 0, sizeof(struct nat) }, /* nat_t */ + { 0, sizeof(struct ipfruleiter) }, + { 0, sizeof(struct ipfgeniter) }, + { 0, sizeof(struct ipftable) }, + { 0, sizeof(struct ipflookupiter) } }; @@ -5615,13 +5630,14 @@ fr_info_t *fin; u_short sum, hdrsum, *csump; udphdr_t *udp; int dosum; + ipf_stack_t *ifs = fin->fin_ifs; #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) net_data_t net_data_p; if (fin->fin_v == 4) - net_data_p = ipf_ipv4; + net_data_p = ifs->ifs_ipf_ipv4; else - net_data_p = ipf_ipv6; + net_data_p = ifs->ifs_ipf_ipv6; #endif if ((fin->fin_flx & FI_NOCKSUM) != 0) @@ -5836,6 +5852,7 @@ ipftag_t *tag1, *tag2; int fr_coalesce(fin) fr_info_t *fin; { + ipf_stack_t *ifs = fin->fin_ifs; if ((fin->fin_flx & FI_COALESCE) != 0) return 1; @@ -5848,7 +5865,7 @@ fr_info_t *fin; #if defined(_KERNEL) if (fr_pullup(fin->fin_m, fin, fin->fin_plen) == NULL) { - ATOMIC_INCL(fr_badcoalesces[fin->fin_out]); + ATOMIC_INCL(ifs->ifs_fr_badcoalesces[fin->fin_out]); # ifdef MENTAT FREE_MB_T(*fin->fin_mp); # endif @@ -5865,7 +5882,7 @@ fr_info_t *fin; /* * The following table lists all of the tunable variables that can be - * accessed via SIOCIPFGET/SIOCIPFSET/SIOCIPFGETNEXt. The format of each row + * accessed via SIOCIPFGET/SIOCIPFSET/SIOCIPFGETNEXT. The format of each row * in the table below is as follows: * * pointer to value, name of value, minimum, maximum, size of the value's @@ -5876,111 +5893,204 @@ fr_info_t *fin; * The obvious implication is if neither of these are set then the value can be * changed at any time without harm. */ -ipftuneable_t ipf_tuneables[] = { +ipftuneable_t lcl_ipf_tuneables[] = { /* filtering */ - { { &fr_flags }, "fr_flags", 0, 0xffffffff, - sizeof(fr_flags), 0 }, - { { &fr_active }, "fr_active", 0, 0, - sizeof(fr_active), IPFT_RDONLY }, - { { &fr_control_forwarding }, "fr_control_forwarding", 0, 1, - sizeof(fr_control_forwarding), 0 }, - { { &fr_update_ipid }, "fr_update_ipid", 0, 1, - sizeof(fr_update_ipid), 0 }, - { { &fr_chksrc }, "fr_chksrc", 0, 1, - sizeof(fr_chksrc), 0 }, - { { &fr_minttl }, "fr_minttl", 0, 1, - sizeof(fr_minttl), 0 }, - { { &fr_icmpminfragmtu }, "fr_icmpminfragmtu", 0, 1, - sizeof(fr_icmpminfragmtu), 0 }, - { { &fr_pass }, "fr_pass", 0, 0xffffffff, - sizeof(fr_pass), 0 }, + { { NULL }, "fr_flags", 0, 0xffffffff, + 0, 0 }, + { { NULL }, "fr_active", 0, 0, + 0, IPFT_RDONLY }, + { { NULL }, "fr_control_forwarding", 0, 1, + 0, 0 }, + { { NULL }, "fr_update_ipid", 0, 1, + 0, 0 }, + { { NULL }, "fr_chksrc", 0, 1, + 0, 0 }, + { { NULL }, "fr_minttl", 0, 1, + 0, 0 }, + { { NULL }, "fr_icmpminfragmtu", 0, 1, + 0, 0 }, + { { NULL }, "fr_pass", 0, 0xffffffff, + 0, 0 }, #if SOLARIS2 >= 10 - { { &ipf_loopback}, "ipf_loopback", 0, 1, - sizeof(ipf_loopback), IPFT_WRDISABLED }, + { { NULL }, "ipf_loopback", 0, 1, + 0, IPFT_WRDISABLED }, #endif /* state */ - { { &fr_tcpidletimeout }, "fr_tcpidletimeout", 1, 0x7fffffff, - sizeof(fr_tcpidletimeout), IPFT_WRDISABLED }, - { { &fr_tcpclosewait }, "fr_tcpclosewait", 1, 0x7fffffff, - sizeof(fr_tcpclosewait), IPFT_WRDISABLED }, - { { &fr_tcplastack }, "fr_tcplastack", 1, 0x7fffffff, - sizeof(fr_tcplastack), IPFT_WRDISABLED }, - { { &fr_tcptimeout }, "fr_tcptimeout", 1, 0x7fffffff, - sizeof(fr_tcptimeout), IPFT_WRDISABLED }, - { { &fr_tcpclosed }, "fr_tcpclosed", 1, 0x7fffffff, - sizeof(fr_tcpclosed), IPFT_WRDISABLED }, - { { &fr_tcphalfclosed }, "fr_tcphalfclosed", 1, 0x7fffffff, - sizeof(fr_tcphalfclosed), IPFT_WRDISABLED }, - { { &fr_udptimeout }, "fr_udptimeout", 1, 0x7fffffff, - sizeof(fr_udptimeout), IPFT_WRDISABLED }, - { { &fr_udpacktimeout }, "fr_udpacktimeout", 1, 0x7fffffff, - sizeof(fr_udpacktimeout), IPFT_WRDISABLED }, - { { &fr_icmptimeout }, "fr_icmptimeout", 1, 0x7fffffff, - sizeof(fr_icmptimeout), IPFT_WRDISABLED }, - { { &fr_icmpacktimeout }, "fr_icmpacktimeout", 1, 0x7fffffff, - sizeof(fr_icmpacktimeout), IPFT_WRDISABLED }, - { { &fr_iptimeout }, "fr_iptimeout", 1, 0x7fffffff, - sizeof(fr_iptimeout), IPFT_WRDISABLED }, - { { &fr_statemax }, "fr_statemax", 1, 0x7fffffff, - sizeof(fr_statemax), 0 }, - { { &fr_statesize }, "fr_statesize", 1, 0x7fffffff, - sizeof(fr_statesize), IPFT_WRDISABLED }, - { { &fr_state_lock }, "fr_state_lock", 0, 1, - sizeof(fr_state_lock), IPFT_RDONLY }, - { { &fr_state_maxbucket }, "fr_state_maxbucket", 1, 0x7fffffff, - sizeof(fr_state_maxbucket), IPFT_WRDISABLED }, - { { &fr_state_maxbucket_reset }, "fr_state_maxbucket_reset", 0, 1, - sizeof(fr_state_maxbucket_reset), IPFT_WRDISABLED }, - { { &ipstate_logging }, "ipstate_logging", 0, 1, - sizeof(ipstate_logging), 0 }, + { { NULL }, "fr_tcpidletimeout", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_tcpclosewait", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_tcplastack", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_tcptimeout", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_tcpclosed", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_tcphalfclosed", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_udptimeout", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_udpacktimeout", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_icmptimeout", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_icmpacktimeout", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_iptimeout", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_statemax", 1, 0x7fffffff, + 0, 0 }, + { { NULL }, "fr_statesize", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_state_lock", 0, 1, + 0, IPFT_RDONLY }, + { { NULL }, "fr_state_maxbucket", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_state_maxbucket_reset", 0, 1, + 0, IPFT_WRDISABLED }, + { { NULL }, "ipstate_logging", 0, 1, + 0, 0 }, /* nat */ - { { &fr_nat_lock }, "fr_nat_lock", 0, 1, - sizeof(fr_nat_lock), IPFT_RDONLY }, - { { &ipf_nattable_sz }, "ipf_nattable_sz", 1, 0x7fffffff, - sizeof(ipf_nattable_sz), IPFT_WRDISABLED }, - { { &ipf_nattable_max }, "ipf_nattable_max", 1, 0x7fffffff, - sizeof(ipf_nattable_max), 0 }, - { { &ipf_natrules_sz }, "ipf_natrules_sz", 1, 0x7fffffff, - sizeof(ipf_natrules_sz), IPFT_WRDISABLED }, - { { &ipf_rdrrules_sz }, "ipf_rdrrules_sz", 1, 0x7fffffff, - sizeof(ipf_rdrrules_sz), IPFT_WRDISABLED }, - { { &ipf_hostmap_sz }, "ipf_hostmap_sz", 1, 0x7fffffff, - sizeof(ipf_hostmap_sz), IPFT_WRDISABLED }, - { { &fr_nat_maxbucket }, "fr_nat_maxbucket", 1, 0x7fffffff, - sizeof(fr_nat_maxbucket), IPFT_WRDISABLED }, - { { &fr_nat_maxbucket_reset }, "fr_nat_maxbucket_reset", 0, 1, - sizeof(fr_nat_maxbucket_reset), IPFT_WRDISABLED }, - { { &nat_logging }, "nat_logging", 0, 1, - sizeof(nat_logging), 0 }, - { { &fr_defnatage }, "fr_defnatage", 1, 0x7fffffff, - sizeof(fr_defnatage), IPFT_WRDISABLED }, - { { &fr_defnatipage }, "fr_defnatipage", 1, 0x7fffffff, - sizeof(fr_defnatipage), IPFT_WRDISABLED }, - { { &fr_defnaticmpage }, "fr_defnaticmpage", 1, 0x7fffffff, - sizeof(fr_defnaticmpage), IPFT_WRDISABLED }, + { { NULL }, "fr_nat_lock", 0, 1, + 0, IPFT_RDONLY }, + { { NULL }, "ipf_nattable_sz", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "ipf_nattable_max", 1, 0x7fffffff, + 0, 0 }, + { { NULL }, "ipf_natrules_sz", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "ipf_rdrrules_sz", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "ipf_hostmap_sz", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_nat_maxbucket", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_nat_maxbucket_reset", 0, 1, + 0, IPFT_WRDISABLED }, + { { NULL }, "nat_logging", 0, 1, + 0, 0 }, + { { NULL }, "fr_defnatage", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_defnatipage", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_defnaticmpage", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, /* frag */ - { { &ipfr_size }, "ipfr_size", 1, 0x7fffffff, - sizeof(ipfr_size), IPFT_WRDISABLED }, - { { &fr_ipfrttl }, "fr_ipfrttl", 1, 0x7fffffff, - sizeof(fr_ipfrttl), IPFT_WRDISABLED }, + { { NULL }, "ipfr_size", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "fr_ipfrttl", 1, 0x7fffffff, + 0, IPFT_WRDISABLED }, #ifdef IPFILTER_LOG /* log */ - { { &ipl_suppress }, "ipl_suppress", 0, 1, - sizeof(ipl_suppress), 0 }, - { { &ipl_buffer_sz }, "ipl_buffer_sz", 0, 0, - sizeof(ipl_buffer_sz), IPFT_RDONLY }, - { { &ipl_logmax }, "ipl_logmax", 0, 0x7fffffff, - sizeof(ipl_logmax), IPFT_WRDISABLED }, - { { &ipl_logall }, "ipl_logall", 0, 1, - sizeof(ipl_logall), 0 }, - { { &ipl_logsize }, "ipl_logsize", 0, 0x80000, - sizeof(ipl_logsize), 0 }, + { { NULL }, "ipl_suppress", 0, 1, + 0, 0 }, + { { NULL }, "ipl_buffer_sz", 0, 0, + 0, IPFT_RDONLY }, + { { NULL }, "ipl_logmax", 0, 0x7fffffff, + 0, IPFT_WRDISABLED }, + { { NULL }, "ipl_logall", 0, 1, + 0, 0 }, + { { NULL }, "ipl_logsize", 0, 0x80000, + 0, 0 }, #endif { { NULL }, NULL, 0, 0 } }; -static ipftuneable_t *ipf_tunelist = NULL; +static ipftuneable_t * +tune_lookup(ipf_stack_t *ifs, char *name) +{ + int i; + for (i = 0; ifs->ifs_ipf_tuneables[i].ipft_name != NULL; i++) { + if (strcmp(ifs->ifs_ipf_tuneables[i].ipft_name, name) == 0) + return (&ifs->ifs_ipf_tuneables[i]); + } + return (NULL); +} + +#ifdef _KERNEL +extern dev_info_t *ipf_dev_info; +extern int ipf_property_update __P((dev_info_t *, ipf_stack_t *)); +#endif +/* + * Allocate a per-stack tuneable and copy in the names. Then + * set it to point to each of the per-stack tunables. + */ +void +ipftuneable_alloc(ipf_stack_t *ifs) +{ + ipftuneable_t *item; + + KMALLOCS(ifs->ifs_ipf_tuneables, ipftuneable_t *, + sizeof (lcl_ipf_tuneables)); + bcopy(lcl_ipf_tuneables, ifs->ifs_ipf_tuneables, + sizeof (lcl_ipf_tuneables)); + +#define TUNE_SET(_ifs, _name, _field) \ + item = tune_lookup((_ifs), (_name)); \ + if (item != NULL) { \ + item->ipft_una.ipftp_int = (unsigned int *)&((_ifs)->_field); \ + item->ipft_sz = sizeof ((_ifs)->_field); \ + } + + TUNE_SET(ifs, "fr_flags", ifs_fr_flags); + TUNE_SET(ifs, "fr_active", ifs_fr_active); + TUNE_SET(ifs, "fr_control_forwarding", ifs_fr_control_forwarding); + TUNE_SET(ifs, "fr_update_ipid", ifs_fr_update_ipid); + TUNE_SET(ifs, "fr_chksrc", ifs_fr_chksrc); + TUNE_SET(ifs, "fr_minttl", ifs_fr_minttl); + TUNE_SET(ifs, "fr_icmpminfragmtu", ifs_fr_icmpminfragmtu); + TUNE_SET(ifs, "fr_pass", ifs_fr_pass); + TUNE_SET(ifs, "fr_tcpidletimeout", ifs_fr_tcpidletimeout); + TUNE_SET(ifs, "fr_tcpclosewait", ifs_fr_tcpclosewait); + TUNE_SET(ifs, "fr_tcplastack", ifs_fr_tcplastack); + TUNE_SET(ifs, "fr_tcptimeout", ifs_fr_tcptimeout); + TUNE_SET(ifs, "fr_tcpclosed", ifs_fr_tcpclosed); + TUNE_SET(ifs, "fr_tcphalfclosed", ifs_fr_tcphalfclosed); + TUNE_SET(ifs, "fr_udptimeout", ifs_fr_udptimeout); + TUNE_SET(ifs, "fr_udpacktimeout", ifs_fr_udpacktimeout); + TUNE_SET(ifs, "fr_icmptimeout", ifs_fr_icmptimeout); + TUNE_SET(ifs, "fr_icmpacktimeout", ifs_fr_icmpacktimeout); + TUNE_SET(ifs, "fr_iptimeout", ifs_fr_iptimeout); + TUNE_SET(ifs, "fr_statemax", ifs_fr_statemax); + TUNE_SET(ifs, "fr_statesize", ifs_fr_statesize); + TUNE_SET(ifs, "fr_state_lock", ifs_fr_state_lock); + TUNE_SET(ifs, "fr_state_maxbucket", ifs_fr_state_maxbucket); + TUNE_SET(ifs, "fr_state_maxbucket_reset", ifs_fr_state_maxbucket_reset); + TUNE_SET(ifs, "ipstate_logging", ifs_ipstate_logging); + TUNE_SET(ifs, "fr_nat_lock", ifs_fr_nat_lock); + TUNE_SET(ifs, "ipf_nattable_sz", ifs_ipf_nattable_sz); + TUNE_SET(ifs, "ipf_nattable_max", ifs_ipf_nattable_max); + TUNE_SET(ifs, "ipf_natrules_sz", ifs_ipf_natrules_sz); + TUNE_SET(ifs, "ipf_rdrrules_sz", ifs_ipf_rdrrules_sz); + TUNE_SET(ifs, "ipf_hostmap_sz", ifs_ipf_hostmap_sz); + TUNE_SET(ifs, "fr_nat_maxbucket", ifs_fr_nat_maxbucket); + TUNE_SET(ifs, "fr_nat_maxbucket_reset", ifs_fr_nat_maxbucket_reset); + TUNE_SET(ifs, "nat_logging", ifs_nat_logging); + TUNE_SET(ifs, "fr_defnatage", ifs_fr_defnatage); + TUNE_SET(ifs, "fr_defnaticmpage", ifs_fr_defnaticmpage); + TUNE_SET(ifs, "ipfr_size", ifs_ipfr_size); + TUNE_SET(ifs, "fr_ipfrttl", ifs_fr_ipfrttl); +#ifdef IPFILTER_LOG + TUNE_SET(ifs, "ipl_suppress", ifs_ipl_suppress); + TUNE_SET(ifs, "ipl_buffer_sz", ifs_ipl_buffer_sz); + TUNE_SET(ifs, "ipl_logmax", ifs_ipl_logmax); + TUNE_SET(ifs, "ipl_logall", ifs_ipl_logall); + TUNE_SET(ifs, "ipl_logsize", ifs_ipl_logsize); +#endif +#undef TUNE_SET + +#ifdef _KERNEL + (void) ipf_property_update(ipf_dev_info, ifs); +#endif +} + +void +ipftuneable_free(ipf_stack_t *ifs) +{ + KFREES(ifs->ifs_ipf_tuneables, sizeof (lcl_ipf_tuneables)); + ifs->ifs_ipf_tuneables = NULL; +} /* ------------------------------------------------------------------------ */ /* Function: fr_findtunebycookie */ @@ -5994,12 +6104,13 @@ static ipftuneable_t *ipf_tunelist = NULL; /* a matching value for "cookie" - ie its address. When returning a match, */ /* the next one to be found may be returned inside next. */ /* ------------------------------------------------------------------------ */ -static ipftuneable_t *fr_findtunebycookie(cookie, next) +static ipftuneable_t *fr_findtunebycookie(cookie, next, ifs) void *cookie, **next; +ipf_stack_t * ifs; { ipftuneable_t *ta, **tap; - for (ta = ipf_tuneables; ta->ipft_name != NULL; ta++) + for (ta = ifs->ifs_ipf_tuneables; ta->ipft_name != NULL; ta++) if (ta == cookie) { if (next != NULL) { /* @@ -6013,12 +6124,12 @@ void *cookie, **next; if ((ta + 1)->ipft_name != NULL) *next = ta + 1; else - *next = &ipf_tunelist; + *next = &ifs->ifs_ipf_tunelist; } return ta; } - for (tap = &ipf_tunelist; (ta = *tap) != NULL; tap = &ta->ipft_next) + for (tap = &ifs->ifs_ipf_tunelist; (ta = *tap) != NULL; tap = &ta->ipft_next) if (tap == cookie) { if (next != NULL) *next = &ta->ipft_next; @@ -6040,17 +6151,18 @@ void *cookie, **next; /* for an entry with a matching name. If we can find one, return a pointer */ /* to the matching structure. */ /* ------------------------------------------------------------------------ */ -static ipftuneable_t *fr_findtunebyname(name) +static ipftuneable_t *fr_findtunebyname(name, ifs) const char *name; +ipf_stack_t *ifs; { ipftuneable_t *ta; - for (ta = ipf_tuneables; ta->ipft_name != NULL; ta++) + for (ta = ifs->ifs_ipf_tuneables; ta->ipft_name != NULL; ta++) if (!strcmp(ta->ipft_name, name)) { return ta; } - for (ta = ipf_tunelist; ta != NULL; ta = ta->ipft_next) + for (ta = ifs->ifs_ipf_tunelist; ta != NULL; ta = ta->ipft_next) if (!strcmp(ta->ipft_name, name)) { return ta; } @@ -6068,16 +6180,17 @@ const char *name; /* current list of "dynamic" tuneable parameters. Once added, the owner */ /* of the object is not expected to ever change "ipft_next". */ /* ------------------------------------------------------------------------ */ -int fr_addipftune(newtune) +int fr_addipftune(newtune, ifs) ipftuneable_t *newtune; +ipf_stack_t *ifs; { ipftuneable_t *ta, **tap; - ta = fr_findtunebyname(newtune->ipft_name); + ta = fr_findtunebyname(newtune->ipft_name, ifs); if (ta != NULL) return EEXIST; - for (tap = &ipf_tunelist; *tap != NULL; tap = &(*tap)->ipft_next) + for (tap = &ifs->ifs_ipf_tunelist; *tap != NULL; tap = &(*tap)->ipft_next) ; newtune->ipft_next = NULL; @@ -6096,12 +6209,13 @@ ipftuneable_t *newtune; /* dynamically added at run time. If found, adjust the list so that this */ /* structure is no longer part of it. */ /* ------------------------------------------------------------------------ */ -int fr_delipftune(oldtune) +int fr_delipftune(oldtune, ifs) ipftuneable_t *oldtune; +ipf_stack_t *ifs; { ipftuneable_t *ta, **tap; - for (tap = &ipf_tunelist; (ta = *tap) != NULL; tap = &ta->ipft_next) + for (tap = &ifs->ifs_ipf_tunelist; (ta = *tap) != NULL; tap = &ta->ipft_next) if (ta == oldtune) { *tap = oldtune->ipft_next; oldtune->ipft_next = NULL; @@ -6125,9 +6239,10 @@ ipftuneable_t *oldtune; /* and 'destruction' routines of the various components of ipfilter are all */ /* each responsible for handling their own values being too big. */ /* ------------------------------------------------------------------------ */ -int fr_ipftune(cmd, data) +int fr_ipftune(cmd, data, ifs) ioctlcmd_t cmd; void *data; +ipf_stack_t *ifs; { ipftuneable_t *ta; ipftune_t tu; @@ -6155,9 +6270,9 @@ void *data; * at the front of the list. */ if (cookie != NULL) { - ta = fr_findtunebycookie(cookie, &tu.ipft_cookie); + ta = fr_findtunebycookie(cookie, &tu.ipft_cookie, ifs); } else { - ta = ipf_tuneables; + ta = ifs->ifs_ipf_tuneables; tu.ipft_cookie = ta + 1; } if (ta != NULL) { @@ -6197,11 +6312,11 @@ void *data; */ error = ESRCH; if (cookie != NULL) { - ta = fr_findtunebycookie(cookie, NULL); + ta = fr_findtunebycookie(cookie, NULL, ifs); if (ta != NULL) error = 0; } else if (tu.ipft_name[0] != '\0') { - ta = fr_findtunebyname(tu.ipft_name); + ta = fr_findtunebyname(tu.ipft_name, ifs); if (ta != NULL) error = 0; } @@ -6237,7 +6352,7 @@ void *data; u_long in; if (((ta->ipft_flags & IPFT_WRDISABLED) != 0) && - (fr_running > 0)) { + (ifs->ifs_fr_running > 0)) { error = EBUSY; break; } @@ -6283,52 +6398,53 @@ void *data; /* of IPFilter. If any of them should fail, return immeadiately a failure */ /* BUT do not try to recover from the error here. */ /* ------------------------------------------------------------------------ */ -int fr_initialise() +int fr_initialise(ifs) +ipf_stack_t *ifs; { int i; #ifdef IPFILTER_LOG - i = fr_loginit(); + i = fr_loginit(ifs); if (i < 0) return -10 + i; #endif - i = fr_natinit(); + i = fr_natinit(ifs); if (i < 0) return -20 + i; - i = fr_stateinit(); + i = fr_stateinit(ifs); if (i < 0) return -30 + i; - i = fr_authinit(); + i = fr_authinit(ifs); if (i < 0) return -40 + i; - i = fr_fraginit(); + i = fr_fraginit(ifs); if (i < 0) return -50 + i; - i = appr_init(); + i = appr_init(ifs); if (i < 0) return -60 + i; #ifdef IPFILTER_SYNC - i = ipfsync_init(); + i = ipfsync_init(ifs); if (i < 0) return -70 + i; #endif #ifdef IPFILTER_SCAN - i = ipsc_init(); + i = ipsc_init(ifs); if (i < 0) return -80 + i; #endif #ifdef IPFILTER_LOOKUP - i = ip_lookup_init(); + i = ip_lookup_init(ifs); if (i < 0) return -90 + i; #endif #ifdef IPFILTER_COMPILED - ipfrule_add(); + ipfrule_add(ifs); #endif return 0; } @@ -6344,32 +6460,33 @@ int fr_initialise() /* The order here IS important as there are some cross references of */ /* internal data structures. */ /* ------------------------------------------------------------------------ */ -void fr_deinitialise() +void fr_deinitialise(ifs) +ipf_stack_t *ifs; { - fr_fragunload(); - fr_authunload(); - fr_natunload(); - fr_stateunload(); + fr_fragunload(ifs); + fr_authunload(ifs); + fr_natunload(ifs); + fr_stateunload(ifs); #ifdef IPFILTER_SCAN - fr_scanunload(); + fr_scanunload(ifs); #endif - appr_unload(); + appr_unload(ifs); #ifdef IPFILTER_COMPILED - ipfrule_remove(); + ipfrule_remove(ifs); #endif - (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE); - (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE); - (void) frflush(IPL_LOGCOUNT, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE); - (void) frflush(IPL_LOGCOUNT, 0, FR_INQUE|FR_OUTQUE); + (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs); + (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs); + (void) frflush(IPL_LOGCOUNT, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs); + (void) frflush(IPL_LOGCOUNT, 0, FR_INQUE|FR_OUTQUE, ifs); #ifdef IPFILTER_LOOKUP - ip_lookup_unload(); + ip_lookup_unload(ifs); #endif #ifdef IPFILTER_LOG - fr_logunload(); + fr_logunload(ifs); #endif } @@ -6383,20 +6500,21 @@ void fr_deinitialise() /* current ones in the kernel. The lock is only held across the bzero() as */ /* the copyout may result in paging (ie network activity.) */ /* ------------------------------------------------------------------------ */ -int fr_zerostats(data) +int fr_zerostats(data, ifs) caddr_t data; +ipf_stack_t *ifs; { friostat_t fio; int error; - fr_getstat(&fio); + fr_getstat(&fio, ifs); error = copyoutptr(&fio, data, sizeof(fio)); if (error) return EFAULT; - WRITE_ENTER(&ipf_mutex); - bzero((char *)frstats, sizeof(*frstats) * 2); - RWLOCK_EXIT(&ipf_mutex); + WRITE_ENTER(&ifs->ifs_ipf_mutex); + bzero((char *)ifs->ifs_frstats, sizeof(*ifs->ifs_frstats) * 2); + RWLOCK_EXIT(&ifs->ifs_ipf_mutex); return 0; } @@ -6415,14 +6533,15 @@ caddr_t data; /* found, then set the interface pointer to be -1 as NULL is considered to */ /* indicate there is no information at all in the structure. */ /* ------------------------------------------------------------------------ */ -void fr_resolvedest(fdp, v) +void fr_resolvedest(fdp, v, ifs) frdest_t *fdp; int v; +ipf_stack_t *ifs; { fdp->fd_ifp = NULL; if (*fdp->fd_ifname != '\0') { - fdp->fd_ifp = GETIFP(fdp->fd_ifname, v); + fdp->fd_ifp = GETIFP(fdp->fd_ifname, v, ifs); if (fdp->fd_ifp == NULL) fdp->fd_ifp = (void *)-1; } @@ -6452,9 +6571,10 @@ int v; /* NOTE: This SHOULD ONLY be used with IPFilter structures that have an */ /* array for the name that is LIFNAMSIZ bytes (at least) in length. */ /* ------------------------------------------------------------------------ */ -void *fr_resolvenic(name, v) +void *fr_resolvenic(name, v, ifs) char *name; int v; +ipf_stack_t *ifs; { void *nic; @@ -6467,8 +6587,303 @@ int v; name[LIFNAMSIZ - 1] = '\0'; - nic = GETIFP(name, v); + nic = GETIFP(name, v, ifs); if (nic == NULL) nic = (void *)-1; return nic; } + +void ipf_expiretokens(ifs) +ipf_stack_t *ifs; +{ + ipftoken_t *it; + + WRITE_ENTER(&ifs->ifs_ipf_tokens); + while ((it = ifs->ifs_ipftokenhead) != NULL) { + if (it->ipt_die > ifs->ifs_fr_ticks) + break; + + ipf_freetoken(it, ifs); + } + RWLOCK_EXIT(&ifs->ifs_ipf_tokens); +} + + +int ipf_deltoken(type, uid, ptr, ifs) +int type, uid; +void *ptr; +ipf_stack_t *ifs; +{ + ipftoken_t *it; + int error = ESRCH; + + WRITE_ENTER(&ifs->ifs_ipf_tokens); + for (it = ifs->ifs_ipftokenhead; it != NULL; it = it->ipt_next) + if (ptr == it->ipt_ctx && type == it->ipt_type && + uid == it->ipt_uid) { + ipf_freetoken(it, ifs); + error = 0; + break; + } + RWLOCK_EXIT(&ifs->ifs_ipf_tokens); + + return error; +} + +static void ipf_unlinktoken(token, ifs) +ipftoken_t *token; +ipf_stack_t *ifs; +{ + + if (ifs->ifs_ipftokentail == &token->ipt_next) + ifs->ifs_ipftokentail = token->ipt_pnext; + + *token->ipt_pnext = token->ipt_next; + if (token->ipt_next != NULL) + token->ipt_next->ipt_pnext = token->ipt_pnext; +} + + + +ipftoken_t *ipf_findtoken(type, uid, ptr, ifs) +int type, uid; +void *ptr; +ipf_stack_t *ifs; +{ + ipftoken_t *it, *new; + + KMALLOC(new, ipftoken_t *); + + WRITE_ENTER(&ifs->ifs_ipf_tokens); + for (it = ifs->ifs_ipftokenhead; it != NULL; it = it->ipt_next) { + if (it->ipt_alive == 0) + continue; + if (ptr == it->ipt_ctx && type == it->ipt_type && + uid == it->ipt_uid) + break; + } + + if (it == NULL) { + it = new; + new = NULL; + if (it == NULL) + return NULL; + it->ipt_data = NULL; + it->ipt_ctx = ptr; + it->ipt_uid = uid; + it->ipt_type = type; + it->ipt_next = NULL; + it->ipt_alive = 1; + } else { + if (new != NULL) { + KFREE(new); + new = NULL; + } + + ipf_unlinktoken(it, ifs); + } + it->ipt_pnext = ifs->ifs_ipftokentail; + *ifs->ifs_ipftokentail = it; + ifs->ifs_ipftokentail = &it->ipt_next; + it->ipt_next = NULL; + + /* XXX: more needed */ + it->ipt_die = ifs->ifs_fr_ticks + 2; + + MUTEX_DOWNGRADE(&ifs->ifs_ipf_tokens); + + return it; +} + + +void ipf_freetoken(token, ifs) +ipftoken_t *token; +ipf_stack_t *ifs; +{ + void *data; + + ipf_unlinktoken(token, ifs); + + data = token->ipt_data; + + if ((data != NULL) && (data != (void *)-1)) { + switch (token->ipt_type) + { + case IPFGENITER_IPF : + (void)fr_derefrule((frentry_t **)&data, ifs); + break; + case IPFGENITER_IPNAT : + WRITE_ENTER(&ifs->ifs_ipf_nat); + fr_ipnatderef((ipnat_t **)&data, ifs); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); + break; + case IPFGENITER_NAT : + fr_natderef((nat_t **)&data, ifs); + break; + case IPFGENITER_STATE : + fr_statederef(NULL, (ipstate_t **)&data, ifs); + break; + case IPFGENITER_FRAG : + fr_fragderef((ipfr_t **)&data, &ifs->ifs_ipf_frag, ifs); + break; + case IPFGENITER_NATFRAG : + fr_fragderef((ipfr_t **)&data, &ifs->ifs_ipf_natfrag, ifs); + break; + case IPFGENITER_HOSTMAP : + fr_hostmapderef((hostmap_t **)&data); + break; + default : + (void) ip_lookup_iterderef(token->ipt_type, data, ifs); + break; + } + } + + KFREE(token); +} + +int ipf_getnextrule(ipftoken_t *t, void *ptr, ipf_stack_t *ifs) +{ + frentry_t *fr, *next, zero; + ipfruleiter_t it; + frgroup_t *fg; + int error; + + if (t == NULL || ptr == NULL) + return EFAULT; + error = fr_inobj(ptr, &it, IPFOBJ_IPFITER); + if (error != 0) + return error; + if ((it.iri_ver != AF_INET) && (it.iri_ver != AF_INET6)) + return EINVAL; + if ((it.iri_inout != 0) && (it.iri_inout != 1)) + return EINVAL; + if ((it.iri_active != 0) && (it.iri_active != 1)) + return EINVAL; + if (it.iri_rule == NULL) + return EFAULT; + + fr = t->ipt_data; + READ_ENTER(&ifs->ifs_ipf_mutex); + if (fr == NULL) { + if (*it.iri_group == '\0') { + if (it.iri_ver == AF_INET) + next = ifs->ifs_ipfilter + [it.iri_inout][it.iri_active]; + else + next = ifs->ifs_ipfilter6 + [it.iri_inout][it.iri_active]; + } else { + fg = fr_findgroup(it.iri_group, IPL_LOGIPF, + it.iri_active, NULL, ifs); + if (fg != NULL) + next = fg->fg_start; + else + next = NULL; + } + } else { + next = fr->fr_next; + } + + if (next != NULL) { + if (next->fr_next == NULL) { + t->ipt_alive = 0; + /*ipf_freetoken(t, ifs); + fr = NULL; */ + } else { + MUTEX_ENTER(&next->fr_lock); + next->fr_ref++; + MUTEX_EXIT(&next->fr_lock); + } + t->ipt_data = next; + } else { + bzero(&zero, sizeof(zero)); + next = &zero; + ipf_freetoken(t, ifs); + fr = NULL; + } + RWLOCK_EXIT(&ifs->ifs_ipf_mutex); + + if (fr != NULL) { + (void)fr_derefrule(&fr, ifs); + } + + error = COPYOUT(next, it.iri_rule, sizeof(*next)); + if (error != 0) + return EFAULT; + + if (next->fr_data != NULL) { + error = COPYOUT(next->fr_data, + (char *)it.iri_rule + sizeof(*next), + next->fr_dsize); + if (error != 0) + error = EFAULT; + } + + return error; +} + + +int ipf_frruleiter(data, uid, ctx, ifs) +void *data, *ctx; +int uid; +ipf_stack_t *ifs; +{ + ipftoken_t *token; + int error; + + token = ipf_findtoken(IPFGENITER_IPF, uid, ctx, ifs); + if (token != NULL) + error = ipf_getnextrule(token, data, ifs); + else + error = EFAULT; + RWLOCK_EXIT(&ifs->ifs_ipf_tokens); + + return error; +} + + +int ipf_geniter(token, itp, ifs) +ipftoken_t *token; +ipfgeniter_t *itp; +ipf_stack_t *ifs; +{ + int error; + + switch (itp->igi_type) + { + case IPFGENITER_FRAG : + error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_list, + &ifs->ifs_ipfr_tail, &ifs->ifs_ipf_frag, ifs); + break; + default : + error = EINVAL; + break; + } + + return error; +} + + +int ipf_genericiter(data, uid, ctx, ifs) +void *data, *ctx; +int uid; +ipf_stack_t *ifs; +{ + ipftoken_t *token; + ipfgeniter_t iter; + int error; + + error = fr_inobj(data, &iter, IPFOBJ_GENITER); + if (error != 0) + return error; + + token = ipf_findtoken(iter.igi_type, uid, ctx, ifs); + if (token != NULL) { + token->ipt_subtype = iter.igi_type; + error = ipf_geniter(token, &iter, ifs); + } else + error = EFAULT; + RWLOCK_EXIT(&ifs->ifs_ipf_tokens); + + return error; +} diff --git a/usr/src/uts/common/inet/ipf/ip_auth.c b/usr/src/uts/common/inet/ipf/ip_auth.c index bf2afb2ba4..f733c6b55a 100644 --- a/usr/src/uts/common/inet/ipf/ip_auth.c +++ b/usr/src/uts/common/inet/ipf/ip_auth.c @@ -2,10 +2,13 @@ * Copyright (C) 1998-2003 by Darren Reed & Guido van Rooij. * * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -108,6 +111,7 @@ extern struct ifqueue ipintrq; /* ip packet input queue */ #include <netinet/ip_icmp.h> #include "netinet/ip_compat.h" #include <netinet/tcpip.h> +#include "netinet/ipf_stack.h" #include "netinet/ip_fil.h" #include "netinet/ip_auth.h" #if !defined(MENTAT) && !defined(linux) @@ -129,56 +133,42 @@ extern struct ifqueue ipintrq; /* ip packet input queue */ static const char rcsid[] = "@(#)$Id: ip_auth.c,v 2.73.2.5 2005/06/12 07:18:14 darrenr Exp $"; #endif +void fr_authderef __P((frauthent_t **)); +int fr_authgeniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); -#if SOLARIS -extern kcondvar_t ipfauthwait; -#endif /* SOLARIS */ -#if defined(linux) && defined(_KERNEL) -wait_queue_head_t fr_authnext_linux; -#endif - -int fr_authsize = FR_NUMAUTH; -int fr_authused = 0; -int fr_defaultauthage = 600; -int fr_auth_lock = 0; -int fr_auth_init = 0; -fr_authstat_t fr_authstats; -static frauth_t *fr_auth = NULL; -mb_t **fr_authpkts = NULL; -int fr_authstart = 0, fr_authend = 0, fr_authnext = 0; -frauthent_t *fae_list = NULL; -frentry_t *ipauth = NULL, - *fr_authlist = NULL; - -#if SOLARIS2 >= 10 -extern net_data_t ipf_ipv4; -extern net_data_t ipf_ipv6; -#endif -int fr_authinit() +int fr_authinit(ifs) +ipf_stack_t *ifs; { - KMALLOCS(fr_auth, frauth_t *, fr_authsize * sizeof(*fr_auth)); - if (fr_auth != NULL) - bzero((char *)fr_auth, fr_authsize * sizeof(*fr_auth)); + ifs->ifs_fr_authsize = FR_NUMAUTH; + ifs->ifs_fr_defaultauthage = 600; + + KMALLOCS(ifs->ifs_fr_auth, frauth_t *, + ifs->ifs_fr_authsize * sizeof(*ifs->ifs_fr_auth)); + if (ifs->ifs_fr_auth != NULL) + bzero((char *)ifs->ifs_fr_auth, + ifs->ifs_fr_authsize * sizeof(*ifs->ifs_fr_auth)); else return -1; - KMALLOCS(fr_authpkts, mb_t **, fr_authsize * sizeof(*fr_authpkts)); - if (fr_authpkts != NULL) - bzero((char *)fr_authpkts, fr_authsize * sizeof(*fr_authpkts)); + KMALLOCS(ifs->ifs_fr_authpkts, mb_t **, + ifs->ifs_fr_authsize * sizeof(*ifs->ifs_fr_authpkts)); + if (ifs->ifs_fr_authpkts != NULL) + bzero((char *)ifs->ifs_fr_authpkts, + ifs->ifs_fr_authsize * sizeof(*ifs->ifs_fr_authpkts)); else return -2; - MUTEX_INIT(&ipf_authmx, "ipf auth log mutex"); - RWLOCK_INIT(&ipf_auth, "ipf IP User-Auth rwlock"); + MUTEX_INIT(&ifs->ifs_ipf_authmx, "ipf auth log mutex"); + RWLOCK_INIT(&ifs->ifs_ipf_auth, "ipf IP User-Auth rwlock"); #if SOLARIS && defined(_KERNEL) - cv_init(&ipfauthwait, "ipf auth condvar", CV_DRIVER, NULL); + cv_init(&ifs->ifs_ipfauthwait, "ipf auth condvar", CV_DRIVER, NULL); #endif #if defined(linux) && defined(_KERNEL) init_waitqueue_head(&fr_authnext_linux); #endif - fr_auth_init = 1; + ifs->ifs_fr_auth_init = 1; return 0; } @@ -199,21 +189,22 @@ u_32_t *passp; u_short id; ip_t *ip; int i; + ipf_stack_t *ifs = fin->fin_ifs; - if (fr_auth_lock || !fr_authused) + if (ifs->ifs_fr_auth_lock || !ifs->ifs_fr_authused) return NULL; ip = fin->fin_ip; id = ip->ip_id; - READ_ENTER(&ipf_auth); - for (i = fr_authstart; i != fr_authend; ) { + READ_ENTER(&ifs->ifs_ipf_auth); + for (i = ifs->ifs_fr_authstart; i != ifs->ifs_fr_authend; ) { /* * index becomes -2 only after an SIOCAUTHW. Check this in * case the same packet gets sent again and it hasn't yet been * auth'd. */ - fra = fr_auth + i; + fra = ifs->ifs_fr_auth + i; if ((fra->fra_index == -2) && (id == fra->fra_info.fin_id) && !bcmp((char *)fin, (char *)&fra->fra_info, FI_CSIZE)) { /* @@ -244,45 +235,46 @@ u_32_t *passp; } else fr = fra->fra_info.fin_fr; fin->fin_fr = fr; - RWLOCK_EXIT(&ipf_auth); - WRITE_ENTER(&ipf_auth); + RWLOCK_EXIT(&ifs->ifs_ipf_auth); + WRITE_ENTER(&ifs->ifs_ipf_auth); if ((fr != NULL) && (fr != fra->fra_info.fin_fr)) { - fr->fr_next = fr_authlist; - fr_authlist = fr; + fr->fr_next = ifs->ifs_fr_authlist; + ifs->ifs_fr_authlist = fr; } - fr_authstats.fas_hits++; + ifs->ifs_fr_authstats.fas_hits++; fra->fra_index = -1; - fr_authused--; - if (i == fr_authstart) { + ifs->ifs_fr_authused--; + if (i == ifs->ifs_fr_authstart) { while (fra->fra_index == -1) { i++; fra++; - if (i == fr_authsize) { + if (i == ifs->ifs_fr_authsize) { i = 0; - fra = fr_auth; + fra = ifs->ifs_fr_auth; } - fr_authstart = i; - if (i == fr_authend) + ifs->ifs_fr_authstart = i; + if (i == ifs->ifs_fr_authend) break; } - if (fr_authstart == fr_authend) { - fr_authnext = 0; - fr_authstart = fr_authend = 0; + if (ifs->ifs_fr_authstart == ifs->ifs_fr_authend) { + ifs->ifs_fr_authnext = 0; + ifs->ifs_fr_authstart = 0; + ifs->ifs_fr_authend = 0; } } - RWLOCK_EXIT(&ipf_auth); + RWLOCK_EXIT(&ifs->ifs_ipf_auth); if (passp != NULL) *passp = pass; - ATOMIC_INC64(fr_authstats.fas_hits); + ATOMIC_INC64(ifs->ifs_fr_authstats.fas_hits); return fr; } i++; - if (i == fr_authsize) + if (i == ifs->ifs_fr_authsize) i = 0; } - fr_authstats.fas_miss++; - RWLOCK_EXIT(&ipf_auth); - ATOMIC_INC64(fr_authstats.fas_miss); + ifs->ifs_fr_authstats.fas_miss++; + RWLOCK_EXIT(&ifs->ifs_ipf_auth); + ATOMIC_INC64(ifs->ifs_fr_authstats.fas_miss); return NULL; } @@ -304,34 +296,35 @@ fr_info_t *fin; ip_t *ip; #endif int i; + ipf_stack_t *ifs = fin->fin_ifs; - if (fr_auth_lock) + if (ifs->ifs_fr_auth_lock) return 0; - WRITE_ENTER(&ipf_auth); - if (fr_authstart > fr_authend) { - fr_authstats.fas_nospace++; - RWLOCK_EXIT(&ipf_auth); + WRITE_ENTER(&ifs->ifs_ipf_auth); + if (ifs->ifs_fr_authstart > ifs->ifs_fr_authend) { + ifs->ifs_fr_authstats.fas_nospace++; + RWLOCK_EXIT(&ifs->ifs_ipf_auth); return 0; } else { - if (fr_authused == fr_authsize) { - fr_authstats.fas_nospace++; - RWLOCK_EXIT(&ipf_auth); + if (ifs->ifs_fr_authused == ifs->ifs_fr_authsize) { + ifs->ifs_fr_authstats.fas_nospace++; + RWLOCK_EXIT(&ifs->ifs_ipf_auth); return 0; } } - fr_authstats.fas_added++; - fr_authused++; - i = fr_authend++; - if (fr_authend == fr_authsize) - fr_authend = 0; - RWLOCK_EXIT(&ipf_auth); + ifs->ifs_fr_authstats.fas_added++; + ifs->ifs_fr_authused++; + i = ifs->ifs_fr_authend++; + if (ifs->ifs_fr_authend == ifs->ifs_fr_authsize) + ifs->ifs_fr_authend = 0; + RWLOCK_EXIT(&ifs->ifs_ipf_auth); - fra = fr_auth + i; + fra = ifs->ifs_fr_auth + i; fra->fra_index = i; fra->fra_pass = 0; - fra->fra_age = fr_defaultauthage; + fra->fra_age = ifs->ifs_fr_defaultauthage; bcopy((char *)fin, (char *)&fra->fra_info, sizeof(*fin)); #if !defined(sparc) && !defined(m68k) /* @@ -353,8 +346,8 @@ fr_info_t *fin; #endif #if SOLARIS && defined(_KERNEL) m->b_rptr -= qpi->qpi_off; - fr_authpkts[i] = *(mblk_t **)fin->fin_mp; - cv_signal(&ipfauthwait); + ifs->ifs_fr_authpkts[i] = *(mblk_t **)fin->fin_mp; + cv_signal(&ifs->ifs_ipfauthwait); #else # if defined(BSD) && !defined(sparc) && (BSD >= 199306) if (!fin->fin_out) { @@ -362,17 +355,19 @@ fr_info_t *fin; ip->ip_off = htons(ip->ip_off); } # endif - fr_authpkts[i] = m; - WAKEUP(&fr_authnext,0); + ifs->ifs_fr_authpkts[i] = m; + WAKEUP(&ifs->ifs_fr_authnext, 0); #endif return 1; } -int fr_auth_ioctl(data, cmd, mode) +int fr_auth_ioctl(data, cmd, mode, uid, ctx, ifs) caddr_t data; ioctlcmd_t cmd; -int mode; +int mode,uid; +void *ctx; +ipf_stack_t *ifs; { mb_t *m; #if defined(_KERNEL) && !defined(MENTAT) && !defined(linux) && \ @@ -389,24 +384,44 @@ int mode; switch (cmd) { + case SIOCGENITER : + { + ipftoken_t *token; + ipfgeniter_t iter; + + error = fr_inobj(data, &iter, IPFOBJ_GENITER); + if (error != 0) + break; + + token = ipf_findtoken(IPFGENITER_AUTH, uid, ctx, ifs); + if (token != NULL) + error = fr_authgeniter(token, &iter, ifs); + else + error = ESRCH; + RWLOCK_EXIT(&ifs->ifs_ipf_tokens); + + break; + } + case SIOCSTLCK : if (!(mode & FWRITE)) { error = EPERM; break; } - fr_lock(data, &fr_auth_lock); + fr_lock(data, &ifs->ifs_fr_auth_lock); break; case SIOCATHST: - fr_authstats.fas_faelist = fae_list; - error = fr_outobj(data, &fr_authstats, IPFOBJ_AUTHSTAT); + ifs->ifs_fr_authstats.fas_faelist = ifs->ifs_fae_list; + error = fr_outobj(data, &ifs->ifs_fr_authstats, + IPFOBJ_AUTHSTAT); break; case SIOCIPFFL: SPL_NET(s); - WRITE_ENTER(&ipf_auth); - i = fr_authflush(); - RWLOCK_EXIT(&ipf_auth); + WRITE_ENTER(&ifs->ifs_ipf_auth); + i = fr_authflush(ifs); + RWLOCK_EXIT(&ifs->ifs_ipf_auth); SPL_X(s); error = copyoutptr((char *)&i, data, sizeof(i)); break; @@ -414,16 +429,18 @@ int mode; case SIOCAUTHW: fr_authioctlloop: error = fr_inobj(data, au, IPFOBJ_FRAUTH); - READ_ENTER(&ipf_auth); - if ((fr_authnext != fr_authend) && fr_authpkts[fr_authnext]) { - error = fr_outobj(data, &fr_auth[fr_authnext], + READ_ENTER(&ifs->ifs_ipf_auth); + if ((ifs->ifs_fr_authnext != ifs->ifs_fr_authend) && + ifs->ifs_fr_authpkts[ifs->ifs_fr_authnext]) { + error = fr_outobj(data, + &ifs->ifs_fr_auth[ifs->ifs_fr_authnext], IPFOBJ_FRAUTH); if (auth.fra_len != 0 && auth.fra_buf != NULL) { /* * Copy packet contents out to user space if * requested. Bail on an error. */ - m = fr_authpkts[fr_authnext]; + m = ifs->ifs_fr_authpkts[ifs->ifs_fr_authnext]; len = MSGDSIZE(m); if (len > auth.fra_len) len = auth.fra_len; @@ -438,19 +455,19 @@ fr_authioctlloop: break; } } - RWLOCK_EXIT(&ipf_auth); + RWLOCK_EXIT(&ifs->ifs_ipf_auth); if (error != 0) break; SPL_NET(s); - WRITE_ENTER(&ipf_auth); - fr_authnext++; - if (fr_authnext == fr_authsize) - fr_authnext = 0; - RWLOCK_EXIT(&ipf_auth); + WRITE_ENTER(&ifs->ifs_ipf_auth); + ifs->ifs_fr_authnext++; + if (ifs->ifs_fr_authnext == ifs->ifs_fr_authsize) + ifs->ifs_fr_authnext = 0; + RWLOCK_EXIT(&ifs->ifs_ipf_auth); SPL_X(s); return 0; } - RWLOCK_EXIT(&ipf_auth); + RWLOCK_EXIT(&ifs->ifs_ipf_auth); /* * We exit ipf_global here because a program that enters in * here will have a lock on it and goto sleep having this lock. @@ -459,37 +476,38 @@ fr_authioctlloop: * caller of this function expects it to be held when we * return so we have to reacquire it in here. */ - RWLOCK_EXIT(&ipf_global); + RWLOCK_EXIT(&ifs->ifs_ipf_global); - MUTEX_ENTER(&ipf_authmx); + MUTEX_ENTER(&ifs->ifs_ipf_authmx); #ifdef _KERNEL # if SOLARIS error = 0; - if (!cv_wait_sig(&ipfauthwait, &ipf_authmx.ipf_lk)) + if (!cv_wait_sig(&ifs->ifs_ipfauthwait, &ifs->ifs_ipf_authmx.ipf_lk)) error = EINTR; # else /* SOLARIS */ # ifdef __hpux { lock_t *l; - l = get_sleep_lock(&fr_authnext); - error = sleep(&fr_authnext, PZERO+1); + l = get_sleep_lock(&ifs->ifs_fr_authnext); + error = sleep(&ifs->ifs_fr_authnext, PZERO+1); spinunlock(l); } # else # ifdef __osf__ - error = mpsleep(&fr_authnext, PSUSP|PCATCH, "fr_authnext", 0, - &ipf_authmx, MS_LOCK_SIMPLE); + error = mpsleep(&ifs->ifs_fr_authnext, PSUSP|PCATCH, + "fr_authnext", 0, + &ifs->ifs_ipf_authmx, MS_LOCK_SIMPLE); # else - error = SLEEP(&fr_authnext, "fr_authnext"); + error = SLEEP(&ifs->ifs_fr_authnext, "fr_authnext"); # endif /* __osf__ */ # endif /* __hpux */ # endif /* SOLARIS */ #endif - MUTEX_EXIT(&ipf_authmx); - READ_ENTER(&ipf_global); + MUTEX_EXIT(&ifs->ifs_ipf_authmx); + READ_ENTER(&ifs->ifs_ipf_global); if (error == 0) { - READ_ENTER(&ipf_auth); + READ_ENTER(&ifs->ifs_ipf_auth); goto fr_authioctlloop; } break; @@ -499,26 +517,25 @@ fr_authioctlloop: if (error != 0) return error; SPL_NET(s); - WRITE_ENTER(&ipf_auth); + WRITE_ENTER(&ifs->ifs_ipf_auth); i = au->fra_index; - fra = fr_auth + i; - if ((i < 0) || (i >= fr_authsize) || + fra = ifs->ifs_fr_auth + i; + if ((i < 0) || (i >= ifs->ifs_fr_authsize) || (fra->fra_info.fin_id != au->fra_info.fin_id)) { - RWLOCK_EXIT(&ipf_auth); + RWLOCK_EXIT(&ifs->ifs_ipf_auth); SPL_X(s); return ESRCH; } - m = fr_authpkts[i]; + m = ifs->ifs_fr_authpkts[i]; fra->fra_index = -2; fra->fra_pass = au->fra_pass; - fr_authpkts[i] = NULL; - RWLOCK_EXIT(&ipf_auth); - + ifs->ifs_fr_authpkts[i] = NULL; + RWLOCK_EXIT(&ifs->ifs_ipf_auth); #ifdef _KERNEL if (fra->fra_info.fin_v == 4) { - net_data_p = ipf_ipv4; + net_data_p = ifs->ifs_ipf_ipv4; } else if (fra->fra_info.fin_v == 6) { - net_data_p = ipf_ipv6; + net_data_p = ifs->ifs_ipf_ipv6; } else { return (-1); } @@ -537,9 +554,9 @@ fr_authioctlloop: ret = net_inject(net_data_p, NI_QUEUE_OUT, &inj_data); if (ret < 0) - fr_authstats.fas_sendfail++; + ifs->ifs_fr_authstats.fas_sendfail++; else - fr_authstats.fas_sendok++; + ifs->ifs_fr_authstats.fas_sendok++; # else /* MENTAT */ # if defined(linux) || defined(AIX) # else @@ -552,9 +569,9 @@ fr_authioctlloop: error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL); # endif if (error != 0) - fr_authstats.fas_sendfail++; + ifs->ifs_fr_authstats.fas_sendfail++; else - fr_authstats.fas_sendok++; + ifs->ifs_fr_authstats.fas_sendok++; # endif /* Linux */ # endif /* MENTAT */ } else if (m) { @@ -586,9 +603,9 @@ fr_authioctlloop: # endif /* Linux */ # endif /* MENTAT */ if (error != 0) - fr_authstats.fas_quefail++; + ifs->ifs_fr_authstats.fas_quefail++; else - fr_authstats.fas_queok++; + ifs->ifs_fr_authstats.fas_queok++; } else error = EINVAL; # ifdef MENTAT @@ -600,21 +617,22 @@ fr_authioctlloop: * not being processed, make sure we advance to the next one. */ if (error == ENOBUFS) { - fr_authused--; + ifs->ifs_fr_authused--; fra->fra_index = -1; fra->fra_pass = 0; - if (i == fr_authstart) { + if (i == ifs->ifs_fr_authstart) { while (fra->fra_index == -1) { i++; - if (i == fr_authsize) + if (i == ifs->ifs_fr_authsize) i = 0; - fr_authstart = i; - if (i == fr_authend) + ifs->ifs_fr_authstart = i; + if (i == ifs->ifs_fr_authend) break; } - if (fr_authstart == fr_authend) { - fr_authnext = 0; - fr_authstart = fr_authend = 0; + if (ifs->ifs_fr_authstart == ifs->ifs_fr_authend) { + ifs->ifs_fr_authnext = 0; + ifs->ifs_fr_authstart = 0; + ifs->ifs_fr_authend = 0; } } } @@ -634,39 +652,42 @@ fr_authioctlloop: /* * Free all network buffer memory used to keep saved packets. */ -void fr_authunload() +void fr_authunload(ifs) +ipf_stack_t *ifs; { register int i; register frauthent_t *fae, **faep; frentry_t *fr, **frp; mb_t *m; - if (fr_auth != NULL) { - KFREES(fr_auth, fr_authsize * sizeof(*fr_auth)); - fr_auth = NULL; + if (ifs->ifs_fr_auth != NULL) { + KFREES(ifs->ifs_fr_auth, + ifs->ifs_fr_authsize * sizeof(*ifs->ifs_fr_auth)); + ifs->ifs_fr_auth = NULL; } - if (fr_authpkts != NULL) { - for (i = 0; i < fr_authsize; i++) { - m = fr_authpkts[i]; + if (ifs->ifs_fr_authpkts != NULL) { + for (i = 0; i < ifs->ifs_fr_authsize; i++) { + m = ifs->ifs_fr_authpkts[i]; if (m != NULL) { FREE_MB_T(m); - fr_authpkts[i] = NULL; + ifs->ifs_fr_authpkts[i] = NULL; } } - KFREES(fr_authpkts, fr_authsize * sizeof(*fr_authpkts)); - fr_authpkts = NULL; + KFREES(ifs->ifs_fr_authpkts, + ifs->ifs_fr_authsize * sizeof(*ifs->ifs_fr_authpkts)); + ifs->ifs_fr_authpkts = NULL; } - faep = &fae_list; + faep = &ifs->ifs_fae_list; while ((fae = *faep) != NULL) { *faep = fae->fae_next; KFREE(fae); } - ipauth = NULL; + ifs->ifs_ipauth = NULL; - if (fr_authlist != NULL) { - for (frp = &fr_authlist; ((fr = *frp) != NULL); ) { + if (ifs->ifs_fr_authlist != NULL) { + for (frp = &ifs->ifs_fr_authlist; ((fr = *frp) != NULL); ) { if (fr->fr_ref == 1) { *frp = fr->fr_next; KFREE(fr); @@ -675,14 +696,14 @@ void fr_authunload() } } - if (fr_auth_init == 1) { + if (ifs->ifs_fr_auth_init == 1) { # if SOLARIS && defined(_KERNEL) - cv_destroy(&ipfauthwait); + cv_destroy(&ifs->ifs_ipfauthwait); # endif - MUTEX_DESTROY(&ipf_authmx); - RW_DESTROY(&ipf_auth); + MUTEX_DESTROY(&ifs->ifs_ipf_authmx); + RW_DESTROY(&ifs->ifs_ipf_auth); - fr_auth_init = 0; + ifs->ifs_fr_auth_init = 0; } } @@ -691,7 +712,8 @@ void fr_authunload() * Slowly expire held auth records. Timeouts are set * in expectation of this being called twice per second. */ -void fr_authexpire() +void fr_authexpire(ifs) +ipf_stack_t *ifs; { register int i; register frauth_t *fra; @@ -700,50 +722,51 @@ void fr_authexpire() mb_t *m; SPL_INT(s); - if (fr_auth_lock) + if (ifs->ifs_fr_auth_lock) return; SPL_NET(s); - WRITE_ENTER(&ipf_auth); - for (i = 0, fra = fr_auth; i < fr_authsize; i++, fra++) { + WRITE_ENTER(&ifs->ifs_ipf_auth); + for (i = 0, fra = ifs->ifs_fr_auth; i < ifs->ifs_fr_authsize; i++, fra++) { fra->fra_age--; - if ((fra->fra_age == 0) && (m = fr_authpkts[i])) { + if ((fra->fra_age == 0) && (m = ifs->ifs_fr_authpkts[i])) { FREE_MB_T(m); - fr_authpkts[i] = NULL; - fr_auth[i].fra_index = -1; - fr_authstats.fas_expire++; - fr_authused--; + ifs->ifs_fr_authpkts[i] = NULL; + ifs->ifs_fr_auth[i].fra_index = -1; + ifs->ifs_fr_authstats.fas_expire++; + ifs->ifs_fr_authused--; } } - for (faep = &fae_list; ((fae = *faep) != NULL); ) { + for (faep = &ifs->ifs_fae_list; ((fae = *faep) != NULL); ) { fae->fae_age--; if (fae->fae_age == 0) { *faep = fae->fae_next; KFREE(fae); - fr_authstats.fas_expire++; + ifs->ifs_fr_authstats.fas_expire++; } else faep = &fae->fae_next; } - if (fae_list != NULL) - ipauth = &fae_list->fae_fr; + if (ifs->ifs_fae_list != NULL) + ifs->ifs_ipauth = &ifs->ifs_fae_list->fae_fr; else - ipauth = NULL; + ifs->ifs_ipauth = NULL; - for (frp = &fr_authlist; ((fr = *frp) != NULL); ) { + for (frp = &ifs->ifs_fr_authlist; ((fr = *frp) != NULL); ) { if (fr->fr_ref == 1) { *frp = fr->fr_next; KFREE(fr); } else frp = &fr->fr_next; } - RWLOCK_EXIT(&ipf_auth); + RWLOCK_EXIT(&ifs->ifs_ipf_auth); SPL_X(s); } -int fr_preauthcmd(cmd, fr, frptr) +int fr_preauthcmd(cmd, fr, frptr, ifs) ioctlcmd_t cmd; frentry_t *fr, **frptr; +ipf_stack_t *ifs; { frauthent_t *fae, **faep; int error = 0; @@ -752,7 +775,7 @@ frentry_t *fr, **frptr; if ((cmd != SIOCADAFR) && (cmd != SIOCRMAFR)) return EIO; - for (faep = &fae_list; ((fae = *faep) != NULL); ) { + for (faep = &ifs->ifs_fae_list; ((fae = *faep) != NULL); ) { if (&fae->fae_fr == fr) break; else @@ -766,11 +789,12 @@ frentry_t *fr, **frptr; error = ESRCH; else { SPL_NET(s); - WRITE_ENTER(&ipf_auth); + WRITE_ENTER(&ifs->ifs_ipf_auth); *faep = fae->fae_next; - if (ipauth == &fae->fae_fr) - ipauth = fae_list ? &fae_list->fae_fr : NULL; - RWLOCK_EXIT(&ipf_auth); + if (ifs->ifs_ipauth == &fae->fae_fr) + ifs->ifs_ipauth = ifs->ifs_fae_list ? + &ifs->ifs_fae_list->fae_fr : NULL; + RWLOCK_EXIT(&ifs->ifs_ipf_auth); SPL_X(s); KFREE(fae); @@ -781,15 +805,16 @@ frentry_t *fr, **frptr; bcopy((char *)fr, (char *)&fae->fae_fr, sizeof(*fr)); SPL_NET(s); - WRITE_ENTER(&ipf_auth); - fae->fae_age = fr_defaultauthage; + WRITE_ENTER(&ifs->ifs_ipf_auth); + fae->fae_age = ifs->ifs_fr_defaultauthage; fae->fae_fr.fr_hits = 0; fae->fae_fr.fr_next = *frptr; + fae->fae_ref = 1; *frptr = &fae->fae_fr; fae->fae_next = *faep; *faep = fae; - ipauth = &fae_list->fae_fr; - RWLOCK_EXIT(&ipf_auth); + ifs->ifs_ipauth = &ifs->ifs_fae_list->fae_fr; + RWLOCK_EXIT(&ifs->ifs_ipf_auth); SPL_X(s); } else error = ENOMEM; @@ -804,32 +829,112 @@ frentry_t *fr, **frptr; * Must already be properly SPL'ed and Locked on &ipf_auth. * */ -int fr_authflush() +int fr_authflush(ifs) +ipf_stack_t *ifs; { register int i, num_flushed; mb_t *m; - if (fr_auth_lock) + if (ifs->ifs_fr_auth_lock) return -1; num_flushed = 0; - for (i = 0 ; i < fr_authsize; i++) { - m = fr_authpkts[i]; + for (i = 0 ; i < ifs->ifs_fr_authsize; i++) { + m = ifs->ifs_fr_authpkts[i]; if (m != NULL) { FREE_MB_T(m); - fr_authpkts[i] = NULL; - fr_auth[i].fra_index = -1; + ifs->ifs_fr_authpkts[i] = NULL; + ifs->ifs_fr_auth[i].fra_index = -1; /* perhaps add & use a flush counter inst.*/ - fr_authstats.fas_expire++; - fr_authused--; + ifs->ifs_fr_authstats.fas_expire++; + ifs->ifs_fr_authused--; num_flushed++; } } - fr_authstart = 0; - fr_authend = 0; - fr_authnext = 0; + ifs->ifs_fr_authstart = 0; + ifs->ifs_fr_authend = 0; + ifs->ifs_fr_authnext = 0; return num_flushed; } + +/* ------------------------------------------------------------------------ */ +/* Function: fr_authgeniter */ +/* Returns: int - 0 == success, else error */ +/* Parameters: token(I) - pointer to ipftoken structure */ +/* itp(I) - pointer to ipfgeniter structure */ +/* */ +/* ------------------------------------------------------------------------ */ +int fr_authgeniter(token, itp, ifs) +ipftoken_t *token; +ipfgeniter_t *itp; +ipf_stack_t *ifs; +{ + frauthent_t *fae, *next, zero; + int error; + + if (itp->igi_data == NULL) + return EFAULT; + + if (itp->igi_type != IPFGENITER_AUTH) + return EINVAL; + + fae = token->ipt_data; + READ_ENTER(&ifs->ifs_ipf_auth); + if (fae == NULL) { + next = ifs->ifs_fae_list; + } else { + next = fae->fae_next; + } + + if (next != NULL) { + /* + * If we find an auth entry to use, bump its reference count + * so that it can be used for is_next when we come back. + */ + ATOMIC_INC(next->fae_ref); + if (next->fae_next == NULL) + ipf_freetoken(token, ifs); + } else { + bzero(&zero, sizeof(zero)); + next = &zero; + } + RWLOCK_EXIT(&ifs->ifs_ipf_auth); + + /* + * If we had a prior pointer to an auth entry, release it. + */ + if (fae != NULL) { + WRITE_ENTER(&ifs->ifs_ipf_auth); + fr_authderef(&fae); + RWLOCK_EXIT(&ifs->ifs_ipf_auth); + } + token->ipt_data = next; + + /* + * This should arguably be via fr_outobj() so that the auth + * structure can (if required) be massaged going out. + */ + error = COPYOUT(next, itp->igi_data, sizeof(*next)); + if (error != 0) + error = EFAULT; + + return error; +} + + +void fr_authderef(faep) +frauthent_t **faep; +{ + frauthent_t *fae; + + fae = *faep; + *faep = NULL; + + fae->fae_ref--; + if (fae->fae_ref == 0) { + KFREE(fae); + } +} diff --git a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c index 738f257ac7..8c6853969d 100644 --- a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c +++ b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c @@ -24,6 +24,7 @@ static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21 #include <sys/systm.h> #include <sys/strsubr.h> #include <sys/cred.h> +#include <sys/cred_impl.h> #include <sys/ddi.h> #include <sys/sunddi.h> #include <sys/ksynch.h> @@ -33,6 +34,7 @@ static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21 #include <sys/socket.h> #include <sys/dditypes.h> #include <sys/cmn_err.h> +#include <sys/zone.h> #include <net/if.h> #include <net/af.h> #include <net/route.h> @@ -54,6 +56,7 @@ static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21 #include "netinet/ip_state.h" #include "netinet/ip_auth.h" #include "netinet/ip_proxy.h" +#include "netinet/ipf_stack.h" #ifdef IPFILTER_LOOKUP # include "netinet/ip_lookup.h" #endif @@ -62,53 +65,27 @@ static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21 #include <sys/md5.h> #include <sys/neti.h> -extern int fr_flags, fr_active; -#if SOLARIS2 >= 7 -timeout_id_t fr_timer_id; -#else -int fr_timer_id; -#endif -#if SOLARIS2 >= 10 -extern int ipf_loopback; -#endif - - -static int fr_setipfloopback __P((int)); +static int frzerostats __P((caddr_t, ipf_stack_t *)); +static int fr_setipfloopback __P((int, ipf_stack_t *)); static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp)); -static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t)); -static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t)); -static int ipf_hook_out __P((hook_event_token_t, hook_data_t)); -static int ipf_hook_in __P((hook_event_token_t, hook_data_t)); -static int ipf_hook_loop_out __P((hook_event_token_t, hook_data_t)); -static int ipf_hook_loop_in __P((hook_event_token_t, hook_data_t)); -static int ipf_hook __P((hook_data_t, int, int)); - -static hook_t ipfhook_in; -static hook_t ipfhook_out; -static hook_t ipfhook_loop_in; -static hook_t ipfhook_loop_out; -static hook_t ipfhook_nicevents; - -/* flags to indicate whether hooks are registered. */ -static boolean_t hook4_physical_in = B_FALSE; -static boolean_t hook4_physical_out = B_FALSE; -static boolean_t hook4_nic_events = B_FALSE; -static boolean_t hook4_loopback_in = B_FALSE; -static boolean_t hook4_loopback_out = B_FALSE; -static boolean_t hook6_physical_in = B_FALSE; -static boolean_t hook6_physical_out = B_FALSE; -static boolean_t hook6_nic_events = B_FALSE; -static boolean_t hook6_loopback_in = B_FALSE; -static boolean_t hook6_loopback_out = B_FALSE; - -ipfmutex_t ipl_mutex, ipf_authmx, ipf_rw, ipf_stinsert; -ipfmutex_t ipf_nat_new, ipf_natio, ipf_timeoutlock; -ipfrwlock_t ipf_mutex, ipf_global, ipf_ipidfrag, ipf_frcache; -ipfrwlock_t ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth; -kcondvar_t iplwait, ipfauthwait; +static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, + netstack_t *)); +static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, + netstack_t *)); +static int ipf_hook_out __P((hook_event_token_t, hook_data_t, + netstack_t *)); +static int ipf_hook_in __P((hook_event_token_t, hook_data_t, + netstack_t *)); +static int ipf_hook_loop_out __P((hook_event_token_t, hook_data_t, + netstack_t *)); +static int ipf_hook_loop_in __P((hook_event_token_t, hook_data_t, + netstack_t *)); +static int ipf_hook __P((hook_data_t, int, int, netstack_t *)); +extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); +extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *)); + #if SOLARIS2 < 10 #if SOLARIS2 >= 7 -timeout_id_t fr_timer_id; u_int *ip_ttl_ptr = NULL; u_int *ip_mtudisc = NULL; # if SOLARIS2 >= 8 @@ -118,17 +95,11 @@ u_int *ip6_forwarding = NULL; u_int *ip_forwarding = NULL; # endif #else -int fr_timer_id; u_long *ip_ttl_ptr = NULL; u_long *ip_mtudisc = NULL; u_long *ip_forwarding = NULL; #endif #endif -#if SOLARIS2 >= 10 -extern net_data_t ipf_ipv4; -extern net_data_t ipf_ipv6; -#endif -int ipf_locks_done = 0; /* ------------------------------------------------------------------------ */ @@ -142,14 +113,15 @@ int ipf_locks_done = 0; /* configures a table to be so large that we cannot allocate enough memory */ /* for it. */ /* ------------------------------------------------------------------------ */ -int ipldetach() +int ipldetach(ifs) +ipf_stack_t *ifs; { - ASSERT(rw_read_locked(&ipf_global.ipf_lk) == 0); + ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0); #if SOLARIS2 < 10 - if (fr_control_forwarding & 2) { + if (ifs->ifs_fr_control_forwarding & 2) { if (ip_forwarding != NULL) *ip_forwarding = 0; #if SOLARIS2 >= 8 @@ -165,100 +137,111 @@ int ipldetach() * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running) */ - RWLOCK_EXIT(&ipf_global); + RWLOCK_EXIT(&ifs->ifs_ipf_global); /* * Remove IPv6 Hooks */ - if (ipf_ipv6 != NULL) { - if (hook6_physical_in) { - hook6_physical_in = (net_unregister_hook(ipf_ipv6, - NH_PHYSICAL_IN, &ipfhook_in) != 0); + if (ifs->ifs_ipf_ipv6 != NULL) { + if (ifs->ifs_hook6_physical_in) { + ifs->ifs_hook6_physical_in = (net_unregister_hook(ifs->ifs_ipf_ipv6, + NH_PHYSICAL_IN, &ifs->ifs_ipfhook_in) != 0); } - if (hook6_physical_out) { - hook6_physical_out = (net_unregister_hook(ipf_ipv6, - NH_PHYSICAL_OUT, &ipfhook_out) != 0); + if (ifs->ifs_hook6_physical_out) { + ifs->ifs_hook6_physical_out = + (net_unregister_hook(ifs->ifs_ipf_ipv6, + NH_PHYSICAL_OUT, &ifs->ifs_ipfhook_out) != 0); } - if (hook6_nic_events) { - hook6_nic_events = (net_unregister_hook(ipf_ipv6, - NH_NIC_EVENTS, &ipfhook_nicevents) != 0); + if (ifs->ifs_hook6_nic_events) { + ifs->ifs_hook6_nic_events = + (net_unregister_hook(ifs->ifs_ipf_ipv6, + NH_NIC_EVENTS, &ifs->ifs_ipfhook_nicevents) != 0); } - if (hook6_loopback_in) { - hook6_loopback_in = (net_unregister_hook(ipf_ipv6, - NH_LOOPBACK_IN, &ipfhook_loop_in) != 0); + if (ifs->ifs_hook6_loopback_in) { + ifs->ifs_hook6_loopback_in = + (net_unregister_hook(ifs->ifs_ipf_ipv6, + NH_LOOPBACK_IN, &ifs->ifs_ipfhook_loop_in) != 0); } - if (hook6_loopback_out) { - hook6_loopback_out = (net_unregister_hook(ipf_ipv6, - NH_LOOPBACK_OUT, &ipfhook_loop_out) != 0); + if (ifs->ifs_hook6_loopback_out) { + ifs->ifs_hook6_loopback_out = + (net_unregister_hook(ifs->ifs_ipf_ipv6, + NH_LOOPBACK_OUT, &ifs->ifs_ipfhook_loop_out) != 0); } - if (net_release(ipf_ipv6) != 0) + if (net_release(ifs->ifs_ipf_ipv6) != 0) goto detach_failed; - ipf_ipv6 = NULL; + ifs->ifs_ipf_ipv6 = NULL; } /* * Remove IPv4 Hooks */ - if (ipf_ipv4 != NULL) { - if (hook4_physical_in) { - hook4_physical_in = (net_unregister_hook(ipf_ipv4, - NH_PHYSICAL_IN, &ipfhook_in) != 0); + if (ifs->ifs_ipf_ipv4 != NULL) { + if (ifs->ifs_hook4_physical_in) { + ifs->ifs_hook4_physical_in = + (net_unregister_hook(ifs->ifs_ipf_ipv4, + NH_PHYSICAL_IN, &ifs->ifs_ipfhook_in) != 0); } - if (hook4_physical_out) { - hook4_physical_out = (net_unregister_hook(ipf_ipv4, - NH_PHYSICAL_OUT, &ipfhook_out) != 0); + if (ifs->ifs_hook4_physical_out) { + ifs->ifs_hook4_physical_out = + (net_unregister_hook(ifs->ifs_ipf_ipv4, + NH_PHYSICAL_OUT, &ifs->ifs_ipfhook_out) != 0); } - if (hook4_nic_events) { - hook4_nic_events = (net_unregister_hook(ipf_ipv4, - NH_NIC_EVENTS, &ipfhook_nicevents) != 0); + if (ifs->ifs_hook4_nic_events) { + ifs->ifs_hook4_nic_events = + (net_unregister_hook(ifs->ifs_ipf_ipv4, + NH_NIC_EVENTS, &ifs->ifs_ipfhook_nicevents) != 0); } - if (hook4_loopback_in) { - hook4_loopback_in = (net_unregister_hook(ipf_ipv4, - NH_LOOPBACK_IN, &ipfhook_loop_in) != 0); + if (ifs->ifs_hook4_loopback_in) { + ifs->ifs_hook4_loopback_in = + (net_unregister_hook(ifs->ifs_ipf_ipv4, + NH_LOOPBACK_IN, &ifs->ifs_ipfhook_loop_in) != 0); } - if (hook4_loopback_out) { - hook4_loopback_out = (net_unregister_hook(ipf_ipv4, - NH_LOOPBACK_OUT, &ipfhook_loop_out) != 0); + if (ifs->ifs_hook4_loopback_out) { + ifs->ifs_hook4_loopback_out = + (net_unregister_hook(ifs->ifs_ipf_ipv4, + NH_LOOPBACK_OUT, &ifs->ifs_ipfhook_loop_out) != 0); } - if (net_release(ipf_ipv4) != 0) + if (net_release(ifs->ifs_ipf_ipv4) != 0) goto detach_failed; - ipf_ipv4 = NULL; + ifs->ifs_ipf_ipv4 = NULL; } #ifdef IPFDEBUG cmn_err(CE_CONT, "ipldetach()\n"); #endif - WRITE_ENTER(&ipf_global); - fr_deinitialise(); + WRITE_ENTER(&ifs->ifs_ipf_global); + fr_deinitialise(ifs); - (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE); - (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE); + (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs); + (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs); - if (ipf_locks_done == 1) { - MUTEX_DESTROY(&ipf_timeoutlock); - MUTEX_DESTROY(&ipf_rw); - RW_DESTROY(&ipf_ipidfrag); - ipf_locks_done = 0; + if (ifs->ifs_ipf_locks_done == 1) { + MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock); + MUTEX_DESTROY(&ifs->ifs_ipf_rw); + RW_DESTROY(&ifs->ifs_ipf_tokens); + RW_DESTROY(&ifs->ifs_ipf_ipidfrag); + ifs->ifs_ipf_locks_done = 0; } - if (hook4_physical_in || hook4_physical_out || hook4_nic_events || - hook4_loopback_in || hook4_loopback_out || hook6_nic_events || - hook6_physical_in || hook6_physical_out || hook6_loopback_in || - hook6_loopback_out) + if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out || ifs->ifs_hook4_nic_events || + ifs->ifs_hook4_loopback_in || ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events || + ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out || ifs->ifs_hook6_loopback_in || + ifs->ifs_hook6_loopback_out) return -1; return 0; detach_failed: - WRITE_ENTER(&ipf_global); + WRITE_ENTER(&ifs->ifs_ipf_global); return -1; } - -int iplattach __P((void)) +int iplattach(ifs, ns) +ipf_stack_t *ifs; +netstack_t *ns; { #if SOLARIS2 < 10 int i; @@ -268,23 +251,39 @@ int iplattach __P((void)) cmn_err(CE_CONT, "iplattach()\n"); #endif - ASSERT(rw_read_locked(&ipf_global.ipf_lk) == 0); + ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0); + ifs->ifs_fr_flags = IPF_LOGGING; +#ifdef _KERNEL + ifs->ifs_fr_update_ipid = 0; +#else + ifs->ifs_fr_update_ipid = 1; +#endif + ifs->ifs_fr_minttl = 4; + ifs->ifs_fr_icmpminfragmtu = 68; +#if defined(IPFILTER_DEFAULT_BLOCK) + ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH; +#else + ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH; +#endif + ifs->ifs_ipf_loopback = 0; - bzero((char *)frcache, sizeof(frcache)); - MUTEX_INIT(&ipf_rw, "ipf rw mutex"); - MUTEX_INIT(&ipf_timeoutlock, "ipf timeout lock mutex"); - RWLOCK_INIT(&ipf_ipidfrag, "ipf IP NAT-Frag rwlock"); - ipf_locks_done = 1; + bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache)); + MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex"); + MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex"); + RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock"); + RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock"); + ifs->ifs_ipf_locks_done = 1; - if (fr_initialise() < 0) + if (fr_initialise(ifs) < 0) return -1; - HOOK_INIT(&ipfhook_nicevents, ipf_nic_event_v4, + HOOK_INIT(&ifs->ifs_ipfhook_nicevents, ipf_nic_event_v4, "ipfilter_hook_nicevents"); - HOOK_INIT(&ipfhook_in, ipf_hook_in, "ipfilter_hook_in"); - HOOK_INIT(&ipfhook_out, ipf_hook_out, "ipfilter_hook_out"); - HOOK_INIT(&ipfhook_loop_in, ipf_hook_loop_in, "ipfilter_hook_loop_in"); - HOOK_INIT(&ipfhook_loop_out, ipf_hook_loop_out, + HOOK_INIT(&ifs->ifs_ipfhook_in, ipf_hook_in, "ipfilter_hook_in"); + HOOK_INIT(&ifs->ifs_ipfhook_out, ipf_hook_out, "ipfilter_hook_out"); + HOOK_INIT(&ifs->ifs_ipfhook_loop_in, ipf_hook_in, + "ipfilter_hook_loop_in"); + HOOK_INIT(&ifs->ifs_ipfhook_loop_out, ipf_hook_out, "ipfilter_hook_loop_out"); /* @@ -293,81 +292,85 @@ int iplattach __P((void)) * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path) */ - RWLOCK_EXIT(&ipf_global); + RWLOCK_EXIT(&ifs->ifs_ipf_global); /* * Add IPv4 hooks */ - ipf_ipv4 = net_lookup(NHF_INET); - if (ipf_ipv4 == NULL) + ifs->ifs_ipf_ipv4 = net_lookup_impl(NHF_INET, ns); + if (ifs->ifs_ipf_ipv4 == NULL) goto hookup_failed; - hook4_nic_events = (net_register_hook(ipf_ipv4, NH_NIC_EVENTS, - &ipfhook_nicevents) == 0); - if (!hook4_nic_events) + ifs->ifs_hook4_nic_events = (net_register_hook(ifs->ifs_ipf_ipv4, + NH_NIC_EVENTS, &ifs->ifs_ipfhook_nicevents) == 0); + if (!ifs->ifs_hook4_nic_events) goto hookup_failed; - hook4_physical_in = (net_register_hook(ipf_ipv4, NH_PHYSICAL_IN, - &ipfhook_in) == 0); - if (!hook4_physical_in) + ifs->ifs_hook4_physical_in = (net_register_hook(ifs->ifs_ipf_ipv4, + NH_PHYSICAL_IN, &ifs->ifs_ipfhook_in) == 0); + if (!ifs->ifs_hook4_physical_in) goto hookup_failed; - hook4_physical_out = (net_register_hook(ipf_ipv4, NH_PHYSICAL_OUT, - &ipfhook_out) == 0); - if (!hook4_physical_out) + ifs->ifs_hook4_physical_out = (net_register_hook(ifs->ifs_ipf_ipv4, + NH_PHYSICAL_OUT, &ifs->ifs_ipfhook_out) == 0); + if (!ifs->ifs_hook4_physical_out) goto hookup_failed; - if (ipf_loopback) { - hook4_loopback_in = (net_register_hook(ipf_ipv4, - NH_LOOPBACK_IN, &ipfhook_loop_in) == 0); - if (!hook4_loopback_in) + if (ifs->ifs_ipf_loopback) { + ifs->ifs_hook4_loopback_in = + (net_register_hook(ifs->ifs_ipf_ipv4, + NH_LOOPBACK_IN, &ifs->ifs_ipfhook_loop_in) == 0); + if (!ifs->ifs_hook4_loopback_in) goto hookup_failed; - hook4_loopback_out = (net_register_hook(ipf_ipv4, - NH_LOOPBACK_OUT, &ipfhook_loop_out) == 0); - if (!hook4_loopback_out) + ifs->ifs_hook4_loopback_out = + (net_register_hook(ifs->ifs_ipf_ipv4, + NH_LOOPBACK_OUT, &ifs->ifs_ipfhook_loop_out) == 0); + if (!ifs->ifs_hook4_loopback_out) goto hookup_failed; } /* * Add IPv6 hooks */ - ipf_ipv6 = net_lookup(NHF_INET6); - if (ipf_ipv6 == NULL) + ifs->ifs_ipf_ipv6 = net_lookup_impl(NHF_INET6, ns); + if (ifs->ifs_ipf_ipv6 == NULL) goto hookup_failed; - HOOK_INIT(&ipfhook_nicevents, ipf_nic_event_v6, + HOOK_INIT(&ifs->ifs_ipfhook_nicevents, ipf_nic_event_v6, "ipfilter_hook_nicevents"); - hook6_nic_events = (net_register_hook(ipf_ipv6, NH_NIC_EVENTS, - &ipfhook_nicevents) == 0); - if (!hook6_nic_events) + ifs->ifs_hook6_nic_events = (net_register_hook(ifs->ifs_ipf_ipv6, + NH_NIC_EVENTS, &ifs->ifs_ipfhook_nicevents) == 0); + if (!ifs->ifs_hook6_nic_events) goto hookup_failed; - hook6_physical_in = (net_register_hook(ipf_ipv6, NH_PHYSICAL_IN, - &ipfhook_in) == 0); - if (!hook6_physical_in) + ifs->ifs_hook6_physical_in = (net_register_hook(ifs->ifs_ipf_ipv6, + NH_PHYSICAL_IN, &ifs->ifs_ipfhook_in) == 0); + if (!ifs->ifs_hook6_physical_in) goto hookup_failed; - hook6_physical_out = (net_register_hook(ipf_ipv6, NH_PHYSICAL_OUT, - &ipfhook_out) == 0); - if (!hook6_physical_out) + ifs->ifs_hook6_physical_out = (net_register_hook(ifs->ifs_ipf_ipv6, + NH_PHYSICAL_OUT, &ifs->ifs_ipfhook_out) == 0); + if (!ifs->ifs_hook6_physical_out) goto hookup_failed; - if (ipf_loopback) { - hook6_loopback_in = (net_register_hook(ipf_ipv6, - NH_LOOPBACK_IN, &ipfhook_loop_in) == 0); - if (!hook6_loopback_in) + if (ifs->ifs_ipf_loopback) { + ifs->ifs_hook6_loopback_in = + (net_register_hook(ifs->ifs_ipf_ipv6, + NH_LOOPBACK_IN, &ifs->ifs_ipfhook_loop_in) == 0); + if (!ifs->ifs_hook6_loopback_in) goto hookup_failed; - hook6_loopback_out = (net_register_hook(ipf_ipv6, - NH_LOOPBACK_OUT, &ipfhook_loop_out) == 0); - if (!hook6_loopback_out) + ifs->ifs_hook6_loopback_out = + (net_register_hook(ifs->ifs_ipf_ipv6, + NH_LOOPBACK_OUT, &ifs->ifs_ipfhook_loop_out) == 0); + if (!ifs->ifs_hook6_loopback_out) goto hookup_failed; } /* * Reacquire ipf_global, now it is safe. */ - WRITE_ENTER(&ipf_global); + WRITE_ENTER(&ifs->ifs_ipf_global); /* Do not use private interface ip_params_arr[] in Solaris 10 */ #if SOLARIS2 < 10 @@ -410,7 +413,7 @@ int iplattach __P((void)) } #endif - if (fr_control_forwarding & 1) { + if (ifs->ifs_fr_control_forwarding & 1) { if (ip_forwarding != NULL) *ip_forwarding = 1; #if SOLARIS2 >= 8 @@ -423,60 +426,69 @@ int iplattach __P((void)) return 0; hookup_failed: - WRITE_ENTER(&ipf_global); + WRITE_ENTER(&ifs->ifs_ipf_global); return -1; } -static int fr_setipfloopback(set) +static int fr_setipfloopback(set, ifs) int set; +ipf_stack_t *ifs; { - if (ipf_ipv4 == NULL || ipf_ipv6 == NULL) + if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL) return EFAULT; - if (set && !ipf_loopback) { - ipf_loopback = 1; + if (set && !ifs->ifs_ipf_loopback) { + ifs->ifs_ipf_loopback = 1; - hook4_loopback_in = (net_register_hook(ipf_ipv4, - NH_LOOPBACK_IN, &ipfhook_loop_in) == 0); - if (!hook4_loopback_in) + ifs->ifs_hook4_loopback_in = + (net_register_hook(ifs->ifs_ipf_ipv4, + NH_LOOPBACK_IN, &ifs->ifs_ipfhook_loop_in) == 0); + if (!ifs->ifs_hook4_loopback_in) return EINVAL; - hook4_loopback_out = (net_register_hook(ipf_ipv4, - NH_LOOPBACK_OUT, &ipfhook_loop_out) == 0); - if (!hook4_loopback_out) + ifs->ifs_hook4_loopback_out = + (net_register_hook(ifs->ifs_ipf_ipv4, + NH_LOOPBACK_OUT, &ifs->ifs_ipfhook_loop_out) == 0); + if (!ifs->ifs_hook4_loopback_out) return EINVAL; - hook6_loopback_in = (net_register_hook(ipf_ipv6, - NH_LOOPBACK_IN, &ipfhook_loop_in) == 0); - if (!hook6_loopback_in) + ifs->ifs_hook6_loopback_in = + (net_register_hook(ifs->ifs_ipf_ipv6, + NH_LOOPBACK_IN, &ifs->ifs_ipfhook_loop_in) == 0); + if (!ifs->ifs_hook6_loopback_in) return EINVAL; - hook6_loopback_out = (net_register_hook(ipf_ipv6, - NH_LOOPBACK_OUT, &ipfhook_loop_out) == 0); - if (!hook6_loopback_out) + ifs->ifs_hook6_loopback_out = + (net_register_hook(ifs->ifs_ipf_ipv6, + NH_LOOPBACK_OUT, &ifs->ifs_ipfhook_loop_out) == 0); + if (!ifs->ifs_hook6_loopback_out) return EINVAL; - } else if (!set && ipf_loopback) { - ipf_loopback = 0; + } else if (!set && ifs->ifs_ipf_loopback) { + ifs->ifs_ipf_loopback = 0; - hook4_loopback_in = (net_unregister_hook(ipf_ipv4, - NH_LOOPBACK_IN, &ipfhook_loop_in) != 0); - if (hook4_loopback_in) + ifs->ifs_hook4_loopback_in = + (net_unregister_hook(ifs->ifs_ipf_ipv4, + NH_LOOPBACK_IN, &ifs->ifs_ipfhook_loop_in) != 0); + if (ifs->ifs_hook4_loopback_in) return EBUSY; - hook4_loopback_out = (net_unregister_hook(ipf_ipv4, - NH_LOOPBACK_OUT, &ipfhook_loop_out) != 0); - if (hook4_loopback_out) + ifs->ifs_hook4_loopback_out = + (net_unregister_hook(ifs->ifs_ipf_ipv4, + NH_LOOPBACK_OUT, &ifs->ifs_ipfhook_loop_out) != 0); + if (ifs->ifs_hook4_loopback_out) return EBUSY; - hook6_loopback_in = (net_unregister_hook(ipf_ipv6, - NH_LOOPBACK_IN, &ipfhook_loop_in) != 0); - if (hook6_loopback_in) + ifs->ifs_hook6_loopback_in = + (net_unregister_hook(ifs->ifs_ipf_ipv6, + NH_LOOPBACK_IN, &ifs->ifs_ipfhook_loop_in) != 0); + if (ifs->ifs_hook6_loopback_in) return EBUSY; - hook6_loopback_out = (net_unregister_hook(ipf_ipv6, - NH_LOOPBACK_OUT, &ipfhook_loop_out) != 0); - if (hook6_loopback_out) + ifs->ifs_hook6_loopback_out = + (net_unregister_hook(ifs->ifs_ipf_ipv6, + NH_LOOPBACK_OUT, &ifs->ifs_ipfhook_loop_out) != 0); + if (ifs->ifs_hook6_loopback_out) return EBUSY; } return 0; @@ -503,6 +515,8 @@ int *rp; friostat_t fio; minor_t unit; u_int enable; + netstack_t *ns; + ipf_stack_t *ifs; #ifdef IPFDEBUG cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n", @@ -512,20 +526,30 @@ int *rp; if (IPL_LOGMAX < unit) return ENXIO; - if (fr_running <= 0) { - if (unit != IPL_LOGIPF) + ns = netstack_find_by_cred(cp); + ASSERT(ns != NULL); + ifs = ns->netstack_ipf; + ASSERT(ifs != NULL); + + if (ifs->ifs_fr_running <= 0) { + if (unit != IPL_LOGIPF) { + netstack_rele(ifs->ifs_netstack); return EIO; + } if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET && cmd != SIOCIPFSET && cmd != SIOCFRENB && - cmd != SIOCGETFS && cmd != SIOCGETFF) + cmd != SIOCGETFS && cmd != SIOCGETFF) { + netstack_rele(ifs->ifs_netstack); return EIO; + } } - READ_ENTER(&ipf_global); + READ_ENTER(&ifs->ifs_ipf_global); - error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode); + error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, cp->cr_uid, curproc, ifs); if (error != -1) { - RWLOCK_EXIT(&ipf_global); + RWLOCK_EXIT(&ifs->ifs_ipf_global); + netstack_rele(ifs->ifs_netstack); return error; } error = 0; @@ -543,21 +567,21 @@ int *rp; break; } - RWLOCK_EXIT(&ipf_global); - WRITE_ENTER(&ipf_global); + RWLOCK_EXIT(&ifs->ifs_ipf_global); + WRITE_ENTER(&ifs->ifs_ipf_global); if (enable) { - if (fr_running > 0) + if (ifs->ifs_fr_running > 0) error = 0; else - error = iplattach(); + error = iplattach(ifs, ns); if (error == 0) - fr_running = 1; + ifs->ifs_fr_running = 1; else - (void) ipldetach(); + (void) ipldetach(ifs); } else { - error = ipldetach(); + error = ipldetach(ifs); if (error == 0) - fr_running = -1; + ifs->ifs_fr_running = -1; } } break; @@ -569,14 +593,14 @@ int *rp; /* FALLTHRU */ case SIOCIPFGETNEXT : case SIOCIPFGET : - error = fr_ipftune(cmd, (void *)data); + error = fr_ipftune(cmd, (void *)data, ifs); break; case SIOCSETFF : if (!(mode & FWRITE)) error = EPERM; else { - error = COPYIN((caddr_t)data, (caddr_t)&fr_flags, - sizeof(fr_flags)); + error = COPYIN((caddr_t)data, (caddr_t)&ifs->ifs_fr_flags, + sizeof(ifs->ifs_fr_flags)); if (error != 0) error = EFAULT; } @@ -587,11 +611,11 @@ int *rp; if (error != 0) error = EFAULT; else - error = fr_setipfloopback(tmp); + error = fr_setipfloopback(tmp, ifs); break; case SIOCGETFF : - error = COPYOUT((caddr_t)&fr_flags, (caddr_t)data, - sizeof(fr_flags)); + error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data, + sizeof(ifs->ifs_fr_flags)); if (error != 0) error = EFAULT; break; @@ -606,7 +630,7 @@ int *rp; error = EPERM; else error = frrequest(unit, cmd, (caddr_t)data, - fr_active, 1); + ifs->ifs_fr_active, 1, ifs); break; case SIOCINIFR : case SIOCRMIFR : @@ -615,32 +639,35 @@ int *rp; error = EPERM; else error = frrequest(unit, cmd, (caddr_t)data, - 1 - fr_active, 1); + 1 - ifs->ifs_fr_active, 1, ifs); break; case SIOCSWAPA : if (!(mode & FWRITE)) error = EPERM; else { - WRITE_ENTER(&ipf_mutex); - bzero((char *)frcache, sizeof(frcache[0]) * 2); - error = COPYOUT((caddr_t)&fr_active, (caddr_t)data, - sizeof(fr_active)); + WRITE_ENTER(&ifs->ifs_ipf_mutex); + /* Clear one fourth of the table */ + bzero((char *)&ifs->ifs_frcache, + sizeof (ifs->ifs_frcache[0]) * 2); + error = COPYOUT((caddr_t)&ifs->ifs_fr_active, + (caddr_t)data, + sizeof(ifs->ifs_fr_active)); if (error != 0) error = EFAULT; else - fr_active = 1 - fr_active; - RWLOCK_EXIT(&ipf_mutex); + ifs->ifs_fr_active = 1 - ifs->ifs_fr_active; + RWLOCK_EXIT(&ifs->ifs_ipf_mutex); } break; case SIOCGETFS : - fr_getstat(&fio); + fr_getstat(&fio, ifs); error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT); break; case SIOCFRZST : if (!(mode & FWRITE)) error = EPERM; else - error = fr_zerostats((caddr_t)data); + error = fr_zerostats((caddr_t)data, ifs); break; case SIOCIPFFL : if (!(mode & FWRITE)) @@ -649,7 +676,7 @@ int *rp; error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); if (!error) { - tmp = frflush(unit, 4, tmp); + tmp = frflush(unit, 4, tmp, ifs); error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp)); if (error != 0) @@ -666,7 +693,7 @@ int *rp; error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); if (!error) { - tmp = frflush(unit, 6, tmp); + tmp = frflush(unit, 6, tmp, ifs); error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp)); if (error != 0) @@ -679,10 +706,10 @@ int *rp; case SIOCSTLCK : error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); if (error == 0) { - fr_state_lock = tmp; - fr_nat_lock = tmp; - fr_frag_lock = tmp; - fr_auth_lock = tmp; + ifs->ifs_fr_state_lock = tmp; + ifs->ifs_fr_nat_lock = tmp; + ifs->ifs_fr_frag_lock = tmp; + ifs->ifs_fr_auth_lock = tmp; } else error = EFAULT; break; @@ -691,7 +718,7 @@ int *rp; if (!(mode & FWRITE)) error = EPERM; else { - tmp = ipflog_clear(unit); + tmp = ipflog_clear(unit, ifs); error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp)); if (error) @@ -703,56 +730,70 @@ int *rp; if (!(mode & FWRITE)) error = EPERM; else { - RWLOCK_EXIT(&ipf_global); - WRITE_ENTER(&ipf_global); + RWLOCK_EXIT(&ifs->ifs_ipf_global); + WRITE_ENTER(&ifs->ifs_ipf_global); - frsync(IPFSYNC_RESYNC, 0, NULL, NULL); - fr_natifpsync(IPFSYNC_RESYNC, NULL, NULL); - fr_nataddrsync(NULL, NULL); - fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL); + frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); + fr_natifpsync(IPFSYNC_RESYNC, NULL, NULL, ifs); + fr_nataddrsync(NULL, NULL, ifs); + fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); error = 0; } break; case SIOCGFRST : - error = fr_outobj((void *)data, fr_fragstats(), + error = fr_outobj((void *)data, fr_fragstats(ifs), IPFOBJ_FRAGSTAT); break; case FIONREAD : #ifdef IPFILTER_LOG - tmp = (int)iplused[IPL_LOGIPF]; + tmp = (int)ifs->ifs_iplused[IPL_LOGIPF]; error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp)); if (error != 0) error = EFAULT; #endif break; + case SIOCIPFITER : + error = ipf_frruleiter((caddr_t)data, cp->cr_uid, curproc, ifs); + break; + + case SIOCGENITER : + error = ipf_genericiter((caddr_t)data, cp->cr_uid, curproc, ifs); + break; + + case SIOCIPFDELTOK : + (void)BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); + error = ipf_deltoken(tmp, cp->cr_uid, curproc, ifs); + break; + default : cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p", cmd, (void *)data); error = EINVAL; break; } - RWLOCK_EXIT(&ipf_global); + RWLOCK_EXIT(&ifs->ifs_ipf_global); + netstack_rele(ifs->ifs_netstack); return error; } -phy_if_t get_unit(name, v) -char *name; -int v; +phy_if_t get_unit(name, v, ifs) +char *name; +int v; +ipf_stack_t *ifs; { - phy_if_t phy; net_data_t nif; if (v == 4) - nif = ipf_ipv4; + nif = ifs->ifs_ipf_ipv4; else if (v == 6) - nif = ipf_ipv6; + nif = ifs->ifs_ipf_ipv6; else return 0; - - phy = net_phylookup(nif, name); - return (phy); + nif->netd_netstack = ifs->ifs_netstack; + + return (net_phylookup(nif, name)); } /* @@ -806,19 +847,34 @@ dev_t dev; register struct uio *uio; cred_t *cp; { + netstack_t *ns; + ipf_stack_t *ifs; + int ret; + + ns = netstack_find_by_cred(cp); + ASSERT(ns != NULL); + ifs = ns->netstack_ipf; + ASSERT(ifs != NULL); + # ifdef IPFDEBUG cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp); # endif - if (fr_running < 1) + if (ifs->ifs_fr_running < 1) { + netstack_rele(ifs->ifs_netstack); return EIO; + } # ifdef IPFILTER_SYNC - if (getminor(dev) == IPL_LOGSYNC) + if (getminor(dev) == IPL_LOGSYNC) { + netstack_rele(ifs->ifs_netstack); return ipfsync_read(uio); + } # endif - return ipflog_read(getminor(dev), uio); + ret = ipflog_read(getminor(dev), uio, ifs); + netstack_rele(ifs->ifs_netstack); + return ret; } #endif /* IPFILTER_LOG */ @@ -834,12 +890,22 @@ dev_t dev; register struct uio *uio; cred_t *cp; { + netstack_t *ns; + ipf_stack_t *ifs; + + ns = netstack_find_by_cred(cp); + ASSERT(ns != NULL); + ifs = ns->netstack_ipf; + ASSERT(ifs != NULL); + #ifdef IPFDEBUG cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp); #endif - if (fr_running < 1) + if (ifs->ifs_fr_running < 1) { + netstack_rele(ifs->ifs_netstack); return EIO; + } #ifdef IPFILTER_SYNC if (getminor(dev) == IPL_LOGSYNC) @@ -848,6 +914,7 @@ cred_t *cp; dev = dev; /* LINT */ uio = uio; /* LINT */ cp = cp; /* LINT */ + netstack_rele(ifs->ifs_netstack); return ENXIO; } @@ -959,6 +1026,7 @@ mblk_t *m, **mpp; fr_info_t fnew; ip_t *ip; int i, hlen; + ipf_stack_t *ifs = fin->fin_ifs; ip = (ip_t *)m->b_rptr; bzero((char *)&fnew, sizeof(fnew)); @@ -979,7 +1047,7 @@ mblk_t *m, **mpp; fnew.fin_v = 4; #if SOLARIS2 >= 10 ip->ip_ttl = 255; - if (net_getpmtuenabled(ipf_ipv4) == 1) + if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1) ip->ip_off = htons(IP_DF); #else if (ip_ttl_ptr != NULL) @@ -1017,6 +1085,7 @@ mblk_t *m, **mpp; fnew.fin_mp = mpp; fnew.fin_hlen = hlen; fnew.fin_dp = (char *)ip + hlen; + fnew.fin_ifs = fin->fin_ifs; (void) fr_makefrip(hlen, ip, &fnew); i = fr_fastroute(m, mpp, &fnew, NULL); @@ -1043,6 +1112,7 @@ int dst; ip6_t *ip6; #endif ip_t *ip; + ipf_stack_t *ifs = fin->fin_ifs; if ((type < 0) || (type > ICMP_MAXTYPE)) return -1; @@ -1106,7 +1176,7 @@ int dst; phy = (phy_if_t)qpi->qpi_ill; if (type == ICMP_UNREACH && (phy != 0) && fin->fin_icode == ICMP_UNREACH_NEEDFRAG) - icmp->icmp_nextmtu = net_getmtu(ipf_ipv4, phy,0 ); + icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 ); #ifdef USE_INET6 if (fin->fin_v == 6) { @@ -1114,8 +1184,10 @@ int dst; int csz; if (dst == 0) { + ipf_stack_t *ifs = fin->fin_ifs; + if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy, - (void *)&dst6, NULL) == -1) { + (void *)&dst6, NULL, ifs) == -1) { FREE_MB_T(m); return -1; } @@ -1142,8 +1214,10 @@ int dst; ip->ip_tos = fin->fin_ip->ip_tos; ip->ip_len = (u_short)sz; if (dst == 0) { + ipf_stack_t *ifs = fin->fin_ifs; + if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy, - (void *)&dst4, NULL) == -1) { + (void *)&dst4, NULL, ifs) == -1) { FREE_MB_T(m); return -1; } @@ -1182,7 +1256,8 @@ int dst; /* * Print out warning message at rate-limited speed. */ -static void rate_limit_message(int rate, const char *message, ...) +static void rate_limit_message(ipf_stack_t *ifs, + int rate, const char *message, ...) { static time_t last_time = 0; time_t now; @@ -1193,13 +1268,13 @@ static void rate_limit_message(int rate, const char *message, ...) now = ddi_get_time(); /* make sure, no multiple entries */ - ASSERT(MUTEX_NOT_HELD(&(ipf_rw.ipf_lk))); - MUTEX_ENTER(&ipf_rw); + ASSERT(MUTEX_NOT_HELD(&(ifs->ifs_ipf_rw.ipf_lk))); + MUTEX_ENTER(&ifs->ifs_ipf_rw); if (now - last_time >= rate) { need_printed = 1; last_time = now; } - MUTEX_EXIT(&ipf_rw); + MUTEX_EXIT(&ifs->ifs_ipf_rw); if (need_printed) { va_start(args, message); @@ -1217,10 +1292,11 @@ static void rate_limit_message(int rate, const char *message, ...) * return the first IP Address associated with an interface */ /*ARGSUSED*/ -int fr_ifpaddr(v, atype, ifptr, inp, inpmask) +int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs) int v, atype; void *ifptr; struct in_addr *inp, *inpmask; +ipf_stack_t *ifs; { struct sockaddr_in6 v6addr[2]; struct sockaddr_in v4addr[2]; @@ -1232,11 +1308,11 @@ struct in_addr *inp, *inpmask; switch (v) { case 4: - net_data = ipf_ipv4; + net_data = ifs->ifs_ipf_ipv4; array = v4addr; break; case 6: - net_data = ipf_ipv6; + net_data = ifs->ifs_ipf_ipv6; array = v6addr; break; default: @@ -1284,6 +1360,7 @@ fr_info_t *fin; u_char hash[16]; u_32_t newiss; MD5_CTX ctx; + ipf_stack_t *ifs = fin->fin_ifs; /* * Compute the base value of the ISS. It is a hash @@ -1297,7 +1374,7 @@ fr_info_t *fin; sizeof(fin->fin_fi.fi_dst)); MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat)); - MD5Update(&ctx, ipf_iss_secret, sizeof(ipf_iss_secret)); + MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret)); MD5Final(hash, &ctx); @@ -1330,8 +1407,9 @@ fr_info_t *fin; ipstate_t *is; nat_t *nat; u_short id; + ipf_stack_t *ifs = fin->fin_ifs; - MUTEX_ENTER(&ipf_rw); + MUTEX_ENTER(&ifs->ifs_ipf_rw); if (fin->fin_state != NULL) { is = fin->fin_state; id = (u_short)(is->is_pkts[(fin->fin_rev << 1) + 1] & 0xffff); @@ -1340,7 +1418,7 @@ fr_info_t *fin; id = (u_short)(nat->nat_pkts[fin->fin_out] & 0xffff); } else id = ipid++; - MUTEX_EXIT(&ipf_rw); + MUTEX_EXIT(&ifs->ifs_ipf_rw); return id; } @@ -1378,28 +1456,30 @@ fr_info_t *fin; void fr_slowtimer() #else /*ARGSUSED*/ -void fr_slowtimer __P((void *ptr)) +void fr_slowtimer __P((void *arg)) #endif { + ipf_stack_t *ifs = arg; - WRITE_ENTER(&ipf_global); - if (fr_running == -1 || fr_running == 0) { - fr_timer_id = timeout(fr_slowtimer, NULL, drv_usectohz(500000)); - RWLOCK_EXIT(&ipf_global); + WRITE_ENTER(&ifs->ifs_ipf_global); + if (ifs->ifs_fr_running == -1 || ifs->ifs_fr_running == 0) { + ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg, drv_usectohz(500000)); + RWLOCK_EXIT(&ifs->ifs_ipf_global); return; } - MUTEX_DOWNGRADE(&ipf_global); - - fr_fragexpire(); - fr_timeoutstate(); - fr_natexpire(); - fr_authexpire(); - fr_ticks++; - if (fr_running == -1 || fr_running == 1) - fr_timer_id = timeout(fr_slowtimer, NULL, drv_usectohz(500000)); + MUTEX_DOWNGRADE(&ifs->ifs_ipf_global); + + fr_fragexpire(ifs); + fr_timeoutstate(ifs); + fr_natexpire(ifs); + fr_authexpire(ifs); + ifs->ifs_fr_ticks++; + if (ifs->ifs_fr_running == -1 || ifs->ifs_fr_running == 1) + ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg, + drv_usectohz(500000)); else - fr_timer_id = NULL; - RWLOCK_EXIT(&ipf_global); + ifs->ifs_fr_timer_id = NULL; + RWLOCK_EXIT(&ifs->ifs_ipf_global); } @@ -1430,6 +1510,7 @@ int len; mb_t *m = min, *m1, *m2; char *ip; uint32_t start, stuff, end, value, flags; + ipf_stack_t *ifs = fin->fin_ifs; if (m == NULL) return NULL; @@ -1487,7 +1568,7 @@ int len; &value, &flags); if (pullupmsg(m, len + ipoff + inc) == 0) { - ATOMIC_INCL(frstats[out].fr_pull[1]); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]); FREE_MB_T(*fin->fin_mp); *fin->fin_mp = NULL; fin->fin_m = NULL; @@ -1507,7 +1588,7 @@ int len; qpi->qpi_data = ip; } - ATOMIC_INCL(frstats[out].fr_pull[0]); + ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]); fin->fin_ip = (ip_t *)ip; if (fin->fin_dp != NULL) fin->fin_dp = (char *)fin->fin_ip + dpoff; @@ -1533,11 +1614,12 @@ fr_info_t *fin; net_data_t net_data_p; phy_if_t phy_ifdata_routeto; struct sockaddr sin; + ipf_stack_t *ifs = fin->fin_ifs; if (fin->fin_v == 4) { - net_data_p = ipf_ipv4; + net_data_p = ifs->ifs_ipf_ipv4; } else if (fin->fin_v == 6) { - net_data_p = ipf_ipv6; + net_data_p = ifs->ifs_ipf_ipv6; } else { return (0); } @@ -1588,14 +1670,15 @@ frdest_t *fdp; struct sockaddr_in *sin; struct sockaddr_in6 *sin6; struct sockaddr *sinp; + ipf_stack_t *ifs = fin->fin_ifs; #ifndef sparc u_short __iplen, __ipoff; #endif if (fin->fin_v == 4) { - net_data_p = ipf_ipv4; + net_data_p = ifs->ifs_ipf_ipv4; } else if (fin->fin_v == 6) { - net_data_p = ipf_ipv6; + net_data_p = ifs->ifs_ipf_ipv6; } else { return (-1); } @@ -1689,7 +1772,7 @@ frdest_t *fdp; fin->fin_ifp = saveifp; if (fin->fin_nat != NULL) - fr_natderef((nat_t **)&fin->fin_nat); + fr_natderef((nat_t **)&fin->fin_nat, ifs); } #ifndef sparc if (fin->fin_v == 4) { @@ -1707,11 +1790,11 @@ frdest_t *fdp; } } - fr_frouteok[0]++; + ifs->ifs_fr_frouteok[0]++; return 0; bad_fastroute: freemsg(mb); - fr_frouteok[1]++; + ifs->ifs_fr_frouteok[1]++; return -1; } @@ -1725,9 +1808,9 @@ bad_fastroute: /* Calling ipf_hook. */ /* ------------------------------------------------------------------------ */ /*ARGSUSED*/ -int ipf_hook_out(hook_event_token_t token, hook_data_t info) +int ipf_hook_out(hook_event_token_t token, hook_data_t info, netstack_t *ns) { - return ipf_hook(info, 1, 0); + return ipf_hook(info, 1, 0, ns); } /* ------------------------------------------------------------------------ */ @@ -1739,9 +1822,9 @@ int ipf_hook_out(hook_event_token_t token, hook_data_t info) /* Calling ipf_hook. */ /* ------------------------------------------------------------------------ */ /*ARGSUSED*/ -int ipf_hook_in(hook_event_token_t token, hook_data_t info) +int ipf_hook_in(hook_event_token_t token, hook_data_t info, netstack_t *ns) { - return ipf_hook(info, 0, 0); + return ipf_hook(info, 0, 0, ns); } @@ -1754,9 +1837,10 @@ int ipf_hook_in(hook_event_token_t token, hook_data_t info) /* Calling ipf_hook. */ /* ------------------------------------------------------------------------ */ /*ARGSUSED*/ -int ipf_hook_loop_out(hook_event_token_t token, hook_data_t info) +int ipf_hook_loop_out(hook_event_token_t token, hook_data_t info, + netstack_t *ns) { - return ipf_hook(info, 1, 1); + return ipf_hook(info, 1, 1, ns); } /* ------------------------------------------------------------------------ */ @@ -1768,9 +1852,10 @@ int ipf_hook_loop_out(hook_event_token_t token, hook_data_t info) /* Calling ipf_hook. */ /* ------------------------------------------------------------------------ */ /*ARGSUSED*/ -int ipf_hook_loop_in(hook_event_token_t token, hook_data_t info) +int ipf_hook_loop_in(hook_event_token_t token, hook_data_t info, + netstack_t *ns) { - return ipf_hook(info, 0, 1); + return ipf_hook(info, 0, 1, ns); } /* ------------------------------------------------------------------------ */ @@ -1784,7 +1869,7 @@ int ipf_hook_loop_in(hook_event_token_t token, hook_data_t info) /* parameters out of the info structure and forms them up to be useful for */ /* calling ipfilter. */ /* ------------------------------------------------------------------------ */ -int ipf_hook(hook_data_t info, int out, int loopback) +int ipf_hook(hook_data_t info, int out, int loopback, netstack_t *ns) { hook_pkt_event_t *fw; int rval, v, hlen; @@ -1821,7 +1906,8 @@ int ipf_hook(hook_data_t info, int out, int loopback) else qpi.qpi_flags = 0; - rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, &qpi, fw->hpe_mp); + rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, + &qpi, fw->hpe_mp, ns->netstack_ipf); /* For fastroute cases, fr_check returns 0 with mp set to NULL */ if (rval == 0 && *(fw->hpe_mp) == NULL) @@ -1851,32 +1937,34 @@ int ipf_hook(hook_data_t info, int out, int loopback) /* Function to receive asynchronous NIC events from IP */ /* ------------------------------------------------------------------------ */ /*ARGSUSED*/ -int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info) +int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, + netstack_t *ns) { struct sockaddr_in *sin; hook_nic_event_t *hn; + ipf_stack_t *ifs = ns->netstack_ipf; hn = (hook_nic_event_t *)info; switch (hn->hne_event) { case NE_PLUMB : - frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data); + frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data, ifs); fr_natifpsync(IPFSYNC_NEWIFP, (void *)hn->hne_nic, - hn->hne_data); + hn->hne_data, ifs); fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, - hn->hne_data); + hn->hne_data, ifs); break; case NE_UNPLUMB : - frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL); - fr_natifpsync(IPFSYNC_OLDIFP, (void *)hn->hne_nic, NULL); - fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL); + frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); + fr_natifpsync(IPFSYNC_OLDIFP, (void *)hn->hne_nic, NULL, ifs); + fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); break; case NE_ADDRESS_CHANGE : sin = hn->hne_data; - fr_nataddrsync((void *)hn->hne_nic, &sin->sin_addr); + fr_nataddrsync((void *)hn->hne_nic, &sin->sin_addr, ifs); break; default : @@ -1896,23 +1984,25 @@ int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info) /* Function to receive asynchronous NIC events from IP */ /* ------------------------------------------------------------------------ */ /*ARGSUSED*/ -int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info) +int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, + netstack_t *ns) { hook_nic_event_t *hn; + ipf_stack_t *ifs = ns->netstack_ipf; hn = (hook_nic_event_t *)info; switch (hn->hne_event) { case NE_PLUMB : - frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, hn->hne_data); + frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, hn->hne_data, ifs); fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, - hn->hne_data); + hn->hne_data, ifs); break; case NE_UNPLUMB : - frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL); - fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL); + frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); + fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); break; case NE_ADDRESS_CHANGE : diff --git a/usr/src/uts/common/inet/ipf/ip_frag.c b/usr/src/uts/common/inet/ipf/ip_frag.c index 29362c8a83..7748b6175c 100644 --- a/usr/src/uts/common/inet/ipf/ip_frag.c +++ b/usr/src/uts/common/inet/ipf/ip_frag.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -83,7 +83,7 @@ struct file; #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_auth.h" -#include "netinet/ip_proxy.h" +#include "netinet/ipf_stack.h" #if (__FreeBSD_version >= 300000) # include <sys/malloc.h> # if defined(_KERNEL) @@ -109,34 +109,9 @@ static const char sccsid[] = "@(#)ip_frag.c 1.11 3/24/96 (C) 1993-2000 Darren Re static const char rcsid[] = "@(#)$Id: ip_frag.c,v 2.77.2.5 2005/08/11 14:33:10 darrenr Exp $"; #endif - -static ipfr_t *ipfr_list = NULL; -static ipfr_t **ipfr_tail = &ipfr_list; -static ipfr_t **ipfr_heads; - -static ipfr_t *ipfr_natlist = NULL; -static ipfr_t **ipfr_nattail = &ipfr_natlist; -static ipfr_t **ipfr_nattab; - -static ipfr_t *ipfr_ipidlist = NULL; -static ipfr_t **ipfr_ipidtail = &ipfr_ipidlist; -static ipfr_t **ipfr_ipidtab; - -static ipfrstat_t ipfr_stats; -static int ipfr_inuse = 0; -int ipfr_size = IPFT_SIZE; - -int fr_ipfrttl = 120; /* 60 seconds */ -int fr_frag_lock = 0; -int fr_frag_init = 0; -u_long fr_ticks = 0; - - static ipfr_t *ipfr_newfrag __P((fr_info_t *, u_32_t, ipfr_t **)); static ipfr_t *fr_fraglookup __P((fr_info_t *, ipfr_t **)); -static void fr_fragdelete __P((ipfr_t *, ipfr_t ***)); - -static frentry_t frblock; +static void fr_fragdelete __P((ipfr_t *, ipfr_t ***, ipf_stack_t *)); /* ------------------------------------------------------------------------ */ /* Function: fr_fraginit */ @@ -145,31 +120,44 @@ static frentry_t frblock; /* */ /* Initialise the hash tables for the fragment cache lookups. */ /* ------------------------------------------------------------------------ */ -int fr_fraginit() +int fr_fraginit(ifs) +ipf_stack_t *ifs; { - KMALLOCS(ipfr_heads, ipfr_t **, ipfr_size * sizeof(ipfr_t *)); - if (ipfr_heads == NULL) + ifs->ifs_ipfr_tail = &ifs->ifs_ipfr_list; + ifs->ifs_ipfr_nattail = &ifs->ifs_ipfr_natlist; + ifs->ifs_ipfr_ipidtail = &ifs->ifs_ipfr_ipidlist; + ifs->ifs_ipfr_size = IPFT_SIZE; + ifs->ifs_fr_ipfrttl = 120; /* 60 seconds */ + + KMALLOCS(ifs->ifs_ipfr_heads, ipfr_t **, + ifs->ifs_ipfr_size * sizeof(ipfr_t *)); + if (ifs->ifs_ipfr_heads == NULL) return -1; - bzero((char *)ipfr_heads, ipfr_size * sizeof(ipfr_t *)); + bzero((char *)ifs->ifs_ipfr_heads, + ifs->ifs_ipfr_size * sizeof(ipfr_t *)); - KMALLOCS(ipfr_nattab, ipfr_t **, ipfr_size * sizeof(ipfr_t *)); - if (ipfr_nattab == NULL) + KMALLOCS(ifs->ifs_ipfr_nattab, ipfr_t **, + ifs->ifs_ipfr_size * sizeof(ipfr_t *)); + if (ifs->ifs_ipfr_nattab == NULL) return -1; - bzero((char *)ipfr_nattab, ipfr_size * sizeof(ipfr_t *)); + bzero((char *)ifs->ifs_ipfr_nattab, + ifs->ifs_ipfr_size * sizeof(ipfr_t *)); - KMALLOCS(ipfr_ipidtab, ipfr_t **, ipfr_size * sizeof(ipfr_t *)); - if (ipfr_ipidtab == NULL) + KMALLOCS(ifs->ifs_ipfr_ipidtab, ipfr_t **, + ifs->ifs_ipfr_size * sizeof(ipfr_t *)); + if (ifs->ifs_ipfr_ipidtab == NULL) return -1; - bzero((char *)ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *)); + bzero((char *)ifs->ifs_ipfr_ipidtab, + ifs->ifs_ipfr_size * sizeof(ipfr_t *)); - RWLOCK_INIT(&ipf_frag, "ipf fragment rwlock"); + RWLOCK_INIT(&ifs->ifs_ipf_frag, "ipf fragment rwlock"); /* Initialise frblock with "block in all" */ - bzero((char *)&frblock, sizeof(frblock)); - frblock.fr_flags = FR_BLOCK|FR_INQUE; /* block in */ - frblock.fr_ref = 1; + bzero((char *)&ifs->ifs_frblock, sizeof(ifs->ifs_frblock)); + ifs->ifs_frblock.fr_flags = FR_BLOCK|FR_INQUE; /* block in */ + ifs->ifs_frblock.fr_ref = 1; - fr_frag_init = 1; + ifs->ifs_fr_frag_init = 1; return 0; } @@ -182,26 +170,33 @@ int fr_fraginit() /* */ /* Free all memory allocated whilst running and from initialisation. */ /* ------------------------------------------------------------------------ */ -void fr_fragunload() +void fr_fragunload(ifs) +ipf_stack_t *ifs; { - if (fr_frag_init == 1) { - fr_fragclear(); + if (ifs->ifs_fr_frag_init == 1) { + fr_fragclear(ifs); - RW_DESTROY(&ipf_frag); - fr_frag_init = 0; + RW_DESTROY(&ifs->ifs_ipf_frag); + ifs->ifs_fr_frag_init = 0; } - if (ipfr_heads != NULL) - KFREES(ipfr_heads, ipfr_size * sizeof(ipfr_t *)); - ipfr_heads = NULL; + if (ifs->ifs_ipfr_heads != NULL) { + KFREES(ifs->ifs_ipfr_heads, + ifs->ifs_ipfr_size * sizeof(ipfr_t *)); + } + ifs->ifs_ipfr_heads = NULL; - if (ipfr_nattab != NULL) - KFREES(ipfr_nattab, ipfr_size * sizeof(ipfr_t *)); - ipfr_nattab = NULL; + if (ifs->ifs_ipfr_nattab != NULL) { + KFREES(ifs->ifs_ipfr_nattab, + ifs->ifs_ipfr_size * sizeof(ipfr_t *)); + } + ifs->ifs_ipfr_nattab = NULL; - if (ipfr_ipidtab != NULL) - KFREES(ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *)); - ipfr_ipidtab = NULL; + if (ifs->ifs_ipfr_ipidtab != NULL) { + KFREES(ifs->ifs_ipfr_ipidtab, + ifs->ifs_ipfr_size * sizeof(ipfr_t *)); + } + ifs->ifs_ipfr_ipidtab = NULL; } @@ -212,12 +207,13 @@ void fr_fragunload() /* */ /* Updates ipfr_stats with current information and returns a pointer to it */ /* ------------------------------------------------------------------------ */ -ipfrstat_t *fr_fragstats() +ipfrstat_t *fr_fragstats(ifs) +ipf_stack_t *ifs; { - ipfr_stats.ifs_table = ipfr_heads; - ipfr_stats.ifs_nattab = ipfr_nattab; - ipfr_stats.ifs_inuse = ipfr_inuse; - return &ipfr_stats; + ifs->ifs_ipfr_stats.ifs_table = ifs->ifs_ipfr_heads; + ifs->ifs_ipfr_stats.ifs_nattab = ifs->ifs_ipfr_nattab; + ifs->ifs_ipfr_stats.ifs_inuse = ifs->ifs_ipfr_inuse; + return &ifs->ifs_ipfr_stats; } @@ -237,8 +233,9 @@ ipfr_t *table[]; { ipfr_t *fra, frag; u_int idx, off; + ipf_stack_t *ifs = fin->fin_ifs; - if (ipfr_inuse >= IPFT_SIZE) + if (ifs->ifs_ipfr_inuse >= IPFT_SIZE) return NULL; if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG) @@ -270,7 +267,7 @@ ipfr_t *table[]; for (fra = table[idx]; (fra != NULL); fra = fra->ipfr_hnext) if (!bcmp((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, IPFR_CMPSZ)) { - ipfr_stats.ifs_exists++; + ifs->ifs_ipfr_stats.ifs_exists++; return NULL; } @@ -280,7 +277,7 @@ ipfr_t *table[]; */ KMALLOC(fra, ipfr_t *); if (fra == NULL) { - ipfr_stats.ifs_nomem++; + ifs->ifs_ipfr_stats.ifs_nomem++; return NULL; } @@ -306,7 +303,7 @@ ipfr_t *table[]; fra->ipfr_data = NULL; table[idx] = fra; bcopy((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, IPFR_CMPSZ); - fra->ipfr_ttl = fr_ticks + fr_ipfrttl; + fra->ipfr_ttl = ifs->ifs_fr_ticks + ifs->ifs_fr_ipfrttl; /* * Compute the offset of the expected start of the next packet. @@ -321,8 +318,9 @@ ipfr_t *table[]; } fra->ipfr_off = off + fin->fin_dlen; fra->ipfr_pass = pass; - ipfr_stats.ifs_new++; - ipfr_inuse++; + fra->ipfr_ref = 1; + ifs->ifs_ipfr_stats.ifs_new++; + ifs->ifs_ipfr_inuse++; return fra; } @@ -339,21 +337,22 @@ u_32_t pass; fr_info_t *fin; { ipfr_t *fra; + ipf_stack_t *ifs = fin->fin_ifs; - if (fr_frag_lock != 0) + if (ifs->ifs_fr_frag_lock != 0) return -1; - WRITE_ENTER(&ipf_frag); - fra = ipfr_newfrag(fin, pass, ipfr_heads); + WRITE_ENTER(&ifs->ifs_ipf_frag); + fra = ipfr_newfrag(fin, pass, ifs->ifs_ipfr_heads); if (fra != NULL) { - *ipfr_tail = fra; - fra->ipfr_prev = ipfr_tail; - ipfr_tail = &fra->ipfr_next; - if (ipfr_list == NULL) - ipfr_list = fra; + *ifs->ifs_ipfr_tail = fra; + fra->ipfr_prev = ifs->ifs_ipfr_tail; + ifs->ifs_ipfr_tail = &fra->ipfr_next; + if (ifs->ifs_ipfr_list == NULL) + ifs->ifs_ipfr_list = fra; fra->ipfr_next = NULL; } - RWLOCK_EXIT(&ipf_frag); + RWLOCK_EXIT(&ifs->ifs_ipf_frag); return fra ? 0 : -1; } @@ -373,21 +372,22 @@ u_32_t pass; nat_t *nat; { ipfr_t *fra; + ipf_stack_t *ifs = fin->fin_ifs; - if ((fin->fin_v != 4) || (fr_frag_lock != 0)) + if ((fin->fin_v != 4) || (ifs->ifs_fr_frag_lock != 0)) return 0; - WRITE_ENTER(&ipf_natfrag); - fra = ipfr_newfrag(fin, pass, ipfr_nattab); + WRITE_ENTER(&ifs->ifs_ipf_natfrag); + fra = ipfr_newfrag(fin, pass, ifs->ifs_ipfr_nattab); if (fra != NULL) { fra->ipfr_data = nat; nat->nat_data = fra; - *ipfr_nattail = fra; - fra->ipfr_prev = ipfr_nattail; - ipfr_nattail = &fra->ipfr_next; + *ifs->ifs_ipfr_nattail = fra; + fra->ipfr_prev = ifs->ifs_ipfr_nattail; + ifs->ifs_ipfr_nattail = &fra->ipfr_next; fra->ipfr_next = NULL; } - RWLOCK_EXIT(&ipf_natfrag); + RWLOCK_EXIT(&ifs->ifs_ipf_natfrag); return fra ? 0 : -1; } @@ -406,20 +406,21 @@ fr_info_t *fin; u_32_t ipid; { ipfr_t *fra; + ipf_stack_t *ifs = fin->fin_ifs; - if (fr_frag_lock) + if (ifs->ifs_fr_frag_lock) return 0; - WRITE_ENTER(&ipf_ipidfrag); - fra = ipfr_newfrag(fin, 0, ipfr_ipidtab); + WRITE_ENTER(&ifs->ifs_ipf_ipidfrag); + fra = ipfr_newfrag(fin, 0, ifs->ifs_ipfr_ipidtab); if (fra != NULL) { fra->ipfr_data = (void *)(uintptr_t)ipid; - *ipfr_ipidtail = fra; - fra->ipfr_prev = ipfr_ipidtail; - ipfr_ipidtail = &fra->ipfr_next; + *ifs->ifs_ipfr_ipidtail = fra; + fra->ipfr_prev = ifs->ifs_ipfr_ipidtail; + ifs->ifs_ipfr_ipidtail = &fra->ipfr_next; fra->ipfr_next = NULL; } - RWLOCK_EXIT(&ipf_ipidfrag); + RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag); return fra ? 0 : -1; } @@ -440,6 +441,7 @@ ipfr_t *table[]; { ipfr_t *f, frag; u_int idx; + ipf_stack_t *ifs = fin->fin_ifs; if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG) return NULL; @@ -485,7 +487,7 @@ ipfr_t *table[]; * packets had been seen. */ if (fin->fin_flx & FI_SHORT) { - ATOMIC_INCL(ipfr_stats.ifs_short); + ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_short); continue; } @@ -498,7 +500,7 @@ ipfr_t *table[]; off = fin->fin_off; /* same as in ipfr_newfrag() */ if (f->ipfr_seen0) { if (off == 0) { - ATOMIC_INCL(ipfr_stats.ifs_retrans0); + ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_retrans0); continue; } } else if (off == 0) { @@ -536,11 +538,11 @@ ipfr_t *table[]; */ if (off == f->ipfr_off) { if (!(fin->fin_ip->ip_off & IP_MF)) - f->ipfr_ttl = fr_ticks + 1; + f->ipfr_ttl = ifs->ifs_fr_ticks + 1; f->ipfr_off = fin->fin_dlen + off; } else if (f->ipfr_pass & FR_FRSTRICT) continue; - ATOMIC_INCL(ipfr_stats.ifs_hits); + ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_hits); return f; } return NULL; @@ -560,23 +562,24 @@ fr_info_t *fin; { nat_t *nat; ipfr_t *ipf; + ipf_stack_t *ifs = fin->fin_ifs; - if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_natlist) + if ((fin->fin_v != 4) || (ifs->ifs_fr_frag_lock) || !ifs->ifs_ipfr_natlist) return NULL; - READ_ENTER(&ipf_natfrag); - ipf = fr_fraglookup(fin, ipfr_nattab); + READ_ENTER(&ifs->ifs_ipf_natfrag); + ipf = fr_fraglookup(fin, ifs->ifs_ipfr_nattab); if (ipf != NULL) { nat = ipf->ipfr_data; /* * This is the last fragment for this packet. */ - if ((ipf->ipfr_ttl == fr_ticks + 1) && (nat != NULL)) { + if ((ipf->ipfr_ttl == ifs->ifs_fr_ticks + 1) && (nat != NULL)) { nat->nat_data = NULL; ipf->ipfr_data = NULL; } } else nat = NULL; - RWLOCK_EXIT(&ipf_natfrag); + RWLOCK_EXIT(&ifs->ifs_ipf_natfrag); return nat; } @@ -594,17 +597,18 @@ fr_info_t *fin; { ipfr_t *ipf; u_32_t id; + ipf_stack_t *ifs = fin->fin_ifs; - if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_ipidlist) + if ((fin->fin_v != 4) || (ifs->ifs_fr_frag_lock) || !ifs->ifs_ipfr_ipidlist) return 0xffffffff; - READ_ENTER(&ipf_ipidfrag); - ipf = fr_fraglookup(fin, ipfr_ipidtab); + READ_ENTER(&ifs->ifs_ipf_ipidfrag); + ipf = fr_fraglookup(fin, ifs->ifs_ipfr_ipidtab); if (ipf != NULL) id = (u_32_t)(uintptr_t)ipf->ipfr_data; else id = 0xffffffff; - RWLOCK_EXIT(&ipf_ipidfrag); + RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag); return id; } @@ -627,13 +631,14 @@ u_32_t *passp; frentry_t *fr = NULL; ipfr_t *fra; u_32_t pass, oflx; + ipf_stack_t *ifs = fin->fin_ifs; - if ((fr_frag_lock) || (ipfr_list == NULL)) + if ((ifs->ifs_fr_frag_lock) || (ifs->ifs_ipfr_list == NULL)) return NULL; - READ_ENTER(&ipf_frag); + READ_ENTER(&ifs->ifs_ipf_frag); oflx = fin->fin_flx; - fra = fr_fraglookup(fin, ipfr_heads); + fra = fr_fraglookup(fin, ifs->ifs_ipfr_heads); if (fra != NULL) { fr = fra->ipfr_rule; fin->fin_fr = fr; @@ -647,9 +652,9 @@ u_32_t *passp; if (!(oflx & FI_BAD) && (fin->fin_flx & FI_BAD)) { *passp &= ~FR_CMDMASK; *passp |= FR_BLOCK; - fr = &frblock; + fr = &ifs->ifs_frblock; } - RWLOCK_EXIT(&ipf_frag); + RWLOCK_EXIT(&ifs->ifs_ipf_frag); return fr; } @@ -662,16 +667,17 @@ u_32_t *passp; /* Search through all of the fragment cache entries and wherever a pointer */ /* is found to match ptr, reset it to NULL. */ /* ------------------------------------------------------------------------ */ -void fr_forget(ptr) +void fr_forget(ptr, ifs) void *ptr; +ipf_stack_t *ifs; { ipfr_t *fr; - WRITE_ENTER(&ipf_frag); - for (fr = ipfr_list; fr; fr = fr->ipfr_next) + WRITE_ENTER(&ifs->ifs_ipf_frag); + for (fr = ifs->ifs_ipfr_list; fr; fr = fr->ipfr_next) if (fr->ipfr_data == ptr) fr->ipfr_data = NULL; - RWLOCK_EXIT(&ipf_frag); + RWLOCK_EXIT(&ifs->ifs_ipf_frag); } @@ -683,16 +689,17 @@ void *ptr; /* Search through all of the fragment cache entries for NAT and wherever a */ /* pointer is found to match ptr, reset it to NULL. */ /* ------------------------------------------------------------------------ */ -void fr_forgetnat(ptr) +void fr_forgetnat(ptr, ifs) void *ptr; +ipf_stack_t *ifs; { ipfr_t *fr; - WRITE_ENTER(&ipf_natfrag); - for (fr = ipfr_natlist; fr; fr = fr->ipfr_next) + WRITE_ENTER(&ifs->ifs_ipf_natfrag); + for (fr = ifs->ifs_ipfr_natlist; fr; fr = fr->ipfr_next) if (fr->ipfr_data == ptr) fr->ipfr_data = NULL; - RWLOCK_EXIT(&ipf_natfrag); + RWLOCK_EXIT(&ifs->ifs_ipf_natfrag); } @@ -707,14 +714,15 @@ void *ptr; /* the filter rule it is associated with it if it is no longer used as a */ /* result of decreasing the reference count. */ /* ------------------------------------------------------------------------ */ -static void fr_fragdelete(fra, tail) +static void fr_fragdelete(fra, tail, ifs) ipfr_t *fra, ***tail; +ipf_stack_t *ifs; { frentry_t *fr; fr = fra->ipfr_rule; if (fr != NULL) - (void)fr_derefrule(&fr); + (void)fr_derefrule(&fr, ifs); if (fra->ipfr_next) fra->ipfr_next->ipfr_prev = fra->ipfr_prev; @@ -725,7 +733,9 @@ ipfr_t *fra, ***tail; if (fra->ipfr_hnext) fra->ipfr_hnext->ipfr_hprev = fra->ipfr_hprev; *fra->ipfr_hprev = fra->ipfr_hnext; - KFREE(fra); + + if (fra->ipfr_ref <= 0) + KFREE(fra); } @@ -737,30 +747,34 @@ ipfr_t *fra, ***tail; /* Free memory in use by fragment state information kept. Do the normal */ /* fragment state stuff first and then the NAT-fragment table. */ /* ------------------------------------------------------------------------ */ -void fr_fragclear() +void fr_fragclear(ifs) +ipf_stack_t *ifs; { ipfr_t *fra; nat_t *nat; - WRITE_ENTER(&ipf_frag); - while ((fra = ipfr_list) != NULL) - fr_fragdelete(fra, &ipfr_tail); - ipfr_tail = &ipfr_list; - RWLOCK_EXIT(&ipf_frag); + WRITE_ENTER(&ifs->ifs_ipf_frag); + while ((fra = ifs->ifs_ipfr_list) != NULL) { + fra->ipfr_ref--; + fr_fragdelete(fra, &ifs->ifs_ipfr_tail, ifs); + } + ifs->ifs_ipfr_tail = &ifs->ifs_ipfr_list; + RWLOCK_EXIT(&ifs->ifs_ipf_frag); - WRITE_ENTER(&ipf_nat); - WRITE_ENTER(&ipf_natfrag); - while ((fra = ipfr_natlist) != NULL) { + WRITE_ENTER(&ifs->ifs_ipf_nat); + WRITE_ENTER(&ifs->ifs_ipf_natfrag); + while ((fra = ifs->ifs_ipfr_natlist) != NULL) { nat = fra->ipfr_data; if (nat != NULL) { if (nat->nat_data == fra) nat->nat_data = NULL; } - fr_fragdelete(fra, &ipfr_nattail); + fra->ipfr_ref--; + fr_fragdelete(fra, &ifs->ifs_ipfr_nattail, ifs); } - ipfr_nattail = &ipfr_natlist; - RWLOCK_EXIT(&ipf_natfrag); - RWLOCK_EXIT(&ipf_nat); + ifs->ifs_ipfr_nattail = &ifs->ifs_ipfr_natlist; + RWLOCK_EXIT(&ifs->ifs_ipf_natfrag); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); } @@ -771,39 +785,43 @@ void fr_fragclear() /* */ /* Expire entries in the fragment cache table that have been there too long */ /* ------------------------------------------------------------------------ */ -void fr_fragexpire() +void fr_fragexpire(ifs) +ipf_stack_t *ifs; { ipfr_t **fp, *fra; nat_t *nat; SPL_INT(s); - if (fr_frag_lock) + if (ifs->ifs_fr_frag_lock) return; SPL_NET(s); - WRITE_ENTER(&ipf_frag); + WRITE_ENTER(&ifs->ifs_ipf_frag); /* * Go through the entire table, looking for entries to expire, - * which is indicated by the ttl being less than or equal to fr_ticks. + * which is indicated by the ttl being less than or equal to + * ifs_fr_ticks. */ - for (fp = &ipfr_list; ((fra = *fp) != NULL); ) { - if (fra->ipfr_ttl > fr_ticks) + for (fp = &ifs->ifs_ipfr_list; ((fra = *fp) != NULL); ) { + if (fra->ipfr_ttl > ifs->ifs_fr_ticks) break; - fr_fragdelete(fra, &ipfr_tail); - ipfr_stats.ifs_expire++; - ipfr_inuse--; + fra->ipfr_ref--; + fr_fragdelete(fra, &ifs->ifs_ipfr_tail, ifs); + ifs->ifs_ipfr_stats.ifs_expire++; + ifs->ifs_ipfr_inuse--; } - RWLOCK_EXIT(&ipf_frag); + RWLOCK_EXIT(&ifs->ifs_ipf_frag); - WRITE_ENTER(&ipf_ipidfrag); - for (fp = &ipfr_ipidlist; ((fra = *fp) != NULL); ) { - if (fra->ipfr_ttl > fr_ticks) + WRITE_ENTER(&ifs->ifs_ipf_ipidfrag); + for (fp = &ifs->ifs_ipfr_ipidlist; ((fra = *fp) != NULL); ) { + if (fra->ipfr_ttl > ifs->ifs_fr_ticks) break; - fr_fragdelete(fra, &ipfr_ipidtail); - ipfr_stats.ifs_expire++; - ipfr_inuse--; + fra->ipfr_ref--; + fr_fragdelete(fra, &ifs->ifs_ipfr_ipidtail, ifs); + ifs->ifs_ipfr_stats.ifs_expire++; + ifs->ifs_ipfr_inuse--; } - RWLOCK_EXIT(&ipf_ipidfrag); + RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag); /* * Same again for the NAT table, except that if the structure also @@ -812,22 +830,23 @@ void fr_fragexpire() * NOTE: We need to grab both mutex's early, and in this order so as * to prevent a deadlock if both try to expire at the same time. */ - WRITE_ENTER(&ipf_nat); - WRITE_ENTER(&ipf_natfrag); - for (fp = &ipfr_natlist; ((fra = *fp) != NULL); ) { - if (fra->ipfr_ttl > fr_ticks) + WRITE_ENTER(&ifs->ifs_ipf_nat); + WRITE_ENTER(&ifs->ifs_ipf_natfrag); + for (fp = &ifs->ifs_ipfr_natlist; ((fra = *fp) != NULL); ) { + if (fra->ipfr_ttl > ifs->ifs_fr_ticks) break; nat = fra->ipfr_data; if (nat != NULL) { if (nat->nat_data == fra) nat->nat_data = NULL; } - fr_fragdelete(fra, &ipfr_nattail); - ipfr_stats.ifs_expire++; - ipfr_inuse--; + fra->ipfr_ref--; + fr_fragdelete(fra, &ifs->ifs_ipfr_nattail, ifs); + ifs->ifs_ipfr_stats.ifs_expire++; + ifs->ifs_ipfr_inuse--; } - RWLOCK_EXIT(&ipf_natfrag); - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_natfrag); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); SPL_X(s); } @@ -843,19 +862,21 @@ void fr_fragexpire() #if !defined(_KERNEL) || (!SOLARIS && !defined(__hpux) && !defined(__sgi) && \ !defined(__osf__) && !defined(linux)) # if defined(_KERNEL) && ((BSD >= 199103) || defined(__sgi)) -void fr_slowtimer __P((void *ptr)) +void fr_slowtimer __P((void *arg)) # else -int fr_slowtimer() +int fr_slowtimer(void *arg) # endif { - READ_ENTER(&ipf_global); - - fr_fragexpire(); - fr_timeoutstate(); - fr_natexpire(); - fr_authexpire(); - fr_ticks++; - if (fr_running <= 0) + ipf_stack_t *ifs = arg; + + READ_ENTER(&ifs->ifs_ipf_global); + + fr_fragexpire(ifs); + fr_timeoutstate(ifs); + fr_natexpire(ifs); + fr_authexpire(ifs); + ifs->ifs_fr_ticks++; + if (ifs->ifs_fr_running <= 0) goto done; # ifdef _KERNEL # if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000) @@ -877,9 +898,74 @@ int fr_slowtimer() # endif /* NetBSD */ # endif done: - RWLOCK_EXIT(&ipf_global); + RWLOCK_EXIT(&ifs->ifs_ipf_global); # if (BSD < 199103) || !defined(_KERNEL) return 0; # endif } #endif /* !SOLARIS && !defined(__hpux) && !defined(__sgi) */ + +/*ARGSUSED*/ +int fr_nextfrag(token, itp, top, tail, lock, ifs) +ipftoken_t *token; +ipfgeniter_t *itp; +ipfr_t **top, ***tail; +ipfrwlock_t *lock; +ipf_stack_t *ifs; +{ + ipfr_t *frag, *next, zero; + int error = 0; + + frag = token->ipt_data; + if (frag == (ipfr_t *)-1) { + ipf_freetoken(token, ifs); + return ESRCH; + } + + READ_ENTER(lock); + if (frag == NULL) + next = *top; + else + next = frag->ipfr_next; + + if (next != NULL) { + ATOMIC_INC(next->ipfr_ref); + token->ipt_data = next; + } else { + bzero(&zero, sizeof(zero)); + next = &zero; + token->ipt_data = (void *)-1; + } + RWLOCK_EXIT(lock); + + if (frag != NULL) { + fr_fragderef(&frag, lock, ifs); + } + + error = COPYOUT(next, itp->igi_data, sizeof(*next)); + if (error != 0) + error = EFAULT; + + return error; +} + + +void fr_fragderef(frp, lock, ifs) +ipfr_t **frp; +ipfrwlock_t *lock; +ipf_stack_t *ifs; +{ + ipfr_t *fra; + + fra = *frp; + *frp = NULL; + + WRITE_ENTER(lock); + fra->ipfr_ref--; + if (fra->ipfr_ref <= 0) { + KFREE(fra); + ifs->ifs_ipfr_stats.ifs_expire++; + ifs->ifs_ipfr_inuse--; + } + RWLOCK_EXIT(lock); +} diff --git a/usr/src/uts/common/inet/ipf/ip_htable.c b/usr/src/uts/common/inet/ipf/ip_htable.c index 4ce3cc411e..bbb10d93d9 100644 --- a/usr/src/uts/common/inet/ipf/ip_htable.c +++ b/usr/src/uts/common/inet/ipf/ip_htable.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -54,6 +54,7 @@ struct file; #include "netinet/ip_fil.h" #include "netinet/ip_lookup.h" #include "netinet/ip_htable.h" +#include "netinet/ipf_stack.h" /* END OF INCLUDES */ #if !defined(lint) @@ -68,35 +69,29 @@ static uint32_t sum4(uint32_t *); static void left_shift_ipv6 __P((char *)); #endif -static u_long ipht_nomem[IPL_LOGSIZE] = { 0, 0, 0, 0, 0, 0, 0, 0 }; -static u_long ipf_nhtables[IPL_LOGSIZE] = { 0, 0, 0, 0, 0, 0, 0, 0 }; -static u_long ipf_nhtnodes[IPL_LOGSIZE] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - -iphtable_t *ipf_htables[IPL_LOGSIZE] = { NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL }; - - -void fr_htable_unload() +void fr_htable_unload(ifs) +ipf_stack_t *ifs; { iplookupflush_t fop; fop.iplf_unit = IPL_LOGALL; - (void)fr_flushhtable(&fop); + (void)fr_flushhtable(&fop, ifs); } -int fr_gethtablestat(op) +int fr_gethtablestat(op, ifs) iplookupop_t *op; +ipf_stack_t *ifs; { iphtstat_t stats; if (op->iplo_size != sizeof(stats)) return EINVAL; - stats.iphs_tables = ipf_htables[op->iplo_unit]; - stats.iphs_numtables = ipf_nhtables[op->iplo_unit]; - stats.iphs_numnodes = ipf_nhtnodes[op->iplo_unit]; - stats.iphs_nomem = ipht_nomem[op->iplo_unit]; + stats.iphs_tables = ifs->ifs_ipf_htables[op->iplo_unit]; + stats.iphs_numtables = ifs->ifs_ipf_nhtables[op->iplo_unit]; + stats.iphs_numnodes = ifs->ifs_ipf_nhtnodes[op->iplo_unit]; + stats.iphs_nomem = ifs->ifs_ipht_nomem[op->iplo_unit]; return COPYOUT(&stats, op->iplo_struct, sizeof(stats)); @@ -106,8 +101,9 @@ iplookupop_t *op; /* * Create a new hash table using the template passed. */ -int fr_newhtable(op) +int fr_newhtable(op, ifs) iplookupop_t *op; +ipf_stack_t *ifs; { iphtable_t *iph, *oiph; char name[FR_GROUPLEN]; @@ -115,7 +111,7 @@ iplookupop_t *op; KMALLOC(iph, iphtable_t *); if (iph == NULL) { - ipht_nomem[op->iplo_unit]++; + ifs->ifs_ipht_nomem[op->iplo_unit]++; return ENOMEM; } @@ -132,7 +128,7 @@ iplookupop_t *op; } if ((op->iplo_arg & IPHASH_ANON) == 0) { - if (fr_findhtable(op->iplo_unit, op->iplo_name) != NULL) { + if (fr_findhtable(op->iplo_unit, op->iplo_name, ifs) != NULL) { KFREE(iph); return EEXIST; } @@ -145,7 +141,7 @@ iplookupop_t *op; #else (void)sprintf(name, "%u", i); #endif - for (oiph = ipf_htables[unit]; oiph != NULL; + for (oiph = ifs->ifs_ipf_htables[unit]; oiph != NULL; oiph = oiph->iph_next) if (strncmp(oiph->iph_name, name, sizeof(oiph->iph_name)) == 0) @@ -164,7 +160,7 @@ iplookupop_t *op; iph->iph_size * sizeof(*iph->iph_table)); if (iph->iph_table == NULL) { KFREE(iph); - ipht_nomem[unit]++; + ifs->ifs_ipht_nomem[unit]++; return ENOMEM; } @@ -173,14 +169,16 @@ iplookupop_t *op; iph->iph_masks[1] = 0; iph->iph_masks[2] = 0; iph->iph_masks[3] = 0; + iph->iph_list = NULL; - iph->iph_next = ipf_htables[unit]; - iph->iph_pnext = &ipf_htables[unit]; - if (ipf_htables[unit] != NULL) - ipf_htables[unit]->iph_pnext = &iph->iph_next; - ipf_htables[unit] = iph; + iph->iph_ref = 1; + iph->iph_next = ifs->ifs_ipf_htables[unit]; + iph->iph_pnext = &ifs->ifs_ipf_htables[unit]; + if (ifs->ifs_ipf_htables[unit] != NULL) + ifs->ifs_ipf_htables[unit]->iph_pnext = &iph->iph_next; + ifs->ifs_ipf_htables[unit] = iph; - ipf_nhtables[unit]++; + ifs->ifs_ipf_nhtables[unit]++; return 0; } @@ -188,78 +186,93 @@ iplookupop_t *op; /* */ -int fr_removehtable(op) +int fr_removehtable(op, ifs) iplookupop_t *op; +ipf_stack_t *ifs; { iphtable_t *iph; - iph = fr_findhtable(op->iplo_unit, op->iplo_name); + iph = fr_findhtable(op->iplo_unit, op->iplo_name, ifs); if (iph == NULL) return ESRCH; if (iph->iph_unit != op->iplo_unit) { return EINVAL; } - - if (iph->iph_ref != 0) { + + if (iph->iph_ref != 1) { return EBUSY; } - fr_delhtable(iph); + fr_delhtable(iph, ifs); return 0; } -void fr_delhtable(iph) +void fr_delhtable(iph, ifs) iphtable_t *iph; +ipf_stack_t *ifs; { iphtent_t *ipe; int i; for (i = 0; i < iph->iph_size; i++) while ((ipe = iph->iph_table[i]) != NULL) - if (fr_delhtent(iph, ipe) != 0) + if (fr_delhtent(iph, ipe, ifs) != 0) return; *iph->iph_pnext = iph->iph_next; if (iph->iph_next != NULL) iph->iph_next->iph_pnext = iph->iph_pnext; - ipf_nhtables[iph->iph_unit]--; + ifs->ifs_ipf_nhtables[iph->iph_unit]--; - if (iph->iph_ref == 0) { + if (iph->iph_ref == 1) { KFREES(iph->iph_table, iph->iph_size * sizeof(*iph->iph_table)); KFREE(iph); } } -void fr_derefhtable(iph) +void fr_derefhtable(iph, ifs) iphtable_t *iph; +ipf_stack_t *ifs; { iph->iph_ref--; if (iph->iph_ref == 0) - fr_delhtable(iph); + fr_delhtable(iph, ifs); +} + + +void fr_derefhtent(ipe) +iphtent_t *ipe; +{ + ipe->ipe_ref--; + if (ipe->ipe_ref == 0) { + KFREE(ipe); + } } -iphtable_t *fr_findhtable(unit, name) +iphtable_t *fr_findhtable(unit, name, ifs) int unit; char *name; +ipf_stack_t *ifs; { iphtable_t *iph; - for (iph = ipf_htables[unit]; iph != NULL; iph = iph->iph_next) + for (iph = ifs->ifs_ipf_htables[unit]; iph != NULL; iph = iph->iph_next) if (strncmp(iph->iph_name, name, sizeof(iph->iph_name)) == 0) break; return iph; } -size_t fr_flushhtable(op) +size_t fr_flushhtable(op, ifs) iplookupflush_t *op; +ipf_stack_t *ifs; { iphtable_t *iph; size_t freed; @@ -269,8 +282,8 @@ iplookupflush_t *op; for (i = 0; i <= IPL_LOGMAX; i++) { if (op->iplf_unit == i || op->iplf_unit == IPL_LOGALL) { - while ((iph = ipf_htables[i]) != NULL) { - fr_delhtable(iph); + while ((iph = ifs->ifs_ipf_htables[i]) != NULL) { + fr_delhtable(iph, ifs); freed++; } } @@ -283,9 +296,10 @@ iplookupflush_t *op; /* * Add an entry to a hash table. */ -int fr_addhtent(iph, ipeo) +int fr_addhtent(iph, ipeo, ifs) iphtable_t *iph; iphtent_t *ipeo; +ipf_stack_t *ifs; { iphtent_t *ipe; u_int hv; @@ -316,13 +330,20 @@ iphtent_t *ipeo; } else return -1; - ipe->ipe_ref = 0; + ipe->ipe_ref = 1; ipe->ipe_next = iph->iph_table[hv]; ipe->ipe_pnext = iph->iph_table + hv; if (iph->iph_table[hv] != NULL) iph->iph_table[hv]->ipe_pnext = &ipe->ipe_next; iph->iph_table[hv] = ipe; + + ipe->ipe_snext = iph->iph_list; + ipe->ipe_psnext = &iph->iph_list; + if (ipe->ipe_next != NULL) + ipe->ipe_next->ipe_psnext = &ipe->ipe_snext; + iph->iph_list = ipe; + #ifdef USE_INET6 if (ipe->ipe_family == AF_INET6) { if ((bits >= 0) && (bits != 128)) @@ -347,7 +368,7 @@ iphtent_t *ipeo; case IPHASH_GROUPMAP : ipe->ipe_ptr = fr_addgroup(ipe->ipe_group, NULL, iph->iph_flags, IPL_LOGIPF, - fr_active); + ifs->ifs_fr_active, ifs); break; default : @@ -356,7 +377,7 @@ iphtent_t *ipeo; break; } - ipf_nhtnodes[iph->iph_unit]++; + ifs->ifs_ipf_nhtnodes[iph->iph_unit]++; return 0; } @@ -365,12 +386,12 @@ iphtent_t *ipeo; /* * Delete an entry from a hash table. */ -int fr_delhtent(iph, ipe) +int fr_delhtent(iph, ipe, ifs) iphtable_t *iph; iphtent_t *ipe; +ipf_stack_t *ifs; { - - if (ipe->ipe_ref != 0) + if (ipe->ipe_ref != 1) return EBUSY; @@ -382,7 +403,8 @@ iphtent_t *ipe; { case IPHASH_GROUPMAP : if (ipe->ipe_group != NULL) - fr_delgroup(ipe->ipe_group, IPL_LOGIPF, fr_active); + fr_delgroup(ipe->ipe_group, IPL_LOGIPF, + ifs->ifs_fr_active, ifs); break; default : @@ -393,16 +415,17 @@ iphtent_t *ipe; KFREE(ipe); - ipf_nhtnodes[iph->iph_unit]--; - + ifs->ifs_ipf_nhtnodes[iph->iph_unit]--; + return 0; } -void *fr_iphmfindgroup(tptr, version, aptr) +void *fr_iphmfindgroup(tptr, version, aptr, ifs) void *tptr; int version; void *aptr; +ipf_stack_t *ifs; { i6addr_t *addr; iphtable_t *iph; @@ -416,7 +439,7 @@ void *aptr; ) return NULL; - READ_ENTER(&ip_poolrw); + READ_ENTER(&ifs->ifs_ip_poolrw); iph = tptr; addr = aptr; @@ -433,7 +456,7 @@ void *aptr; rval = ipe->ipe_ptr; else rval = NULL; - RWLOCK_EXIT(&ip_poolrw); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); return rval; } @@ -447,9 +470,10 @@ void *aptr; /* */ /* Search the hash table for a given address and return a search result. */ /* ------------------------------------------------------------------------ */ -int fr_iphmfindip(tptr, version, aptr) +int fr_iphmfindip(tptr, version, aptr, ifs) void *tptr, *aptr; int version; +ipf_stack_t *ifs; { i6addr_t *addr; iphtable_t *iph; @@ -469,7 +493,7 @@ int version; iph = tptr; addr = aptr; - READ_ENTER(&ip_poolrw); + READ_ENTER(&ifs->ifs_ip_poolrw); #ifdef USE_INET6 if (version == 6) ipe = fr_iphmfind6(iph, &addr->in6); @@ -483,7 +507,7 @@ int version; rval = 0; else rval = 1; - RWLOCK_EXIT(&ip_poolrw); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); return rval; } @@ -615,4 +639,138 @@ char *data; sd[3] <<= 1; } #endif + +int fr_htable_getnext(token, ilp, ifs) +ipftoken_t *token; +ipflookupiter_t *ilp; +ipf_stack_t *ifs; +{ + iphtent_t *node, zn, *nextnode; + iphtable_t *iph, zp, *nextiph; + int err; + + err = 0; + iph = NULL; + node = NULL; + nextiph = NULL; + nextnode = NULL; + + READ_ENTER(&ifs->ifs_ip_poolrw); + + switch (ilp->ili_otype) + { + case IPFLOOKUPITER_LIST : + iph = token->ipt_data; + if (iph == NULL) { + nextiph = ifs->ifs_ipf_htables[(int)ilp->ili_unit]; + } else { + nextiph = iph->iph_next; + } + + if (nextiph != NULL) { + if (nextiph->iph_next == NULL) + token->ipt_alive = 0; + else { + ATOMIC_INC(nextiph->iph_ref); + } + } else { + bzero((char *)&zp, sizeof(zp)); + nextiph = &zp; + } + break; + + case IPFLOOKUPITER_NODE : + node = token->ipt_data; + if (node == NULL) { + iph = fr_findhtable(ilp->ili_unit, ilp->ili_name, ifs); + if (iph == NULL) + err = ESRCH; + else { + nextnode = iph->iph_list; + } + } else { + nextnode = node->ipe_snext; + } + + if (nextnode != NULL) { + if (nextnode->ipe_snext == NULL) + token->ipt_alive = 0; + else { + ATOMIC_INC(nextnode->ipe_ref); + } + } else { + bzero((char *)&zn, sizeof(zn)); + nextnode = &zn; + } + break; + default : + err = EINVAL; + break; + } + + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); + if (err != 0) + return err; + + switch (ilp->ili_otype) + { + case IPFLOOKUPITER_LIST : + if (iph != NULL) { + WRITE_ENTER(&ifs->ifs_ip_poolrw); + fr_derefhtable(iph, ifs); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); + } + token->ipt_data = nextiph; + err = COPYOUT(nextiph, ilp->ili_data, sizeof(*nextiph)); + if (err != 0) + err = EFAULT; + break; + + case IPFLOOKUPITER_NODE : + if (node != NULL) { + WRITE_ENTER(&ifs->ifs_ip_poolrw); + fr_derefhtent(node); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); + } + token->ipt_data = nextnode; + err = COPYOUT(nextnode, ilp->ili_data, sizeof(*nextnode)); + if (err != 0) + err = EFAULT; + break; + } + + return err; +} + + +void fr_htable_iterderef(otype, unit, data, ifs) +u_int otype; +int unit; +void *data; +ipf_stack_t *ifs; +{ + + if (data == NULL) + return; + + if (unit < 0 || unit > IPL_LOGMAX) + return; + + switch (otype) + { + case IPFLOOKUPITER_LIST : + WRITE_ENTER(&ifs->ifs_ip_poolrw); + fr_derefhtable((iphtable_t *)data, ifs); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); + break; + + case IPFLOOKUPITER_NODE : + WRITE_ENTER(&ifs->ifs_ip_poolrw); + fr_derefhtent((iphtent_t *)data); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); + break; + default : + break; + } +} #endif /* IPFILTER_LOOKUP */ diff --git a/usr/src/uts/common/inet/ipf/ip_log.c b/usr/src/uts/common/inet/ipf/ip_log.c index 380bf597ba..8f5ed7be9d 100644 --- a/usr/src/uts/common/inet/ipf/ip_log.c +++ b/usr/src/uts/common/inet/ipf/ip_log.c @@ -5,7 +5,7 @@ * * $Id: ip_log.c,v 2.75.2.7 2005/06/11 07:47:44 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -129,6 +129,7 @@ struct file; #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_auth.h" +#include "netinet/ipf_stack.h" #if (__FreeBSD_version >= 300000) || defined(__NetBSD__) # include <sys/malloc.h> #endif @@ -146,27 +147,11 @@ iplog_select_t iplog_ss[IPL_LOGMAX+1]; extern int selwait; # endif /* IPL_SELECT */ -# if defined(linux) && defined(_KERNEL) -wait_queue_head_t iplh_linux[IPL_LOGSIZE]; -# endif -# if SOLARIS -extern kcondvar_t iplwait; -# endif - -iplog_t **iplh[IPL_LOGSIZE], *iplt[IPL_LOGSIZE], *ipll[IPL_LOGSIZE]; -int iplused[IPL_LOGSIZE]; -static fr_info_t iplcrc[IPL_LOGSIZE]; -int ipl_suppress = 1; -int ipl_buffer_sz; -int ipl_logmax = IPL_LOGMAX; -int ipl_logall = 0; -int ipl_log_init = 0; -int ipl_logsize = IPFILTER_LOGSIZE; +/* ipl_magic never changes */ int ipl_magic[IPL_LOGSIZE] = { IPL_MAGIC, IPL_MAGIC_NAT, IPL_MAGIC_STATE, IPL_MAGIC, IPL_MAGIC, IPL_MAGIC, IPL_MAGIC, IPL_MAGIC }; - /* ------------------------------------------------------------------------ */ /* Function: fr_loginit */ /* Returns: int - 0 == success (always returned) */ @@ -175,16 +160,20 @@ int ipl_magic[IPL_LOGSIZE] = { IPL_MAGIC, IPL_MAGIC_NAT, IPL_MAGIC_STATE, /* Initialise log buffers & pointers. Also iniialised the CRC to a local */ /* secret for use in calculating the "last log checksum". */ /* ------------------------------------------------------------------------ */ -int fr_loginit() +int fr_loginit(ifs) +ipf_stack_t *ifs; { int i; - + + ifs->ifs_ipl_suppress = 1; + ifs->ifs_ipl_logmax = IPL_LOGMAX; + ifs->ifs_ipl_logsize = IPFILTER_LOGSIZE; for (i = IPL_LOGMAX; i >= 0; i--) { - iplt[i] = NULL; - ipll[i] = NULL; - iplh[i] = &iplt[i]; - iplused[i] = 0; - bzero((char *)&iplcrc[i], sizeof(iplcrc[i])); + ifs->ifs_iplt[i] = NULL; + ifs->ifs_ipll[i] = NULL; + ifs->ifs_iplh[i] = &ifs->ifs_iplt[i]; + ifs->ifs_iplused[i] = 0; + bzero((char *)&ifs->ifs_iplcrc[i], sizeof(ifs->ifs_iplcrc[i])); # ifdef IPL_SELECT iplog_ss[i].read_waiter = 0; iplog_ss[i].state = 0; @@ -195,11 +184,11 @@ int fr_loginit() } # if SOLARIS && defined(_KERNEL) - cv_init(&iplwait, "ipl condvar", CV_DRIVER, NULL); + cv_init(&ifs->ifs_iplwait, "ipl condvar", CV_DRIVER, NULL); # endif - MUTEX_INIT(&ipl_mutex, "ipf log mutex"); + MUTEX_INIT(&ifs->ifs_ipl_mutex, "ipf log mutex"); - ipl_log_init = 1; + ifs->ifs_ipl_log_init = 1; return 0; } @@ -212,22 +201,23 @@ int fr_loginit() /* */ /* Clean up any log data that has accumulated without being read. */ /* ------------------------------------------------------------------------ */ -void fr_logunload() +void fr_logunload(ifs) +ipf_stack_t *ifs; { int i; - if (ipl_log_init == 0) + if (ifs->ifs_ipl_log_init == 0) return; for (i = IPL_LOGMAX; i >= 0; i--) - (void) ipflog_clear(i); + (void) ipflog_clear(i, ifs); # if SOLARIS && defined(_KERNEL) - cv_destroy(&iplwait); + cv_destroy(&ifs->ifs_iplwait); # endif - MUTEX_DESTROY(&ipl_mutex); + MUTEX_DESTROY(&ifs->ifs_ipl_mutex); - ipl_log_init = 0; + ifs->ifs_ipl_log_init = 0; } @@ -264,6 +254,7 @@ u_int flags; struct ifnet *ifp; # endif # endif /* SOLARIS */ + ipf_stack_t *ifs = fin->fin_ifs; ipfl.fl_nattag.ipt_num[0] = 0; m = fin->fin_m; @@ -333,9 +324,9 @@ u_int flags; ipfl.fl_unit = (u_int)0; nif = NULL; if (fin->fin_fi.fi_v == 4) - nif = ipf_ipv4; + nif = ifs->ifs_ipf_ipv4; else if (fin->fin_fi.fi_v == 6) - nif = ipf_ipv6; + nif = ifs->ifs_ipf_ipv6; if (nif != NULL) { if (net_getifname(nif, (phy_if_t)ifp, ipfl.fl_ifname, sizeof(ipfl.fl_ifname)) != 0) @@ -366,7 +357,7 @@ u_int flags; # endif /* __hpux */ # endif /* SOLARIS */ mlen = fin->fin_plen - hlen; - if (!ipl_logall) { + if (!ifs->ifs_ipl_logall) { mlen = (flags & FR_LOGBODY) ? MIN(mlen, 128) : 0; } else if ((flags & FR_LOGBODY) == 0) { mlen = 0; @@ -411,7 +402,7 @@ u_int flags; sizes[1] = hlen + mlen; types[1] = 1; # endif /* MENTAT */ - return ipllog(IPL_LOGIPF, fin, ptrs, sizes, types, 2); + return ipllog(IPL_LOGIPF, fin, ptrs, sizes, types, 2, fin->fin_ifs); } @@ -429,12 +420,13 @@ u_int flags; /* miscellaneous packet information, as well as packet data, for reading */ /* from the log device. */ /* ------------------------------------------------------------------------ */ -int ipllog(dev, fin, items, itemsz, types, cnt) +int ipllog(dev, fin, items, itemsz, types, cnt, ifs) int dev; fr_info_t *fin; void **items; size_t *itemsz; int *types, cnt; +ipf_stack_t *ifs; { caddr_t buf, ptr; iplog_t *ipl; @@ -447,20 +439,21 @@ int *types, cnt; * record logged. If it does, just up the count on the previous one * rather than create a new one. */ - if (ipl_suppress) { - MUTEX_ENTER(&ipl_mutex); + if (ifs->ifs_ipl_suppress) { + MUTEX_ENTER(&ifs->ifs_ipl_mutex); if ((fin != NULL) && (fin->fin_off == 0)) { - if ((ipll[dev] != NULL) && - bcmp((char *)fin, (char *)&iplcrc[dev], + if ((ifs->ifs_ipll[dev] != NULL) && + bcmp((char *)fin, (char *)&ifs->ifs_iplcrc[dev], FI_LCSIZE) == 0) { - ipll[dev]->ipl_count++; - MUTEX_EXIT(&ipl_mutex); + ifs->ifs_ipll[dev]->ipl_count++; + MUTEX_EXIT(&ifs->ifs_ipl_mutex); return 0; } - bcopy((char *)fin, (char *)&iplcrc[dev], FI_LCSIZE); + bcopy((char *)fin, (char *)&ifs->ifs_iplcrc[dev], + FI_LCSIZE); } else - bzero((char *)&iplcrc[dev], FI_CSIZE); - MUTEX_EXIT(&ipl_mutex); + bzero((char *)&ifs->ifs_iplcrc[dev], FI_CSIZE); + MUTEX_EXIT(&ifs->ifs_ipl_mutex); } /* @@ -477,15 +470,15 @@ int *types, cnt; if (buf == NULL) return -1; SPL_NET(s); - MUTEX_ENTER(&ipl_mutex); - if ((iplused[dev] + len) > ipl_logsize) { - MUTEX_EXIT(&ipl_mutex); + MUTEX_ENTER(&ifs->ifs_ipl_mutex); + if ((ifs->ifs_iplused[dev] + len) > IPFILTER_LOGSIZE) { + MUTEX_EXIT(&ifs->ifs_ipl_mutex); SPL_X(s); KFREES(buf, len); return -1; } - iplused[dev] += len; - MUTEX_EXIT(&ipl_mutex); + ifs->ifs_iplused[dev] += len; + MUTEX_EXIT(&ifs->ifs_ipl_mutex); SPL_X(s); /* @@ -517,21 +510,21 @@ int *types, cnt; ptr += itemsz[i]; } SPL_NET(s); - MUTEX_ENTER(&ipl_mutex); - ipll[dev] = ipl; - *iplh[dev] = ipl; - iplh[dev] = &ipl->ipl_next; + MUTEX_ENTER(&ifs->ifs_ipl_mutex); + ifs->ifs_ipll[dev] = ipl; + *ifs->ifs_iplh[dev] = ipl; + ifs->ifs_iplh[dev] = &ipl->ipl_next; /* * Now that the log record has been completed and added to the queue, * wake up any listeners who may want to read it. */ # if SOLARIS && defined(_KERNEL) - cv_signal(&iplwait); - MUTEX_EXIT(&ipl_mutex); + cv_signal(&ifs->ifs_iplwait); + MUTEX_EXIT(&ifs->ifs_ipl_mutex); # else - MUTEX_EXIT(&ipl_mutex); - WAKEUP(iplh,dev); + MUTEX_EXIT(&ifs->ifs_ipl_mutex); + WAKEUP(&ifs->ifs_iplh, dev); # endif SPL_X(s); # ifdef IPL_SELECT @@ -553,9 +546,10 @@ int *types, cnt; /* NOTE: This function will block and wait for a signal to return data if */ /* there is none present. Asynchronous I/O is not implemented. */ /* ------------------------------------------------------------------------ */ -int ipflog_read(unit, uio) +int ipflog_read(unit, uio, ifs) minor_t unit; struct uio *uio; +ipf_stack_t *ifs; { size_t dlen, copied; int error = 0; @@ -571,7 +565,7 @@ struct uio *uio; if (uio->uio_resid == 0) return 0; if ((uio->uio_resid < sizeof(iplog_t)) || - (uio->uio_resid > ipl_logsize)) + (uio->uio_resid > ifs->ifs_ipl_logsize)) return EINVAL; /* @@ -579,12 +573,12 @@ struct uio *uio; * if the log is empty. */ SPL_NET(s); - MUTEX_ENTER(&ipl_mutex); + MUTEX_ENTER(&ifs->ifs_ipl_mutex); - while (iplt[unit] == NULL) { + while (ifs->ifs_iplt[unit] == NULL) { # if SOLARIS && defined(_KERNEL) - if (!cv_wait_sig(&iplwait, &ipl_mutex.ipf_lk)) { - MUTEX_EXIT(&ipl_mutex); + if (!cv_wait_sig(&ifs->ifs_iplwait, &ifs->ifs_ipl_mutex.ipf_lk)) { + MUTEX_EXIT(&ifs->ifs_ipl_mutex); return EINTR; } # else @@ -594,29 +588,29 @@ struct uio *uio; # ifdef IPL_SELECT if (uio->uio_fpflags & (FNBLOCK|FNDELAY)) { /* this is no blocking system call */ - MUTEX_EXIT(&ipl_mutex); + MUTEX_EXIT(&ifs->ifs_ipl_mutex); return 0; } # endif - MUTEX_EXIT(&ipl_mutex); - l = get_sleep_lock(&iplh[unit]); - error = sleep(&iplh[unit], PZERO+1); + MUTEX_EXIT(&ifs->ifs_ipl_mutex); + l = get_sleep_lock(&ifs->ifs_iplh[unit]); + error = sleep(&ifs->ifs_iplh[unit], PZERO+1); spinunlock(l); # else # if defined(__osf__) && defined(_KERNEL) - error = mpsleep(&iplh[unit], PSUSP|PCATCH, "iplread", 0, - &ipl_mutex, MS_LOCK_SIMPLE); + error = mpsleep(&ifs->ifs_iplh[unit], PSUSP|PCATCH, "iplread", 0, + &ifs->ifs_ipl_mutex, MS_LOCK_SIMPLE); # else - MUTEX_EXIT(&ipl_mutex); + MUTEX_EXIT(&ifs->ifs_ipl_mutex); SPL_X(s); - error = SLEEP(unit + iplh, "ipl sleep"); + error = SLEEP(&ifs->ifs_iplh[unit], "ipl sleep"); # endif /* __osf__ */ # endif /* __hpux */ if (error) return error; SPL_NET(s); - MUTEX_ENTER(&ipl_mutex); + MUTEX_ENTER(&ifs->ifs_ipl_mutex); # endif /* SOLARIS */ } @@ -624,41 +618,41 @@ struct uio *uio; uio->uio_rw = UIO_READ; # endif - for (copied = 0; (ipl = iplt[unit]) != NULL; copied += dlen) { + for (copied = 0; ((ipl = ifs->ifs_iplt[unit]) != NULL); copied += dlen) { dlen = ipl->ipl_dsize; if (dlen > uio->uio_resid) break; /* * Don't hold the mutex over the uiomove call. */ - iplt[unit] = ipl->ipl_next; - iplused[unit] -= dlen; - if (iplt[unit] == NULL) { - iplh[unit] = &iplt[unit]; - ipll[unit] = NULL; + ifs->ifs_iplt[unit] = ipl->ipl_next; + ifs->ifs_iplused[unit] -= dlen; + if (ifs->ifs_iplt[unit] == NULL) { + ifs->ifs_iplh[unit] = &ifs->ifs_iplt[unit]; + ifs->ifs_ipll[unit] = NULL; } - MUTEX_EXIT(&ipl_mutex); + MUTEX_EXIT(&ifs->ifs_ipl_mutex); SPL_X(s); error = UIOMOVE((caddr_t)ipl, dlen, UIO_READ, uio); if (error) { SPL_NET(s); - MUTEX_ENTER(&ipl_mutex); - iplused[unit] += dlen; - ipl->ipl_next = iplt[unit]; - iplt[unit] = ipl; - ipll[unit] = ipl; - if (iplh[unit] == &iplt[unit]) { - *iplh[unit] = ipl; - iplh[unit] = &ipl->ipl_next; + MUTEX_ENTER(&ifs->ifs_ipl_mutex); + ifs->ifs_iplused[unit] += dlen; + ipl->ipl_next = ifs->ifs_iplt[unit]; + ifs->ifs_iplt[unit] = ipl; + ifs->ifs_ipll[unit] = ipl; + if (ifs->ifs_iplh[unit] == &ifs->ifs_iplt[unit]) { + *ifs->ifs_iplh[unit] = ipl; + ifs->ifs_iplh[unit] = &ipl->ipl_next; } break; } - MUTEX_ENTER(&ipl_mutex); + MUTEX_ENTER(&ifs->ifs_ipl_mutex); KFREES((caddr_t)ipl, dlen); SPL_NET(s); } - MUTEX_EXIT(&ipl_mutex); + MUTEX_EXIT(&ifs->ifs_ipl_mutex); SPL_X(s); return error; } @@ -671,25 +665,26 @@ struct uio *uio; /* */ /* Deletes all queued up log records for a given output device. */ /* ------------------------------------------------------------------------ */ -int ipflog_clear(unit) +int ipflog_clear(unit, ifs) minor_t unit; +ipf_stack_t *ifs; { iplog_t *ipl; int used; SPL_INT(s); SPL_NET(s); - MUTEX_ENTER(&ipl_mutex); - while ((ipl = iplt[unit]) != NULL) { - iplt[unit] = ipl->ipl_next; + MUTEX_ENTER(&ifs->ifs_ipl_mutex); + while ((ipl = ifs->ifs_iplt[unit]) != NULL) { + ifs->ifs_iplt[unit] = ipl->ipl_next; KFREES((caddr_t)ipl, ipl->ipl_dsize); } - iplh[unit] = &iplt[unit]; - ipll[unit] = NULL; - used = iplused[unit]; - iplused[unit] = 0; - bzero((char *)&iplcrc[unit], FI_CSIZE); - MUTEX_EXIT(&ipl_mutex); + ifs->ifs_iplh[unit] = &ifs->ifs_iplt[unit]; + ifs->ifs_ipll[unit] = NULL; + used = ifs->ifs_iplused[unit]; + ifs->ifs_iplused[unit] = 0; + bzero((char *)&ifs->ifs_iplcrc[unit], FI_CSIZE); + MUTEX_EXIT(&ifs->ifs_ipl_mutex); SPL_X(s); return used; } diff --git a/usr/src/uts/common/inet/ipf/ip_lookup.c b/usr/src/uts/common/inet/ipf/ip_lookup.c index 299dadf0bb..da8d6c5bd7 100644 --- a/usr/src/uts/common/inet/ipf/ip_lookup.c +++ b/usr/src/uts/common/inet/ipf/ip_lookup.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -64,6 +64,7 @@ struct file; #include "netinet/ip_pool.h" #include "netinet/ip_htable.h" #include "netinet/ip_lookup.h" +#include "netinet/ipf_stack.h" /* END OF INCLUDES */ #if !defined(lint) @@ -71,14 +72,13 @@ static const char rcsid[] = "@(#)$Id: ip_lookup.c,v 2.35.2.7 2005/06/12 07:18:20 #endif #ifdef IPFILTER_LOOKUP -int ip_lookup_inited = 0; +static int iplookup_addnode __P((caddr_t, ipf_stack_t *)); +static int iplookup_delnode __P((caddr_t data, ipf_stack_t *)); +static int iplookup_addtable __P((caddr_t, ipf_stack_t *)); +static int iplookup_deltable __P((caddr_t, ipf_stack_t *)); +static int iplookup_stats __P((caddr_t, ipf_stack_t *)); +static int iplookup_flush __P((caddr_t, ipf_stack_t *)); -static int iplookup_addnode __P((caddr_t)); -static int iplookup_delnode __P((caddr_t data)); -static int iplookup_addtable __P((caddr_t)); -static int iplookup_deltable __P((caddr_t)); -static int iplookup_stats __P((caddr_t)); -static int iplookup_flush __P((caddr_t)); /* ------------------------------------------------------------------------ */ @@ -88,16 +88,18 @@ static int iplookup_flush __P((caddr_t)); /* */ /* Initialise all of the subcomponents of the lookup infrstructure. */ /* ------------------------------------------------------------------------ */ -int ip_lookup_init() +int ip_lookup_init(ifs) +ipf_stack_t *ifs; { - if (ip_pool_init() == -1) + if (ip_pool_init(ifs) == -1) return -1; - RWLOCK_INIT(&ip_poolrw, "ip pool rwlock"); - - ip_lookup_inited = 1; + RWLOCK_INIT(&ifs->ifs_ip_poolrw, "ip pool rwlock"); + ifs->ifs_ip_lookup_inited = 1; + ifs->ifs_ipftokenhead = NULL; + ifs->ifs_ipftokentail = &ifs->ifs_ipftokenhead; return 0; } @@ -111,14 +113,15 @@ int ip_lookup_init() /* has been running. Also, do any other deinitialisation required such */ /* ip_lookup_init() can be called again, safely. */ /* ------------------------------------------------------------------------ */ -void ip_lookup_unload() +void ip_lookup_unload(ifs) +ipf_stack_t *ifs; { - ip_pool_fini(); - fr_htable_unload(); + ip_pool_fini(ifs); + fr_htable_unload(ifs); - if (ip_lookup_inited == 1) { - RW_DESTROY(&ip_poolrw); - ip_lookup_inited = 0; + if (ifs->ifs_ip_lookup_inited == 1) { + RW_DESTROY(&ifs->ifs_ip_poolrw); + ifs->ifs_ip_lookup_inited = 0; } } @@ -135,10 +138,12 @@ void ip_lookup_unload() /* involves just calling another function to handle the specifics of each */ /* command. */ /* ------------------------------------------------------------------------ */ -int ip_lookup_ioctl(data, cmd, mode) +int ip_lookup_ioctl(data, cmd, mode, uid, ctx, ifs) caddr_t data; ioctlcmd_t cmd; -int mode; +int mode, uid; +void *ctx; +ipf_stack_t *ifs; { int err; SPL_INT(s); @@ -151,41 +156,45 @@ int mode; { case SIOCLOOKUPADDNODE : case SIOCLOOKUPADDNODEW : - WRITE_ENTER(&ip_poolrw); - err = iplookup_addnode(data); - RWLOCK_EXIT(&ip_poolrw); + WRITE_ENTER(&ifs->ifs_ip_poolrw); + err = iplookup_addnode(data, ifs); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); break; case SIOCLOOKUPDELNODE : case SIOCLOOKUPDELNODEW : - WRITE_ENTER(&ip_poolrw); - err = iplookup_delnode(data); - RWLOCK_EXIT(&ip_poolrw); + WRITE_ENTER(&ifs->ifs_ip_poolrw); + err = iplookup_delnode(data, ifs); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); break; case SIOCLOOKUPADDTABLE : - WRITE_ENTER(&ip_poolrw); - err = iplookup_addtable(data); - RWLOCK_EXIT(&ip_poolrw); + WRITE_ENTER(&ifs->ifs_ip_poolrw); + err = iplookup_addtable(data, ifs); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); break; case SIOCLOOKUPDELTABLE : - WRITE_ENTER(&ip_poolrw); - err = iplookup_deltable(data); - RWLOCK_EXIT(&ip_poolrw); + WRITE_ENTER(&ifs->ifs_ip_poolrw); + err = iplookup_deltable(data, ifs); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); break; case SIOCLOOKUPSTAT : case SIOCLOOKUPSTATW : - WRITE_ENTER(&ip_poolrw); - err = iplookup_stats(data); - RWLOCK_EXIT(&ip_poolrw); + WRITE_ENTER(&ifs->ifs_ip_poolrw); + err = iplookup_stats(data, ifs); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); break; case SIOCLOOKUPFLUSH : - WRITE_ENTER(&ip_poolrw); - err = iplookup_flush(data); - RWLOCK_EXIT(&ip_poolrw); + WRITE_ENTER(&ifs->ifs_ip_poolrw); + err = iplookup_flush(data, ifs); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); + break; + + case SIOCLOOKUPITER : + err = ip_lookup_iterate(data, uid, ctx, ifs); break; default : @@ -206,8 +215,9 @@ int mode; /* parent structure refered to by name exists and if it does, then go on to */ /* add a node to it. */ /* ------------------------------------------------------------------------ */ -static int iplookup_addnode(data) +static int iplookup_addnode(data, ifs) caddr_t data; +ipf_stack_t *ifs; { ip_pool_node_t node, *m; iplookupop_t op; @@ -230,7 +240,7 @@ caddr_t data; if (err != 0) return EFAULT; - p = ip_pool_find(op.iplo_unit, op.iplo_name); + p = ip_pool_find(op.iplo_unit, op.iplo_name, ifs); if (p == NULL) return ESRCH; @@ -243,7 +253,7 @@ caddr_t data; if (m) return EEXIST; err = ip_pool_insert(p, &node.ipn_addr, - &node.ipn_mask, node.ipn_info); + &node.ipn_mask, node.ipn_info, ifs); break; case IPLT_HASH : @@ -254,10 +264,10 @@ caddr_t data; if (err != 0) return EFAULT; - iph = fr_findhtable(op.iplo_unit, op.iplo_name); + iph = fr_findhtable(op.iplo_unit, op.iplo_name, ifs); if (iph == NULL) return ESRCH; - err = fr_addhtent(iph, &hte); + err = fr_addhtent(iph, &hte, ifs); break; default : @@ -276,8 +286,9 @@ caddr_t data; /* Delete a node from a lookup table by first looking for the table it is */ /* in and then deleting the entry that gets found. */ /* ------------------------------------------------------------------------ */ -static int iplookup_delnode(data) +static int iplookup_delnode(data, ifs) caddr_t data; +ipf_stack_t *ifs; { ip_pool_node_t node, *m; iplookupop_t op; @@ -301,14 +312,14 @@ caddr_t data; if (err != 0) return EFAULT; - p = ip_pool_find(op.iplo_unit, op.iplo_name); + p = ip_pool_find(op.iplo_unit, op.iplo_name, ifs); if (!p) return ESRCH; m = ip_pool_findeq(p, &node.ipn_addr, &node.ipn_mask); if (m == NULL) return ENOENT; - err = ip_pool_remove(p, m); + err = ip_pool_remove(p, m, ifs); break; case IPLT_HASH : @@ -319,10 +330,10 @@ caddr_t data; if (err != 0) return EFAULT; - iph = fr_findhtable(op.iplo_unit, op.iplo_name); + iph = fr_findhtable(op.iplo_unit, op.iplo_name, ifs); if (iph == NULL) return ESRCH; - err = fr_delhtent(iph, &hte); + err = fr_delhtent(iph, &hte, ifs); break; default : @@ -341,8 +352,9 @@ caddr_t data; /* Create a new lookup table, if one doesn't already exist using the name */ /* for this one. */ /* ------------------------------------------------------------------------ */ -static int iplookup_addtable(data) +static int iplookup_addtable(data, ifs) caddr_t data; +ipf_stack_t *ifs; { iplookupop_t op; int err; @@ -355,17 +367,17 @@ caddr_t data; switch (op.iplo_type) { case IPLT_POOL : - if (ip_pool_find(op.iplo_unit, op.iplo_name) != NULL) + if (ip_pool_find(op.iplo_unit, op.iplo_name, ifs) != NULL) err = EEXIST; else - err = ip_pool_create(&op); + err = ip_pool_create(&op, ifs); break; case IPLT_HASH : - if (fr_findhtable(op.iplo_unit, op.iplo_name) != NULL) + if (fr_findhtable(op.iplo_unit, op.iplo_name, ifs) != NULL) err = EEXIST; else - err = fr_newhtable(&op); + err = fr_newhtable(&op, ifs); break; default : @@ -384,8 +396,9 @@ caddr_t data; /* Decodes ioctl request to remove a particular hash table or pool and */ /* calls the relevant function to do the cleanup. */ /* ------------------------------------------------------------------------ */ -static int iplookup_deltable(data) +static int iplookup_deltable(data, ifs) caddr_t data; +ipf_stack_t *ifs; { iplookupop_t op; int err; @@ -403,11 +416,11 @@ caddr_t data; switch (op.iplo_type) { case IPLT_POOL : - err = ip_pool_destroy(&op); + err = ip_pool_destroy(&op, ifs); break; case IPLT_HASH : - err = fr_removehtable(&op); + err = fr_removehtable(&op, ifs); break; default : @@ -425,8 +438,9 @@ caddr_t data; /* */ /* Copy statistical information from inside the kernel back to user space. */ /* ------------------------------------------------------------------------ */ -static int iplookup_stats(data) +static int iplookup_stats(data, ifs) caddr_t data; +ipf_stack_t *ifs; { iplookupop_t op; int err; @@ -437,11 +451,11 @@ caddr_t data; switch (op.iplo_type) { case IPLT_POOL : - err = ip_pool_statistics(&op); + err = ip_pool_statistics(&op, ifs); break; case IPLT_HASH : - err = fr_gethtablestat(&op); + err = fr_gethtablestat(&op, ifs); break; default : @@ -460,8 +474,9 @@ caddr_t data; /* A flush is called when we want to flush all the nodes from a particular */ /* entry in the hash table/pool or want to remove all groups from those. */ /* ------------------------------------------------------------------------ */ -static int iplookup_flush(data) +static int iplookup_flush(data, ifs) caddr_t data; +ipf_stack_t *ifs; { int err, unit, num, type; iplookupflush_t flush; @@ -481,12 +496,12 @@ caddr_t data; if (type == IPLT_POOL || type == IPLT_ALL) { err = 0; - num = ip_pool_flush(&flush); + num = ip_pool_flush(&flush, ifs); } if (type == IPLT_HASH || type == IPLT_ALL) { err = 0; - num += fr_flushhtable(&flush); + num += fr_flushhtable(&flush, ifs); } if (err == 0) { @@ -497,35 +512,123 @@ caddr_t data; } -void ip_lookup_deref(type, ptr) + +void ip_lookup_deref(type, ptr, ifs) int type; void *ptr; +ipf_stack_t *ifs; { if (ptr == NULL) return; - WRITE_ENTER(&ip_poolrw); + WRITE_ENTER(&ifs->ifs_ip_poolrw); switch (type) { case IPLT_POOL : - ip_pool_deref(ptr); + ip_pool_deref(ptr, ifs); + break; + + case IPLT_HASH : + fr_derefhtable(ptr, ifs); + break; + } + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); +} + + +int ip_lookup_iterate(data, uid, ctx, ifs) +void *data; +int uid; +void *ctx; +ipf_stack_t *ifs; +{ + ipflookupiter_t iter; + ipftoken_t *token; + int err; + + err = fr_inobj(data, &iter, IPFOBJ_LOOKUPITER); + if (err != 0) { +#ifdef _KERNEL + (void) printf("fr_inobj\n"); +#endif + return err; + } + + if (iter.ili_unit < 0 || iter.ili_unit > IPL_LOGMAX) { +#ifdef _KERNEL + (void) printf("unit=%d\n", iter.ili_unit); +#endif + return EINVAL; + } + + if (iter.ili_ival != IPFGENITER_LOOKUP) { +#ifdef _KERNEL + (void) printf("ival=%d\n", iter.ili_ival); +#endif + return EINVAL; + } + + token = ipf_findtoken(iter.ili_key, uid, ctx, ifs); + if (token == NULL) { + RWLOCK_EXIT(&ifs->ifs_ipf_tokens); + return ESRCH; + } + + switch (iter.ili_type) + { + case IPLT_POOL : + err = ip_pool_getnext(token, &iter, ifs); + break; + case IPLT_HASH : + err = fr_htable_getnext(token, &iter, ifs); + break; + default : +#ifdef _KERNEL + (void) printf("type=%d\n", iter.ili_type); +#endif + err = EINVAL; break; + } + RWLOCK_EXIT(&ifs->ifs_ipf_tokens); + return err; +} + + +void ip_lookup_iterderef(type, data, ifs) +u_32_t type; +void *data; +ipf_stack_t *ifs; +{ + iplookupiterkey_t key; + + key.ilik_key = type; + + if (key.ilik_unstr.ilik_ival != IPFGENITER_LOOKUP) + return; + + switch (key.ilik_unstr.ilik_type) + { case IPLT_HASH : - fr_derefhtable(ptr); + fr_htable_iterderef((u_int)key.ilik_unstr.ilik_otype, + (int)key.ilik_unstr.ilik_unit, data, ifs); + break; + case IPLT_POOL : + ip_pool_iterderef((u_int)key.ilik_unstr.ilik_otype, + (int)key.ilik_unstr.ilik_unit, data, ifs); break; } - RWLOCK_EXIT(&ip_poolrw); } #else /* IPFILTER_LOOKUP */ /*ARGSUSED*/ -int ip_lookup_ioctl(data, cmd, mode) +int ip_lookup_ioctl(data, cmd, mode, uid, ifs) caddr_t data; ioctlcmd_t cmd; -int mode; +int mode, uid; +ipf_stack_t *ifs; { return EIO; } diff --git a/usr/src/uts/common/inet/ipf/ip_nat.c b/usr/src/uts/common/inet/ipf/ip_nat.c index a4e2cecf31..8eaa4fe899 100644 --- a/usr/src/uts/common/inet/ipf/ip_nat.c +++ b/usr/src/uts/common/inet/ipf/ip_nat.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -100,6 +100,7 @@ extern struct ifnet vpnif; #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_proxy.h" +#include "netinet/ipf_stack.h" #ifdef IPFILTER_SYNC #include "netinet/ip_sync.h" #endif @@ -139,65 +140,39 @@ static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 /* ======================================================================== */ -nat_t **nat_table[2] = { NULL, NULL }, - *nat_instances = NULL; -ipnat_t *nat_list = NULL; -u_int ipf_nattable_max = NAT_TABLE_MAX; -u_int ipf_nattable_sz = NAT_TABLE_SZ; -u_int ipf_natrules_sz = NAT_SIZE; -u_int ipf_rdrrules_sz = RDR_SIZE; -u_int ipf_hostmap_sz = HOSTMAP_SIZE; -u_int fr_nat_maxbucket = 0, - fr_nat_maxbucket_reset = 1; -u_32_t nat_masks = 0; -u_32_t rdr_masks = 0; -ipnat_t **nat_rules = NULL; -ipnat_t **rdr_rules = NULL; -hostmap_t **maptable = NULL; -ipftq_t nat_tqb[IPF_TCP_NSTATES]; -ipftq_t nat_udptq; -ipftq_t nat_icmptq; -ipftq_t nat_iptq; -ipftq_t *nat_utqe = NULL; -#ifdef IPFILTER_LOG -int nat_logging = 1; -#else -int nat_logging = 0; -#endif -u_long fr_defnatage = DEF_NAT_AGE, - fr_defnatipage = 120, /* 60 seconds */ - fr_defnaticmpage = 6; /* 3 seconds */ -natstat_t nat_stats; -int fr_nat_lock = 0; -int fr_nat_init = 0; - -static int nat_flushtable __P((void)); -static int nat_clearlist __P((void)); -static void nat_addnat __P((struct ipnat *)); -static void nat_addrdr __P((struct ipnat *)); -static void nat_delete __P((struct nat *, int)); +static int nat_flushtable __P((ipf_stack_t *)); +static int nat_clearlist __P((ipf_stack_t *)); +static void nat_addnat __P((struct ipnat *, ipf_stack_t *)); +static void nat_addrdr __P((struct ipnat *, ipf_stack_t *)); +static void nat_delete __P((struct nat *, int, ipf_stack_t *)); static void nat_delrdr __P((struct ipnat *)); static void nat_delnat __P((struct ipnat *)); -static int fr_natgetent __P((caddr_t)); -static int fr_natgetsz __P((caddr_t)); -static int fr_natputent __P((caddr_t, int)); -static void nat_tabmove __P((nat_t *)); +static int fr_natgetent __P((caddr_t, ipf_stack_t *)); +static int fr_natgetsz __P((caddr_t, ipf_stack_t *)); +static int fr_natputent __P((caddr_t, int, ipf_stack_t *)); +static void nat_tabmove __P((nat_t *, ipf_stack_t *)); static int nat_match __P((fr_info_t *, ipnat_t *)); static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *)); static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *)); static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr, - struct in_addr, struct in_addr, u_32_t)); + struct in_addr, struct in_addr, u_32_t, + ipf_stack_t *)); static void nat_hostmapdel __P((struct hostmap *)); static INLINE int nat_icmpquerytype4 __P((int)); -static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int)); -static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int)); +static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, + ipf_stack_t *)); +static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, + ipf_stack_t *)); +static INLINE int nat_icmperrortype4 __P((int)); static INLINE int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *, tcphdr_t *, nat_t **, int)); -static void nat_resolverule __P((ipnat_t *)); +static INLINE void nat_resolverule __P((ipnat_t *, ipf_stack_t *)); static nat_t *fr_natclone __P((fr_info_t *, nat_t *)); static void nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *)); static INLINE int nat_wildok __P((nat_t *, int, int, int, int)); +static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); +static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); /* ------------------------------------------------------------------------ */ @@ -207,93 +182,120 @@ static INLINE int nat_wildok __P((nat_t *, int, int, int, int)); /* */ /* Initialise all of the NAT locks, tables and other structures. */ /* ------------------------------------------------------------------------ */ -int fr_natinit() +int fr_natinit(ifs) +ipf_stack_t *ifs; { int i; - KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz); - if (nat_table[0] != NULL) - bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *)); + ifs->ifs_ipf_nattable_sz = NAT_TABLE_SZ; + ifs->ifs_ipf_nattable_max = NAT_TABLE_MAX; + ifs->ifs_ipf_natrules_sz = NAT_SIZE; + ifs->ifs_ipf_rdrrules_sz = RDR_SIZE; + ifs->ifs_ipf_hostmap_sz = HOSTMAP_SIZE; + ifs->ifs_fr_nat_maxbucket_reset = 1; +#ifdef IPFILTER_LOG + ifs->ifs_nat_logging = 1; +#else + ifs->ifs_nat_logging = 0; +#endif + ifs->ifs_fr_defnatage = DEF_NAT_AGE; + ifs->ifs_fr_defnatipage = 120; /* 60 seconds */ + ifs->ifs_fr_defnaticmpage = 6; /* 3 seconds */ + + KMALLOCS(ifs->ifs_nat_table[0], nat_t **, + sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); + if (ifs->ifs_nat_table[0] != NULL) + bzero((char *)ifs->ifs_nat_table[0], + ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); else return -1; - KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz); - if (nat_table[1] != NULL) - bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *)); + KMALLOCS(ifs->ifs_nat_table[1], nat_t **, + sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); + if (ifs->ifs_nat_table[1] != NULL) + bzero((char *)ifs->ifs_nat_table[1], + ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); else return -2; - KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz); - if (nat_rules != NULL) - bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *)); + KMALLOCS(ifs->ifs_nat_rules, ipnat_t **, + sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); + if (ifs->ifs_nat_rules != NULL) + bzero((char *)ifs->ifs_nat_rules, + ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *)); else return -3; - KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz); - if (rdr_rules != NULL) - bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *)); + KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **, + sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); + if (ifs->ifs_rdr_rules != NULL) + bzero((char *)ifs->ifs_rdr_rules, + ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *)); else return -4; - KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz); - if (maptable != NULL) - bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz); + KMALLOCS(ifs->ifs_maptable, hostmap_t **, + sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); + if (ifs->ifs_maptable != NULL) + bzero((char *)ifs->ifs_maptable, + sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); else return -5; - KMALLOCS(nat_stats.ns_bucketlen[0], u_long *, - ipf_nattable_sz * sizeof(u_long)); - if (nat_stats.ns_bucketlen[0] == NULL) - return -6; - bzero((char *)nat_stats.ns_bucketlen[0], - ipf_nattable_sz * sizeof(u_long)); - - KMALLOCS(nat_stats.ns_bucketlen[1], u_long *, - ipf_nattable_sz * sizeof(u_long)); - if (nat_stats.ns_bucketlen[1] == NULL) - return -7; - - bzero((char *)nat_stats.ns_bucketlen[1], - ipf_nattable_sz * sizeof(u_long)); - - if (fr_nat_maxbucket == 0) { - for (i = ipf_nattable_sz; i > 0; i >>= 1) - fr_nat_maxbucket++; - fr_nat_maxbucket *= 2; + ifs->ifs_ipf_hm_maplist = NULL; + + KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *, + ifs->ifs_ipf_nattable_sz * sizeof(u_long)); + if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL) + return -1; + bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0], + ifs->ifs_ipf_nattable_sz * sizeof(u_long)); + + KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *, + ifs->ifs_ipf_nattable_sz * sizeof(u_long)); + if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL) + return -1; + bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1], + ifs->ifs_ipf_nattable_sz * sizeof(u_long)); + + if (ifs->ifs_fr_nat_maxbucket == 0) { + for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1) + ifs->ifs_fr_nat_maxbucket++; + ifs->ifs_fr_nat_maxbucket *= 2; } - fr_sttab_init(nat_tqb); + fr_sttab_init(ifs->ifs_nat_tqb, ifs); /* * Increase this because we may have "keep state" following this too * and packet storms can occur if this is removed too quickly. */ - nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack; - nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq; - nat_udptq.ifq_ttl = fr_defnatage; - nat_udptq.ifq_ref = 1; - nat_udptq.ifq_head = NULL; - nat_udptq.ifq_tail = &nat_udptq.ifq_head; - MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab"); - nat_udptq.ifq_next = &nat_icmptq; - nat_icmptq.ifq_ttl = fr_defnaticmpage; - nat_icmptq.ifq_ref = 1; - nat_icmptq.ifq_head = NULL; - nat_icmptq.ifq_tail = &nat_icmptq.ifq_head; - MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab"); - nat_icmptq.ifq_next = &nat_iptq; - nat_iptq.ifq_ttl = fr_defnatipage; - nat_iptq.ifq_ref = 1; - nat_iptq.ifq_head = NULL; - nat_iptq.ifq_tail = &nat_iptq.ifq_head; - MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab"); - nat_iptq.ifq_next = NULL; + ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack; + ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq; + ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage; + ifs->ifs_nat_udptq.ifq_ref = 1; + ifs->ifs_nat_udptq.ifq_head = NULL; + ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head; + MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab"); + ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq; + ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage; + ifs->ifs_nat_icmptq.ifq_ref = 1; + ifs->ifs_nat_icmptq.ifq_head = NULL; + ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head; + MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab"); + ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq; + ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage; + ifs->ifs_nat_iptq.ifq_ref = 1; + ifs->ifs_nat_iptq.ifq_head = NULL; + ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head; + MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab"); + ifs->ifs_nat_iptq.ifq_next = NULL; for (i = 0; i < IPF_TCP_NSTATES; i++) { - if (nat_tqb[i].ifq_ttl < fr_defnaticmpage) - nat_tqb[i].ifq_ttl = fr_defnaticmpage; + if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage) + ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage; #ifdef LARGE_NAT - else if (nat_tqb[i].ifq_ttl > fr_defnatage) - nat_tqb[i].ifq_ttl = fr_defnatage; + else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage) + ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage; #endif } @@ -302,14 +304,15 @@ int fr_natinit() * this too and packet storms can occur if this is removed * too quickly. */ - nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl; + ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = + ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl; - RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock"); - RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock"); - MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex"); - MUTEX_INIT(&ipf_natio, "ipf nat io mutex"); + RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock"); + RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock"); + MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex"); + MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex"); - fr_nat_init = 1; + ifs->ifs_fr_nat_init = 1; return 0; } @@ -324,8 +327,9 @@ int fr_natinit() /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */ /* use by redirect rules. */ /* ------------------------------------------------------------------------ */ -static void nat_addrdr(n) +static void nat_addrdr(n, ifs) ipnat_t *n; +ipf_stack_t *ifs; { ipnat_t **np; u_32_t j; @@ -334,10 +338,10 @@ ipnat_t *n; k = count4bits(n->in_outmsk); if ((k >= 0) && (k != 32)) - rdr_masks |= 1 << k; + ifs->ifs_rdr_masks |= 1 << k; j = (n->in_outip & n->in_outmsk); - hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz); - np = rdr_rules + hv; + hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz); + np = ifs->ifs_rdr_rules + hv; while (*np != NULL) np = &(*np)->in_rnext; n->in_rnext = NULL; @@ -356,8 +360,9 @@ ipnat_t *n; /* NAT rules. Updates the bitmask indicating which netmasks are in use by */ /* redirect rules. */ /* ------------------------------------------------------------------------ */ -static void nat_addnat(n) +static void nat_addnat(n, ifs) ipnat_t *n; +ipf_stack_t *ifs; { ipnat_t **np; u_32_t j; @@ -366,10 +371,10 @@ ipnat_t *n; k = count4bits(n->in_inmsk); if ((k >= 0) && (k != 32)) - nat_masks |= 1 << k; + ifs->ifs_nat_masks |= 1 << k; j = (n->in_inip & n->in_inmsk); - hv = NAT_HASH_FN(j, 0, ipf_natrules_sz); - np = nat_rules + hv; + hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz); + np = ifs->ifs_nat_rules + hv; while (*np != NULL) np = &(*np)->in_mnext; n->in_mnext = NULL; @@ -425,12 +430,13 @@ ipnat_t *n; /* that is not doing port based translation. If is not yet allocated, then */ /* create a new entry if a non-NULL NAT rule pointer has been supplied. */ /* ------------------------------------------------------------------------ */ -static struct hostmap *nat_hostmap(np, src, dst, map, port) +static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs) ipnat_t *np; struct in_addr src; struct in_addr dst; struct in_addr map; u_32_t port; +ipf_stack_t *ifs; { hostmap_t *hm; u_int hv; @@ -439,7 +445,7 @@ u_32_t port; hv += src.s_addr; hv += dst.s_addr; hv %= HOSTMAP_SIZE; - for (hm = maptable[hv]; hm; hm = hm->hm_next) + for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next) if ((hm->hm_srcip.s_addr == src.s_addr) && (hm->hm_dstip.s_addr == dst.s_addr) && ((np == NULL) || (np == hm->hm_ipnat)) && @@ -453,11 +459,17 @@ u_32_t port; KMALLOC(hm, hostmap_t *); if (hm) { - hm->hm_next = maptable[hv]; - hm->hm_pnext = maptable + hv; - if (maptable[hv] != NULL) - maptable[hv]->hm_pnext = &hm->hm_next; - maptable[hv] = hm; + hm->hm_hnext = ifs->ifs_ipf_hm_maplist; + hm->hm_phnext = &ifs->ifs_ipf_hm_maplist; + if (ifs->ifs_ipf_hm_maplist != NULL) + ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext; + ifs->ifs_ipf_hm_maplist = hm; + + hm->hm_next = ifs->ifs_maptable[hv]; + hm->hm_pnext = ifs->ifs_maptable + hv; + if (ifs->ifs_maptable[hv] != NULL) + ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next; + ifs->ifs_maptable[hv] = hm; hm->hm_ipnat = np; hm->hm_srcip = src; hm->hm_dstip = dst; @@ -486,10 +498,25 @@ struct hostmap *hm; if (hm->hm_next) hm->hm_next->hm_pnext = hm->hm_pnext; *hm->hm_pnext = hm->hm_next; + if (hm->hm_hnext) + hm->hm_hnext->hm_phnext = hm->hm_phnext; + *hm->hm_phnext = hm->hm_hnext; KFREE(hm); } } +void fr_hostmapderef(hmp) +struct hostmap **hmp; +{ + struct hostmap *hm; + + hm = *hmp; + *hmp = NULL; + hm->hm_ref--; + if (hm->hm_ref == 0) + nat_hostmapdel(hm); +} + /* ------------------------------------------------------------------------ */ /* Function: fix_outcksum */ @@ -594,10 +621,12 @@ u_32_t n; /* */ /* Processes an ioctl call made to operate on the IP Filter NAT device. */ /* ------------------------------------------------------------------------ */ -int fr_nat_ioctl(data, cmd, mode) +int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs) ioctlcmd_t cmd; caddr_t data; -int mode; +int mode, uid; +void *ctx; +ipf_stack_t *ifs; { ipnat_t *nat, *nt, *n = NULL, **np = NULL; int error = 0, ret, arg, getlock; @@ -650,8 +679,9 @@ int mode; if ((nat->in_flags & IPN_IPRANGE) == 0) nat->in_outip &= nat->in_outmsk; } - MUTEX_ENTER(&ipf_natio); - for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next) + MUTEX_ENTER(&ifs->ifs_ipf_natio); + for (np = &ifs->ifs_nat_list; ((n = *np) != NULL); + np = &n->in_next) if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags, IPN_CMPSIZ)) break; @@ -659,6 +689,23 @@ int mode; switch (cmd) { + case SIOCGENITER : + { + ipfgeniter_t iter; + ipftoken_t *token; + + error = fr_inobj(data, &iter, IPFOBJ_GENITER); + if (error != 0) + break; + + token = ipf_findtoken(iter.igi_type, uid, ctx, ifs); + if (token != NULL) + error = nat_iterator(token, &iter, ifs); + else + error = ESRCH; + RWLOCK_EXIT(&ifs->ifs_ipf_tokens); + break; + } #ifdef IPFILTER_LOG case SIOCIPFFB : { @@ -667,7 +714,7 @@ int mode; if (!(mode & FWRITE)) error = EPERM; else { - tmp = ipflog_clear(IPL_LOGNAT); + tmp = ipflog_clear(IPL_LOGNAT, ifs); BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp)); } break; @@ -676,16 +723,17 @@ int mode; if (!(mode & FWRITE)) error = EPERM; else { - BCOPYIN((char *)data, (char *)&nat_logging, - sizeof(nat_logging)); + BCOPYIN((char *)data, + (char *)&ifs->ifs_nat_logging, + sizeof(ifs->ifs_nat_logging)); } break; case SIOCGETLG : - BCOPYOUT((char *)&nat_logging, (char *)data, - sizeof(nat_logging)); + BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data, + sizeof(ifs->ifs_nat_logging)); break; case FIONREAD : - arg = iplused[IPL_LOGNAT]; + arg = ifs->ifs_iplused[IPL_LOGNAT]; BCOPYOUT(&arg, data, sizeof(arg)); break; #endif @@ -698,12 +746,12 @@ int mode; error = ENOMEM; } if (error != 0) { - MUTEX_EXIT(&ipf_natio); + MUTEX_EXIT(&ifs->ifs_ipf_natio); break; } bcopy((char *)nat, (char *)nt, sizeof(*n)); - error = nat_siocaddnat(nt, np, getlock); - MUTEX_EXIT(&ipf_natio); + error = nat_siocaddnat(nt, np, getlock, ifs); + MUTEX_EXIT(&ifs->ifs_ipf_natio); if (error == 0) nt = NULL; break; @@ -716,45 +764,46 @@ int mode; } if (error != 0) { - MUTEX_EXIT(&ipf_natio); + MUTEX_EXIT(&ifs->ifs_ipf_natio); break; } - nat_siocdelnat(n, np, getlock); + nat_siocdelnat(n, np, getlock, ifs); - MUTEX_EXIT(&ipf_natio); + MUTEX_EXIT(&ifs->ifs_ipf_natio); n = NULL; break; case SIOCGNATS : - nat_stats.ns_table[0] = nat_table[0]; - nat_stats.ns_table[1] = nat_table[1]; - nat_stats.ns_list = nat_list; - nat_stats.ns_maptable = maptable; - nat_stats.ns_nattab_sz = ipf_nattable_sz; - nat_stats.ns_nattab_max = ipf_nattable_max; - nat_stats.ns_rultab_sz = ipf_natrules_sz; - nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz; - nat_stats.ns_hostmap_sz = ipf_hostmap_sz; - nat_stats.ns_instances = nat_instances; - nat_stats.ns_apslist = ap_sess_list; - error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT); + ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0]; + ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1]; + ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list; + ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable; + ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist; + ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max; + ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz; + ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz; + ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz; + ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz; + ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances; + ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list; + error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT); break; case SIOCGNATL : { natlookup_t nl; if (getlock) { - READ_ENTER(&ipf_nat); + READ_ENTER(&ifs->ifs_ipf_nat); } error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP); if (error == 0) { - if (nat_lookupredir(&nl) != NULL) { + if (nat_lookupredir(&nl, ifs) != NULL) { error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP); } else { error = ESRCH; } } if (getlock) { - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); } break; } @@ -764,63 +813,67 @@ int mode; break; } if (getlock) { - WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ifs->ifs_ipf_nat); } error = 0; if (arg == 0) - ret = nat_flushtable(); + ret = nat_flushtable(ifs); else if (arg == 1) - ret = nat_clearlist(); + ret = nat_clearlist(ifs); else error = EINVAL; if (getlock) { - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); } if (error == 0) { BCOPYOUT(&ret, data, sizeof(ret)); } break; case SIOCPROXY : - error = appr_ioctl(data, cmd, mode); + error = appr_ioctl(data, cmd, mode, ifs); break; case SIOCSTLCK : if (!(mode & FWRITE)) { error = EPERM; } else { - fr_lock(data, &fr_nat_lock); + fr_lock(data, &ifs->ifs_fr_nat_lock); } break; case SIOCSTPUT : if ((mode & FWRITE) != 0) { - error = fr_natputent(data, getlock); + error = fr_natputent(data, getlock, ifs); } else { error = EACCES; } break; case SIOCSTGSZ : - if (fr_nat_lock) { + if (ifs->ifs_fr_nat_lock) { if (getlock) { - READ_ENTER(&ipf_nat); + READ_ENTER(&ifs->ifs_ipf_nat); } - error = fr_natgetsz(data); + error = fr_natgetsz(data, ifs); if (getlock) { - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); } } else error = EACCES; break; case SIOCSTGET : - if (fr_nat_lock) { + if (ifs->ifs_fr_nat_lock) { if (getlock) { - READ_ENTER(&ipf_nat); + READ_ENTER(&ifs->ifs_ipf_nat); } - error = fr_natgetent(data); + error = fr_natgetent(data, ifs); if (getlock) { - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); } } else error = EACCES; break; + case SIOCIPFDELTOK : + (void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg)); + error = ipf_deltoken(arg, uid, ctx, ifs); + break; default : error = EINVAL; break; @@ -844,13 +897,14 @@ done: /* from information passed to the kernel, then add it to the appropriate */ /* NAT rule table(s). */ /* ------------------------------------------------------------------------ */ -static int nat_siocaddnat(n, np, getlock) +static int nat_siocaddnat(n, np, getlock, ifs) ipnat_t *n, **np; int getlock; +ipf_stack_t *ifs; { int error = 0, i, j; - nat_resolverule(n); + nat_resolverule(n, ifs); if (n->in_plabel[0] != '\0') { if (n->in_apr == NULL) return ENOENT; @@ -938,29 +992,31 @@ int getlock; /* Otherwise, these fields are preset */ if (getlock) { - WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ifs->ifs_ipf_nat); } n->in_next = NULL; *np = n; if (n->in_age[0] != 0) - n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]); + n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, + n->in_age[0], ifs); if (n->in_age[1] != 0) - n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]); + n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, + n->in_age[1], ifs); if (n->in_redir & NAT_REDIRECT) { n->in_flags &= ~IPN_NOTDST; - nat_addrdr(n); + nat_addrdr(n, ifs); } if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { n->in_flags &= ~IPN_NOTSRC; - nat_addnat(n); + nat_addnat(n, ifs); } n = NULL; - nat_stats.ns_rules++; + ifs->ifs_nat_stats.ns_rules++; if (getlock) { - RWLOCK_EXIT(&ipf_nat); /* WRITE */ + RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* WRITE */ } return error; @@ -976,22 +1032,23 @@ int getlock; /* from information passed to the kernel, then add it to the appropriate */ /* NAT rule table(s). */ /* ------------------------------------------------------------------------ */ -static void nat_resolverule(n) +static void nat_resolverule(n, ifs) ipnat_t *n; +ipf_stack_t *ifs; { n->in_ifnames[0][LIFNAMSIZ - 1] = '\0'; - n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4); + n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4, ifs); n->in_ifnames[1][LIFNAMSIZ - 1] = '\0'; if (n->in_ifnames[1][0] == '\0') { (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ); n->in_ifps[1] = n->in_ifps[0]; } else { - n->in_ifps[1] = fr_resolvenic(n->in_ifnames[0], 4); + n->in_ifps[1] = fr_resolvenic(n->in_ifnames[0], 4, ifs); } if (n->in_plabel[0] != '\0') { - n->in_apr = appr_lookup(n->in_p, n->in_plabel); + n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs); } } @@ -1008,31 +1065,32 @@ ipnat_t *n; /* from information passed to the kernel, then add it to the appropriate */ /* NAT rule table(s). */ /* ------------------------------------------------------------------------ */ -static void nat_siocdelnat(n, np, getlock) +static void nat_siocdelnat(n, np, getlock, ifs) ipnat_t *n, **np; int getlock; +ipf_stack_t *ifs; { if (getlock) { - WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ifs->ifs_ipf_nat); } if (n->in_redir & NAT_REDIRECT) nat_delrdr(n); if (n->in_redir & (NAT_MAPBLK|NAT_MAP)) nat_delnat(n); - if (nat_list == NULL) { - nat_masks = 0; - rdr_masks = 0; + if (ifs->ifs_nat_list == NULL) { + ifs->ifs_nat_masks = 0; + ifs->ifs_rdr_masks = 0; } if (n->in_tqehead[0] != NULL) { if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) { - fr_freetimeoutqueue(n->in_tqehead[1]); + fr_freetimeoutqueue(n->in_tqehead[1], ifs); } } if (n->in_tqehead[1] != NULL) { if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) { - fr_freetimeoutqueue(n->in_tqehead[1]); + fr_freetimeoutqueue(n->in_tqehead[1], ifs); } } @@ -1042,13 +1100,13 @@ int getlock; if (n->in_apr) appr_free(n->in_apr); KFREE(n); - nat_stats.ns_rules--; + ifs->ifs_nat_stats.ns_rules--; } else { n->in_flags |= IPN_DELETE; n->in_next = NULL; } if (getlock) { - RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */ + RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* READ/WRITE */ } } @@ -1064,8 +1122,9 @@ int getlock; /* The size of the entry is stored in the ng_sz field and the enture natget */ /* structure is copied back to the user. */ /* ------------------------------------------------------------------------ */ -static int fr_natgetsz(data) +static int fr_natgetsz(data, ifs) caddr_t data; +ipf_stack_t *ifs; { ap_session_t *aps; nat_t *nat, *n; @@ -1075,7 +1134,7 @@ caddr_t data; nat = ng.ng_ptr; if (!nat) { - nat = nat_instances; + nat = ifs->ifs_nat_instances; ng.ng_sz = 0; /* * Empty list so the size returned is 0. Simple. @@ -1090,7 +1149,7 @@ caddr_t data; * current list of entries. Security precaution to prevent * copying of random kernel data. */ - for (n = nat_instances; n; n = n->nat_next) + for (n = ifs->ifs_nat_instances; n; n = n->nat_next) if (n == nat) break; if (!n) @@ -1123,8 +1182,9 @@ caddr_t data; /* Copies out NAT entry to user space. Any additional data held for a */ /* proxy is also copied, as to is the NAT rule which was responsible for it */ /* ------------------------------------------------------------------------ */ -static int fr_natgetent(data) +static int fr_natgetent(data, ifs) caddr_t data; +ipf_stack_t *ifs; { int error, outsize; ap_session_t *aps; @@ -1145,9 +1205,9 @@ caddr_t data; ipn->ipn_dsize = ipns.ipn_dsize; nat = ipns.ipn_next; if (nat == NULL) { - nat = nat_instances; + nat = ifs->ifs_nat_instances; if (nat == NULL) { - if (nat_instances == NULL) + if (ifs->ifs_nat_instances == NULL) error = ENOENT; goto finished; } @@ -1157,7 +1217,7 @@ caddr_t data; * current list of entries. Security precaution to prevent * copying of random kernel data. */ - for (n = nat_instances; n; n = n->nat_next) + for (n = ifs->ifs_nat_instances; n; n = n->nat_next) if (n == nat) break; if (n == NULL) { @@ -1235,9 +1295,10 @@ finished: /* Loads a NAT table entry from user space, including a NAT rule, proxy and */ /* firewall rule data structures, if pointers to them indicate so. */ /* ------------------------------------------------------------------------ */ -static int fr_natputent(data, getlock) +static int fr_natputent(data, getlock, ifs) caddr_t data; int getlock; +ipf_stack_t *ifs; { nat_save_t ipn, *ipnn; ap_session_t *aps; @@ -1314,9 +1375,9 @@ int getlock; in->in_use = 1; in->in_flags |= IPN_DELETE; - ATOMIC_INC(nat_stats.ns_rules); + ATOMIC_INC(ifs->ifs_nat_stats.ns_rules); - nat_resolverule(in); + nat_resolverule(in, ifs); } /* @@ -1329,12 +1390,12 @@ int getlock; fin.fin_data[1] = ntohs(nat->nat_outport); fin.fin_ifp = nat->nat_ifps[0]; if (getlock) { - READ_ENTER(&ipf_nat); + READ_ENTER(&ifs->ifs_ipf_nat); } n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p, nat->nat_oip, nat->nat_outip); if (getlock) { - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); } if (n != NULL) { error = EEXIST; @@ -1345,12 +1406,12 @@ int getlock; fin.fin_data[1] = ntohs(nat->nat_oport); fin.fin_ifp = nat->nat_ifps[1]; if (getlock) { - READ_ENTER(&ipf_nat); + READ_ENTER(&ifs->ifs_ipf_nat); } n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p, nat->nat_inip, nat->nat_oip); if (getlock) { - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); } if (n != NULL) { error = EEXIST; @@ -1416,8 +1477,8 @@ int getlock; MUTEX_NUKE(&fr->fr_lock); MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock"); } else { - READ_ENTER(&ipf_nat); - for (n = nat_instances; n; n = n->nat_next) + READ_ENTER(&ifs->ifs_ipf_nat); + for (n = ifs->ifs_nat_instances; n; n = n->nat_next) if (n->nat_fr == fr) break; @@ -1426,7 +1487,7 @@ int getlock; fr->fr_ref++; MUTEX_EXIT(&fr->fr_lock); } - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); if (!n) { error = ESRCH; @@ -1441,15 +1502,15 @@ int getlock; } if (getlock) { - WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ifs->ifs_ipf_nat); } - error = nat_insert(nat, nat->nat_rev); + error = nat_insert(nat, nat->nat_rev, ifs); if ((error == 0) && (aps != NULL)) { - aps->aps_next = ap_sess_list; - ap_sess_list = aps; + aps->aps_next = ifs->ifs_ap_sess_list; + ifs->ifs_ap_sess_list = aps; } if (getlock) { - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); } if (error == 0) @@ -1459,7 +1520,7 @@ int getlock; junkput: if (fr != NULL) - (void) fr_derefrule(&fr); + (void) fr_derefrule(&fr, ifs); if ((ipnn != NULL) && (ipnn != &ipn)) { KFREES(ipnn, ipn.ipn_dsize); @@ -1492,24 +1553,25 @@ junkput: /* Delete a nat entry from the various lists and table. If NAT logging is */ /* enabled then generate a NAT log record for this event. */ /* ------------------------------------------------------------------------ */ -static void nat_delete(nat, logtype) +static void nat_delete(nat, logtype, ifs) struct nat *nat; int logtype; +ipf_stack_t *ifs; { struct ipnat *ipn; - if (logtype != 0 && nat_logging != 0) - nat_log(nat, logtype); + if (logtype != 0 && ifs->ifs_nat_logging != 0) + nat_log(nat, logtype, ifs); - MUTEX_ENTER(&ipf_nat_new); + MUTEX_ENTER(&ifs->ifs_ipf_nat_new); /* * Take it as a general indication that all the pointers are set if * nat_pnext is set. */ if (nat->nat_pnext != NULL) { - nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; - nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; + ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; + ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; *nat->nat_pnext = nat->nat_next; if (nat->nat_next != NULL) { @@ -1533,7 +1595,7 @@ int logtype; nat->nat_phnext[1] = NULL; if ((nat->nat_flags & SI_WILDP) != 0) - nat_stats.ns_wilds--; + ifs->ifs_nat_stats.ns_wilds--; } if (nat->nat_me != NULL) { @@ -1545,7 +1607,7 @@ int logtype; nat->nat_ref--; if (nat->nat_ref > 0) { - MUTEX_EXIT(&ipf_nat_new); + MUTEX_EXIT(&ifs->ifs_ipf_nat_new); return; } @@ -1555,7 +1617,7 @@ int logtype; #endif if (nat->nat_fr != NULL) - (void)fr_derefrule(&nat->nat_fr); + (void)fr_derefrule(&nat->nat_fr, ifs); if (nat->nat_hm != NULL) nat_hostmapdel(nat->nat_hm); @@ -1573,22 +1635,22 @@ int logtype; if (ipn->in_apr) appr_free(ipn->in_apr); KFREE(ipn); - nat_stats.ns_rules--; + ifs->ifs_nat_stats.ns_rules--; } } MUTEX_DESTROY(&nat->nat_lock); - aps_free(nat->nat_aps); - nat_stats.ns_inuse--; - MUTEX_EXIT(&ipf_nat_new); + aps_free(nat->nat_aps, ifs); + ifs->ifs_nat_stats.ns_inuse--; + MUTEX_EXIT(&ifs->ifs_ipf_nat_new); /* * If there's a fragment table entry too for this nat entry, then * dereference that as well. This is after nat_lock is released * because of Tru64. */ - fr_forgetnat((void *)nat); + fr_forgetnat((void *)nat, ifs); KFREE(nat); } @@ -1605,7 +1667,8 @@ int logtype; /* * nat_flushtable - clear the NAT table of all mapping entries. */ -static int nat_flushtable() +static int nat_flushtable(ifs) +ipf_stack_t *ifs; { nat_t *nat; int j = 0; @@ -1614,19 +1677,19 @@ static int nat_flushtable() * ALL NAT mappings deleted, so lets just make the deletions * quicker. */ - if (nat_table[0] != NULL) - bzero((char *)nat_table[0], - sizeof(nat_table[0]) * ipf_nattable_sz); - if (nat_table[1] != NULL) - bzero((char *)nat_table[1], - sizeof(nat_table[1]) * ipf_nattable_sz); - - while ((nat = nat_instances) != NULL) { - nat_delete(nat, NL_FLUSH); + if (ifs->ifs_nat_table[0] != NULL) + bzero((char *)ifs->ifs_nat_table[0], + sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz); + if (ifs->ifs_nat_table[1] != NULL) + bzero((char *)ifs->ifs_nat_table[1], + sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz); + + while ((nat = ifs->ifs_nat_instances) != NULL) { + nat_delete(nat, NL_FLUSH, ifs); j++; } - nat_stats.ns_inuse = 0; + ifs->ifs_nat_stats.ns_inuse = 0; return j; } @@ -1640,15 +1703,18 @@ static int nat_flushtable() /* about this cleanup: simply free all entries on the list of rules and */ /* clear out the tables used for hashed NAT rule lookups. */ /* ------------------------------------------------------------------------ */ -static int nat_clearlist() +static int nat_clearlist(ifs) +ipf_stack_t *ifs; { - ipnat_t *n, **np = &nat_list; + ipnat_t *n, **np = &ifs->ifs_nat_list; int i = 0; - if (nat_rules != NULL) - bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz); - if (rdr_rules != NULL) - bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz); + if (ifs->ifs_nat_rules != NULL) + bzero((char *)ifs->ifs_nat_rules, + sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz); + if (ifs->ifs_rdr_rules != NULL) + bzero((char *)ifs->ifs_rdr_rules, + sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz); while ((n = *np) != NULL) { *np = n->in_next; @@ -1656,15 +1722,15 @@ static int nat_clearlist() if (n->in_apr != NULL) appr_free(n->in_apr); KFREE(n); - nat_stats.ns_rules--; + ifs->ifs_nat_stats.ns_rules--; } else { n->in_flags |= IPN_DELETE; n->in_next = NULL; } i++; } - nat_masks = 0; - rdr_masks = 0; + ifs->ifs_nat_masks = 0; + ifs->ifs_rdr_masks = 0; return i; } @@ -1695,6 +1761,7 @@ natinfo_t *ni; ipnat_t *np; nat_t *natl; int l; + ipf_stack_t *ifs = fin->fin_ifs; /* * If it's an outbound packet which doesn't match any existing @@ -1723,7 +1790,7 @@ natinfo_t *ni; * setup for this IP address pair. */ hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, - in, 0); + in, 0, ifs); if (hm != NULL) in.s_addr = hm->hm_mapip.s_addr; } else if ((l == 1) && (hm != NULL)) { @@ -1781,7 +1848,7 @@ natinfo_t *ni; */ if ((l > 0) || fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, - &in, NULL) == -1) + &in, NULL, fin->fin_ifs) == -1) return -1; in.s_addr = ntohl(in.s_addr); @@ -1890,7 +1957,7 @@ natinfo_t *ni; nat->nat_oip = fin->fin_dst; if (nat->nat_hm == NULL) nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, - nat->nat_outip, 0); + nat->nat_outip, 0, ifs); /* * The ICMP checksum does not have a pseudo header containing @@ -1944,6 +2011,7 @@ natinfo_t *ni; u_32_t flags; ipnat_t *np; int move; + ipf_stack_t *ifs = fin->fin_ifs; move = 1; hm = NULL; @@ -1963,7 +2031,7 @@ natinfo_t *ni; if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == (IPN_ROUNDR|IPN_STICKY)) { hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in, - (u_32_t)dport); + (u_32_t)dport, ifs); if (hm != NULL) { in.s_addr = ntohl(hm->hm_mapip.s_addr); np = hm->hm_ipnat; @@ -1983,7 +2051,7 @@ natinfo_t *ni; if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, - in, (u_32_t)dport); + in, (u_32_t)dport, ifs); if (hm != NULL) { in.s_addr = hm->hm_mapip.s_addr; move = 0; @@ -2003,7 +2071,8 @@ natinfo_t *ni; /* * 0/32 - use the interface's IP address. */ - if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1) + if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL, + fin->fin_ifs) == -1) return -1; in.s_addr = ntohl(in.s_addr); @@ -2112,9 +2181,10 @@ int direction; natinfo_t ni; u_32_t sumd; int move; + ipf_stack_t *ifs = fin->fin_ifs; - if (nat_stats.ns_inuse >= ipf_nattable_max) { - nat_stats.ns_memfail++; + if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) { + ifs->ifs_nat_stats.ns_memfail++; return NULL; } @@ -2129,17 +2199,17 @@ int direction; /* Give me a new nat */ KMALLOC(nat, nat_t *); if (nat == NULL) { - nat_stats.ns_memfail++; + ifs->ifs_nat_stats.ns_memfail++; /* * Try to automatically tune the max # of entries in the * table allowed to be less than what will cause kmem_alloc() * to fail and try to eliminate panics due to out of memory * conditions arising. */ - if (ipf_nattable_max > ipf_nattable_sz) { - ipf_nattable_max = nat_stats.ns_inuse - 100; + if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) { + ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100; printf("ipf_nattable_max reduced to %d\n", - ipf_nattable_max); + ifs->ifs_ipf_nattable_max); } return NULL; } @@ -2168,9 +2238,10 @@ int direction; bzero((char *)nat, sizeof(*nat)); nat->nat_flags = flags; + nat->nat_redir = np->in_redir; if ((flags & NAT_SLAVE) == 0) { - MUTEX_ENTER(&ipf_nat_new); + MUTEX_ENTER(&ifs->ifs_ipf_nat_new); } /* @@ -2219,10 +2290,10 @@ int direction; if ((move == 1) && (np->in_flags & IPN_ROUNDR)) { if (np->in_redir == NAT_REDIRECT) { nat_delrdr(np); - nat_addrdr(np); + nat_addrdr(np, ifs); } else if (np->in_redir == NAT_MAP) { nat_delnat(np); - nat_addnat(np); + nat_addnat(np, ifs); } } @@ -2278,17 +2349,17 @@ int direction; goto badnat; } if (flags & SI_WILDP) - nat_stats.ns_wilds++; + ifs->ifs_nat_stats.ns_wilds++; goto done; badnat: - nat_stats.ns_badnat++; + ifs->ifs_nat_stats.ns_badnat++; if ((hm = nat->nat_hm) != NULL) nat_hostmapdel(hm); KFREE(nat); nat = NULL; done: if ((flags & NAT_SLAVE) == 0) { - MUTEX_EXIT(&ipf_nat_new); + MUTEX_EXIT(&ifs->ifs_ipf_nat_new); } return nat; } @@ -2317,6 +2388,7 @@ int direction; { frentry_t *fr; ipnat_t *np; + ipf_stack_t *ifs = fin->fin_ifs; np = ni->nai_np; @@ -2341,9 +2413,9 @@ int direction; if (appr_new(fin, nat) == -1) return -1; - if (nat_insert(nat, fin->fin_rev) == 0) { - if (nat_logging) - nat_log(nat, (u_int)np->in_redir); + if (nat_insert(nat, fin->fin_rev, ifs) == 0) { + if (ifs->ifs_nat_logging) + nat_log(nat, (u_int)np->in_redir, ifs); np->in_use++; if (fr != NULL) { MUTEX_ENTER(&fr->fr_lock); @@ -2370,9 +2442,10 @@ int direction; /* Insert a NAT entry into the hash tables for searching and add it to the */ /* list of active NAT entries. Adjust global counters when complete. */ /* ------------------------------------------------------------------------ */ -int nat_insert(nat, rev) +int nat_insert(nat, rev, ifs) nat_t *nat; int rev; +ipf_stack_t *ifs; { u_int hv1, hv2; nat_t **natp; @@ -2385,20 +2458,22 @@ int rev; hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff); hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport, - ipf_nattable_sz); + ifs->ifs_ipf_nattable_sz); hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff); hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport, - ipf_nattable_sz); + ifs->ifs_ipf_nattable_sz); } else { hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff); - hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz); + hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, + ifs->ifs_ipf_nattable_sz); hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff); - hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz); + hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, + ifs->ifs_ipf_nattable_sz); } - if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket || - nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) { + if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket || + ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) { return -1; } @@ -2415,11 +2490,11 @@ int rev; nat->nat_pkts[1] = 0; nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0'; - nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4); + nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs); if (nat->nat_ifnames[1][0] !='\0') { nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; - nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4); + nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs); } else { (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0], LIFNAMSIZ); @@ -2427,32 +2502,32 @@ int rev; nat->nat_ifps[1] = nat->nat_ifps[0]; } - nat->nat_next = nat_instances; - nat->nat_pnext = &nat_instances; - if (nat_instances) - nat_instances->nat_pnext = &nat->nat_next; - nat_instances = nat; + nat->nat_next = ifs->ifs_nat_instances; + nat->nat_pnext = &ifs->ifs_nat_instances; + if (ifs->ifs_nat_instances) + ifs->ifs_nat_instances->nat_pnext = &nat->nat_next; + ifs->ifs_nat_instances = nat; - natp = &nat_table[0][hv1]; + natp = &ifs->ifs_nat_table[0][hv1]; if (*natp) (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; nat->nat_phnext[0] = natp; nat->nat_hnext[0] = *natp; *natp = nat; - nat_stats.ns_bucketlen[0][hv1]++; + ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++; - natp = &nat_table[1][hv2]; + natp = &ifs->ifs_nat_table[1][hv2]; if (*natp) (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; nat->nat_phnext[1] = natp; nat->nat_hnext[1] = *natp; *natp = nat; - nat_stats.ns_bucketlen[1][hv2]++; + ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++; - fr_setnatqueue(nat, rev); + fr_setnatqueue(nat, rev, ifs); - nat_stats.ns_added++; - nat_stats.ns_inuse++; + ifs->ifs_nat_stats.ns_added++; + ifs->ifs_nat_stats.ns_inuse++; return 0; } @@ -2885,6 +2960,7 @@ struct in_addr src , mapdst; u_32_t dst; void *ifp; u_int hv; + ipf_stack_t *ifs = fin->fin_ifs; if (fin != NULL) ifp = fin->fin_ifp; @@ -2917,8 +2993,8 @@ struct in_addr src , mapdst; goto find_in_wild_ports; hv = NAT_HASH_FN(dst, dport, 0xffffffff); - hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz); - nat = nat_table[1][hv]; + hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz); + nat = ifs->ifs_nat_table[1][hv]; for (; nat; nat = nat->nat_hnext[1]) { if (nat->nat_ifps[0] != NULL) { if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) @@ -2979,17 +3055,17 @@ struct in_addr src , mapdst; find_in_wild_ports: if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) return NULL; - if (nat_stats.ns_wilds == 0) + if (ifs->ifs_nat_stats.ns_wilds == 0) return NULL; - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); hv = NAT_HASH_FN(dst, 0, 0xffffffff); - hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz); + hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz); - WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ifs->ifs_ipf_nat); - nat = nat_table[1][hv]; + nat = ifs->ifs_nat_table[1][hv]; for (; nat; nat = nat->nat_hnext[1]) { if (nat->nat_ifps[0] != NULL) { if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) @@ -3016,19 +3092,19 @@ find_in_wild_ports: if (nat == NULL) break; } else { - MUTEX_ENTER(&ipf_nat_new); - nat_stats.ns_wilds--; - MUTEX_EXIT(&ipf_nat_new); + MUTEX_ENTER(&ifs->ifs_ipf_nat_new); + ifs->ifs_nat_stats.ns_wilds--; + MUTEX_EXIT(&ifs->ifs_ipf_nat_new); } nat->nat_oport = sport; nat->nat_outport = dport; nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); - nat_tabmove(nat); + nat_tabmove(nat, ifs); break; } } - MUTEX_DOWNGRADE(&ipf_nat); + MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); return nat; } @@ -3044,8 +3120,9 @@ find_in_wild_ports: /* original was placed in the table without hashing on the ports and we now */ /* want to include hashing on port numbers. */ /* ------------------------------------------------------------------------ */ -static void nat_tabmove(nat) +static void nat_tabmove(nat, ifs) nat_t *nat; +ipf_stack_t *ifs; { nat_t **natp; u_int hv; @@ -3059,39 +3136,39 @@ nat_t *nat; if (nat->nat_hnext[0]) nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; *nat->nat_phnext[0] = nat->nat_hnext[0]; - nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; + ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; if (nat->nat_hnext[1]) nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; *nat->nat_phnext[1] = nat->nat_hnext[1]; - nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; + ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; /* * Add into the NAT table in the new position */ hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff); hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, - ipf_nattable_sz); + ifs->ifs_ipf_nattable_sz); nat->nat_hv[0] = hv; - natp = &nat_table[0][hv]; + natp = &ifs->ifs_nat_table[0][hv]; if (*natp) (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; nat->nat_phnext[0] = natp; nat->nat_hnext[0] = *natp; *natp = nat; - nat_stats.ns_bucketlen[0][hv]++; + ifs->ifs_nat_stats.ns_bucketlen[0][hv]++; hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff); hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, - ipf_nattable_sz); + ifs->ifs_ipf_nattable_sz); nat->nat_hv[1] = hv; - natp = &nat_table[1][hv]; + natp = &ifs->ifs_nat_table[1][hv]; if (*natp) (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; nat->nat_phnext[1] = natp; nat->nat_hnext[1] = *natp; *natp = nat; - nat_stats.ns_bucketlen[1][hv]++; + ifs->ifs_nat_stats.ns_bucketlen[1][hv]++; } @@ -3132,11 +3209,12 @@ struct in_addr src , dst; void *ifp; u_int hv; frentry_t *fr; + ipf_stack_t *ifs = fin->fin_ifs; fr = fin->fin_fr; if ((fr != NULL) && !(fr->fr_flags & FR_DUP) && - fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) + fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) ifp = fr->fr_tif.fd_ifp; else ifp = fin->fin_ifp; @@ -3167,8 +3245,8 @@ struct in_addr src , dst; goto find_out_wild_ports; hv = NAT_HASH_FN(srcip, sport, 0xffffffff); - hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz); - nat = nat_table[0][hv]; + hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz); + nat = ifs->ifs_nat_table[0][hv]; for (; nat; nat = nat->nat_hnext[0]) { if (nat->nat_ifps[1] != NULL) { if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) @@ -3219,21 +3297,21 @@ struct in_addr src , dst; find_out_wild_ports: if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) return NULL; - if (nat_stats.ns_wilds == 0) + if (ifs->ifs_nat_stats.ns_wilds == 0) return NULL; - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); hv = NAT_HASH_FN(srcip, 0, 0xffffffff); - hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz); + hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz); - WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ifs->ifs_ipf_nat); - nat = nat_table[0][hv]; + nat = ifs->ifs_nat_table[0][hv]; for (; nat; nat = nat->nat_hnext[0]) { if (nat->nat_ifps[1] != NULL) { if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) - continue; + continue; } else if (ifp != NULL) nat->nat_ifps[1] = ifp; @@ -3256,21 +3334,21 @@ find_out_wild_ports: if (nat == NULL) break; } else { - MUTEX_ENTER(&ipf_nat_new); - nat_stats.ns_wilds--; - MUTEX_EXIT(&ipf_nat_new); + MUTEX_ENTER(&ifs->ifs_ipf_nat_new); + ifs->ifs_nat_stats.ns_wilds--; + MUTEX_EXIT(&ifs->ifs_ipf_nat_new); } nat->nat_inport = sport; nat->nat_oport = dport; if (nat->nat_outport == 0) nat->nat_outport = sport; nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); - nat_tabmove(nat); + nat_tabmove(nat, ifs); break; } } - MUTEX_DOWNGRADE(&ipf_nat); + MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); return nat; } @@ -3285,8 +3363,9 @@ find_out_wild_ports: /* */ /* Lookup the NAT tables to search for a matching redirect */ /* ------------------------------------------------------------------------ */ -nat_t *nat_lookupredir(np) +nat_t *nat_lookupredir(np, ifs) natlookup_t *np; +ipf_stack_t *ifs; { fr_info_t fi; nat_t *nat; @@ -3306,6 +3385,7 @@ natlookup_t *np; else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY)) fi.fin_p = IPPROTO_ICMP; + fi.fin_ifs = ifs; /* * We can do two sorts of lookups: * - IPN_IN: we have the `real' and `out' address, look for `in'. @@ -3331,6 +3411,7 @@ natlookup_t *np; fin.fin_p = nat->nat_p; fin.fin_data[0] = ntohs(nat->nat_outport); fin.fin_data[1] = ntohs(nat->nat_oport); + fin.fin_ifs = ifs; if (nat_inlookup(&fin, np->nl_flags, fin.fin_p, nat->nat_outip, nat->nat_oip) != NULL) { @@ -3416,6 +3497,7 @@ ipnat_t *np; { ipftq_t *ifq, *ifq2; ipftqent_t *tqe; + ipf_stack_t *ifs = fin->fin_ifs; MUTEX_ENTER(&nat->nat_lock); tqe = &nat->nat_tqe; @@ -3432,18 +3514,18 @@ ipnat_t *np; ifq2 = NULL; if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) { - (void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0); + (void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0); } else { if (ifq2 == NULL) { if (nat->nat_p == IPPROTO_UDP) - ifq2 = &nat_udptq; + ifq2 = &ifs->ifs_nat_udptq; else if (nat->nat_p == IPPROTO_ICMP) - ifq2 = &nat_icmptq; + ifq2 = &ifs->ifs_nat_icmptq; else - ifq2 = &nat_iptq; + ifq2 = &ifs->ifs_nat_iptq; } - fr_movequeue(tqe, ifq, ifq2); + fr_movequeue(tqe, ifq, ifq2, ifs); } MUTEX_EXIT(&nat->nat_lock); } @@ -3478,8 +3560,9 @@ u_32_t *passp; int natadd = 1; frentry_t *fr; nat_t *nat; + ipf_stack_t *ifs = fin->fin_ifs; - if (nat_stats.ns_rules == 0 || fr_nat_lock != 0) + if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) return 0; natfailed = 0; @@ -3519,7 +3602,7 @@ u_32_t *passp; ipa = fin->fin_saddr; - READ_ENTER(&ipf_nat); + READ_ENTER(&ifs->ifs_ipf_nat); if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND))) @@ -3536,14 +3619,14 @@ u_32_t *passp; * If there is no current entry in the nat table for this IP#, * create one for it (if there is a matching rule). */ - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); msk = 0xffffffff; - nmsk = nat_masks; - WRITE_ENTER(&ipf_nat); + nmsk = ifs->ifs_nat_masks; + WRITE_ENTER(&ifs->ifs_ipf_nat); maskloop: iph = ipa & htonl(msk); - hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz); - for (np = nat_rules[hv]; np; np = np->in_mnext) + hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz); + for (np = ifs->ifs_nat_rules[hv]; np; np = np->in_mnext) { if ((np->in_ifps[1] && (np->in_ifps[1] != ifp))) continue; @@ -3590,7 +3673,7 @@ maskloop: goto maskloop; } } - MUTEX_DOWNGRADE(&ipf_nat); + MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); } if (nat != NULL) { @@ -3603,7 +3686,7 @@ maskloop: } } else rval = natfailed; - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); if (rval == -1) { if (passp != NULL) @@ -3637,13 +3720,14 @@ u_32_t nflags; tcphdr_t *tcp; ipnat_t *np; int i; + ipf_stack_t *ifs = fin->fin_ifs; #if SOLARIS && defined(_KERNEL) net_data_t net_data_p; if (fin->fin_v == 4) - net_data_p = ipf_ipv4; + net_data_p = ifs->ifs_ipf_ipv4; else - net_data_p = ipf_ipv6; + net_data_p = ifs->ifs_ipf_ipv6; #endif tcp = NULL; @@ -3751,7 +3835,7 @@ u_32_t nflags; i = 1; } else i = 1; - ATOMIC_INCL(nat_stats.ns_mapped[1]); + ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[1]); fin->fin_flx |= FI_NATED; return i; } @@ -3786,8 +3870,9 @@ u_32_t *passp; ipnat_t *np; nat_t *nat; u_32_t iph; + ipf_stack_t *ifs = fin->fin_ifs; - if (nat_stats.ns_rules == 0 || fr_nat_lock != 0) + if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) return 0; tcp = NULL; @@ -3830,7 +3915,7 @@ u_32_t *passp; in = fin->fin_dst; - READ_ENTER(&ipf_nat); + READ_ENTER(&ifs->ifs_ipf_nat); if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && (nat = nat_icmperror(fin, &nflags, NAT_INBOUND))) @@ -3843,18 +3928,18 @@ u_32_t *passp; } else { u_32_t hv, msk, rmsk; - RWLOCK_EXIT(&ipf_nat); - rmsk = rdr_masks; + RWLOCK_EXIT(&ifs->ifs_ipf_nat); + rmsk = ifs->ifs_rdr_masks; msk = 0xffffffff; - WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ifs->ifs_ipf_nat); /* * If there is no current entry in the nat table for this IP#, * create one for it (if there is a matching rule). */ maskloop: iph = in.s_addr & htonl(msk); - hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz); - for (np = rdr_rules[hv]; np; np = np->in_rnext) { + hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz); + for (np = ifs->ifs_rdr_rules[hv]; np; np = np->in_rnext) { if (np->in_ifps[0] && (np->in_ifps[0] != ifp)) continue; if (np->in_v != fin->fin_v) @@ -3901,7 +3986,7 @@ maskloop: goto maskloop; } } - MUTEX_DOWNGRADE(&ipf_nat); + MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); } if (nat != NULL) { rval = fr_natin(fin, nat, natadd, nflags); @@ -3914,7 +3999,7 @@ maskloop: } } else rval = natfailed; - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); if (rval == -1) { if (passp != NULL) @@ -3949,13 +4034,14 @@ u_32_t nflags; tcphdr_t *tcp; ipnat_t *np; int i; + ipf_stack_t *ifs = fin->fin_ifs; #if SOLARIS && defined(_KERNEL) net_data_t net_data_p; if (fin->fin_v == 4) - net_data_p = ipf_ipv4; + net_data_p = ifs->ifs_ipf_ipv4; else - net_data_p = ipf_ipv6; + net_data_p = ifs->ifs_ipf_ipv6; #endif tcp = NULL; @@ -4060,7 +4146,7 @@ u_32_t nflags; else fix_outcksum(csump, sumd); } - ATOMIC_INCL(nat_stats.ns_mapped[0]); + ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[0]); fin->fin_flx |= FI_NATED; if (np != NULL && np->in_tag.ipt_num[0] != 0) fin->fin_nattag = &np->in_tag; @@ -4142,12 +4228,13 @@ u_int nflags; /* */ /* Free all memory used by NAT structures allocated at runtime. */ /* ------------------------------------------------------------------------ */ -void fr_natunload() +void fr_natunload(ifs) +ipf_stack_t *ifs; { ipftq_t *ifq, *ifqnext; - (void) nat_clearlist(); - (void) nat_flushtable(); + (void) nat_clearlist(ifs); + (void) nat_flushtable(ifs); /* * Proxy timeout queues are not cleaned here because although they @@ -4156,60 +4243,65 @@ void fr_natunload() * Should the proxy timeouts have their own list? There's no real * justification as this is the only complication. */ - for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) { + for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { ifqnext = ifq->ifq_next; if (((ifq->ifq_flags & IFQF_PROXY) == 0) && (fr_deletetimeoutqueue(ifq) == 0)) - fr_freetimeoutqueue(ifq); + fr_freetimeoutqueue(ifq, ifs); } - if (nat_table[0] != NULL) { - KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz); - nat_table[0] = NULL; + if (ifs->ifs_nat_table[0] != NULL) { + KFREES(ifs->ifs_nat_table[0], + sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); + ifs->ifs_nat_table[0] = NULL; } - if (nat_table[1] != NULL) { - KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz); - nat_table[1] = NULL; + if (ifs->ifs_nat_table[1] != NULL) { + KFREES(ifs->ifs_nat_table[1], + sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); + ifs->ifs_nat_table[1] = NULL; } - if (nat_rules != NULL) { - KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz); - nat_rules = NULL; + if (ifs->ifs_nat_rules != NULL) { + KFREES(ifs->ifs_nat_rules, + sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); + ifs->ifs_nat_rules = NULL; } - if (rdr_rules != NULL) { - KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz); - rdr_rules = NULL; + if (ifs->ifs_rdr_rules != NULL) { + KFREES(ifs->ifs_rdr_rules, + sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); + ifs->ifs_rdr_rules = NULL; } - if (maptable != NULL) { - KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz); - maptable = NULL; + if (ifs->ifs_maptable != NULL) { + KFREES(ifs->ifs_maptable, + sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); + ifs->ifs_maptable = NULL; } - if (nat_stats.ns_bucketlen[0] != NULL) { - KFREES(nat_stats.ns_bucketlen[0], - sizeof(u_long *) * ipf_nattable_sz); - nat_stats.ns_bucketlen[0] = NULL; + if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) { + KFREES(ifs->ifs_nat_stats.ns_bucketlen[0], + sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); + ifs->ifs_nat_stats.ns_bucketlen[0] = NULL; } - if (nat_stats.ns_bucketlen[1] != NULL) { - KFREES(nat_stats.ns_bucketlen[1], - sizeof(u_long *) * ipf_nattable_sz); - nat_stats.ns_bucketlen[1] = NULL; + if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) { + KFREES(ifs->ifs_nat_stats.ns_bucketlen[1], + sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); + ifs->ifs_nat_stats.ns_bucketlen[1] = NULL; } - if (fr_nat_maxbucket_reset == 1) - fr_nat_maxbucket = 0; + if (ifs->ifs_fr_nat_maxbucket_reset == 1) + ifs->ifs_fr_nat_maxbucket = 0; - if (fr_nat_init == 1) { - fr_nat_init = 0; - fr_sttab_destroy(nat_tqb); + if (ifs->ifs_fr_nat_init == 1) { + ifs->ifs_fr_nat_init = 0; + fr_sttab_destroy(ifs->ifs_nat_tqb); - RW_DESTROY(&ipf_natfrag); - RW_DESTROY(&ipf_nat); + RW_DESTROY(&ifs->ifs_ipf_natfrag); + RW_DESTROY(&ifs->ifs_ipf_nat); - MUTEX_DESTROY(&ipf_nat_new); - MUTEX_DESTROY(&ipf_natio); + MUTEX_DESTROY(&ifs->ifs_ipf_nat_new); + MUTEX_DESTROY(&ifs->ifs_ipf_natio); - MUTEX_DESTROY(&nat_udptq.ifq_lock); - MUTEX_DESTROY(&nat_icmptq.ifq_lock); - MUTEX_DESTROY(&nat_iptq.ifq_lock); + MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock); + MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock); + MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock); } } @@ -4222,7 +4314,8 @@ void fr_natunload() /* Check all of the timeout queues for entries at the top which need to be */ /* expired. */ /* ------------------------------------------------------------------------ */ -void fr_natexpire() +void fr_natexpire(ifs) +ipf_stack_t *ifs; { ipftq_t *ifq, *ifqnext; ipftqent_t *tqe, *tqn; @@ -4230,37 +4323,37 @@ void fr_natexpire() SPL_INT(s); SPL_NET(s); - WRITE_ENTER(&ipf_nat); - for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) { + WRITE_ENTER(&ifs->ifs_ipf_nat); + for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) { for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { - if (tqe->tqe_die > fr_ticks) + if (tqe->tqe_die > ifs->ifs_fr_ticks) break; tqn = tqe->tqe_next; - nat_delete(tqe->tqe_parent, NL_EXPIRE); + nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); } } - for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) { + for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { ifqnext = ifq->ifq_next; for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { - if (tqe->tqe_die > fr_ticks) + if (tqe->tqe_die > ifs->ifs_fr_ticks) break; tqn = tqe->tqe_next; - nat_delete(tqe->tqe_parent, NL_EXPIRE); + nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); } } - for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) { + for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { ifqnext = ifq->ifq_next; if (((ifq->ifq_flags & IFQF_DELETE) != 0) && (ifq->ifq_ref == 0)) { - fr_freetimeoutqueue(ifq); + fr_freetimeoutqueue(ifq, ifs); } } - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); SPL_X(s); } @@ -4276,23 +4369,24 @@ void fr_natexpire() /* matches the one passed in) and change it, recalculating the checksum sum */ /* difference too. */ /* ------------------------------------------------------------------------ */ -void fr_nataddrsync(ifp, addr) +void fr_nataddrsync(ifp, addr, ifs) void *ifp; struct in_addr *addr; +ipf_stack_t *ifs; { u_32_t sum1, sum2, sumd; nat_t *nat; ipnat_t *np; SPL_INT(s); - if (fr_running <= 0) + if (ifs->ifs_fr_running <= 0) return; SPL_NET(s); - WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ifs->ifs_ipf_nat); - if (fr_running <= 0) { - RWLOCK_EXIT(&ipf_nat); + if (ifs->ifs_fr_running <= 0) { + RWLOCK_EXIT(&ifs->ifs_ipf_nat); return; } @@ -4302,7 +4396,7 @@ struct in_addr *addr; * which will get changed are those which are "map ... -> 0/32", * where the rule specifies the address is taken from the interface. */ - for (nat = nat_instances; nat; nat = nat->nat_next) { + for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { if (addr != NULL) { if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) || ((nat->nat_flags & IPN_TCP) != 0)) @@ -4330,7 +4424,7 @@ struct in_addr *addr; */ sum1 = nat->nat_outip.s_addr; if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0], - &in, NULL) != -1) + &in, NULL, ifs) != -1) nat->nat_outip = in; sum2 = nat->nat_outip.s_addr; } else { @@ -4352,7 +4446,7 @@ struct in_addr *addr; nat->nat_sumd[1] = nat->nat_sumd[0]; } - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); SPL_X(s); } @@ -4371,10 +4465,11 @@ struct in_addr *addr; /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which */ /* there is no longer any interface associated with it. */ /* ------------------------------------------------------------------------ */ -void fr_natifpsync(action, ifp, name) +void fr_natifpsync(action, ifp, name, ifs) int action; void *ifp; char *name; +ipf_stack_t *ifs; { #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) int s; @@ -4382,49 +4477,49 @@ char *name; nat_t *nat; ipnat_t *n; - if (fr_running <= 0) + if (ifs->ifs_fr_running <= 0) return; SPL_NET(s); - WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ifs->ifs_ipf_nat); - if (fr_running <= 0) { - RWLOCK_EXIT(&ipf_nat); + if (ifs->ifs_fr_running <= 0) { + RWLOCK_EXIT(&ifs->ifs_ipf_nat); return; } switch (action) { case IPFSYNC_RESYNC : - for (nat = nat_instances; nat; nat = nat->nat_next) { + for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { if ((ifp == nat->nat_ifps[0]) || (nat->nat_ifps[0] == (void *)-1)) { nat->nat_ifps[0] = - fr_resolvenic(nat->nat_ifnames[0], 4); + fr_resolvenic(nat->nat_ifnames[0], 4, ifs); } if ((ifp == nat->nat_ifps[1]) || (nat->nat_ifps[1] == (void *)-1)) { nat->nat_ifps[1] = - fr_resolvenic(nat->nat_ifnames[1], 4); + fr_resolvenic(nat->nat_ifnames[1], 4, ifs); } } - for (n = nat_list; (n != NULL); n = n->in_next) { + for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { if (n->in_ifps[0] == ifp || n->in_ifps[0] == (void *)-1) { n->in_ifps[0] = - fr_resolvenic(n->in_ifnames[0], 4); + fr_resolvenic(n->in_ifnames[0], 4, ifs); } if (n->in_ifps[1] == ifp || n->in_ifps[1] == (void *)-1) { n->in_ifps[1] = - fr_resolvenic(n->in_ifnames[1], 4); + fr_resolvenic(n->in_ifnames[1], 4, ifs); } } break; case IPFSYNC_NEWIFP : - for (nat = nat_instances; nat; nat = nat->nat_next) { + for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { if (!strncmp(name, nat->nat_ifnames[0], sizeof(nat->nat_ifnames[0]))) nat->nat_ifps[0] = ifp; @@ -4432,7 +4527,7 @@ char *name; sizeof(nat->nat_ifnames[1]))) nat->nat_ifps[1] = ifp; } - for (n = nat_list; (n != NULL); n = n->in_next) { + for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { if (!strncmp(name, n->in_ifnames[0], sizeof(n->in_ifnames[0]))) n->in_ifps[0] = ifp; @@ -4442,13 +4537,13 @@ char *name; } break; case IPFSYNC_OLDIFP : - for (nat = nat_instances; nat; nat = nat->nat_next) { + for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { if (ifp == nat->nat_ifps[0]) nat->nat_ifps[0] = (void *)-1; if (ifp == nat->nat_ifps[1]) nat->nat_ifps[1] = (void *)-1; } - for (n = nat_list; (n != NULL); n = n->in_next) { + for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { if (n->in_ifps[0] == ifp) n->in_ifps[0] = (void *)-1; if (n->in_ifps[1] == ifp) @@ -4456,7 +4551,7 @@ char *name; } break; } - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); SPL_X(s); } @@ -4511,9 +4606,10 @@ int icmptype; /* */ /* Creates a NAT log entry. */ /* ------------------------------------------------------------------------ */ -void nat_log(nat, type) +void nat_log(nat, type, ifs) struct nat *nat; u_int type; +ipf_stack_t *ifs; { #ifdef IPFILTER_LOG # ifndef LARGE_NAT @@ -4540,7 +4636,8 @@ u_int type; natl.nl_rule = -1; # ifndef LARGE_NAT if (nat->nat_ptr != NULL) { - for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++) + for (rulen = 0, np = ifs->ifs_nat_list; np; + np = np->in_next, rulen++) if (np == nat->nat_ptr) { natl.nl_rule = rulen; break; @@ -4551,7 +4648,7 @@ u_int type; sizes[0] = sizeof(natl); types[0] = 0; - (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1); + (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs); #endif } @@ -4565,16 +4662,49 @@ u_int type; /* Compatibility interface for OpenBSD to trigger the correct updating of */ /* interface references within IPFilter. */ /* ------------------------------------------------------------------------ */ -void nat_ifdetach(ifp) +void nat_ifdetach(ifp, ifs) void *ifp; +ipf_stack_t *ifs; { - frsync(ifp); + frsync(ifp, ifs); return; } #endif /* ------------------------------------------------------------------------ */ +/* Function: fr_ipnatderef */ +/* Returns: Nil */ +/* Parameters: isp(I) - pointer to pointer to NAT rule */ +/* Write Locks: ipf_nat */ +/* */ +/* ------------------------------------------------------------------------ */ +void fr_ipnatderef(inp, ifs) +ipnat_t **inp; +ipf_stack_t *ifs; +{ + ipnat_t *in; + + in = *inp; + *inp = NULL; + in->in_space++; + in->in_use--; + if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) { + if (in->in_apr) + appr_free(in->in_apr); + KFREE(in); + ifs->ifs_nat_stats.ns_rules--; +#ifdef notdef +#if SOLARIS + if (ifs->ifs_nat_stats.ns_rules == 0) + ifs->ifs_pfil_delayed_copy = 1; +#endif +#endif + } +} + + +/* ------------------------------------------------------------------------ */ /* Function: fr_natderef */ /* Returns: Nil */ /* Parameters: isp(I) - pointer to pointer to NAT table entry */ @@ -4582,18 +4712,19 @@ void *ifp; /* Decrement the reference counter for this NAT table entry and free it if */ /* there are no more things using it. */ /* ------------------------------------------------------------------------ */ -void fr_natderef(natp) +void fr_natderef(natp, ifs) nat_t **natp; +ipf_stack_t *ifs; { nat_t *nat; nat = *natp; *natp = NULL; - WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ifs->ifs_ipf_nat); nat->nat_ref--; if (nat->nat_ref == 0) - nat_delete(nat, NL_EXPIRE); - RWLOCK_EXIT(&ipf_nat); + nat_delete(nat, NL_EXPIRE, ifs); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); } @@ -4614,6 +4745,7 @@ nat_t *nat; frentry_t *fr; nat_t *clone; ipnat_t *np; + ipf_stack_t *ifs = fin->fin_ifs; KMALLOC(clone, nat_t *); if (clone == NULL) @@ -4637,14 +4769,14 @@ nat_t *nat; if (clone->nat_hm) clone->nat_hm->hm_ref++; - if (nat_insert(clone, fin->fin_rev) == -1) { + if (nat_insert(clone, fin->fin_rev, ifs) == -1) { KFREE(clone); return NULL; } np = clone->nat_ptr; if (np != NULL) { - if (nat_logging) - nat_log(clone, (u_int)np->in_redir); + if (ifs->ifs_nat_logging) + nat_log(clone, (u_int)np->in_redir, ifs); np->in_use++; } fr = clone->nat_fr; @@ -4660,14 +4792,14 @@ nat_t *nat; * state of the new NAT from here. */ if (clone->nat_p == IPPROTO_TCP) { - (void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb, + (void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb, clone->nat_flags); } #ifdef IPFILTER_SYNC clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone); #endif - if (nat_logging) - nat_log(clone, NL_CLONE); + if (ifs->ifs_nat_logging) + nat_log(clone, NL_CLONE, ifs); return clone; } @@ -4813,9 +4945,10 @@ u_short *csump; /* Put the NAT entry on its default queue entry, using rev as a helped in */ /* determining which queue it should be placed on. */ /* ------------------------------------------------------------------------ */ -void fr_setnatqueue(nat, rev) +void fr_setnatqueue(nat, rev, ifs) nat_t *nat; int rev; +ipf_stack_t *ifs; { ipftq_t *oifq, *nifq; @@ -4828,16 +4961,16 @@ int rev; switch (nat->nat_p) { case IPPROTO_UDP : - nifq = &nat_udptq; + nifq = &ifs->ifs_nat_udptq; break; case IPPROTO_ICMP : - nifq = &nat_icmptq; + nifq = &ifs->ifs_nat_icmptq; break; case IPPROTO_TCP : - nifq = nat_tqb + nat->nat_tqe.tqe_state[rev]; + nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev]; break; default : - nifq = &nat_iptq; + nifq = &ifs->ifs_nat_iptq; break; } } @@ -4848,8 +4981,186 @@ int rev; * another, else put it on the end of the newly determined queue. */ if (oifq != NULL) - fr_movequeue(&nat->nat_tqe, oifq, nifq); + fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs); else - fr_queueappend(&nat->nat_tqe, nifq, nat); + fr_queueappend(&nat->nat_tqe, nifq, nat, ifs); return; } + +/* Function: nat_getnext */ +/* Returns: int - 0 == ok, else error */ +/* Parameters: t(I) - pointer to ipftoken structure */ +/* itp(I) - pointer to ipfgeniter_t structure */ +/* */ +/* Fetch the next nat/ipnat structure pointer from the linked list and */ +/* copy it out to the storage space pointed to by itp_data. The next item */ +/* in the list to look at is put back in the ipftoken struture. */ +/* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/ +/* ipf_freetoken will call a deref function for us and we dont want to call */ +/* that twice (second time would be in the second switch statement below. */ +/* ------------------------------------------------------------------------ */ +static int nat_getnext(t, itp, ifs) +ipftoken_t *t; +ipfgeniter_t *itp; +ipf_stack_t *ifs; +{ + hostmap_t *hm, *nexthm = NULL, zerohm; + ipnat_t *ipn, *nextipnat = NULL, zeroipn; + nat_t *nat, *nextnat = NULL, zeronat; + int error = 0; + + READ_ENTER(&ifs->ifs_ipf_nat); + switch (itp->igi_type) + { + case IPFGENITER_HOSTMAP : + hm = t->ipt_data; + if (hm == NULL) { + nexthm = ifs->ifs_ipf_hm_maplist; + } else { + nexthm = hm->hm_hnext; + } + if (nexthm != NULL) { + if (nexthm->hm_hnext == NULL) { + t->ipt_alive = 0; + /* ipf_freetoken(t, ifs); + hm = NULL; */ + } else { + /*MUTEX_ENTER(&nexthm->hm_lock);*/ + nexthm->hm_ref++; + /*MUTEX_EXIT(&nextipnat->hm_lock);*/ + } + + } else { + bzero(&zerohm, sizeof(zerohm)); + nexthm = &zerohm; + } + break; + + case IPFGENITER_IPNAT : + ipn = t->ipt_data; + if (ipn == NULL) { + nextipnat = ifs->ifs_nat_list; + } else { + nextipnat = ipn->in_next; + } + if (nextipnat != NULL) { + if (nextipnat->in_next == NULL) { + t->ipt_alive = 0; + /*ipf_freetoken(t, ifs); + ipn = NULL;*/ + } else { + /* MUTEX_ENTER(&nextipnat->in_lock); */ + nextipnat->in_use++; + /* MUTEX_EXIT(&nextipnat->in_lock); */ + } + } else { + bzero(&zeroipn, sizeof(zeroipn)); + nextipnat = &zeroipn; + } + break; + + case IPFGENITER_NAT : + nat = t->ipt_data; + if (nat == NULL) { + nextnat = ifs->ifs_nat_instances; + } else { + nextnat = nat->nat_next; + } + if (nextnat != NULL) { + if (nextnat->nat_next == NULL) { + t->ipt_alive = 0; + /*ipf_freetoken(t, ifs); + nat = NULL;*/ + } else { + MUTEX_ENTER(&nextnat->nat_lock); + nextnat->nat_ref++; + MUTEX_EXIT(&nextnat->nat_lock); + } + } else { + bzero(&zeronat, sizeof(zeronat)); + nextnat = &zeronat; + } + break; + } + + RWLOCK_EXIT(&ifs->ifs_ipf_nat); + + switch (itp->igi_type) + { + case IPFGENITER_HOSTMAP : + if (hm != NULL) { + WRITE_ENTER(&ifs->ifs_ipf_nat); + fr_hostmapderef(&hm); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); + } + t->ipt_data = nexthm; + error = COPYOUT(nexthm, itp->igi_data, sizeof(*nexthm)); + if (error != 0) + error = EFAULT; + break; + + case IPFGENITER_IPNAT : + if (ipn != NULL) + fr_ipnatderef(&ipn, ifs); + t->ipt_data = nextipnat; + error = COPYOUT(nextipnat, itp->igi_data, sizeof(*nextipnat)); + if (error != 0) + error = EFAULT; + break; + + case IPFGENITER_NAT : + if (nat != NULL) + fr_natderef(&nat, ifs); + t->ipt_data = nextnat; + error = COPYOUT(nextnat, itp->igi_data, sizeof(*nextnat)); + if (error != 0) + error = EFAULT; + break; + } + + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_iterator */ +/* Returns: int - 0 == ok, else error */ +/* Parameters: token(I) - pointer to ipftoken structure */ +/* itp(I) - pointer to ipfgeniter_t structure */ +/* */ +/* This function acts as a handler for the SIOCGENITER ioctls that use a */ +/* generic structure to iterate through a list. There are three different */ +/* linked lists of NAT related information to go through: NAT rules, active */ +/* NAT mappings and the NAT fragment cache. */ +/* ------------------------------------------------------------------------ */ +static int nat_iterator(token, itp, ifs) +ipftoken_t *token; +ipfgeniter_t *itp; +ipf_stack_t *ifs; +{ + int error; + + if (itp->igi_data == NULL) + return EFAULT; + + token->ipt_subtype = itp->igi_type; + + switch (itp->igi_type) + { + case IPFGENITER_HOSTMAP : + case IPFGENITER_IPNAT : + case IPFGENITER_NAT : + error = nat_getnext(token, itp, ifs); + break; + case IPFGENITER_NATFRAG : + error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist, + &ifs->ifs_ipfr_nattail, + &ifs->ifs_ipf_natfrag, ifs); + break; + default : + error = EINVAL; + break; + } + + return error; +} diff --git a/usr/src/uts/common/inet/ipf/ip_pool.c b/usr/src/uts/common/inet/ipf/ip_pool.c index 7db79e790c..0bfbf088c2 100644 --- a/usr/src/uts/common/inet/ipf/ip_pool.c +++ b/usr/src/uts/common/inet/ipf/ip_pool.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -70,6 +70,7 @@ struct file; #include <net/if.h> #include <netinet/in.h> +#include "netinet/ipf_stack.h" #include "netinet/ip_compat.h" #include "netinet/ip_fil.h" #include "netinet/ip_pool.h" @@ -89,9 +90,6 @@ static const char rcsid[] = "@(#)$Id: ip_pool.c,v 2.55.2.14 2005/06/12 07:18:26 #ifdef IPFILTER_LOOKUP -ip_pool_stat_t ipoolstat; -ipfrwlock_t ip_poolrw; - /* * Binary tree routines from Sedgewick and enhanced to do ranges of addresses. * NOTE: Insertion *MUST* be from greatest range to least for it to work! @@ -109,9 +107,6 @@ ipfrwlock_t ip_poolrw; * not make it worthwhile not using radix trees. For now the radix tree from * 4.4 BSD is used, but this is not viewed as a long term solution. */ -ip_pool_t *ip_pool_list[IPL_LOGSIZE] = { NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL }; - #ifdef TEST_POOL void treeprint __P((ip_pool_t *)); @@ -126,8 +121,8 @@ main(argc, argv) ip_pool_t *ipo; i6addr_t ip; - RWLOCK_INIT(&ip_poolrw, "poolrw"); - ip_pool_init(); + RWLOCK_INIT(&ifs->ifs_ip_poolrw, "poolrw"); + ip_pool_init(ifs); bzero((char *)&a, sizeof(a)); bzero((char *)&b, sizeof(b)); @@ -135,82 +130,82 @@ main(argc, argv) bzero((char *)&op, sizeof(op)); strcpy(op.iplo_name, "0"); - if (ip_pool_create(&op) == 0) - ipo = ip_pool_find(0, "0"); + if (ip_pool_create(&op, ifs) == 0) + ipo = ip_pool_find(0, "0", ifs); a.adf_addr.in4.s_addr = 0x0a010203; b.adf_addr.in4.s_addr = 0xffffffff; - ip_pool_insert(ipo, &a, &b, 1); - ip_pool_insert(ipo, &a, &b, 1); + ip_pool_insert(ipo, &a, &b, 1, ifs); + ip_pool_insert(ipo, &a, &b, 1, ifs); a.adf_addr.in4.s_addr = 0x0a000000; b.adf_addr.in4.s_addr = 0xff000000; - ip_pool_insert(ipo, &a, &b, 0); - ip_pool_insert(ipo, &a, &b, 0); + ip_pool_insert(ipo, &a, &b, 0, ifs); + ip_pool_insert(ipo, &a, &b, 0, ifs); a.adf_addr.in4.s_addr = 0x0a010100; b.adf_addr.in4.s_addr = 0xffffff00; - ip_pool_insert(ipo, &a, &b, 1); - ip_pool_insert(ipo, &a, &b, 1); + ip_pool_insert(ipo, &a, &b, 1, ifs); + ip_pool_insert(ipo, &a, &b, 1, ifs); a.adf_addr.in4.s_addr = 0x0a010200; b.adf_addr.in4.s_addr = 0xffffff00; - ip_pool_insert(ipo, &a, &b, 0); - ip_pool_insert(ipo, &a, &b, 0); + ip_pool_insert(ipo, &a, &b, 0, ifs); + ip_pool_insert(ipo, &a, &b, 0, ifs); a.adf_addr.in4.s_addr = 0x0a010000; b.adf_addr.in4.s_addr = 0xffff0000; - ip_pool_insert(ipo, &a, &b, 1); - ip_pool_insert(ipo, &a, &b, 1); + ip_pool_insert(ipo, &a, &b, 1, ifs); + ip_pool_insert(ipo, &a, &b, 1, ifs); a.adf_addr.in4.s_addr = 0x0a01020f; b.adf_addr.in4.s_addr = 0xffffffff; - ip_pool_insert(ipo, &a, &b, 1); - ip_pool_insert(ipo, &a, &b, 1); + ip_pool_insert(ipo, &a, &b, 1, ifs); + ip_pool_insert(ipo, &a, &b, 1, ifs); #ifdef DEBUG_POOL treeprint(ipo); #endif ip.in4.s_addr = 0x0a00aabb; printf("search(%#x) = %d (0)\n", ip.in4.s_addr, - ip_pool_search(ipo, 4, &ip)); + ip_pool_search(ipo, 4, &ip, ifs)); ip.in4.s_addr = 0x0a000001; printf("search(%#x) = %d (0)\n", ip.in4.s_addr, - ip_pool_search(ipo, 4, &ip)); + ip_pool_search(ipo, 4, &ip, ifs)); ip.in4.s_addr = 0x0a000101; printf("search(%#x) = %d (0)\n", ip.in4.s_addr, - ip_pool_search(ipo, 4, &ip)); + ip_pool_search(ipo, 4, &ip, ifs)); ip.in4.s_addr = 0x0a010001; printf("search(%#x) = %d (1)\n", ip.in4.s_addr, - ip_pool_search(ipo, 4, &ip)); + ip_pool_search(ipo, 4, &ip, ifs)); ip.in4.s_addr = 0x0a010101; printf("search(%#x) = %d (1)\n", ip.in4.s_addr, - ip_pool_search(ipo, 4, &ip)); + ip_pool_search(ipo, 4, &ip, ifs)); ip.in4.s_addr = 0x0a010201; printf("search(%#x) = %d (0)\n", ip.in4.s_addr, - ip_pool_search(ipo, 4, &ip)); + ip_pool_search(ipo, 4, &ip, ifs)); ip.in4.s_addr = 0x0a010203; printf("search(%#x) = %d (1)\n", ip.in4.s_addr, - ip_pool_search(ipo, 4, &ip)); + ip_pool_search(ipo, 4, &ip, ifs)); ip.in4.s_addr = 0x0a01020f; printf("search(%#x) = %d (1)\n", ip.in4.s_addr, - ip_pool_search(ipo, 4, &ip)); + ip_pool_search(ipo, 4, &ip, ifs)); ip.in4.s_addr = 0x0b00aabb; printf("search(%#x) = %d (-1)\n", ip.in4.s_addr, - ip_pool_search(ipo, 4, &ip)); + ip_pool_search(ipo, 4, &ip, ifs)); #ifdef DEBUG_POOL treeprint(ipo); #endif - ip_pool_fini(); + ip_pool_fini(ifs); return 0; } @@ -237,10 +232,11 @@ ip_pool_t *ipo; /* */ /* Initialise the routing table data structures where required. */ /* ------------------------------------------------------------------------ */ -int ip_pool_init() +int ip_pool_init(ifs) +ipf_stack_t *ifs; { - bzero((char *)&ipoolstat, sizeof(ipoolstat)); + bzero(&ifs->ifs_ipoolstat, sizeof (ip_pool_stat_t)); #if !defined(_KERNEL) || ((BSD < 199306) && (SOLARIS2 < 10)) rn_init(); @@ -258,21 +254,22 @@ int ip_pool_init() /* function for the radix tree that supports the pools. ip_pool_destroy() is*/ /* used to delete the pools one by one to ensure they're properly freed up. */ /* ------------------------------------------------------------------------ */ -void ip_pool_fini() +void ip_pool_fini(ifs) +ipf_stack_t *ifs; { ip_pool_t *p, *q; iplookupop_t op; int i; - ASSERT(rw_read_locked(&ipf_global.ipf_lk) == 0); + ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0); for (i = 0; i <= IPL_LOGMAX; i++) { - for (q = ip_pool_list[i]; (p = q) != NULL; ) { + for (q = ifs->ifs_ip_pool_list[i]; (p = q) != NULL; ) { op.iplo_unit = i; (void)strncpy(op.iplo_name, p->ipo_name, sizeof(op.iplo_name)); q = p->ipo_next; - (void) ip_pool_destroy(&op); + (void) ip_pool_destroy(&op, ifs); } } @@ -290,26 +287,27 @@ void ip_pool_fini() /* Copy the current statistics out into user space, collecting pool list */ /* pointers as appropriate for later use. */ /* ------------------------------------------------------------------------ */ -int ip_pool_statistics(op) +int ip_pool_statistics(op, ifs) iplookupop_t *op; +ipf_stack_t *ifs; { ip_pool_stat_t stats; int unit, i, err = 0; - if (op->iplo_size != sizeof(ipoolstat)) + if (op->iplo_size != sizeof(ip_pool_stat_t)) return EINVAL; - bcopy((char *)&ipoolstat, (char *)&stats, sizeof(stats)); + bcopy((char *)&ifs->ifs_ipoolstat, (char *)&stats, sizeof(stats)); unit = op->iplo_unit; if (unit == IPL_LOGALL) { for (i = 0; i < IPL_LOGSIZE; i++) - stats.ipls_list[i] = ip_pool_list[i]; + stats.ipls_list[i] = ifs->ifs_ip_pool_list[i]; } else if (unit >= 0 && unit < IPL_LOGSIZE) { if (op->iplo_name[0] != '\0') stats.ipls_list[unit] = ip_pool_find(unit, - op->iplo_name); + op->iplo_name, ifs); else - stats.ipls_list[unit] = ip_pool_list[unit]; + stats.ipls_list[unit] = ifs->ifs_ip_pool_list[unit]; } else err = EINVAL; if (err == 0) @@ -327,13 +325,14 @@ iplookupop_t *op; /* Find a matching pool inside the collection of pools for a particular */ /* device, indicated by the unit number. */ /* ------------------------------------------------------------------------ */ -void *ip_pool_find(unit, name) +void *ip_pool_find(unit, name, ifs) int unit; char *name; +ipf_stack_t *ifs; { ip_pool_t *p; - for (p = ip_pool_list[unit]; p != NULL; p = p->ipo_next) + for (p = ifs->ifs_ip_pool_list[unit]; p != NULL; p = p->ipo_next) if (strncmp(p->ipo_name, name, sizeof(p->ipo_name)) == 0) break; return p; @@ -372,10 +371,11 @@ addrfamily_t *addr, *mask; /* */ /* Search the pool for a given address and return a search result. */ /* ------------------------------------------------------------------------ */ -int ip_pool_search(tptr, version, dptr) +int ip_pool_search(tptr, version, dptr, ifs) void *tptr; int version; void *dptr; +ipf_stack_t *ifs; { struct radix_node *rn; ip_pool_node_t *m; @@ -405,7 +405,7 @@ void *dptr; } else return -1; - READ_ENTER(&ip_poolrw); + READ_ENTER(&ifs->ifs_ip_poolrw); rn = ipo->ipo_head->rnh_matchaddr(&v, ipo->ipo_head); @@ -415,7 +415,7 @@ void *dptr; m->ipn_hits++; rv = m->ipn_info; } - RWLOCK_EXIT(&ip_poolrw); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); return rv; } @@ -432,15 +432,16 @@ void *dptr; /* Add another node to the pool given by ipo. The three parameters passed */ /* in (addr, mask, info) shold all be stored in the node. */ /* ------------------------------------------------------------------------ */ -int ip_pool_insert(ipo, addr, mask, info) +int ip_pool_insert(ipo, addr, mask, info, ifs) ip_pool_t *ipo; addrfamily_t *addr, *mask; int info; +ipf_stack_t *ifs; { struct radix_node *rn; ip_pool_node_t *x; - ASSERT(rw_read_locked(&ip_poolrw.ipf_lk) == 0); + ASSERT(rw_read_locked(&ifs->ifs_ip_poolrw.ipf_lk) == 0); KMALLOC(x, ip_pool_node_t *); if (x == NULL) { @@ -468,13 +469,14 @@ int info; return ENOMEM; } + x->ipn_ref = 1; x->ipn_next = ipo->ipo_list; x->ipn_pnext = &ipo->ipo_list; if (ipo->ipo_list != NULL) ipo->ipo_list->ipn_pnext = &x->ipn_next; ipo->ipo_list = x; - ipoolstat.ipls_nodes++; + ifs->ifs_ipoolstat.ipls_nodes++; return 0; } @@ -492,14 +494,15 @@ int info; /* marked as being anonymous, give it a new, unique, identifier. Call any */ /* other functions required to initialise the structure. */ /* ------------------------------------------------------------------------ */ -int ip_pool_create(op) +int ip_pool_create(op, ifs) iplookupop_t *op; +ipf_stack_t *ifs; { char name[FR_GROUPLEN]; int poolnum, unit; ip_pool_t *h; - ASSERT(rw_read_locked(&ip_poolrw.ipf_lk) == 0); + ASSERT(rw_read_locked(&ifs->ifs_ip_poolrw.ipf_lk) == 0); KMALLOC(h, ip_pool_t *); if (h == NULL) @@ -525,7 +528,7 @@ iplookupop_t *op; (void)sprintf(name, "%x", poolnum); #endif - for (p = ip_pool_list[unit]; p != NULL; ) { + for (p = ifs->ifs_ip_pool_list[unit]; p != NULL; ) { if (strncmp(name, p->ipo_name, sizeof(p->ipo_name)) == 0) { poolnum++; @@ -534,7 +537,7 @@ iplookupop_t *op; #else (void)sprintf(name, "%x", poolnum); #endif - p = ip_pool_list[unit]; + p = ifs->ifs_ip_pool_list[unit]; } else p = p->ipo_next; } @@ -547,13 +550,13 @@ iplookupop_t *op; h->ipo_ref = 1; h->ipo_list = NULL; h->ipo_unit = unit; - h->ipo_next = ip_pool_list[unit]; - if (ip_pool_list[unit] != NULL) - ip_pool_list[unit]->ipo_pnext = &h->ipo_next; - h->ipo_pnext = &ip_pool_list[unit]; - ip_pool_list[unit] = h; + h->ipo_next = ifs->ifs_ip_pool_list[unit]; + if (ifs->ifs_ip_pool_list[unit] != NULL) + ifs->ifs_ip_pool_list[unit]->ipo_pnext = &h->ipo_next; + h->ipo_pnext = &ifs->ifs_ip_pool_list[unit]; + ifs->ifs_ip_pool_list[unit] = h; - ipoolstat.ipls_pools++; + ifs->ifs_ipoolstat.ipls_pools++; return 0; } @@ -569,13 +572,14 @@ iplookupop_t *op; /* Add another node to the pool given by ipo. The three parameters passed */ /* in (addr, mask, info) shold all be stored in the node. */ /* ------------------------------------------------------------------------ */ -int ip_pool_remove(ipo, ipe) +int ip_pool_remove(ipo, ipe, ifs) ip_pool_t *ipo; ip_pool_node_t *ipe; +ipf_stack_t *ifs; { ip_pool_node_t **ipp, *n; - ASSERT(rw_read_locked(&ip_poolrw.ipf_lk) == 0); + ASSERT(rw_read_locked(&ifs->ifs_ip_poolrw.ipf_lk) == 0); for (ipp = &ipo->ipo_list; (n = *ipp) != NULL; ipp = &n->ipn_next) { if (ipe == n) { @@ -593,7 +597,7 @@ ip_pool_node_t *ipe; ipo->ipo_head); KFREE(n); - ipoolstat.ipls_nodes--; + ifs->ifs_ipoolstat.ipls_nodes--; return 0; } @@ -612,19 +616,20 @@ ip_pool_node_t *ipe; /* may not be initialised, we can't use an ASSERT to enforce the locking */ /* assertion that one of the two (ip_poolrw,ipf_global) is held. */ /* ------------------------------------------------------------------------ */ -int ip_pool_destroy(op) +int ip_pool_destroy(op, ifs) iplookupop_t *op; +ipf_stack_t *ifs; { ip_pool_t *ipo; - ipo = ip_pool_find(op->iplo_unit, op->iplo_name); + ipo = ip_pool_find(op->iplo_unit, op->iplo_name, ifs); if (ipo == NULL) return ESRCH; if (ipo->ipo_ref != 1) return EBUSY; - ip_pool_free(ipo); + ip_pool_free(ipo, ifs); return 0; } @@ -642,8 +647,9 @@ iplookupop_t *op; /* may not be initialised, we can't use an ASSERT to enforce the locking */ /* assertion that one of the two (ip_poolrw,ipf_global) is held. */ /* ------------------------------------------------------------------------ */ -int ip_pool_flush(fp) +int ip_pool_flush(fp, ifs) iplookupflush_t *fp; +ipf_stack_t *ifs; { int i, num = 0, unit, err; ip_pool_t *p, *q; @@ -654,12 +660,12 @@ iplookupflush_t *fp; for (i = 0; i <= IPL_LOGMAX; i++) { if (unit != IPLT_ALL && i != unit) continue; - for (q = ip_pool_list[i]; (p = q) != NULL; ) { + for (q = ifs->ifs_ip_pool_list[i]; (p = q) != NULL; ) { op.iplo_unit = i; (void)strncpy(op.iplo_name, p->ipo_name, sizeof(op.iplo_name)); q = p->ipo_next; - err = ip_pool_destroy(&op); + err = ip_pool_destroy(&op, ifs); if (err == 0) num++; else @@ -684,8 +690,9 @@ iplookupflush_t *fp; /* may not be initialised, we can't use an ASSERT to enforce the locking */ /* assertion that one of the two (ip_poolrw,ipf_global) is held. */ /* ------------------------------------------------------------------------ */ -void ip_pool_free(ipo) +void ip_pool_free(ipo, ifs) ip_pool_t *ipo; +ipf_stack_t *ifs; { ip_pool_node_t *n; @@ -699,7 +706,7 @@ ip_pool_t *ipo; KFREE(n); - ipoolstat.ipls_nodes--; + ifs->ifs_ipoolstat.ipls_nodes--; } ipo->ipo_list = NULL; @@ -709,7 +716,7 @@ ip_pool_t *ipo; rn_freehead(ipo->ipo_head); KFREE(ipo); - ipoolstat.ipls_pools--; + ifs->ifs_ipoolstat.ipls_pools--; } @@ -722,29 +729,182 @@ ip_pool_t *ipo; /* Drop the number of known references to this pool structure by one and if */ /* we arrive at zero known references, free it. */ /* ------------------------------------------------------------------------ */ -void ip_pool_deref(ipo) +void ip_pool_deref(ipo, ifs) ip_pool_t *ipo; +ipf_stack_t *ifs; { - ASSERT(rw_read_locked(&ip_poolrw.ipf_lk) == 0); + ASSERT(rw_read_locked(&ifs->ifs_ip_poolrw.ipf_lk) == 0); ipo->ipo_ref--; if (ipo->ipo_ref == 0) - ip_pool_free(ipo); + ip_pool_free(ipo, ifs); +} + + + +void ip_pool_node_deref(ipn, ifs) +ip_pool_node_t *ipn; +ipf_stack_t *ifs; +{ + + ipn->ipn_ref--; + + if (ipn->ipn_ref == 0) { + KFREE(ipn); + ifs->ifs_ipoolstat.ipls_nodes--; + } +} + + +int ip_pool_getnext(token, ilp, ifs) +ipftoken_t *token; +ipflookupiter_t *ilp; +ipf_stack_t *ifs; +{ + ip_pool_node_t *node, zn, *nextnode; + ip_pool_t *ipo, zp, *nextipo; + int err; + + err = 0; + node = NULL; + nextnode = NULL; + ipo = NULL; + nextipo = NULL; + + READ_ENTER(&ifs->ifs_ip_poolrw); + + switch (ilp->ili_otype) + { + case IPFLOOKUPITER_LIST : + ipo = token->ipt_data; + if (ipo == NULL) { + nextipo = ifs->ifs_ip_pool_list[(int)ilp->ili_unit]; + } else { + nextipo = ipo->ipo_next; + } + + if (nextipo != NULL) { + if (nextipo->ipo_next == NULL) + token->ipt_alive = 0; + else { + ATOMIC_INC(nextipo->ipo_ref); + } + } else { + bzero((char *)&zp, sizeof(zp)); + nextipo = &zp; + } + break; + + case IPFLOOKUPITER_NODE : + node = token->ipt_data; + if (node == NULL) { + ipo = ip_pool_find(ilp->ili_unit, ilp->ili_name, ifs); + if (ipo == NULL) + err = ESRCH; + else { + nextnode = ipo->ipo_list; + ipo = NULL; + } + } else { + nextnode = node->ipn_next; + } + + if (nextnode != NULL) { + if (nextnode->ipn_next == NULL) + token->ipt_alive = 0; + else { + ATOMIC_INC(nextnode->ipn_ref); + } + } else { + bzero((char *)&zn, sizeof(zn)); + nextnode = &zn; + } + break; + default : + err = EINVAL; + break; + } + + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); + + if (err != 0) + return err; + + switch (ilp->ili_otype) + { + case IPFLOOKUPITER_LIST : + if (ipo != NULL) { + WRITE_ENTER(&ifs->ifs_ip_poolrw); + ip_pool_deref(ipo, ifs); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); + } + token->ipt_data = nextipo; + err = COPYOUT(nextipo, ilp->ili_data, sizeof(*nextipo)); + if (err != 0) + err = EFAULT; + break; + + case IPFLOOKUPITER_NODE : + if (node != NULL) { + WRITE_ENTER(&ifs->ifs_ip_poolrw); + ip_pool_node_deref(node, ifs); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); + } + token->ipt_data = nextnode; + err = COPYOUT(nextnode, ilp->ili_data, sizeof(*nextnode)); + if (err != 0) + err = EFAULT; + break; + } + + return err; +} + + +void ip_pool_iterderef(otype, unit, data, ifs) +u_int otype; +int unit; +void *data; +ipf_stack_t *ifs; +{ + + if (data == NULL) + return; + + if (unit < 0 || unit > IPL_LOGMAX) + return; + + switch (otype) + { + case IPFLOOKUPITER_LIST : + WRITE_ENTER(&ifs->ifs_ip_poolrw); + ip_pool_deref((ip_pool_t *)data, ifs); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); + break; + + case IPFLOOKUPITER_NODE : + WRITE_ENTER(&ifs->ifs_ip_poolrw); + ip_pool_node_deref((ip_pool_node_t *)data, ifs); + RWLOCK_EXIT(&ifs->ifs_ip_poolrw); + break; + default : + break; + } } # if defined(_KERNEL) && ((BSD >= 198911) && !defined(__osf__) && \ !defined(__hpux) && !defined(__sgi)) static int -rn_freenode(struct radix_node *n, void *p) +rn_freenode(struct radix_node *n, void *p, ipf_stack_t *ifs) { struct radix_node_head *rnh = p; struct radix_node *d; d = rnh->rnh_deladdr(n->rn_key, NULL, rnh); if (d != NULL) { - FreeS(d, max_keylen + 2 * sizeof (*d)); + FreeS(d, ifs->ifs_max_keylen + 2 * sizeof (*d)); } return 0; } diff --git a/usr/src/uts/common/inet/ipf/ip_proxy.c b/usr/src/uts/common/inet/ipf/ip_proxy.c index d48e83ff74..e0eaec5f0b 100644 --- a/usr/src/uts/common/inet/ipf/ip_proxy.c +++ b/usr/src/uts/common/inet/ipf/ip_proxy.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -86,6 +86,7 @@ struct file; #include <netinet/ip_icmp.h> #include "netinet/ip_compat.h" #include <netinet/tcpip.h> +#include "netinet/ipf_stack.h" #include "netinet/ip_fil.h" #include "netinet/ip_nat.h" #include "netinet/ip_state.h" @@ -121,73 +122,72 @@ int ipf_proxy_debug = 0; #else int ipf_proxy_debug = 2; #endif -ap_session_t *ap_sess_tab[AP_SESS_SIZE]; -ap_session_t *ap_sess_list = NULL; -aproxy_t *ap_proxylist = NULL; -aproxy_t ap_proxies[] = { + +static aproxy_t lcl_ap_proxies[] = { #ifdef IPF_FTP_PROXY - { NULL, "ftp", (char)IPPROTO_TCP, 0, 0, ippr_ftp_init, ippr_ftp_fini, + { NULL, "ftp", (char)IPPROTO_TCP, 0, 0, NULL, ippr_ftp_init, ippr_ftp_fini, ippr_ftp_new, NULL, ippr_ftp_in, ippr_ftp_out, NULL }, #endif #ifdef IPF_IRC_PROXY - { NULL, "irc", (char)IPPROTO_TCP, 0, 0, ippr_irc_init, ippr_irc_fini, + { NULL, "irc", (char)IPPROTO_TCP, 0, 0, NULL, ippr_irc_init, ippr_irc_fini, ippr_irc_new, NULL, NULL, ippr_irc_out, NULL, NULL }, #endif #ifdef IPF_RCMD_PROXY - { NULL, "rcmd", (char)IPPROTO_TCP, 0, 0, ippr_rcmd_init, ippr_rcmd_fini, + { NULL, "rcmd", (char)IPPROTO_TCP, 0, 0, NULL, ippr_rcmd_init, ippr_rcmd_fini, ippr_rcmd_new, NULL, ippr_rcmd_in, ippr_rcmd_out, NULL, NULL }, #endif #ifdef IPF_RAUDIO_PROXY - { NULL, "raudio", (char)IPPROTO_TCP, 0, 0, ippr_raudio_init, ippr_raudio_fini, + { NULL, "raudio", (char)IPPROTO_TCP, 0, 0, NULL, ippr_raudio_init, ippr_raudio_fini, ippr_raudio_new, NULL, ippr_raudio_in, ippr_raudio_out, NULL, NULL }, #endif #ifdef IPF_MSNRPC_PROXY - { NULL, "msnrpc", (char)IPPROTO_TCP, 0, 0, ippr_msnrpc_init, ippr_msnrpc_fini, + { NULL, "msnrpc", (char)IPPROTO_TCP, 0, 0, NULL, ippr_msnrpc_init, ippr_msnrpc_fini, ippr_msnrpc_new, NULL, ippr_msnrpc_in, ippr_msnrpc_out, NULL, NULL }, #endif #ifdef IPF_NETBIOS_PROXY - { NULL, "netbios", (char)IPPROTO_UDP, 0, 0, ippr_netbios_init, ippr_netbios_fini, + { NULL, "netbios", (char)IPPROTO_UDP, 0, 0, NULL, ippr_netbios_init, ippr_netbios_fini, NULL, NULL, NULL, ippr_netbios_out, NULL, NULL }, #endif #ifdef IPF_IPSEC_PROXY - { NULL, "ipsec", (char)IPPROTO_UDP, 0, 0, + { NULL, "ipsec", (char)IPPROTO_UDP, 0, 0, NULL, ippr_ipsec_init, ippr_ipsec_fini, ippr_ipsec_new, ippr_ipsec_del, ippr_ipsec_inout, ippr_ipsec_inout, ippr_ipsec_match, NULL }, #endif #ifdef IPF_PPTP_PROXY - { NULL, "pptp", (char)IPPROTO_TCP, 0, 0, + { NULL, "pptp", (char)IPPROTO_TCP, 0, 0, NULL, ippr_pptp_init, ippr_pptp_fini, ippr_pptp_new, ippr_pptp_del, ippr_pptp_inout, ippr_pptp_inout, NULL, NULL }, #endif #ifdef IPF_H323_PROXY - { NULL, "h323", (char)IPPROTO_TCP, 0, 0, ippr_h323_init, ippr_h323_fini, + { NULL, "h323", (char)IPPROTO_TCP, 0, 0, NULL, ippr_h323_init, ippr_h323_fini, ippr_h323_new, ippr_h323_del, ippr_h323_in, NULL, NULL }, - { NULL, "h245", (char)IPPROTO_TCP, 0, 0, NULL, NULL, + { NULL, "h245", (char)IPPROTO_TCP, 0, 0, NULL, NULL, NULL, ippr_h245_new, NULL, NULL, ippr_h245_out, NULL }, #endif #ifdef IPF_RPCB_PROXY # if 0 - { NULL, "rpcbt", (char)IPPROTO_TCP, 0, 0, + { NULL, "rpcbt", (char)IPPROTO_TCP, 0, 0, NULL, ippr_rpcb_init, ippr_rpcb_fini, ippr_rpcb_new, ippr_rpcb_del, ippr_rpcb_in, ippr_rpcb_out, NULL, NULL }, # endif - { NULL, "rpcbu", (char)IPPROTO_UDP, 0, 0, + { NULL, "rpcbu", (char)IPPROTO_UDP, 0, 0, NULL, ippr_rpcb_init, ippr_rpcb_fini, ippr_rpcb_new, ippr_rpcb_del, ippr_rpcb_in, ippr_rpcb_out, NULL, NULL }, #endif - { NULL, "", '\0', 0, 0, NULL, NULL, NULL, NULL } + { NULL, "", '\0', 0, 0, NULL, NULL, NULL, NULL, NULL } }; /* * Dynamically add a new kernel proxy. Ensure that it is unique in the * collection compiled in and dynamically added. */ -int appr_add(ap) +int appr_add(ap, ifs) aproxy_t *ap; +ipf_stack_t *ifs; { aproxy_t *a; - for (a = ap_proxies; a->apr_p; a++) + for (a = ifs->ifs_ap_proxies; a->apr_p; a++) if ((a->apr_p == ap->apr_p) && !strncmp(a->apr_label, ap->apr_label, sizeof(ap->apr_label))) { @@ -197,7 +197,7 @@ aproxy_t *ap; return -1; } - for (a = ap_proxylist; a->apr_p; a = a->apr_next) + for (a = ifs->ifs_ap_proxylist; a->apr_p; a = a->apr_next) if ((a->apr_p == ap->apr_p) && !strncmp(a->apr_label, ap->apr_label, sizeof(ap->apr_label))) { @@ -206,10 +206,11 @@ aproxy_t *ap; a->apr_label, a->apr_p); return -1; } - ap->apr_next = ap_proxylist; - ap_proxylist = ap; + ap->apr_next = ifs->ifs_ap_proxylist; + ifs->ifs_ap_proxylist = ap; if (ap->apr_init != NULL) - return (*ap->apr_init)(); + return (*ap->apr_init)(&ap->apr_private, ifs); + return 0; } @@ -219,13 +220,14 @@ aproxy_t *ap; * exists, and if it does and it has a control function then invoke that * control function. */ -int appr_ctl(ctl) +int appr_ctl(ctl, ifs) ap_ctl_t *ctl; +ipf_stack_t *ifs; { aproxy_t *a; int error; - a = appr_lookup(ctl->apc_p, ctl->apc_label); + a = appr_lookup(ctl->apc_p, ctl->apc_label, ifs); if (a == NULL) { if (ipf_proxy_debug > 1) printf("appr_ctl: can't find %s/%d\n", @@ -237,7 +239,7 @@ ap_ctl_t *ctl; ctl->apc_label, ctl->apc_p); error = ENXIO; } else { - error = (*a->apr_ctl)(a, ctl); + error = (*a->apr_ctl)(a, ctl, a->apr_private); if ((error != 0) && (ipf_proxy_debug > 1)) printf("appr_ctl: %s/%d ctl error %d\n", a->apr_label, a->apr_p, error); @@ -251,12 +253,14 @@ ap_ctl_t *ctl; * If it is in use, return 1 (do not destroy NOW), not in use 0 or -1 * if it cannot be matched. */ -int appr_del(ap) +int appr_del(ap, ifs) aproxy_t *ap; +ipf_stack_t *ifs; { aproxy_t *a, **app; - for (app = &ap_proxylist; ((a = *app) != NULL); app = &a->apr_next) + for (app = &ifs->ifs_ap_proxylist; ((a = *app) != NULL); + app = &a->apr_next) if (a == ap) { a->apr_flags |= APR_DELETE; *app = a->apr_next; @@ -294,10 +298,11 @@ ipnat_t *nat; } -int appr_ioctl(data, cmd, mode) +int appr_ioctl(data, cmd, mode, ifs) caddr_t data; ioctlcmd_t cmd; int mode; +ipf_stack_t *ifs; { ap_ctl_t ctl; caddr_t ptr; @@ -327,7 +332,7 @@ int mode; } if (error == 0) - error = appr_ctl(&ctl); + error = appr_ctl(&ctl, ifs); if ((ctl.apc_dsize > 0) && (ptr != NULL) && (ctl.apc_data == ptr)) { @@ -376,7 +381,7 @@ nat_t *nat; } if (apr->apr_match != NULL) { - result = (*apr->apr_match)(fin, nat->nat_aps, nat); + result = (*apr->apr_match)(fin, nat->nat_aps, nat, apr->apr_private); if (result != 0) { if (ipf_proxy_debug > 4) printf("appr_match: result %d\n", result); @@ -398,6 +403,7 @@ nat_t *nat; { register ap_session_t *aps; aproxy_t *apr; + ipf_stack_t *ifs = fin->fin_ifs; if (ipf_proxy_debug > 8) printf("appr_new(%lx,%lx) \n", (u_long)fin, (u_long)nat); @@ -433,7 +439,7 @@ nat_t *nat; aps->aps_apr = apr; aps->aps_psiz = 0; if (apr->apr_new != NULL) - if ((*apr->apr_new)(fin, aps, nat) == -1) { + if ((*apr->apr_new)(fin, aps, nat, apr->apr_private) == -1) { if ((aps->aps_data != NULL) && (aps->aps_psiz != 0)) { KFREES(aps->aps_data, aps->aps_psiz); } @@ -444,8 +450,8 @@ nat_t *nat; return -1; } aps->aps_nat = nat; - aps->aps_next = ap_sess_list; - ap_sess_list = aps; + aps->aps_next = ifs->ifs_ap_sess_list; + ifs->ifs_ap_sess_list = aps; nat->nat_aps = aps; return 0; @@ -476,12 +482,14 @@ nat_t *nat; #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) u_32_t s1, s2, sd; #endif + ipf_stack_t *ifs = fin->fin_ifs; + #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) net_data_t net_data_p; if (fin->fin_v == 4) - net_data_p = ipf_ipv4; + net_data_p = ifs->ifs_ipf_ipv4; else - net_data_p = ipf_ipv6; + net_data_p = ifs->ifs_ipf_ipv6; #endif if (fin->fin_flx & FI_BAD) { @@ -496,7 +504,7 @@ nat_t *nat; printf("appr_check: l4 checksum failure %d\n", fin->fin_p); if (fin->fin_p == IPPROTO_TCP) - frstats[fin->fin_out].fr_tcpbad++; + ifs->ifs_frstats[fin->fin_out].fr_tcpbad++; return -1; } #endif @@ -545,10 +553,10 @@ nat_t *nat; err = 0; if (fin->fin_out != 0) { if (apr->apr_outpkt != NULL) - err = (*apr->apr_outpkt)(fin, aps, nat); + err = (*apr->apr_outpkt)(fin, aps, nat, apr->apr_private); } else { if (apr->apr_inpkt != NULL) - err = (*apr->apr_inpkt)(fin, aps, nat); + err = (*apr->apr_inpkt)(fin, aps, nat, apr->apr_private); } rv = APR_EXIT(err); @@ -627,23 +635,24 @@ nat_t *nat; /* * Search for an proxy by the protocol it is being used with and its name. */ -aproxy_t *appr_lookup(pr, name) +aproxy_t *appr_lookup(pr, name, ifs) u_int pr; char *name; +ipf_stack_t *ifs; { aproxy_t *ap; if (ipf_proxy_debug > 8) printf("appr_lookup(%d,%s)\n", pr, name); - for (ap = ap_proxies; ap->apr_p; ap++) + for (ap = ifs->ifs_ap_proxies; ap->apr_p; ap++) if ((ap->apr_p == pr) && !strncmp(name, ap->apr_label, sizeof(ap->apr_label))) { ap->apr_ref++; return ap; } - for (ap = ap_proxylist; ap; ap = ap->apr_next) + for (ap = ifs->ifs_ap_proxylist; ap; ap = ap->apr_next) if ((ap->apr_p == pr) && !strncmp(name, ap->apr_label, sizeof(ap->apr_label))) { ap->apr_ref++; @@ -662,8 +671,9 @@ aproxy_t *ap; } -void aps_free(aps) +void aps_free(aps, ifs) ap_session_t *aps; +ipf_stack_t *ifs; { ap_session_t *a, **ap; aproxy_t *apr; @@ -671,7 +681,7 @@ ap_session_t *aps; if (!aps) return; - for (ap = &ap_sess_list; ((a = *ap) != NULL); ap = &a->aps_next) + for (ap = &ifs->ifs_ap_sess_list; ((a = *ap) != NULL); ap = &a->aps_next) if (a == aps) { *ap = a->aps_next; break; @@ -679,7 +689,7 @@ ap_session_t *aps; apr = aps->aps_apr; if ((apr != NULL) && (apr->apr_del != NULL)) - (*apr->apr_del)(aps); + (*apr->apr_del)(aps, apr->apr_private, ifs); if ((aps->aps_data != NULL) && (aps->aps_psiz != 0)) KFREES(aps->aps_data, aps->aps_psiz); @@ -836,19 +846,24 @@ int inc; * Initialise hook for kernel application proxies. * Call the initialise routine for all the compiled in kernel proxies. */ -int appr_init() +int appr_init(ifs) +ipf_stack_t *ifs; { aproxy_t *ap; - int err = 0; + int err = 0; + + /* Since the refcnt is used we make a copy of lcl_ap_proxies */ + KMALLOCS(ifs->ifs_ap_proxies, aproxy_t *, sizeof (lcl_ap_proxies)); + bcopy(lcl_ap_proxies, ifs->ifs_ap_proxies, sizeof (lcl_ap_proxies)); - for (ap = ap_proxies; ap->apr_p; ap++) { + for (ap = ifs->ifs_ap_proxies; ap->apr_p; ap++) { if (ap->apr_init != NULL) { - err = (*ap->apr_init)(); + err = (*ap->apr_init)(&ap->apr_private, ifs); if (err != 0) break; } } - return err; + return 0; } @@ -856,14 +871,20 @@ int appr_init() * Unload hook for kernel application proxies. * Call the finialise routine for all the compiled in kernel proxies. */ -void appr_unload() +void appr_unload(ifs) +ipf_stack_t *ifs; { aproxy_t *ap; + if(ifs->ifs_ap_proxies == NULL) + return; - for (ap = ap_proxies; ap->apr_p; ap++) + for (ap = ifs->ifs_ap_proxies; ap->apr_p; ap++) if (ap->apr_fini != NULL) - (*ap->apr_fini)(); - for (ap = ap_proxylist; ap; ap = ap->apr_next) + (*ap->apr_fini)(&ap->apr_private, ifs); + for (ap = ifs->ifs_ap_proxylist; ap; ap = ap->apr_next) if (ap->apr_fini != NULL) - (*ap->apr_fini)(); + (*ap->apr_fini)(&ap->apr_private, ifs); + + KFREES(ifs->ifs_ap_proxies, sizeof (lcl_ap_proxies)); + ifs->ifs_ap_proxies = NULL; } diff --git a/usr/src/uts/common/inet/ipf/ip_state.c b/usr/src/uts/common/inet/ipf/ip_state.c index f08b2c1087..15e3b7ac8a 100644 --- a/usr/src/uts/common/inet/ipf/ip_state.c +++ b/usr/src/uts/common/inet/ipf/ip_state.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -92,6 +92,7 @@ struct file; #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_proxy.h" +#include "netinet/ipf_stack.h" #ifdef IPFILTER_SYNC #include "netinet/ip_sync.h" #endif @@ -116,68 +117,33 @@ static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Ree static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $"; #endif -static ipstate_t **ips_table = NULL; -static u_long *ips_seed = NULL; -static int ips_num = 0; -static u_long ips_last_force_flush = 0; -ips_stat_t ips_stats; - #ifdef USE_INET6 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); #endif static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, i6addr_t *, tcphdr_t *, u_32_t)); static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); -static int fr_state_flush __P((int, int)); -static ips_stat_t *fr_statetstats __P((void)); -static void fr_delstate __P((ipstate_t *, int)); -static int fr_state_remove __P((caddr_t)); -static void fr_ipsmove __P((ipstate_t *, u_int)); +static int fr_state_flush __P((int, int, ipf_stack_t *)); +static ips_stat_t *fr_statetstats __P((ipf_stack_t *)); +static void fr_delstate __P((ipstate_t *, int, ipf_stack_t *)); +static int fr_state_remove __P((caddr_t, ipf_stack_t *)); +static void fr_ipsmove __P((ipstate_t *, u_int, ipf_stack_t *)); static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); +static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); -int fr_stputent __P((caddr_t)); -int fr_stgetent __P((caddr_t)); +int fr_stputent __P((caddr_t, ipf_stack_t *)); +int fr_stgetent __P((caddr_t, ipf_stack_t *)); #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ #define FIVE_DAYS (5 * ONE_DAY) -#define DOUBLE_HASH(x) (((x) + ips_seed[(x) % fr_statesize]) % fr_statesize) - -u_long fr_tcpidletimeout = FIVE_DAYS, - fr_tcpclosewait = IPF_TTLVAL(2 * TCP_MSL), - fr_tcplastack = IPF_TTLVAL(2 * TCP_MSL), - fr_tcptimeout = IPF_TTLVAL(2 * TCP_MSL), - fr_tcpclosed = IPF_TTLVAL(60), - fr_tcphalfclosed = IPF_TTLVAL(2 * 3600), /* 2 hours */ - fr_udptimeout = IPF_TTLVAL(120), - fr_udpacktimeout = IPF_TTLVAL(12), - fr_icmptimeout = IPF_TTLVAL(60), - fr_icmpacktimeout = IPF_TTLVAL(6), - fr_iptimeout = IPF_TTLVAL(60); -int fr_statemax = IPSTATE_MAX, - fr_statesize = IPSTATE_SIZE; -int fr_state_doflush = 0, - fr_state_lock = 0, - fr_state_maxbucket = 0, - fr_state_maxbucket_reset = 1, - fr_state_init = 0; -ipftq_t ips_tqtqb[IPF_TCP_NSTATES], - ips_udptq, - ips_udpacktq, - ips_iptq, - ips_icmptq, - ips_icmpacktq, - *ips_utqe = NULL; -#ifdef IPFILTER_LOG -int ipstate_logging = 1; -#else -int ipstate_logging = 0; -#endif -ipstate_t *ips_list = NULL; +#define DOUBLE_HASH(x, ifs) \ + (((x) + ifs->ifs_ips_seed[(x) % ifs->ifs_fr_statesize]) % ifs->ifs_fr_statesize) + /* ------------------------------------------------------------------------ */ @@ -188,30 +154,55 @@ ipstate_t *ips_list = NULL; /* Initialise all the global variables used within the state code. */ /* This action also includes initiailising locks. */ /* ------------------------------------------------------------------------ */ -int fr_stateinit() +int fr_stateinit(ifs) +ipf_stack_t *ifs; { int i; - KMALLOCS(ips_table, ipstate_t **, fr_statesize * sizeof(ipstate_t *)); - if (ips_table == NULL) + ifs->ifs_fr_tcpidletimeout = FIVE_DAYS; + ifs->ifs_fr_tcpclosewait = IPF_TTLVAL(TCP_MSL); + ifs->ifs_fr_tcplastack = IPF_TTLVAL(TCP_MSL); + ifs->ifs_fr_tcptimeout = IPF_TTLVAL(TCP_MSL); + ifs->ifs_fr_tcpclosed = IPF_TTLVAL(60); + ifs->ifs_fr_tcphalfclosed = IPF_TTLVAL(2 * 3600); /* 2 hours */ + ifs->ifs_fr_udptimeout = IPF_TTLVAL(120); + ifs->ifs_fr_udpacktimeout = IPF_TTLVAL(12); + ifs->ifs_fr_icmptimeout = IPF_TTLVAL(60); + ifs->ifs_fr_icmpacktimeout = IPF_TTLVAL(6); + ifs->ifs_fr_iptimeout = IPF_TTLVAL(60); + ifs->ifs_fr_statemax = IPSTATE_MAX; + ifs->ifs_fr_statesize = IPSTATE_SIZE; + ifs->ifs_fr_state_maxbucket_reset = 1; +#ifdef IPFILTER_LOG + ifs->ifs_ipstate_logging = 1; +#else + ifs->ifs_ipstate_logging = 0; +#endif + + KMALLOCS(ifs->ifs_ips_table, ipstate_t **, + ifs->ifs_fr_statesize * sizeof(ipstate_t *)); + if (ifs->ifs_ips_table == NULL) return -1; - bzero((char *)ips_table, fr_statesize * sizeof(ipstate_t *)); + bzero((char *)ifs->ifs_ips_table, + ifs->ifs_fr_statesize * sizeof(ipstate_t *)); - KMALLOCS(ips_seed, u_long *, fr_statesize * sizeof(*ips_seed)); - if (ips_seed == NULL) + KMALLOCS(ifs->ifs_ips_seed, u_long *, + ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); + if (ifs->ifs_ips_seed == NULL) return -2; - for (i = 0; i < fr_statesize; i++) { + for (i = 0; i < ifs->ifs_fr_statesize; i++) { /* * XXX - ips_seed[X] should be a random number of sorts. */ #if (__FreeBSD_version >= 400000) - ips_seed[i] = arc4random(); + ifs->ifs_ips_seed[i] = arc4random(); #else - ips_seed[i] = ((u_long)ips_seed + i) * fr_statesize; - ips_seed[i] ^= 0xa5a55a5a; - ips_seed[i] *= (u_long)ips_seed; - ips_seed[i] ^= 0x5a5aa5a5; - ips_seed[i] *= fr_statemax; + ifs->ifs_ips_seed[i] = ((u_long)ifs->ifs_ips_seed + i) * + ifs->ifs_fr_statesize; + ifs->ifs_ips_seed[i] ^= 0xa5a55a5a; + ifs->ifs_ips_seed[i] *= (u_long)ifs->ifs_ips_seed; + ifs->ifs_ips_seed[i] ^= 0x5a5aa5a5; + ifs->ifs_ips_seed[i] *= ifs->ifs_fr_statemax; #endif } @@ -233,56 +224,57 @@ int fr_stateinit() icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; #endif - KMALLOCS(ips_stats.iss_bucketlen, u_long *, - fr_statesize * sizeof(u_long)); - if (ips_stats.iss_bucketlen == NULL) + KMALLOCS(ifs->ifs_ips_stats.iss_bucketlen, u_long *, + ifs->ifs_fr_statesize * sizeof(u_long)); + if (ifs->ifs_ips_stats.iss_bucketlen == NULL) return -1; - bzero((char *)ips_stats.iss_bucketlen, fr_statesize * sizeof(u_long)); - - if (fr_state_maxbucket == 0) { - for (i = fr_statesize; i > 0; i >>= 1) - fr_state_maxbucket++; - fr_state_maxbucket *= 2; - } - - fr_sttab_init(ips_tqtqb); - ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ips_udptq; - ips_udptq.ifq_ttl = (u_long)fr_udptimeout; - ips_udptq.ifq_ref = 1; - ips_udptq.ifq_head = NULL; - ips_udptq.ifq_tail = &ips_udptq.ifq_head; - MUTEX_INIT(&ips_udptq.ifq_lock, "ipftq udp tab"); - ips_udptq.ifq_next = &ips_udpacktq; - ips_udpacktq.ifq_ttl = (u_long)fr_udpacktimeout; - ips_udpacktq.ifq_ref = 1; - ips_udpacktq.ifq_head = NULL; - ips_udpacktq.ifq_tail = &ips_udpacktq.ifq_head; - MUTEX_INIT(&ips_udpacktq.ifq_lock, "ipftq udpack tab"); - ips_udpacktq.ifq_next = &ips_icmptq; - ips_icmptq.ifq_ttl = (u_long)fr_icmptimeout; - ips_icmptq.ifq_ref = 1; - ips_icmptq.ifq_head = NULL; - ips_icmptq.ifq_tail = &ips_icmptq.ifq_head; - MUTEX_INIT(&ips_icmptq.ifq_lock, "ipftq icmp tab"); - ips_icmptq.ifq_next = &ips_icmpacktq; - ips_icmpacktq.ifq_ttl = (u_long)fr_icmpacktimeout; - ips_icmpacktq.ifq_ref = 1; - ips_icmpacktq.ifq_head = NULL; - ips_icmpacktq.ifq_tail = &ips_icmpacktq.ifq_head; - MUTEX_INIT(&ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); - ips_icmpacktq.ifq_next = &ips_iptq; - ips_iptq.ifq_ttl = (u_long)fr_iptimeout; - ips_iptq.ifq_ref = 1; - ips_iptq.ifq_head = NULL; - ips_iptq.ifq_tail = &ips_iptq.ifq_head; - MUTEX_INIT(&ips_iptq.ifq_lock, "ipftq ip tab"); - ips_iptq.ifq_next = NULL; - - RWLOCK_INIT(&ipf_state, "ipf IP state rwlock"); - MUTEX_INIT(&ipf_stinsert, "ipf state insert mutex"); - fr_state_init = 1; - - ips_last_force_flush = fr_ticks; + bzero((char *)ifs->ifs_ips_stats.iss_bucketlen, + ifs->ifs_fr_statesize * sizeof(u_long)); + + if (ifs->ifs_fr_state_maxbucket == 0) { + for (i = ifs->ifs_fr_statesize; i > 0; i >>= 1) + ifs->ifs_fr_state_maxbucket++; + ifs->ifs_fr_state_maxbucket *= 2; + } + + fr_sttab_init(ifs->ifs_ips_tqtqb, ifs); + ifs->ifs_ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_ips_udptq; + ifs->ifs_ips_udptq.ifq_ttl = (u_long)ifs->ifs_fr_udptimeout; + ifs->ifs_ips_udptq.ifq_ref = 1; + ifs->ifs_ips_udptq.ifq_head = NULL; + ifs->ifs_ips_udptq.ifq_tail = &ifs->ifs_ips_udptq.ifq_head; + MUTEX_INIT(&ifs->ifs_ips_udptq.ifq_lock, "ipftq udp tab"); + ifs->ifs_ips_udptq.ifq_next = &ifs->ifs_ips_udpacktq; + ifs->ifs_ips_udpacktq.ifq_ttl = (u_long)ifs->ifs_fr_udpacktimeout; + ifs->ifs_ips_udpacktq.ifq_ref = 1; + ifs->ifs_ips_udpacktq.ifq_head = NULL; + ifs->ifs_ips_udpacktq.ifq_tail = &ifs->ifs_ips_udpacktq.ifq_head; + MUTEX_INIT(&ifs->ifs_ips_udpacktq.ifq_lock, "ipftq udpack tab"); + ifs->ifs_ips_udpacktq.ifq_next = &ifs->ifs_ips_icmptq; + ifs->ifs_ips_icmptq.ifq_ttl = (u_long)ifs->ifs_fr_icmptimeout; + ifs->ifs_ips_icmptq.ifq_ref = 1; + ifs->ifs_ips_icmptq.ifq_head = NULL; + ifs->ifs_ips_icmptq.ifq_tail = &ifs->ifs_ips_icmptq.ifq_head; + MUTEX_INIT(&ifs->ifs_ips_icmptq.ifq_lock, "ipftq icmp tab"); + ifs->ifs_ips_icmptq.ifq_next = &ifs->ifs_ips_icmpacktq; + ifs->ifs_ips_icmpacktq.ifq_ttl = (u_long)ifs->ifs_fr_icmpacktimeout; + ifs->ifs_ips_icmpacktq.ifq_ref = 1; + ifs->ifs_ips_icmpacktq.ifq_head = NULL; + ifs->ifs_ips_icmpacktq.ifq_tail = &ifs->ifs_ips_icmpacktq.ifq_head; + MUTEX_INIT(&ifs->ifs_ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); + ifs->ifs_ips_icmpacktq.ifq_next = &ifs->ifs_ips_iptq; + ifs->ifs_ips_iptq.ifq_ttl = (u_long)ifs->ifs_fr_iptimeout; + ifs->ifs_ips_iptq.ifq_ref = 1; + ifs->ifs_ips_iptq.ifq_head = NULL; + ifs->ifs_ips_iptq.ifq_tail = &ifs->ifs_ips_iptq.ifq_head; + MUTEX_INIT(&ifs->ifs_ips_iptq.ifq_lock, "ipftq ip tab"); + ifs->ifs_ips_iptq.ifq_next = NULL; + + RWLOCK_INIT(&ifs->ifs_ipf_state, "ipf IP state rwlock"); + MUTEX_INIT(&ifs->ifs_ipf_stinsert, "ipf state insert mutex"); + ifs->ifs_fr_state_init = 1; + + ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; return 0; } @@ -295,13 +287,14 @@ int fr_stateinit() /* Release and destroy any resources acquired or initialised so that */ /* IPFilter can be unloaded or re-initialised. */ /* ------------------------------------------------------------------------ */ -void fr_stateunload() +void fr_stateunload(ifs) +ipf_stack_t *ifs; { ipftq_t *ifq, *ifqnext; ipstate_t *is; - while ((is = ips_list) != NULL) - fr_delstate(is, 0); + while ((is = ifs->ifs_ips_list) != NULL) + fr_delstate(is, 0, ifs); /* * Proxy timeout queues are not cleaned here because although they @@ -310,47 +303,50 @@ void fr_stateunload() * Should the proxy timeouts have their own list? There's no real * justification as this is the only complicationA */ - for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { + for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { ifqnext = ifq->ifq_next; if (((ifq->ifq_flags & IFQF_PROXY) == 0) && (fr_deletetimeoutqueue(ifq) == 0)) - fr_freetimeoutqueue(ifq); + fr_freetimeoutqueue(ifq, ifs); } - ips_stats.iss_inuse = 0; - ips_num = 0; + ifs->ifs_ips_stats.iss_inuse = 0; + ifs->ifs_ips_num = 0; - if (fr_state_init == 1) { - fr_sttab_destroy(ips_tqtqb); - MUTEX_DESTROY(&ips_udptq.ifq_lock); - MUTEX_DESTROY(&ips_icmptq.ifq_lock); - MUTEX_DESTROY(&ips_udpacktq.ifq_lock); - MUTEX_DESTROY(&ips_icmpacktq.ifq_lock); - MUTEX_DESTROY(&ips_iptq.ifq_lock); + if (ifs->ifs_fr_state_init == 1) { + fr_sttab_destroy(ifs->ifs_ips_tqtqb); + MUTEX_DESTROY(&ifs->ifs_ips_udptq.ifq_lock); + MUTEX_DESTROY(&ifs->ifs_ips_icmptq.ifq_lock); + MUTEX_DESTROY(&ifs->ifs_ips_udpacktq.ifq_lock); + MUTEX_DESTROY(&ifs->ifs_ips_icmpacktq.ifq_lock); + MUTEX_DESTROY(&ifs->ifs_ips_iptq.ifq_lock); } - if (ips_table != NULL) { - KFREES(ips_table, fr_statesize * sizeof(*ips_table)); - ips_table = NULL; + if (ifs->ifs_ips_table != NULL) { + KFREES(ifs->ifs_ips_table, + ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_table)); + ifs->ifs_ips_table = NULL; } - if (ips_seed != NULL) { - KFREES(ips_seed, fr_statesize * sizeof(*ips_seed)); - ips_seed = NULL; + if (ifs->ifs_ips_seed != NULL) { + KFREES(ifs->ifs_ips_seed, + ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); + ifs->ifs_ips_seed = NULL; } - if (ips_stats.iss_bucketlen != NULL) { - KFREES(ips_stats.iss_bucketlen, fr_statesize * sizeof(u_long)); - ips_stats.iss_bucketlen = NULL; + if (ifs->ifs_ips_stats.iss_bucketlen != NULL) { + KFREES(ifs->ifs_ips_stats.iss_bucketlen, + ifs->ifs_fr_statesize * sizeof(u_long)); + ifs->ifs_ips_stats.iss_bucketlen = NULL; } - if (fr_state_maxbucket_reset == 1) - fr_state_maxbucket = 0; + if (ifs->ifs_fr_state_maxbucket_reset == 1) + ifs->ifs_fr_state_maxbucket = 0; - if (fr_state_init == 1) { - fr_state_init = 0; - RW_DESTROY(&ipf_state); - MUTEX_DESTROY(&ipf_stinsert); + if (ifs->ifs_fr_state_init == 1) { + ifs->ifs_fr_state_init = 0; + RW_DESTROY(&ifs->ifs_ipf_state); + MUTEX_DESTROY(&ifs->ifs_ipf_stinsert); } } @@ -363,15 +359,16 @@ void fr_stateunload() /* Put all the current numbers and pointers into a single struct and return */ /* a pointer to it. */ /* ------------------------------------------------------------------------ */ -static ips_stat_t *fr_statetstats() +static ips_stat_t *fr_statetstats(ifs) +ipf_stack_t *ifs; { - ips_stats.iss_active = ips_num; - ips_stats.iss_statesize = fr_statesize; - ips_stats.iss_statemax = fr_statemax; - ips_stats.iss_table = ips_table; - ips_stats.iss_list = ips_list; - ips_stats.iss_ticks = fr_ticks; - return &ips_stats; + ifs->ifs_ips_stats.iss_active = ifs->ifs_ips_num; + ifs->ifs_ips_stats.iss_statesize = ifs->ifs_fr_statesize; + ifs->ifs_ips_stats.iss_statemax = ifs->ifs_fr_statemax; + ifs->ifs_ips_stats.iss_table = ifs->ifs_ips_table; + ifs->ifs_ips_stats.iss_list = ifs->ifs_ips_list; + ifs->ifs_ips_stats.iss_ticks = ifs->ifs_fr_ticks; + return &ifs->ifs_ips_stats; } /* ------------------------------------------------------------------------ */ @@ -382,8 +379,9 @@ static ips_stat_t *fr_statetstats() /* Search for a state structure that matches the one passed, according to */ /* the IP addresses and other protocol specific information. */ /* ------------------------------------------------------------------------ */ -static int fr_state_remove(data) +static int fr_state_remove(data, ifs) caddr_t data; +ipf_stack_t *ifs; { ipstate_t *sp, st; int error; @@ -393,8 +391,8 @@ caddr_t data; if (error) return EFAULT; - WRITE_ENTER(&ipf_state); - for (sp = ips_list; sp; sp = sp->is_next) + WRITE_ENTER(&ifs->ifs_ipf_state); + for (sp = ifs->ifs_ips_list; sp; sp = sp->is_next) if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, sizeof(st.is_src)) && @@ -402,11 +400,11 @@ caddr_t data; sizeof(st.is_dst)) && !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, sizeof(st.is_ps))) { - fr_delstate(sp, ISL_REMOVE); - RWLOCK_EXIT(&ipf_state); + fr_delstate(sp, ISL_REMOVE, ifs); + RWLOCK_EXIT(&ifs->ifs_ipf_state); return 0; } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); return ESRCH; } @@ -420,10 +418,12 @@ caddr_t data; /* */ /* Processes an ioctl call made to operate on the IP Filter state device. */ /* ------------------------------------------------------------------------ */ -int fr_state_ioctl(data, cmd, mode) +int fr_state_ioctl(data, cmd, mode, uid, ctx, ifs) caddr_t data; ioctlcmd_t cmd; -int mode; +int mode, uid; +void *ctx; +ipf_stack_t *ifs; { int arg, ret, error = 0; @@ -433,7 +433,7 @@ int mode; * Delete an entry from the state table. */ case SIOCDELST : - error = fr_state_remove(data); + error = fr_state_remove(data, ifs); break; /* * Flush the state table @@ -441,9 +441,9 @@ int mode; case SIOCIPFFL : BCOPYIN(data, (char *)&arg, sizeof(arg)); if (arg == 0 || arg == 1) { - WRITE_ENTER(&ipf_state); - ret = fr_state_flush(arg, 4); - RWLOCK_EXIT(&ipf_state); + WRITE_ENTER(&ifs->ifs_ipf_state); + ret = fr_state_flush(arg, 4, ifs); + RWLOCK_EXIT(&ifs->ifs_ipf_state); BCOPYOUT((char *)&ret, data, sizeof(ret)); } else error = EINVAL; @@ -452,9 +452,9 @@ int mode; case SIOCIPFL6 : BCOPYIN(data, (char *)&arg, sizeof(arg)); if (arg == 0 || arg == 1) { - WRITE_ENTER(&ipf_state); - ret = fr_state_flush(arg, 6); - RWLOCK_EXIT(&ipf_state); + WRITE_ENTER(&ifs->ifs_ipf_state); + ret = fr_state_flush(arg, 6, ifs); + RWLOCK_EXIT(&ifs->ifs_ipf_state); BCOPYOUT((char *)&ret, data, sizeof(ret)); } else error = EINVAL; @@ -470,7 +470,7 @@ int mode; else { int tmp; - tmp = ipflog_clear(IPL_LOGSTATE); + tmp = ipflog_clear(IPL_LOGSTATE, ifs); BCOPYOUT((char *)&tmp, data, sizeof(tmp)); } break; @@ -481,22 +481,23 @@ int mode; if (!(mode & FWRITE)) error = EPERM; else { - BCOPYIN((char *)data, (char *)&ipstate_logging, - sizeof(ipstate_logging)); + BCOPYIN((char *)data, + (char *)&ifs->ifs_ipstate_logging, + sizeof(ifs->ifs_ipstate_logging)); } break; /* * Return the current state of logging. */ case SIOCGETLG : - BCOPYOUT((char *)&ipstate_logging, (char *)data, - sizeof(ipstate_logging)); + BCOPYOUT((char *)&ifs->ifs_ipstate_logging, (char *)data, + sizeof(ifs->ifs_ipstate_logging)); break; /* * Return the number of bytes currently waiting to be read. */ case FIONREAD : - arg = iplused[IPL_LOGSTATE]; /* returned in an int */ + arg = ifs->ifs_iplused[IPL_LOGSTATE]; /* returned in an int */ BCOPYOUT((char *)&arg, data, sizeof(arg)); break; #endif @@ -504,7 +505,7 @@ int mode; * Get the current state statistics. */ case SIOCGETFS : - error = fr_outobj(data, fr_statetstats(), IPFOBJ_STATESTAT); + error = fr_outobj(data, fr_statetstats(ifs), IPFOBJ_STATESTAT); break; /* * Lock/Unlock the state table. (Locking prevents any changes, which @@ -514,29 +515,53 @@ int mode; if (!(mode & FWRITE)) { error = EPERM; } else { - fr_lock(data, &fr_state_lock); + fr_lock(data, &ifs->ifs_fr_state_lock); } break; /* * Add an entry to the current state table. */ case SIOCSTPUT : - if (!fr_state_lock || !(mode &FWRITE)) { + if (!ifs->ifs_fr_state_lock || !(mode &FWRITE)) { error = EACCES; break; } - error = fr_stputent(data); + error = fr_stputent(data, ifs); break; /* * Get a state table entry. */ case SIOCSTGET : - if (!fr_state_lock) { + if (!ifs->ifs_fr_state_lock) { error = EACCES; break; } - error = fr_stgetent(data); + error = fr_stgetent(data, ifs); + break; + + case SIOCGENITER : + { + ipftoken_t *token; + ipfgeniter_t iter; + + error = fr_inobj(data, &iter, IPFOBJ_GENITER); + if (error != 0) + break; + + token = ipf_findtoken(IPFGENITER_STATE, uid, ctx, ifs); + if (token != NULL) + error = fr_stateiter(token, &iter, ifs); + else + error = ESRCH; + RWLOCK_EXIT(&ifs->ifs_ipf_tokens); + break; + } + + case SIOCIPFDELTOK : + (void) BCOPYIN(data, (char *)&arg, sizeof(arg)); + error = ipf_deltoken(arg, uid, ctx, ifs); break; + default : error = EINVAL; break; @@ -556,8 +581,9 @@ int mode; /* the struct passed in and if not null and not found in the list of current*/ /* state entries, the retrieval fails. */ /* ------------------------------------------------------------------------ */ -int fr_stgetent(data) +int fr_stgetent(data, ifs) caddr_t data; +ipf_stack_t *ifs; { ipstate_t *is, *isn; ipstate_save_t ips; @@ -569,7 +595,7 @@ caddr_t data; isn = ips.ips_next; if (isn == NULL) { - isn = ips_list; + isn = ifs->ifs_ips_list; if (isn == NULL) { if (ips.ips_next == NULL) return ENOENT; @@ -581,7 +607,7 @@ caddr_t data; * current list of entries. Security precaution to prevent * copying of random kernel data. */ - for (is = ips_list; is; is = is->is_next) + for (is = ifs->ifs_ips_list; is; is = is->is_next) if (is == isn) break; if (!is) @@ -610,8 +636,9 @@ caddr_t data; /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ /* output. */ /* ------------------------------------------------------------------------ */ -int fr_stputent(data) +int fr_stputent(data, ifs) caddr_t data; +ipf_stack_t *ifs; { ipstate_t *is, *isn; ipstate_save_t ips; @@ -641,10 +668,10 @@ caddr_t data; fr = ips.ips_rule; if (fr == NULL) { - READ_ENTER(&ipf_state); - fr_stinsert(isn, 0); + READ_ENTER(&ifs->ifs_ipf_state); + fr_stinsert(isn, 0, ifs); MUTEX_EXIT(&isn->is_lock); - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); return 0; } @@ -665,17 +692,17 @@ caddr_t data; */ for (i = 0; i < 4; i++) { name = fr->fr_ifnames[i]; - fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v); + fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v, ifs); name = isn->is_ifname[i]; - isn->is_ifp[i] = fr_resolvenic(name, isn->is_v); + isn->is_ifp[i] = fr_resolvenic(name, isn->is_v, ifs); } fr->fr_ref = 0; fr->fr_dsize = 0; fr->fr_data = NULL; - fr_resolvedest(&fr->fr_tif, fr->fr_v); - fr_resolvedest(&fr->fr_dif, fr->fr_v); + fr_resolvedest(&fr->fr_tif, fr->fr_v, ifs); + fr_resolvedest(&fr->fr_dif, fr->fr_v, ifs); /* * send a copy back to userland of what we ended up @@ -688,16 +715,16 @@ caddr_t data; KFREE(fr); return EFAULT; } - READ_ENTER(&ipf_state); - fr_stinsert(isn, 0); + READ_ENTER(&ifs->ifs_ipf_state); + fr_stinsert(isn, 0, ifs); MUTEX_EXIT(&isn->is_lock); - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); } else { - READ_ENTER(&ipf_state); - for (is = ips_list; is; is = is->is_next) + READ_ENTER(&ifs->ifs_ipf_state); + for (is = ifs->ifs_ips_list; is; is = is->is_next) if (is->is_rule == fr) { - fr_stinsert(isn, 0); + fr_stinsert(isn, 0, ifs); MUTEX_EXIT(&isn->is_lock); break; } @@ -706,7 +733,7 @@ caddr_t data; KFREE(isn); isn = NULL; } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); return (isn == NULL) ? ESRCH : 0; } @@ -728,9 +755,10 @@ caddr_t data; /* Locking: it is assumed that some kind of lock on ipf_state is held. */ /* Exits with is_lock initialised and held. */ /* ------------------------------------------------------------------------ */ -void fr_stinsert(is, rev) +void fr_stinsert(is, rev, ifs) ipstate_t *is; int rev; +ipf_stack_t *ifs; { frentry_t *fr; u_int hv; @@ -752,14 +780,14 @@ int rev; for (i = 0; i < 4; i++) { if (is->is_ifp[i] != NULL) continue; - is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v); + is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v, ifs); } /* * If we could trust is_hv, then the modulous would not be needed, but * when running with IPFILTER_SYNC, this stops bad values. */ - hv = is->is_hv % fr_statesize; + hv = is->is_hv % ifs->ifs_fr_statesize; is->is_hv = hv; /* @@ -768,29 +796,29 @@ int rev; * come along, match the entry and want to update it. */ MUTEX_ENTER(&is->is_lock); - MUTEX_ENTER(&ipf_stinsert); + MUTEX_ENTER(&ifs->ifs_ipf_stinsert); /* * add into list table. */ - if (ips_list != NULL) - ips_list->is_pnext = &is->is_next; - is->is_pnext = &ips_list; - is->is_next = ips_list; - ips_list = is; - - if (ips_table[hv] != NULL) - ips_table[hv]->is_phnext = &is->is_hnext; + if (ifs->ifs_ips_list != NULL) + ifs->ifs_ips_list->is_pnext = &is->is_next; + is->is_pnext = &ifs->ifs_ips_list; + is->is_next = ifs->ifs_ips_list; + ifs->ifs_ips_list = is; + + if (ifs->ifs_ips_table[hv] != NULL) + ifs->ifs_ips_table[hv]->is_phnext = &is->is_hnext; else - ips_stats.iss_inuse++; - is->is_phnext = ips_table + hv; - is->is_hnext = ips_table[hv]; - ips_table[hv] = is; - ips_stats.iss_bucketlen[hv]++; - ips_num++; - MUTEX_EXIT(&ipf_stinsert); - - fr_setstatequeue(is, rev); + ifs->ifs_ips_stats.iss_inuse++; + is->is_phnext = ifs->ifs_ips_table + hv; + is->is_hnext = ifs->ifs_ips_table[hv]; + ifs->ifs_ips_table[hv] = is; + ifs->ifs_ips_stats.iss_bucketlen[hv]++; + ifs->ifs_ips_num++; + MUTEX_EXIT(&ifs->ifs_ipf_stinsert); + + fr_setstatequeue(is, rev, ifs); } @@ -820,8 +848,9 @@ u_int flags; grehdr_t *gre; void *ifp; int out; + ipf_stack_t *ifs = fin->fin_ifs; - if (fr_state_lock || + if (ifs->ifs_fr_state_lock || (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) return NULL; @@ -840,15 +869,15 @@ u_int flags; */ fr = fin->fin_fr; if (fr != NULL) { - if ((ips_num == fr_statemax) && (fr->fr_statemax == 0)) { - ATOMIC_INCL(ips_stats.iss_max); - fr_state_doflush = 1; + if ((ifs->ifs_ips_num == ifs->ifs_fr_statemax) && (fr->fr_statemax == 0)) { + ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); + ifs->ifs_fr_state_doflush = 1; return NULL; } if ((fr->fr_statemax != 0) && (fr->fr_statecnt >= fr->fr_statemax)) { - ATOMIC_INCL(ips_stats.iss_maxref); - fr_state_doflush = 1; + ATOMIC_INCL(ifs->ifs_ips_stats.iss_maxref); + ifs->ifs_fr_state_doflush = 1; return NULL; } } @@ -860,7 +889,7 @@ u_int flags; out = fin->fin_out; is = &ips; bzero((char *)is, sizeof(*is)); - is->is_die = 1 + fr_ticks; + is->is_die = 1 + ifs->ifs_fr_ticks; /* * Copy and calculate... @@ -921,7 +950,7 @@ u_int flags; default : return NULL; } - ATOMIC_INCL(ips_stats.iss_icmp); + ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); break; #endif case IPPROTO_ICMP : @@ -939,7 +968,7 @@ u_int flags; default : return NULL; } - ATOMIC_INCL(ips_stats.iss_icmp); + ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); break; case IPPROTO_GRE : @@ -1019,7 +1048,7 @@ u_int flags; * timer on it as we'll never see an error if it fails to * connect. */ - ATOMIC_INCL(ips_stats.iss_tcp); + ATOMIC_INCL(ifs->ifs_ips_stats.iss_tcp); break; case IPPROTO_UDP : @@ -1031,13 +1060,13 @@ u_int flags; hv += tcp->th_dport; hv += tcp->th_sport; } - ATOMIC_INCL(ips_stats.iss_udp); + ATOMIC_INCL(ifs->ifs_ips_stats.iss_udp); break; default : break; } - hv = DOUBLE_HASH(hv); + hv = DOUBLE_HASH(hv, ifs); is->is_hv = hv; is->is_rule = fr; is->is_flags = flags & IS_INHERITED; @@ -1045,7 +1074,8 @@ u_int flags; /* * Look for identical state. */ - for (is = ips_table[is->is_hv % fr_statesize]; is != NULL; + for (is = ifs->ifs_ips_table[is->is_hv % ifs->ifs_fr_statesize]; + is != NULL; is = is->is_hnext) { if (bcmp(&ips.is_src, &is->is_src, offsetof(struct ipstate, is_ps) - @@ -1055,13 +1085,13 @@ u_int flags; if (is != NULL) return NULL; - if (ips_stats.iss_bucketlen[hv] >= fr_state_maxbucket) { - ATOMIC_INCL(ips_stats.iss_bucketfull); + if (ifs->ifs_ips_stats.iss_bucketlen[hv] >= ifs->ifs_fr_state_maxbucket) { + ATOMIC_INCL(ifs->ifs_ips_stats.iss_bucketfull); return NULL; } KMALLOC(is, ipstate_t *); if (is == NULL) { - ATOMIC_INCL(ips_stats.iss_nomem); + ATOMIC_INCL(ifs->ifs_ips_stats.iss_nomem); return NULL; } bcopy((char *)&ips, (char *)is, sizeof(*is)); @@ -1071,16 +1101,17 @@ u_int flags; if (fr != NULL) { (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); if (fr->fr_age[0] != 0) { - is->is_tqehead[0] = fr_addtimeoutqueue(&ips_utqe, - fr->fr_age[0]); + is->is_tqehead[0] = + fr_addtimeoutqueue(&ifs->ifs_ips_utqe, + fr->fr_age[0], ifs); is->is_sti.tqe_flags |= TQE_RULEBASED; } if (fr->fr_age[1] != 0) { - is->is_tqehead[1] = fr_addtimeoutqueue(&ips_utqe, - fr->fr_age[1]); + is->is_tqehead[1] = + fr_addtimeoutqueue(&ifs->ifs_ips_utqe, + fr->fr_age[1], ifs); is->is_sti.tqe_flags |= TQE_RULEBASED; } - is->is_tag = fr->fr_logtag; is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; @@ -1100,7 +1131,7 @@ u_int flags; COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1], fr->fr_v); } } else { - pass = fr_flags; + pass = ifs->ifs_fr_flags; is->is_tag = FR_NOLOGTAG; } @@ -1152,7 +1183,7 @@ u_int flags; is->is_auth = fin->fin_auth; is->is_authmsk = 0xffff; if (flags & (SI_WILDP|SI_WILDA)) { - ATOMIC_INCL(ips_stats.iss_wild); + ATOMIC_INCL(ifs->ifs_ips_stats.iss_wild); } is->is_rulen = fin->fin_rule; @@ -1160,10 +1191,10 @@ u_int flags; if (pass & FR_LOGFIRST) is->is_pass &= ~(FR_LOGFIRST|FR_LOG); - READ_ENTER(&ipf_state); + READ_ENTER(&ifs->ifs_ipf_state); is->is_me = stsave; - fr_stinsert(is, fin->fin_rev); + fr_stinsert(is, fin->fin_rev, ifs); if (fin->fin_p == IPPROTO_TCP) { /* @@ -1171,7 +1202,8 @@ u_int flags; * timer on it as we'll never see an error if it fails to * connect. */ - (void) fr_tcp_age(&is->is_sti, fin, ips_tqtqb, is->is_flags); + (void) fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, + is->is_flags); MUTEX_EXIT(&is->is_lock); #ifdef IPFILTER_SCAN if ((is->is_flags & SI_CLONE) == 0) @@ -1184,10 +1216,10 @@ u_int flags; if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) is->is_sync = ipfsync_new(SMC_STATE, fin, is); #endif - if (ipstate_logging) - ipstate_log(is, ISL_NEW); + if (ifs->ifs_ipstate_logging) + ipstate_log(is, ISL_NEW, ifs); - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); fin->fin_state = is; fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); fin->fin_flx |= FI_STATE; @@ -1303,6 +1335,7 @@ ipstate_t *is; { int source, ret = 0, flags; tcpdata_t *fdata, *tdata; + ipf_stack_t *ifs = fin->fin_ifs; source = !fin->fin_rev; if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && @@ -1326,7 +1359,8 @@ ipstate_t *is; /* * Nearing end of connection, start timeout. */ - ret = fr_tcp_age(&is->is_sti, fin, ips_tqtqb, is->is_flags); + ret = fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, + is->is_flags); if (ret == 0) { MUTEX_EXIT(&is->is_lock); return 0; @@ -1566,10 +1600,11 @@ ipstate_t *is; { ipstate_t *clone; u_32_t send; + ipf_stack_t *ifs = fin->fin_ifs; - if (ips_num == fr_statemax) { - ATOMIC_INCL(ips_stats.iss_max); - fr_state_doflush = 1; + if (ifs->ifs_ips_num == ifs->ifs_fr_statemax) { + ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); + ifs->ifs_fr_state_doflush = 1; return NULL; } KMALLOC(clone, ipstate_t *); @@ -1579,7 +1614,7 @@ ipstate_t *is; MUTEX_NUKE(&clone->is_lock); - clone->is_die = ONE_DAY + fr_ticks; + clone->is_die = ONE_DAY + ifs->ifs_fr_ticks; clone->is_state[0] = 0; clone->is_state[1] = 0; send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + @@ -1606,10 +1641,10 @@ ipstate_t *is; clone->is_flags &= ~SI_CLONE; clone->is_flags |= SI_CLONED; - fr_stinsert(clone, fin->fin_rev); + fr_stinsert(clone, fin->fin_rev, ifs); clone->is_ref = 2; if (clone->is_p == IPPROTO_TCP) { - (void) fr_tcp_age(&clone->is_sti, fin, ips_tqtqb, + (void) fr_tcp_age(&clone->is_sti, fin, ifs->ifs_ips_tqtqb, clone->is_flags); } MUTEX_EXIT(&clone->is_lock); @@ -1648,6 +1683,7 @@ u_32_t cmask; u_short sp, dp; u_32_t cflx; void *ifp; + ipf_stack_t *ifs = fin->fin_ifs; rev = IP6_NEQ(&is->is_dst, dst); ifp = fin->fin_ifp; @@ -1781,7 +1817,7 @@ u_32_t cmask; } } if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { - ATOMIC_DECL(ips_stats.iss_wild); + ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); } } @@ -1816,7 +1852,7 @@ u_32_t cmask; return NULL; is = clone; } else { - ATOMIC_DECL(ips_stats.iss_wild); + ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); } if ((flags & SI_W_SPORT) != 0) { @@ -1839,8 +1875,8 @@ u_32_t cmask; is->is_maxdend = is->is_dend + 1; } is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); - if ((flags & SI_CLONED) && ipstate_logging) - ipstate_log(is, ISL_CLONE); + if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging) + ipstate_log(is, ISL_CLONE, ifs); } ret = -1; @@ -1895,6 +1931,7 @@ fr_info_t *fin; int len; ip_t *oip; u_int hv; + ipf_stack_t *ifs = fin->fin_ifs; /* * Does it at least have the return (basic) IP header ? @@ -2017,10 +2054,10 @@ fr_info_t *fin; dst.in4 = oip->ip_dst; hv += dst.in4.s_addr; hv += icmp->icmp_id; - hv = DOUBLE_HASH(hv); + hv = DOUBLE_HASH(hv, ifs); - READ_ENTER(&ipf_state); - for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + READ_ENTER(&ifs->ifs_ipf_state); + for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { isp = &is->is_hnext; if ((is->is_p != pr) || (is->is_v != 4)) continue; @@ -2030,7 +2067,7 @@ fr_info_t *fin; NULL, FI_ICMPCMP); if (is != NULL) { if ((is->is_pass & FR_NOICMPERR) != 0) { - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); return NULL; } /* @@ -2048,12 +2085,12 @@ fr_info_t *fin; oi = (backward << 1) + ofin.fin_out; if (is->is_icmppkts[i] > is->is_pkts[oi]) continue; - ips_stats.iss_hits++; + ifs->ifs_ips_stats.iss_hits++; is->is_icmppkts[i]++; return is; } } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); return NULL; case IPPROTO_TCP : case IPPROTO_UDP : @@ -2073,10 +2110,10 @@ fr_info_t *fin; hv += dst.in4.s_addr; hv += dport; hv += sport; - hv = DOUBLE_HASH(hv); + hv = DOUBLE_HASH(hv, ifs); - READ_ENTER(&ipf_state); - for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + READ_ENTER(&ifs->ifs_ipf_state); + for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { isp = &is->is_hnext; /* * Only allow this icmp though if the @@ -2105,7 +2142,7 @@ fr_info_t *fin; if (((is->is_pass & FR_NOICMPERR) != 0) || (is->is_icmppkts[i] > is->is_pkts[oi])) break; - ips_stats.iss_hits++; + ifs->ifs_ips_stats.iss_hits++; is->is_icmppkts[i]++; /* * we deliberately do not touch the timeouts @@ -2115,7 +2152,7 @@ fr_info_t *fin; return is; } } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); return NULL; } @@ -2129,14 +2166,15 @@ fr_info_t *fin; /* */ /* Move a state entry from one position in the hash table to another. */ /* ------------------------------------------------------------------------ */ -static void fr_ipsmove(is, hv) +static void fr_ipsmove(is, hv, ifs) ipstate_t *is; u_int hv; +ipf_stack_t *ifs; { ipstate_t **isp; u_int hvm; - ASSERT(rw_read_locked(&ipf_state.ipf_lk) == 0); + ASSERT(rw_read_locked(&ifs->ifs_ipf_state.ipf_lk) == 0); hvm = is->is_hv; /* @@ -2146,21 +2184,21 @@ u_int hv; if (is->is_hnext) is->is_hnext->is_phnext = isp; *isp = is->is_hnext; - if (ips_table[hvm] == NULL) - ips_stats.iss_inuse--; - ips_stats.iss_bucketlen[hvm]--; + if (ifs->ifs_ips_table[hvm] == NULL) + ifs->ifs_ips_stats.iss_inuse--; + ifs->ifs_ips_stats.iss_bucketlen[hvm]--; /* * ...and put the hash in the new one. */ - hvm = DOUBLE_HASH(hv); + hvm = DOUBLE_HASH(hv, ifs); is->is_hv = hvm; - isp = &ips_table[hvm]; + isp = &ifs->ifs_ips_table[hvm]; if (*isp) (*isp)->is_phnext = &is->is_hnext; else - ips_stats.iss_inuse++; - ips_stats.iss_bucketlen[hvm]++; + ifs->ifs_ips_stats.iss_inuse++; + ifs->ifs_ips_stats.iss_bucketlen[hvm]++; is->is_phnext = isp; is->is_hnext = *isp; *isp = is; @@ -2192,6 +2230,7 @@ ipftq_t **ifqp; struct icmp *ic; ipftq_t *ifq; int oow; + ipf_stack_t *ifs = fin->fin_ifs; is = NULL; ifq = NULL; @@ -2235,10 +2274,10 @@ ipftq_t **ifqp; hv += ic->icmp_id; } } - READ_ENTER(&ipf_state); + READ_ENTER(&ifs->ifs_ipf_state); icmp6again: - hvm = DOUBLE_HASH(hv); - for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) { + hvm = DOUBLE_HASH(hv, ifs); + for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { isp = &is->is_hnext; if ((is->is_p != pr) || (is->is_v != v)) continue; @@ -2247,9 +2286,9 @@ icmp6again: fr_matchicmpqueryreply(v, &is->is_icmp, ic, fin->fin_rev)) { if (fin->fin_rev) - ifq = &ips_icmpacktq; + ifq = &ifs->ifs_ips_icmpacktq; else - ifq = &ips_icmptq; + ifq = &ifs->ifs_ips_icmptq; break; } } @@ -2260,12 +2299,12 @@ icmp6again: hv += fin->fin_fi.fi_src.i6[1]; hv += fin->fin_fi.fi_src.i6[2]; hv += fin->fin_fi.fi_src.i6[3]; - fr_ipsmove(is, hv); - MUTEX_DOWNGRADE(&ipf_state); + fr_ipsmove(is, hv, ifs); + MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); } break; } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); /* * No matching icmp state entry. Perhaps this is a @@ -2277,14 +2316,14 @@ icmp6again: * advantage of this requires some significant code changes * to handle the specific types where that is the case. */ - if ((ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && + if ((ifs->ifs_ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { hv -= fin->fin_fi.fi_src.i6[0]; hv -= fin->fin_fi.fi_src.i6[1]; hv -= fin->fin_fi.fi_src.i6[2]; hv -= fin->fin_fi.fi_src.i6[3]; tryagain = 1; - WRITE_ENTER(&ipf_state); + WRITE_ENTER(&ifs->ifs_ipf_state); goto icmp6again; } @@ -2298,9 +2337,9 @@ icmp6again: if (v == 4) { hv += ic->icmp_id; } - hv = DOUBLE_HASH(hv); - READ_ENTER(&ipf_state); - for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + hv = DOUBLE_HASH(hv, ifs); + READ_ENTER(&ifs->ifs_ipf_state); + for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { isp = &is->is_hnext; if ((is->is_p != pr) || (is->is_v != v)) continue; @@ -2309,14 +2348,14 @@ icmp6again: fr_matchicmpqueryreply(v, &is->is_icmp, ic, fin->fin_rev)) { if (fin->fin_rev) - ifq = &ips_icmpacktq; + ifq = &ifs->ifs_ips_icmpacktq; else - ifq = &ips_icmptq; + ifq = &ifs->ifs_ips_icmptq; break; } } if (is == NULL) { - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); } break; @@ -2329,10 +2368,10 @@ icmp6again: hv += dport; oow = 0; tryagain = 0; - READ_ENTER(&ipf_state); + READ_ENTER(&ifs->ifs_ipf_state); retry_tcpudp: - hvm = DOUBLE_HASH(hv); - for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) { + hvm = DOUBLE_HASH(hv, ifs); + for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { isp = &is->is_hnext; if ((is->is_p != pr) || (is->is_v != v)) continue; @@ -2353,18 +2392,18 @@ retry_tcpudp: !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { hv += dport; hv += sport; - fr_ipsmove(is, hv); - MUTEX_DOWNGRADE(&ipf_state); + fr_ipsmove(is, hv, ifs); + MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); } break; } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); - if (!tryagain && ips_stats.iss_wild) { + if (!tryagain && ifs->ifs_ips_stats.iss_wild) { hv -= dport; hv -= sport; tryagain = 1; - WRITE_ENTER(&ipf_state); + WRITE_ENTER(&ifs->ifs_ipf_state); goto retry_tcpudp; } fin->fin_flx |= oow; @@ -2380,20 +2419,20 @@ retry_tcpudp: #endif default : ifqp = NULL; - hvm = DOUBLE_HASH(hv); - READ_ENTER(&ipf_state); - for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) { + hvm = DOUBLE_HASH(hv, ifs); + READ_ENTER(&ifs->ifs_ipf_state); + for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { isp = &is->is_hnext; if ((is->is_p != pr) || (is->is_v != v)) continue; is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); if (is != NULL) { - ifq = &ips_iptq; + ifq = &ifs->ifs_ips_iptq; break; } } if (is == NULL) { - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); } break; } @@ -2424,6 +2463,7 @@ ipftq_t *ifq; { ipftqent_t *tqe; int i, pass; + ipf_stack_t *ifs = fin->fin_ifs; i = (fin->fin_rev << 1) + fin->fin_out; @@ -2437,7 +2477,7 @@ ipftq_t *ifq; ifq = is->is_tqehead[fin->fin_rev]; if (ifq != NULL) - fr_movequeue(tqe, tqe->tqe_ifq, ifq); + fr_movequeue(tqe, tqe->tqe_ifq, ifq, ifs); is->is_pkts[i]++; is->is_bytes[i] += fin->fin_plen; @@ -2448,7 +2488,7 @@ ipftq_t *ifq; ipfsync_update(SMC_STATE, fin, is->is_sync); #endif - ATOMIC_INCL(ips_stats.iss_hits); + ATOMIC_INCL(ifs->ifs_ips_stats.iss_hits); fin->fin_fr = is->is_rule; @@ -2480,8 +2520,9 @@ u_32_t *passp; tcphdr_t *tcp; ipftq_t *ifq; u_int pass; + ipf_stack_t *ifs = fin->fin_ifs; - if (fr_state_lock || (ips_list == NULL) || + if (ifs->ifs_fr_state_lock || (ifs->ifs_ips_list == NULL) || (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) return NULL; @@ -2540,13 +2581,13 @@ u_32_t *passp; break; default : if (fin->fin_rev) - ifq = &ips_udpacktq; + ifq = &ifs->ifs_ips_udpacktq; else - ifq = &ips_udptq; + ifq = &ifs->ifs_ips_udptq; break; } if (is == NULL) { - ATOMIC_INCL(ips_stats.iss_miss); + ATOMIC_INCL(ifs->ifs_ips_stats.iss_miss); return NULL; } @@ -2570,11 +2611,11 @@ matched: fin->fin_nat = is->is_nat[fin->fin_rev]; fin->fin_state = is; - is->is_touched = fr_ticks; + is->is_touched = ifs->ifs_fr_ticks; MUTEX_ENTER(&is->is_lock); is->is_ref++; MUTEX_EXIT(&is->is_lock); - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); fin->fin_flx |= FI_STATE; if ((pass & FR_LOGFIRST) != 0) pass &= ~(FR_LOGFIRST|FR_LOG); @@ -2674,28 +2715,29 @@ ipstate_t *is; /* If ifp is passed in as being non-null then we are only doing updates for */ /* existing, matching, uses of it. */ /* ------------------------------------------------------------------------ */ -void fr_statesync(action, v, ifp, name) +void fr_statesync(action, v, ifp, name, ifs) int action, v; void *ifp; char *name; +ipf_stack_t *ifs; { ipstate_t *is; int i; - if (fr_running <= 0) + if (ifs->ifs_fr_running <= 0) return; - WRITE_ENTER(&ipf_state); + WRITE_ENTER(&ifs->ifs_ipf_state); - if (fr_running <= 0) { - RWLOCK_EXIT(&ipf_state); + if (ifs->ifs_fr_running <= 0) { + RWLOCK_EXIT(&ifs->ifs_ipf_state); return; } switch (action) { case IPFSYNC_RESYNC : - for (is = ips_list; is; is = is->is_next) { + for (is = ifs->ifs_ips_list; is; is = is->is_next) { if (v != 0 && is->is_v != v) continue; /* @@ -2703,12 +2745,12 @@ char *name; */ for (i = 0; i < 4; i++) { is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], - is->is_v); + is->is_v, ifs); } } break; case IPFSYNC_NEWIFP : - for (is = ips_list; is; is = is->is_next) { + for (is = ifs->ifs_ips_list; is; is = is->is_next) { if (v != 0 && is->is_v != v) continue; /* @@ -2722,7 +2764,7 @@ char *name; } break; case IPFSYNC_OLDIFP : - for (is = ips_list; is; is = is->is_next) { + for (is = ifs->ifs_ips_list; is; is = is->is_next) { if (v != 0 && is->is_v != v) continue; /* @@ -2735,7 +2777,7 @@ char *name; } break; } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); } @@ -2750,13 +2792,14 @@ char *name; /* and timeout queue lists. Make adjustments to hash table statistics and */ /* global counters as required. */ /* ------------------------------------------------------------------------ */ -static void fr_delstate(is, why) +static void fr_delstate(is, why, ifs) ipstate_t *is; int why; +ipf_stack_t *ifs; { - ASSERT(rw_write_held(&ipf_global.ipf_lk) == 0 || - rw_write_held(&ipf_state.ipf_lk) == 0); + ASSERT(rw_write_held(&ifs->ifs_ipf_global.ipf_lk) == 0 || + rw_write_held(&ifs->ifs_ipf_state.ipf_lk) == 0); /* * Since we want to delete this, remove it from the state table, @@ -2776,22 +2819,22 @@ int why; *is->is_phnext = is->is_hnext; if (is->is_hnext != NULL) is->is_hnext->is_phnext = is->is_phnext; - if (ips_table[is->is_hv] == NULL) - ips_stats.iss_inuse--; - ips_stats.iss_bucketlen[is->is_hv]--; + if (ifs->ifs_ips_table[is->is_hv] == NULL) + ifs->ifs_ips_stats.iss_inuse--; + ifs->ifs_ips_stats.iss_bucketlen[is->is_hv]--; is->is_phnext = NULL; is->is_hnext = NULL; } /* - * Because ips_stats.iss_wild is a count of entries in the state + * Because ifs->ifs_ips_stats.iss_wild is a count of entries in the state * table that have wildcard flags set, only decerement it once * and do it here. */ if (is->is_flags & (SI_WILDP|SI_WILDA)) { if (!(is->is_flags & SI_CLONED)) { - ATOMIC_DECL(ips_stats.iss_wild); + ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); } is->is_flags &= ~(SI_WILDP|SI_WILDA); } @@ -2813,11 +2856,11 @@ int why; if (is->is_tqehead[0] != NULL) { if (fr_deletetimeoutqueue(is->is_tqehead[0]) == 0) - fr_freetimeoutqueue(is->is_tqehead[0]); + fr_freetimeoutqueue(is->is_tqehead[0], ifs); } if (is->is_tqehead[1] != NULL) { if (fr_deletetimeoutqueue(is->is_tqehead[1]) == 0) - fr_freetimeoutqueue(is->is_tqehead[1]); + fr_freetimeoutqueue(is->is_tqehead[1], ifs); } #ifdef IPFILTER_SYNC @@ -2828,17 +2871,17 @@ int why; (void) ipsc_detachis(is); #endif - if (ipstate_logging != 0 && why != 0) - ipstate_log(is, why); + if (ifs->ifs_ipstate_logging != 0 && why != 0) + ipstate_log(is, why, ifs); if (is->is_rule != NULL) { is->is_rule->fr_statecnt--; - (void)fr_derefrule(&is->is_rule); + (void)fr_derefrule(&is->is_rule, ifs); } MUTEX_DESTROY(&is->is_lock); KFREE(is); - ips_num--; + ifs->ifs_ips_num--; } @@ -2852,7 +2895,8 @@ int why; /* and the youngest at the bottom. So if the top one doesn't need to be */ /* expired then neither will any under it. */ /* ------------------------------------------------------------------------ */ -void fr_timeoutstate() +void fr_timeoutstate(ifs) +ipf_stack_t *ifs; { ipftq_t *ifq, *ifqnext; ipftqent_t *tqe, *tqn; @@ -2860,43 +2904,42 @@ void fr_timeoutstate() SPL_INT(s); SPL_NET(s); - WRITE_ENTER(&ipf_state); - for (ifq = ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) + WRITE_ENTER(&ifs->ifs_ipf_state); + for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { - if (tqe->tqe_die > fr_ticks) + if (tqe->tqe_die > ifs->ifs_fr_ticks) break; tqn = tqe->tqe_next; is = tqe->tqe_parent; - fr_delstate(is, ISL_EXPIRE); + fr_delstate(is, ISL_EXPIRE, ifs); } - for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { + for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { ifqnext = ifq->ifq_next; for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { - if (tqe->tqe_die > fr_ticks) + if (tqe->tqe_die > ifs->ifs_fr_ticks) break; tqn = tqe->tqe_next; is = tqe->tqe_parent; - fr_delstate(is, ISL_EXPIRE); + fr_delstate(is, ISL_EXPIRE, ifs); } } - for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { + for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { ifqnext = ifq->ifq_next; if (((ifq->ifq_flags & IFQF_DELETE) != 0) && (ifq->ifq_ref == 0)) { - fr_freetimeoutqueue(ifq); + fr_freetimeoutqueue(ifq, ifs); } } - if (fr_state_doflush) { - (void) fr_state_flush(2, 0); - fr_state_doflush = 0; + if (ifs->ifs_fr_state_doflush) { + (void) fr_state_flush(2, 0, ifs); + ifs->ifs_fr_state_doflush = 0; } - - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); SPL_X(s); } @@ -2918,8 +2961,9 @@ void fr_timeoutstate() /* If that too fails, then work backwards in 30 second intervals */ /* for the last 30 minutes to at worst 30 seconds idle. */ /* ------------------------------------------------------------------------ */ -static int fr_state_flush(which, proto) +static int fr_state_flush(which, proto, ifs) int which, proto; +ipf_stack_t *ifs; { ipftq_t *ifq, *ifqnext; ipftqent_t *tqe, *tqn; @@ -2932,7 +2976,7 @@ int which, proto; removed = 0; SPL_NET(s); - for (isp = &ips_list; ((is = *isp) != NULL); ) { + for (isp = &ifs->ifs_ips_list; ((is = *isp) != NULL); ) { delete = 0; if ((proto != 0) && (is->is_v != proto)) { @@ -2957,10 +3001,10 @@ int which, proto; if (delete) { if (is->is_p == IPPROTO_TCP) - ips_stats.iss_fin++; + ifs->ifs_ips_stats.iss_fin++; else - ips_stats.iss_expire++; - fr_delstate(is, ISL_FLUSH); + ifs->ifs_ips_stats.iss_expire++; + fr_delstate(is, ISL_FLUSH, ifs); removed++; } else isp = &is->is_next; @@ -2978,29 +3022,29 @@ int which, proto; * Another alternative is to implement random drop and drop N entries * at random until N have been freed up. */ - if (fr_ticks - ips_last_force_flush < IPF_TTLVAL(5)) + if (ifs->ifs_fr_ticks - ifs->ifs_ips_last_force_flush < IPF_TTLVAL(5)) goto force_flush_skipped; - ips_last_force_flush = fr_ticks; + ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; - if (fr_ticks > IPF_TTLVAL(43200)) + if (ifs->ifs_fr_ticks > IPF_TTLVAL(43200)) interval = IPF_TTLVAL(43200); - else if (fr_ticks > IPF_TTLVAL(1800)) + else if (ifs->ifs_fr_ticks > IPF_TTLVAL(1800)) interval = IPF_TTLVAL(1800); - else if (fr_ticks > IPF_TTLVAL(30)) + else if (ifs->ifs_fr_ticks > IPF_TTLVAL(30)) interval = IPF_TTLVAL(30); else interval = IPF_TTLVAL(10); - try = fr_ticks - (fr_ticks - interval); + try = ifs->ifs_fr_ticks - (ifs->ifs_fr_ticks - interval); if (try < 0) goto force_flush_skipped; while (removed == 0) { - maxtick = fr_ticks - interval; + maxtick = ifs->ifs_fr_ticks - interval; if (maxtick < 0) break; while (try < maxtick) { - for (ifq = ips_tqtqb; ifq != NULL; + for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) { for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { @@ -3008,12 +3052,12 @@ int which, proto; break; tqn = tqe->tqe_next; is = tqe->tqe_parent; - fr_delstate(is, ISL_EXPIRE); + fr_delstate(is, ISL_EXPIRE, ifs); removed++; } } - for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { + for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { ifqnext = ifq->ifq_next; for (tqn = ifq->ifq_head; @@ -3022,7 +3066,7 @@ int which, proto; break; tqn = tqe->tqe_next; is = tqe->tqe_parent; - fr_delstate(is, ISL_EXPIRE); + fr_delstate(is, ISL_EXPIRE, ifs); removed++; } } @@ -3089,6 +3133,7 @@ int flags; int dlen, ostate, nstate, rval, dir; u_char tcpflags; tcphdr_t *tcp; + ipf_stack_t *ifs = fin->fin_ifs; tcp = fin->fin_dp; @@ -3385,7 +3430,7 @@ int flags; else if (rval == 1) { tqe->tqe_state[dir] = nstate; if ((tqe->tqe_flags & TQE_RULEBASED) == 0) - fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate); + fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs); } return rval; @@ -3402,9 +3447,10 @@ int flags; /* passed in. Log packet/byte counts, source/destination address and other */ /* protocol specific information. */ /* ------------------------------------------------------------------------ */ -void ipstate_log(is, type) +void ipstate_log(is, type, ifs) struct ipstate *is; u_int type; +ipf_stack_t *ifs; { #ifdef IPFILTER_LOG struct ipslog ipsl; @@ -3454,10 +3500,10 @@ u_int type; sizes[0] = sizeof(ipsl); types[0] = 0; - if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1)) { - ATOMIC_INCL(ips_stats.iss_logged); + if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1, ifs)) { + ATOMIC_INCL(ifs->ifs_ips_stats.iss_logged); } else { - ATOMIC_INCL(ips_stats.iss_logfail); + ATOMIC_INCL(ifs->ifs_ips_stats.iss_logfail); } #endif } @@ -3489,6 +3535,7 @@ fr_info_t *fin; ip6_t *oip6; u_char pr; u_int hv; + ipf_stack_t *ifs = fin->fin_ifs; /* * Does it at least have the return (basic) IP header ? @@ -3554,10 +3601,10 @@ fr_info_t *fin; hv += dst.in4.s_addr; hv += oic->icmp6_id; hv += oic->icmp6_seq; - hv = DOUBLE_HASH(hv); + hv = DOUBLE_HASH(hv, ifs); - READ_ENTER(&ipf_state); - for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + READ_ENTER(&ifs->ifs_ipf_state); + for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { ic = &is->is_icmp; isp = &is->is_hnext; if ((is->is_p == pr) && @@ -3574,7 +3621,7 @@ fr_info_t *fin; if (((ic->ici_type == ICMP6_ECHO_REPLY) && (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || (ic->ici_type - 1 == oic->icmp6_type )) { - ips_stats.iss_hits++; + ifs->ifs_ips_stats.iss_hits++; backward = IP6_NEQ(&is->is_dst, &src); fin->fin_rev = !backward; i = (backward << 1) + fin->fin_out; @@ -3583,7 +3630,7 @@ fr_info_t *fin; } } } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); return NULL; } @@ -3607,10 +3654,10 @@ fr_info_t *fin; hv += sport; } else tcp = NULL; - hv = DOUBLE_HASH(hv); + hv = DOUBLE_HASH(hv, ifs); - READ_ENTER(&ipf_state); - for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + READ_ENTER(&ifs->ifs_ipf_state); + for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { isp = &is->is_hnext; /* * Only allow this icmp though if the @@ -3624,7 +3671,7 @@ fr_info_t *fin; continue; is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); if (is != NULL) { - ips_stats.iss_hits++; + ifs->ifs_ips_stats.iss_hits++; backward = IP6_NEQ(&is->is_dst, &src); fin->fin_rev = !backward; i = (backward << 1) + fin->fin_out; @@ -3637,7 +3684,7 @@ fr_info_t *fin; return is; } } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); return NULL; } #endif @@ -3650,8 +3697,9 @@ fr_info_t *fin; /* */ /* Initialise the array of timeout queues for TCP. */ /* ------------------------------------------------------------------------ */ -void fr_sttab_init(tqp) +void fr_sttab_init(tqp, ifs) ipftq_t *tqp; +ipf_stack_t *ifs; { int i; @@ -3664,18 +3712,18 @@ ipftq_t *tqp; MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); } tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; - tqp[IPF_TCPS_CLOSED].ifq_ttl = fr_tcpclosed; - tqp[IPF_TCPS_LISTEN].ifq_ttl = fr_tcptimeout; - tqp[IPF_TCPS_SYN_SENT].ifq_ttl = fr_tcptimeout; - tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = fr_tcptimeout; - tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = fr_tcpidletimeout; - tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = fr_tcphalfclosed; - tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = fr_tcphalfclosed; - tqp[IPF_TCPS_CLOSING].ifq_ttl = fr_tcptimeout; - tqp[IPF_TCPS_LAST_ACK].ifq_ttl = fr_tcplastack; - tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = fr_tcpclosewait; - tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = fr_tcptimeout; - tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = fr_tcptimeout; + tqp[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcpclosed; + tqp[IPF_TCPS_LISTEN].ifq_ttl = ifs->ifs_fr_tcptimeout; + tqp[IPF_TCPS_SYN_SENT].ifq_ttl = ifs->ifs_fr_tcptimeout; + tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = ifs->ifs_fr_tcptimeout; + tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = ifs->ifs_fr_tcpidletimeout; + tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = ifs->ifs_fr_tcphalfclosed; + tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = ifs->ifs_fr_tcphalfclosed; + tqp[IPF_TCPS_CLOSING].ifq_ttl = ifs->ifs_fr_tcptimeout; + tqp[IPF_TCPS_LAST_ACK].ifq_ttl = ifs->ifs_fr_tcplastack; + tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = ifs->ifs_fr_tcpclosewait; + tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = ifs->ifs_fr_tcptimeout; + tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = ifs->ifs_fr_tcptimeout; } @@ -3720,9 +3768,10 @@ ipftq_t *tqp; /* dir == 0 : a packet from source to dest */ /* dir == 1 : a packet from dest to source */ /* ------------------------------------------------------------------------ */ -void fr_statederef(fin, isp) +void fr_statederef(fin, isp, ifs) fr_info_t *fin; ipstate_t **isp; +ipf_stack_t *ifs; { ipstate_t *is = *isp; #if 0 @@ -3756,11 +3805,11 @@ ipstate_t **isp; fin = fin; /* LINT */ is = *isp; *isp = NULL; - WRITE_ENTER(&ipf_state); + WRITE_ENTER(&ifs->ifs_ipf_state); is->is_ref--; if (is->is_ref == 0) { is->is_ref++; /* To counter ref-- in fr_delstate() */ - fr_delstate(is, ISL_EXPIRE); + fr_delstate(is, ISL_EXPIRE, ifs); #ifndef _KERNEL #if 0 } else if (((fin->fin_out == 1) || (eol == 1)) && @@ -3771,10 +3820,10 @@ ipstate_t **isp; } else if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { #endif - fr_delstate(is, ISL_ORPHAN); + fr_delstate(is, ISL_ORPHAN, ifs); #endif } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); } @@ -3788,9 +3837,10 @@ ipstate_t **isp; /* Put the state entry on its default queue entry, using rev as a helped in */ /* determining which queue it should be placed on. */ /* ------------------------------------------------------------------------ */ -void fr_setstatequeue(is, rev) +void fr_setstatequeue(is, rev, ifs) ipstate_t *is; int rev; +ipf_stack_t *ifs; { ipftq_t *oifq, *nifq; @@ -3806,30 +3856,30 @@ int rev; #ifdef USE_INET6 case IPPROTO_ICMPV6 : if (rev == 1) - nifq = &ips_icmpacktq; + nifq = &ifs->ifs_ips_icmpacktq; else - nifq = &ips_icmptq; + nifq = &ifs->ifs_ips_icmptq; break; #endif case IPPROTO_ICMP : if (rev == 1) - nifq = &ips_icmpacktq; + nifq = &ifs->ifs_ips_icmpacktq; else - nifq = &ips_icmptq; + nifq = &ifs->ifs_ips_icmptq; break; case IPPROTO_TCP : - nifq = ips_tqtqb + is->is_state[rev]; + nifq = ifs->ifs_ips_tqtqb + is->is_state[rev]; break; case IPPROTO_UDP : if (rev == 1) - nifq = &ips_udpacktq; + nifq = &ifs->ifs_ips_udpacktq; else - nifq = &ips_udptq; + nifq = &ifs->ifs_ips_udptq; break; default : - nifq = &ips_iptq; + nifq = &ifs->ifs_ips_iptq; break; } } @@ -3840,8 +3890,80 @@ int rev; * another, else put it on the end of the newly determined queue. */ if (oifq != NULL) - fr_movequeue(&is->is_sti, oifq, nifq); + fr_movequeue(&is->is_sti, oifq, nifq, ifs); else - fr_queueappend(&is->is_sti, nifq, is); + fr_queueappend(&is->is_sti, nifq, is, ifs); return; } + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_stateiter */ +/* Returns: int - 0 == success, else error */ +/* Parameters: token(I) - pointer to ipftoken structure */ +/* itp(I) - pointer to ipfgeniter structure */ +/* */ +/* This function handles the SIOCGENITER ioctl for the state tables and */ +/* walks through the list of entries in the state table list (ips_list.) */ +/* ------------------------------------------------------------------------ */ +static int fr_stateiter(token, itp, ifs) +ipftoken_t *token; +ipfgeniter_t *itp; +ipf_stack_t *ifs; +{ + ipstate_t *is, *next, zero; + int error; + + if (itp->igi_data == NULL) + return EFAULT; + + if (itp->igi_type != IPFGENITER_STATE) + return EINVAL; + + is = token->ipt_data; + if (is == (void *)-1) { + ipf_freetoken(token, ifs); + return ESRCH; + } + + READ_ENTER(&ifs->ifs_ipf_state); + if (is == NULL) { + next = ifs->ifs_ips_list; + } else { + next = is->is_next; + } + + if (next != NULL) { + /* + * If we find a state entry to use, bump its reference count + * so that it can be used for is_next when we come back. + */ + MUTEX_ENTER(&next->is_lock); + next->is_ref++; + MUTEX_EXIT(&next->is_lock); + token->ipt_data = next; + } else { + bzero(&zero, sizeof(zero)); + next = &zero; + token->ipt_data = (void *)-1; + } + RWLOCK_EXIT(&ifs->ifs_ipf_state); + + /* + * If we had a prior pointer to a state entry, release it. + */ + if (is != NULL) { + fr_statederef(NULL, &is, ifs); + } + + /* + * This should arguably be via fr_outobj() so that the state + * structure can (if required) be massaged going out. + */ + error = COPYOUT(next, itp->igi_data, sizeof(*next)); + if (error != 0) + error = EFAULT; + + return error; +} + diff --git a/usr/src/uts/common/inet/ipf/ipf.h b/usr/src/uts/common/inet/ipf/ipf.h index 25c75b13a6..d60d89b70c 100644 --- a/usr/src/uts/common/inet/ipf/ipf.h +++ b/usr/src/uts/common/inet/ipf/ipf.h @@ -6,7 +6,7 @@ * @(#)ipf.h 1.12 6/5/96 * $Id: ipf.h,v 2.71.2.7 2005/06/12 07:18:31 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -71,6 +71,7 @@ struct file; #include <string.h> #include <unistd.h> +#include "netinet/ipf_stack.h" #include "netinet/ip_compat.h" #include "netinet/ip_fil.h" #include "netinet/ip_nat.h" @@ -209,7 +210,7 @@ extern int getportproto __P((char *, int)); extern int getproto __P((char *)); extern char *getline __P((char *, size_t, FILE *, int *)); extern int genmask __P((char *, u_32_t *)); -extern char *getnattype __P((struct ipnat *)); +extern char *getnattype __P((struct nat *, int)); extern char *getsumd __P((u_32_t)); extern u_32_t getoptbyname __P((char *)); extern u_32_t getoptbyvalue __P((int)); @@ -252,6 +253,8 @@ extern void print_toif __P((char *, struct frdest *)); extern void printaps __P((ap_session_t *, int)); extern void printbuf __P((char *, int, int)); extern void printfr __P((struct frentry *, ioctlfunc_t)); +extern struct iphtable_s *printhash_live __P((struct iphtable_s *, int, char*, int)); +extern void printhashdata __P((struct iphtable_s *, int)); extern void printtunable __P((ipftune_t *)); extern struct iphtable_s *printhash __P((struct iphtable_s *, copyfunc_t, char *, int)); @@ -267,6 +270,8 @@ extern void printpacket __P((struct ip *)); extern void printpacket6 __P((struct ip *)); extern struct ip_pool_s *printpool __P((struct ip_pool_s *, copyfunc_t, char *, int)); +extern struct ip_pool_s *printpool_live __P((struct ip_pool_s *, int, char*, int)); +extern void printpooldata __P((struct ip_pool_s *, int)); extern struct ip_pool_node *printpoolnode __P((struct ip_pool_node *, int)); extern void printproto __P((struct protoent *, int, struct ipnat *)); extern void printportcmp __P((int, struct frpcmp *)); @@ -293,7 +298,7 @@ extern char *hostname __P((int, void *)); extern struct ipstate *printstate __P((struct ipstate *, int, u_long)); extern void printsbuf __P((char *)); extern void printnat __P((struct ipnat *, int)); -extern void printactivenat __P((struct nat *, int)); +extern void printactivenat __P((struct nat *, int, int)); extern void printhostmap __P((struct hostmap *, u_int)); extern void printpacket __P((struct ip *)); diff --git a/usr/src/uts/common/inet/ipf/netinet/Makefile b/usr/src/uts/common/inet/ipf/netinet/Makefile index e64d612f4c..6e66a1c892 100644 --- a/usr/src/uts/common/inet/ipf/netinet/Makefile +++ b/usr/src/uts/common/inet/ipf/netinet/Makefile @@ -1,7 +1,7 @@ # #ident "%Z%%M% %I% %E% SMI" # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # uts/common/inet/ipf/netinet/Makefile @@ -9,7 +9,8 @@ # include global definitions include ../../../../../Makefile.master -HDRS= ipl.h ip_compat.h ip_fil.h ip_icmp.h ip_nat.h ip_proxy.h ip_state.h +HDRS= ipl.h ip_compat.h ip_fil.h ip_icmp.h ip_nat.h ip_proxy.h ip_state.h \ + ip_frag.h ip_auth.h ip_lookup.h ip_pool.h ip_htable.h ipf_stack.h ROOTDIRS= $(ROOT)/usr/include/netinet @@ -29,4 +30,4 @@ install_h: $(ROOTDIRS) $(ROOTHDRS) $(ROOTDIRS): $(INS.dir) -check: $(CHECKHDRS) +check: diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_auth.h b/usr/src/uts/common/inet/ipf/netinet/ip_auth.h index 3892778270..2e6f0c4155 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_auth.h +++ b/usr/src/uts/common/inet/ipf/netinet/ip_auth.h @@ -5,7 +5,12 @@ * * $Id: ip_auth.h,v 2.16 2003/07/25 12:29:56 darrenr Exp $ * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ + +#pragma ident "%Z%%M% %I% %E% SMI" + #ifndef __IP_AUTH_H__ #define __IP_AUTH_H__ @@ -27,6 +32,7 @@ typedef struct frauthent { struct frentry fae_fr; struct frauthent *fae_next; u_long fae_age; + int fae_ref; } frauthent_t; typedef struct fr_authstat { @@ -43,22 +49,13 @@ typedef struct fr_authstat { } fr_authstat_t; -extern frentry_t *ipauth; -extern struct fr_authstat fr_authstats; -extern int fr_defaultauthage; -extern int fr_authstart; -extern int fr_authend; -extern int fr_authsize; -extern int fr_authused; -extern int fr_auth_lock; extern frentry_t *fr_checkauth __P((fr_info_t *, u_32_t *)); -extern void fr_authexpire __P((void)); -extern int fr_authinit __P((void)); -extern void fr_authunload __P((void)); -extern int fr_authflush __P((void)); -extern mb_t **fr_authpkts; +extern void fr_authexpire __P((ipf_stack_t *)); +extern int fr_authinit __P((ipf_stack_t *)); +extern void fr_authunload __P((ipf_stack_t *)); +extern int fr_authflush __P((ipf_stack_t *)); extern int fr_newauth __P((mb_t *, fr_info_t *)); -extern int fr_preauthcmd __P((ioctlcmd_t, frentry_t *, frentry_t **)); -extern int fr_auth_ioctl __P((caddr_t, ioctlcmd_t, int)); +extern int fr_preauthcmd __P((ioctlcmd_t, frentry_t *, frentry_t **, ipf_stack_t *)); +extern int fr_auth_ioctl __P((caddr_t, int, int, int, void *, ipf_stack_t *)); #endif /* __IP_AUTH_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_compat.h b/usr/src/uts/common/inet/ipf/netinet/ip_compat.h index 365e3b010e..6dd9213ece 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_compat.h +++ b/usr/src/uts/common/inet/ipf/netinet/ip_compat.h @@ -6,7 +6,7 @@ * @(#)ip_compat.h 1.8 1/14/96 * $Id: ip_compat.h,v 2.142.2.30 2005/08/11 15:13:49 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -149,6 +149,7 @@ struct file; # include <sys/proc.h> # include <sys/devops.h> # include <sys/ddi_impldefs.h> +# include <sys/neti.h> # endif /* @@ -162,6 +163,7 @@ struct file; * because Solaris 2 defines these in two places :-/ */ # ifndef KERNEL +# define ADD_KERNEL # define _KERNEL # undef RES_INIT # endif /* _KERNEL */ @@ -181,6 +183,9 @@ struct file; # ifdef i386 # define _SYS_PROMIF_H # endif +# ifdef ADD_KERNEL +# undef _KERNEL +# endif # include <inet/ip.h> # undef COPYOUT # include <inet/ip_ire.h> @@ -217,9 +222,6 @@ typedef struct qpktinfo { #define QPI_NOCKSUM 0x01 -extern net_data_t ipf_ipv4; -extern net_data_t ipf_ipv6; - extern void mb_copydata __P((mblk_t *, size_t , size_t, char *)); extern void mb_copyback __P((mblk_t *, size_t , size_t, char *)); # endif @@ -289,12 +291,12 @@ typedef unsigned int u_32_t; # define KMALLOC(a,b) (a) = (b)kmem_alloc(sizeof(*(a)), KM_NOSLEEP) # define KMALLOCS(a,b,c) (a) = (b)kmem_alloc((c), KM_NOSLEEP) # define GET_MINOR(x) getminor(x) -extern phy_if_t get_unit __P((char *, int)); -# define GETIFP(n, v) (void *)get_unit(n, v) +/*extern phy_if_t get_unit __P((char *, int, ipf_stack_t *));*/ +# define GETIFP(n, v, ifs) (void *)get_unit(n, v, ifs) # define IFNAME(x) ((ill_t *)x)->ill_name # define COPYIFNAME(x, b, v) (void) net_getifname(((v) == 4) ? \ - ipf_ipv4 : ipf_ipv6, \ - (phy_if_t)(x), (b), sizeof(b)) + ifs->ifs_ipf_ipv4 : ifs->ifs_ipf_ipv6,\ + (phy_if_t)(x), (b), sizeof(b)) # define GETKTIME(x) uniqtime((struct timeval *)x) # define MSGDSIZE(x) msgdsize(x) # define M_LEN(x) ((x)->b_wptr - (x)->b_rptr) @@ -459,8 +461,8 @@ typedef struct iplog_select_s { # define SPL_IMP(x) ; # undef SPL_X # define SPL_X(x) ; -extern void *get_unit __P((char *, int)); -# define GETIFP(n, v) get_unit(n, v) +/*extern void *get_unit __P((char *, int, ipf_stack_t *));*/ +# define GETIFP(n, v, ifs) get_unit(n, v, ifs) # define IFNAME(x, b) ((ill_t *)x)->ill_name # define COPYIFNAME(x, b, v) \ strncpy(b, ((ifinfo_t *)x)->ifi_name, \ @@ -613,7 +615,7 @@ typedef struct { # define WAKEUP(id,x) wakeup(id+x) # define KFREE(x) kmem_free((char *)(x), sizeof(*(x))) # define KFREES(x,s) kmem_free((char *)(x), (s)) -# define GETIFP(n,v) ifunit(n) +# define GETIFP(n,v, ifs) ifunit(n) # include <sys/kmem.h> # include <sys/ddi.h> # define KMALLOC(a,b) (a) = (b)kmem_alloc(sizeof(*(a)), KM_NOSLEEP) @@ -689,7 +691,7 @@ typedef struct mbuf mb_t; # define UIOMOVE(a,b,c,d) uiomove((caddr_t)a, b, d) # define FREE_MB_T(m) m_freem(m) # define MTOD(m,t) mtod(m,t) -# define GETIFP(n, v) ifunit(n) +# define GETIFP(n, v, ifs) ifunit(n) # define GET_MINOR getminor # define WAKEUP(id,x) wakeup(id + x) # define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) @@ -1057,7 +1059,7 @@ typedef u_int32_t u_32_t; # define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) # define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) -# define GETIFP(n, v) ifunit(n, IFNAMSIZ) +# define GETIFP(n, v, ifs) ifunit(n, IFNAMSIZ) # define KFREE(x) kmem_free((char *)(x), sizeof(*(x))) # define KFREES(x,s) kmem_free((char *)(x), (s)) # define SLEEP(id, n) sleep((id), PZERO+1) @@ -1467,7 +1469,7 @@ typedef struct mb_s { # define KMALLOCS(a,b,c) (a) = (b)malloc(c) # define KFREE(x) free(x) # define KFREES(x,s) free(x) -# define GETIFP(x, v) get_unit(x,v) +# define GETIFP(x, v, ifs) get_unit(x,v, ifs) # define COPYIN(a,b,c) (bcopy((a), (b), (c)), 0) # define COPYOUT(a,b,c) (bcopy((a), (b), (c)), 0) # define BCOPYIN(a,b,c) (bcopy((a), (b), (c)), 0) @@ -1616,7 +1618,7 @@ MALLOC_DECLARE(M_IPFILTER); # define UIOMOVE(a,b,c,d) uiomove(a,b,d) # define SLEEP(id, n) tsleep((id), PPAUSE|PCATCH, n, 0) # define WAKEUP(id,x) wakeup(id+x) -# define GETIFP(n, v) ifunit(n) +# define GETIFP(n, v, ifs) ifunit(n) # endif /* (Free)BSD */ # if !defined(USE_MUTEXES) && !defined(SPL_NET) diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h index 7fc1712dd0..2e40abe061 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h +++ b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h @@ -6,7 +6,7 @@ * @(#)ip_fil.h 1.35 6/5/96 * $Id: ip_fil.h,v 2.170.2.22 2005/07/16 05:55:35 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -62,6 +62,11 @@ # define SIOCIPFSET _IOWR('r', 89, struct ipfobj) # define SIOCIPFL6 _IOWR('r', 90, int) # define SIOCIPFLP _IOWR('r', 91, int) +# define SIOCIPFITER _IOWR('r', 92, struct ipfobj) +# define SIOCGENITER _IOWR('r', 93, struct ipfobj) +# define SIOCGTABL _IOWR('r', 94, struct ipfobj) +# define SIOCIPFDELTOK _IOWR('r', 95, int) +# define SIOCLOOKUPITER _IOWR('r', 96, struct ipfobj) #else # define SIOCADAFR _IOW(r, 60, struct ipfobj) # define SIOCRMAFR _IOW(r, 61, struct ipfobj) @@ -95,6 +100,11 @@ # define SIOCIPFSET _IOWR(r, 89, struct ipfobj) # define SIOCIPFL6 _IOWR(r, 90, int) # define SIOCIPFLP _IOWR(r, 91, int) +# define SIOCIPFITER _IOWR(r, 92, struct ipfobj) +# define SIOCGENITER _IOWR(r, 93, struct ipfobj) +# define SIOCGTABL _IOWR(r, 94, struct ipfobj) +# define SIOCIPFDELTOK _IOWR(r, 95, int) +# define SIOCLOOKUPITER _IOWR(r, 96, struct ipfobj) #endif #define SIOCADDFR SIOCADAFR #define SIOCDELFR SIOCRMAFR @@ -104,8 +114,10 @@ struct ipscan; struct ifnet; +typedef struct ipf_stack ipf_stack_t; + +typedef int (* lookupfunc_t) __P((void *, int, void *, ipf_stack_t *)); -typedef int (* lookupfunc_t) __P((void *, int, void *)); /* * i6addr is used as a container for both IPv4 and IPv6 addresses, as well @@ -281,6 +293,8 @@ typedef struct fr_ip { #define SI_CLONED 0x00004000 + + typedef struct fr_info { void *fin_ifp; /* interface packet is `on' */ fr_ip_t fin_fi; /* IP Packet summary */ @@ -315,6 +329,7 @@ typedef struct fr_info { #ifdef MENTAT mb_t *fin_qfm; /* pointer to mblk where pkt starts */ void *fin_qpi; + ipf_stack_t *fin_ifs; #endif #ifdef __sgi void *fin_hbuf; @@ -342,7 +357,8 @@ typedef struct fr_info { #define IPF_OUT 1 typedef struct frentry *(*ipfunc_t) __P((fr_info_t *, u_32_t *)); -typedef int (*ipfuncinit_t) __P((struct frentry *)); +typedef int (*ipfuncinit_t) __P((struct frentry *, + ipf_stack_t *)); typedef struct ipfunc_resolve { char ipfu_name[32]; @@ -852,7 +868,7 @@ typedef struct ipflog { #define IPL_LOGLOOKUP 6 #define IPL_LOGCOUNT 7 #define IPL_LOGMAX 7 -#define IPL_LOGSIZE IPL_LOGMAX + 1 +#define IPL_LOGSIZE (IPL_LOGMAX + 1) #define IPL_LOGALL -1 #define IPL_LOGNONE -2 @@ -1096,6 +1112,12 @@ typedef struct ipfobj { #define IPFOBJ_STATESTAT 11 /* struct ips_stat */ #define IPFOBJ_FRAUTH 12 /* struct frauth */ #define IPFOBJ_TUNEABLE 13 /* struct ipftune */ +#define IPFOBJ_NAT 14 /* struct nat */ +#define IPFOBJ_IPFITER 15 /* struct ipfruleiter */ +#define IPFOBJ_GENITER 16 /* struct ipfgeniter */ +#define IPFOBJ_GTABLE 17 /* struct ipftable */ +#define IPFOBJ_LOOKUPITER 18 /* struct ipflookupiter */ +#define IPFOBJ_COUNT 19 /* How many #defines are above this? */ typedef union ipftunevalptr { @@ -1148,6 +1170,47 @@ typedef struct ipftune { #define ipft_vchar ipft_un.ipftu_char +typedef struct ipfruleiter { + int iri_ver; + int iri_inout; + char iri_group[FR_GROUPLEN]; + int iri_active; + frentry_t *iri_rule; +} ipfruleiter_t; + +typedef struct ipfgeniter { + int igi_type; + void *igi_data; +} ipfgeniter_t; + +#define IPFGENITER_IPF 0 +#define IPFGENITER_NAT 1 +#define IPFGENITER_IPNAT 2 +#define IPFGENITER_FRAG 3 +#define IPFGENITER_AUTH 4 +#define IPFGENITER_STATE 5 +#define IPFGENITER_NATFRAG 6 +#define IPFGENITER_HOSTMAP 7 +#define IPFGENITER_LOOKUP 8 + +typedef struct ipftable { + int ita_type; + void *ita_table; +} ipftable_t; + +typedef struct ipftoken { + struct ipftoken *ipt_next; + struct ipftoken **ipt_pnext; + void *ipt_ctx; + void *ipt_data; + u_long ipt_die; + int ipt_type; + int ipt_uid; + int ipt_subtype; + int ipt_alive; +} ipftoken_t; + + /* * sync commands */ @@ -1186,14 +1249,14 @@ typedef struct ipftune { #endif #ifndef _KERNEL -extern int fr_check __P((struct ip *, int, void *, int, mb_t **)); -extern int (*fr_checkp) __P((ip_t *, int, void *, int, mb_t **)); +extern int fr_check __P((struct ip *, int, void *, int, mb_t **, ipf_stack_t *)); +extern int (*fr_checkp) __P((ip_t *, int, void *, int, mb_t **, ipf_stack_t *)); extern int ipf_log __P((void)); -extern struct ifnet *get_unit __P((char *, int)); +extern struct ifnet *get_unit __P((char *, int, ipf_stack_t *)); extern char *get_ifname __P((struct ifnet *)); # if defined(__NetBSD__) || defined(__OpenBSD__) || \ (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) -extern int iplioctl __P((int, ioctlcmd_t, caddr_t, int)); +extern int frrequest __P((int, u_long, caddr_t, int, int, ipf_stack_t *)); # else extern int iplioctl __P((int, ioctlcmd_t, caddr_t, int)); # endif @@ -1201,6 +1264,7 @@ extern int iplopen __P((dev_t, int)); extern int iplclose __P((dev_t, int)); extern void m_freem __P((mb_t *)); #else /* #ifndef _KERNEL */ +extern phy_if_t get_unit __P((char *, int, ipf_stack_t *)); # if defined(__NetBSD__) && defined(PFIL_HOOKS) extern void ipfilterattach __P((int)); # endif @@ -1208,7 +1272,7 @@ extern int ipl_enable __P((void)); extern int ipl_disable __P((void)); # ifdef MENTAT extern int fr_check __P((struct ip *, int, void *, int, void *, - mblk_t **)); + mblk_t **, ipf_stack_t *)); # if SOLARIS # if SOLARIS2 >= 7 extern int iplioctl __P((dev_t, int, intptr_t, int, cred_t *, int *)); @@ -1228,11 +1292,11 @@ extern int iplread __P((dev_t, uio_t *)); extern int iplwrite __P((dev_t, uio_t *)); extern int iplselect __P((dev_t, int)); # endif -extern int ipfsync __P((void)); +extern int ipfsync __P((ipf_stack_t *)); extern int fr_qout __P((queue_t *, mblk_t *)); # else /* MENTAT */ -extern int fr_check __P((struct ip *, int, void *, int, mb_t **)); -extern int (*fr_checkp) __P((ip_t *, int, void *, int, mb_t **)); +extern int fr_check __P((struct ip *, int, void *, int, mb_t **, ipf_stack_t *)); +extern int (*fr_checkp) __P((ip_t *, int, void *, int, mb_t **, ipf_stack_t *)); extern size_t mbufchainlen __P((mb_t *)); # ifdef __sgi # include <sys/cred.h> @@ -1241,7 +1305,7 @@ extern int iplopen __P((dev_t *, int, int, cred_t *)); extern int iplclose __P((dev_t, int, int, cred_t *)); extern int iplread __P((dev_t, uio_t *, cred_t *)); extern int iplwrite __P((dev_t, uio_t *, cred_t *)); -extern int ipfsync __P((void)); +extern int ipfsync __P((ipf_stack_t *)); extern int ipfilter_sgi_attach __P((void)); extern void ipfilter_sgi_detach __P((void)); extern void ipfilter_sgi_intfsync __P((void)); @@ -1305,45 +1369,39 @@ extern int iplwrite __P((dev_t, struct uio *)); #endif /* #ifndef _KERNEL */ -extern ipfmutex_t ipl_mutex, ipf_authmx, ipf_rw, ipf_hostmap; -extern ipfmutex_t ipf_timeoutlock, ipf_stinsert, ipf_natio, ipf_nat_new; -extern ipfrwlock_t ipf_mutex, ipf_global, ip_poolrw, ipf_ipidfrag; -extern ipfrwlock_t ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth; -extern ipfrwlock_t ipf_frcache; - extern char *memstr __P((char *, char *, int, int)); extern int count4bits __P((u_32_t)); extern int count6bits __P((u_32_t *)); -extern int frrequest __P((int, ioctlcmd_t, caddr_t, int, int)); +extern int frrequest __P((int, ioctlcmd_t, caddr_t, int, int, ipf_stack_t *)); extern char *getifname __P((struct ifnet *)); -extern int iplattach __P((void)); -extern int ipldetach __P((void)); +extern int iplattach __P((ipf_stack_t *, netstack_t *)); +extern int ipldetach __P((ipf_stack_t *)); extern u_short ipf_cksum __P((u_short *, int)); extern int copyinptr __P((void *, void *, size_t)); extern int copyoutptr __P((void *, void *, size_t)); extern int fr_fastroute __P((mb_t *, mb_t **, fr_info_t *, frdest_t *)); extern int fr_inobj __P((void *, void *, int)); extern int fr_inobjsz __P((void *, void *, int, int)); -extern int fr_ioctlswitch __P((int, void *, ioctlcmd_t, int)); -extern int fr_ipftune __P((ioctlcmd_t, void *)); +extern int fr_ioctlswitch __P((int, void *, ioctlcmd_t, int, int, void *, ipf_stack_t *)); +extern int fr_ipftune __P((ioctlcmd_t, void *, ipf_stack_t *)); extern int fr_outobj __P((void *, void *, int)); extern int fr_outobjsz __P((void *, void *, int, int)); extern void *fr_pullup __P((mb_t *, fr_info_t *, int)); -extern void fr_resolvedest __P((struct frdest *, int)); +extern void fr_resolvedest __P((struct frdest *, int, ipf_stack_t *)); extern int fr_resolvefunc __P((void *)); -extern void *fr_resolvenic __P((char *, int)); +extern void *fr_resolvenic __P((char *, int, ipf_stack_t *)); extern int fr_send_icmp_err __P((int, fr_info_t *, int)); extern int fr_send_reset __P((fr_info_t *)); #if (__FreeBSD_version < 490000) || !defined(_KERNEL) extern int ppsratecheck __P((struct timeval *, int *, int)); #endif -extern ipftq_t *fr_addtimeoutqueue __P((ipftq_t **, u_int)); +extern ipftq_t *fr_addtimeoutqueue __P((ipftq_t **, u_int, ipf_stack_t *)); extern void fr_deletequeueentry __P((ipftqent_t *)); extern int fr_deletetimeoutqueue __P((ipftq_t *)); -extern void fr_freetimeoutqueue __P((ipftq_t *)); -extern void fr_movequeue __P((ipftqent_t *, ipftq_t *, ipftq_t *)); -extern void fr_queueappend __P((ipftqent_t *, ipftq_t *, void *)); -extern void fr_queueback __P((ipftqent_t *)); +extern void fr_freetimeoutqueue __P((ipftq_t *, ipf_stack_t *)); +extern void fr_movequeue __P((ipftqent_t *, ipftq_t *, ipftq_t *, ipf_stack_t *)); +extern void fr_queueappend __P((ipftqent_t *, ipftq_t *, void *, ipf_stack_t *)); +extern void fr_queueback __P((ipftqent_t *, ipf_stack_t *)); extern void fr_queuefront __P((ipftqent_t *)); extern void fr_checkv4sum __P((fr_info_t *)); extern int fr_checkl4sum __P((fr_info_t *)); @@ -1358,36 +1416,41 @@ extern int fr_ifpfillv6addr __P((int, struct sockaddr_in6 *, struct in_addr *)); #endif -extern int fr_addipftune __P((ipftuneable_t *)); -extern int fr_delipftune __P((ipftuneable_t *)); - -extern int frflush __P((minor_t, int, int)); -extern void frsync __P((int, int, void *, char *)); -extern frgroup_t *fr_addgroup __P((char *, void *, u_32_t, minor_t, int)); -extern int fr_derefrule __P((frentry_t **)); -extern void fr_delgroup __P((char *, minor_t, int)); -extern frgroup_t *fr_findgroup __P((char *, minor_t, int, frgroup_t ***)); - -extern int fr_loginit __P((void)); -extern int ipflog_clear __P((minor_t)); -extern int ipflog_read __P((minor_t, uio_t *)); +extern int fr_addipftune __P((ipftuneable_t *, ipf_stack_t *)); +extern int fr_delipftune __P((ipftuneable_t *, ipf_stack_t *)); + +extern int frflush __P((minor_t, int, int, ipf_stack_t *)); +extern void frsync __P((int, int, void *, char *, ipf_stack_t *)); +extern frgroup_t *fr_addgroup __P((char *, void *, u_32_t, minor_t, int, + ipf_stack_t *)); +extern int fr_derefrule __P((frentry_t **, ipf_stack_t *)); +extern void fr_delgroup __P((char *, minor_t, int, ipf_stack_t *)); +extern frgroup_t *fr_findgroup __P((char *, minor_t, int, frgroup_t ***, + ipf_stack_t *)); + +extern int fr_loginit __P((ipf_stack_t *)); +extern int ipflog_clear __P((minor_t, ipf_stack_t *)); +extern int ipflog_read __P((minor_t, struct uio *, ipf_stack_t *)); extern int ipflog __P((fr_info_t *, u_int)); -extern int ipllog __P((int, fr_info_t *, void **, size_t *, int *, int)); -extern void fr_logunload __P((void)); +extern int ipllog __P((int, fr_info_t *, void **, size_t *, int *, int, + ipf_stack_t *)); +extern void fr_logunload __P((ipf_stack_t *)); extern frentry_t *fr_acctpkt __P((fr_info_t *, u_32_t *)); extern int fr_copytolog __P((int, char *, int)); extern u_short fr_cksum __P((mb_t *, ip_t *, int, void *)); -extern void fr_deinitialise __P((void)); +extern void fr_deinitialise __P((ipf_stack_t *)); extern frentry_t *fr_dolog __P((fr_info_t *, u_32_t *)); extern frentry_t *fr_dstgrpmap __P((fr_info_t *, u_32_t *)); extern void fr_fixskip __P((frentry_t **, frentry_t *, int)); -extern void fr_forgetifp __P((void *)); -extern frentry_t *fr_getrulen __P((int, char *, u_32_t)); -extern void fr_getstat __P((struct friostat *)); +extern void fr_forgetifp __P((void *, ipf_stack_t *)); +extern frentry_t *fr_getrulen __P((int, char *, u_32_t, + ipf_stack_t *)); +extern void fr_getstat __P((struct friostat *, ipf_stack_t *)); extern int fr_ifpaddr __P((int, int, void *, - struct in_addr *, struct in_addr *)); -extern int fr_initialise __P((void)); + struct in_addr *, struct in_addr *, + ipf_stack_t *)); +extern int fr_initialise __P((ipf_stack_t *)); extern void fr_lock __P((caddr_t, int *)); extern int fr_makefrip __P((int, ip_t *, fr_info_t *)); extern int fr_matchtag __P((ipftag_t *, ipftag_t *)); @@ -1395,47 +1458,29 @@ extern int fr_matchicmpqueryreply __P((int, icmpinfo_t *, struct icmp *, int)); extern u_32_t fr_newisn __P((fr_info_t *)); extern u_short fr_nextipid __P((fr_info_t *)); -extern int fr_rulen __P((int, frentry_t *)); +extern int fr_rulen __P((int, frentry_t *, ipf_stack_t *)); extern int fr_scanlist __P((fr_info_t *, u_32_t)); extern frentry_t *fr_srcgrpmap __P((fr_info_t *, u_32_t *)); extern int fr_tcpudpchk __P((fr_info_t *, frtuc_t *)); extern int fr_verifysrc __P((fr_info_t *fin)); -extern int fr_zerostats __P((char *)); - -extern int fr_running; -extern u_long fr_frouteok[2]; -extern int fr_pass; -extern int fr_flags; -extern int fr_active; -extern int fr_chksrc; -extern int fr_minttl; -extern int fr_refcnt; -extern int fr_control_forwarding; -extern int fr_update_ipid; -extern int nat_logging; -extern int ipstate_logging; -extern int ipl_suppress; -extern int ipl_buffer_sz; -extern int ipl_logmax; -extern int ipl_logall; -extern int ipl_logsize; -extern u_long fr_ticks; -extern fr_info_t frcache[2][8]; +extern int fr_zerostats __P((char *, ipf_stack_t *)); +extern ipftoken_t *ipf_findtoken __P((int, int, void *, ipf_stack_t *)); +extern int ipf_getnextrule __P((ipftoken_t *, void *, ipf_stack_t *)); +extern void ipf_expiretokens __P((ipf_stack_t *)); +extern void ipf_freetoken __P((ipftoken_t *, ipf_stack_t *)); +extern int ipf_deltoken __P((int,int, void *, ipf_stack_t *)); +extern int ipf_genericiter __P((void *, int, void *, ipf_stack_t *)); + extern char ipfilter_version[]; -extern iplog_t **iplh[IPL_LOGMAX+1], *iplt[IPL_LOGMAX+1]; -extern int iplused[IPL_LOGMAX + 1]; -extern struct frentry *ipfilter[2][2], *ipacct[2][2]; #ifdef USE_INET6 -extern struct frentry *ipfilter6[2][2], *ipacct6[2][2]; extern int icmptoicmp6types[ICMP_MAXTYPE+1]; extern int icmptoicmp6unreach[ICMP_MAX_UNREACH]; extern int icmpreplytype6[ICMP6_MAXTYPE + 1]; #endif extern int icmpreplytype4[ICMP_MAXTYPE + 1]; -extern struct frgroup *ipfgroups[IPL_LOGSIZE][2]; -extern struct filterstats frstats[]; extern frentry_t *ipfrule_match __P((fr_info_t *)); -extern u_char ipf_iss_secret[32]; -extern ipftuneable_t ipf_tuneables[]; + +extern void ipftuneable_alloc(ipf_stack_t *); +extern void ipftuneable_free(ipf_stack_t *); #endif /* __IP_FIL_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_frag.h b/usr/src/uts/common/inet/ipf/netinet/ip_frag.h index 7eb7399010..1632fdb0fe 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_frag.h +++ b/usr/src/uts/common/inet/ipf/netinet/ip_frag.h @@ -6,7 +6,7 @@ * @(#)ip_frag.h 1.5 3/24/96 * $Id: ip_frag.h,v 2.23.2.2 2005/06/10 18:02:37 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,10 +32,11 @@ typedef struct ipfr { u_char ipfr_tos; u_32_t ipfr_pass; u_short ipfr_off; - u_char ipfr_ttl; + u_long ipfr_ttl; u_char ipfr_seen0; u_short ipfr_firstend; frentry_t *ipfr_rule; + int ipfr_ref; } ipfr_t; #define ipfr_src ipfr_source.in4 @@ -57,12 +58,9 @@ typedef struct ipfrstat { #define IPFR_CMPSZ (offsetof(ipfr_t, ipfr_tos) - \ offsetof(ipfr_t, ipfr_ifp)) -extern int ipfr_size; -extern int fr_ipfrttl; -extern int fr_frag_lock; -extern int fr_fraginit __P((void)); -extern void fr_fragunload __P((void)); -extern ipfrstat_t *fr_fragstats __P((void)); +extern int fr_fraginit __P((ipf_stack_t *)); +extern void fr_fragunload __P((ipf_stack_t *)); +extern ipfrstat_t *fr_fragstats __P((ipf_stack_t *)); extern int fr_newfrag __P((fr_info_t *, u_32_t)); extern frentry_t *fr_knownfrag __P((fr_info_t *, u_32_t *)); @@ -72,16 +70,19 @@ extern nat_t *fr_nat_knownfrag __P((fr_info_t *)); extern int fr_ipid_newfrag __P((fr_info_t *, u_32_t)); extern u_32_t fr_ipid_knownfrag __P((fr_info_t *)); +extern void fr_fragderef __P((ipfr_t **, ipfrwlock_t *, ipf_stack_t *)); -extern void fr_forget __P((void *)); -extern void fr_forgetnat __P((void *)); -extern void fr_fragclear __P((void)); -extern void fr_fragexpire __P((void)); +extern void fr_forget __P((void *, ipf_stack_t *)); +extern void fr_forgetnat __P((void *, ipf_stack_t *)); +extern void fr_fragclear __P((ipf_stack_t *)); +extern void fr_fragexpire __P((ipf_stack_t *)); +extern int fr_nextfrag __P((ipftoken_t *, ipfgeniter_t *, ipfr_t **, \ + ipfr_t ***, ipfrwlock_t *, ipf_stack_t *)); #if defined(_KERNEL) && ((BSD >= 199306) || SOLARIS || defined(__sgi) \ || defined(__osf__) || (defined(__sgi) && (IRIX >= 60500))) # if defined(SOLARIS2) && (SOLARIS2 < 7) -extern void fr_slowtimer __P((void)); +extern void fr_slowtimer __P((void *)); # else extern void fr_slowtimer __P((void *)); # endif @@ -89,7 +90,7 @@ extern void fr_slowtimer __P((void *)); # if defined(linux) && defined(_KERNEL) extern void fr_slowtimer __P((long)); # else -extern int fr_slowtimer __P((void)); +extern int fr_slowtimer __P((void *)); # endif #endif diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_ftp_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_ftp_pxy.c index 7d1ed33c96..44fdf1d25f 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_ftp_pxy.c +++ b/usr/src/uts/common/inet/ipf/netinet/ip_ftp_pxy.c @@ -5,7 +5,7 @@ * * $Id: ip_ftp_pxy.c,v 2.88.2.15 2005/03/19 19:38:10 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * * Simple FTP transparent proxy for in-kernel use. For use with the NAT @@ -14,7 +14,6 @@ #pragma ident "%Z%%M% %I% %E% SMI" - #define IPF_FTP_PROXY #define IPF_MINPORTLEN 18 @@ -46,88 +45,118 @@ */ #define FTPXY_C_PASV 1000 -int ippr_ftp_client __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int)); +typedef struct ifs_ftppxy { + frentry_t ftppxyfr; + int ftp_proxy_init; + int ippr_ftp_pasvonly; + int ippr_ftp_insecure; + /* Do not require logins before transfers */ + int ippr_ftp_pasvrdr; + int ippr_ftp_forcepasv; + /* PASV must be last command prior to 227 */ + /* + * 1 - security + * 2 - errors + * 3 - error debugging + * 4 - parsing errors + * 5 - parsing info + * 6 - parsing debug + */ + int ippr_ftp_debug; + ipftuneable_t ftptune; +} ifs_ftppxy_t; + +int ippr_ftp_client __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int, + ifs_ftppxy_t *)); int ippr_ftp_complete __P((char *, size_t)); -int ippr_ftp_in __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_ftp_init __P((void)); -void ippr_ftp_fini __P((void)); -int ippr_ftp_new __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_ftp_out __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_ftp_pasv __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int)); -int ippr_ftp_epsv __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, int)); -int ippr_ftp_port __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, int)); -int ippr_ftp_process __P((fr_info_t *, nat_t *, ftpinfo_t *, int)); -int ippr_ftp_server __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int)); -int ippr_ftp_valid __P((ftpinfo_t *, int, char *, size_t)); -int ippr_ftp_server_valid __P((ftpside_t *, char *, size_t)); -int ippr_ftp_client_valid __P((ftpside_t *, char *, size_t)); +int ippr_ftp_in __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_ftp_init __P((void **, ipf_stack_t *)); +void ippr_ftp_fini __P((void **, ipf_stack_t *)); +int ippr_ftp_new __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_ftp_out __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_ftp_pasv __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int, + ifs_ftppxy_t *)); +int ippr_ftp_epsv __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, int, + ifs_ftppxy_t *)); +int ippr_ftp_port __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, int, + ifs_ftppxy_t *)); +int ippr_ftp_process __P((fr_info_t *, nat_t *, ftpinfo_t *, int, + ifs_ftppxy_t *)); +int ippr_ftp_server __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int, + ifs_ftppxy_t *)); +int ippr_ftp_valid __P((ftpinfo_t *, int, char *, size_t, ifs_ftppxy_t *)); +int ippr_ftp_server_valid __P((ftpside_t *, char *, size_t, ifs_ftppxy_t *)); +int ippr_ftp_client_valid __P((ftpside_t *, char *, size_t, ifs_ftppxy_t *)); u_short ippr_ftp_atoi __P((char **)); int ippr_ftp_pasvreply __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, - u_int, char *, char *, u_int)); + u_int, char *, char *, u_int, ifs_ftppxy_t *)); + +/* + * Initialize local structures. + */ +int ippr_ftp_init(private, ifs) +void **private; +ipf_stack_t *ifs; +{ + ifs_ftppxy_t *ifsftp; + KMALLOC(ifsftp, ifs_ftppxy_t *); + if (ifsftp == NULL) + return -1; -int ftp_proxy_init = 0; -int ippr_ftp_pasvonly = 0; -int ippr_ftp_insecure = 0; /* Do not require logins before transfers */ -int ippr_ftp_pasvrdr = 0; -int ippr_ftp_forcepasv = 0; /* PASV must be last command prior to 227 */ + bzero((char *)&ifsftp->ftppxyfr, sizeof(ifsftp->ftppxyfr)); + ifsftp->ftppxyfr.fr_ref = 1; + ifsftp->ftppxyfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&ifsftp->ftppxyfr.fr_lock, "FTP Proxy Mutex"); + ifsftp->ftp_proxy_init = 1; + ifsftp->ippr_ftp_pasvonly = 0; + ifsftp->ippr_ftp_insecure = 0; + ifsftp->ippr_ftp_pasvrdr = 0; + ifsftp->ippr_ftp_forcepasv = 0; #if defined(_KERNEL) -int ippr_ftp_debug = 0; + ifsftp->ippr_ftp_debug = 0; #else -int ippr_ftp_debug = 2; + ifsftp->ippr_ftp_debug = 2; #endif -/* - * 1 - security - * 2 - errors - * 3 - error debugging - * 4 - parsing errors - * 5 - parsing info - * 6 - parsing debug - */ + bzero((char *)&ifsftp->ftptune, sizeof(ifsftp->ftptune)); + ifsftp->ftptune.ipft_pint = (uint_t *)&ifsftp->ippr_ftp_debug; + ifsftp->ftptune.ipft_name = "ippr_ftp_debug"; + ifsftp->ftptune.ipft_max = 10; + ifsftp->ftptune.ipft_sz = sizeof(ifsftp->ippr_ftp_debug); + ifsftp->ftptune.ipft_next = NULL; -static frentry_t ftppxyfr; -static ipftuneable_t ftptune = { - { &ippr_ftp_debug }, - "ippr_ftp_debug", - 0, - 10, - sizeof(ippr_ftp_debug), - 0, - NULL -}; + (void) fr_addipftune(&ifsftp->ftptune, ifs); - -/* - * Initialize local structures. - */ -int ippr_ftp_init() -{ - bzero((char *)&ftppxyfr, sizeof(ftppxyfr)); - ftppxyfr.fr_ref = 1; - ftppxyfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; - MUTEX_INIT(&ftppxyfr.fr_lock, "FTP Proxy Mutex"); - ftp_proxy_init = 1; - (void) fr_addipftune(&ftptune); + *private = (void *)ifsftp; return 0; } -void ippr_ftp_fini() +void ippr_ftp_fini(private, ifs) +void **private; +ipf_stack_t *ifs; { - (void) fr_delipftune(&ftptune); + ifs_ftppxy_t *ifsftp = *((ifs_ftppxy_t **)private); - if (ftp_proxy_init == 1) { - MUTEX_DESTROY(&ftppxyfr.fr_lock); - ftp_proxy_init = 0; + (void) fr_delipftune(&ifsftp->ftptune, ifs); + + if (ifsftp->ftp_proxy_init == 1) { + MUTEX_DESTROY(&ifsftp->ftppxyfr.fr_lock); + ifsftp->ftp_proxy_init = 0; } + + KFREE(ifsftp); + *private = NULL; } -int ippr_ftp_new(fin, aps, nat) +/*ARGSUSED*/ +int ippr_ftp_new(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { ftpinfo_t *ftp; ftpside_t *f; @@ -155,12 +184,13 @@ nat_t *nat; } -int ippr_ftp_port(fin, ip, nat, f, dlen) +int ippr_ftp_port(fin, ip, nat, f, dlen, ifsftp) fr_info_t *fin; ip_t *ip; nat_t *nat; ftpside_t *f; int dlen; +ifs_ftppxy_t *ifsftp; { tcphdr_t *tcp, tcph, *tcp2 = &tcph; char newbuf[IPF_FTPBUFSZ], *s; @@ -172,6 +202,7 @@ int dlen; fr_info_t fi; nat_t *nat2; mb_t *m; + ipf_stack_t *ifs = fin->fin_ifs; m = fin->fin_m; tcp = (tcphdr_t *)fin->fin_dp; @@ -181,7 +212,7 @@ int dlen; * Check for client sending out PORT message. */ if (dlen < IPF_MINPORTLEN) { - if (ippr_ftp_debug > 1) + if (ifsftp->ippr_ftp_debug > 1) printf("ippr_ftp_port:dlen(%d) < IPF_MINPORTLEN\n", dlen); return 0; @@ -195,13 +226,13 @@ int dlen; */ a1 = ippr_ftp_atoi(&s); if (s == NULL) { - if (ippr_ftp_debug > 1) + if (ifsftp->ippr_ftp_debug > 1) printf("ippr_ftp_port:ippr_ftp_atoi(%d) failed\n", 1); return 0; } a2 = ippr_ftp_atoi(&s); if (s == NULL) { - if (ippr_ftp_debug > 1) + if (ifsftp->ippr_ftp_debug > 1) printf("ippr_ftp_port:ippr_ftp_atoi(%d) failed\n", 2); return 0; } @@ -216,14 +247,14 @@ int dlen; (a1 != ntohl(nat->nat_inip.s_addr))) || ((nat->nat_dir == NAT_INBOUND) && (a1 != ntohl(nat->nat_oip.s_addr)))) { - if (ippr_ftp_debug > 0) + if (ifsftp->ippr_ftp_debug > 0) printf("ippr_ftp_port:%s != nat->nat_inip\n", "a1"); return APR_ERR(1); } a5 = ippr_ftp_atoi(&s); if (s == NULL) { - if (ippr_ftp_debug > 1) + if (ifsftp->ippr_ftp_debug > 1) printf("ippr_ftp_port:ippr_ftp_atoi(%d) failed\n", 3); return 0; } @@ -239,7 +270,7 @@ int dlen; s += 2; a6 = a5 & 0xff; } else { - if (ippr_ftp_debug > 1) + if (ifsftp->ippr_ftp_debug > 1) printf("ippr_ftp_port:missing %s\n", "cr-lf"); return 0; } @@ -252,7 +283,7 @@ int dlen; * security crap. */ if (sp < 1024) { - if (ippr_ftp_debug > 0) + if (ifsftp->ippr_ftp_debug > 0) printf("ippr_ftp_port:sp(%d) < 1024\n", sp); return 0; } @@ -280,7 +311,7 @@ int dlen; nlen = strlen(newbuf); inc = nlen - olen; if ((inc + ip->ip_len) > 65535) { - if (ippr_ftp_debug > 0) + if (ifsftp->ippr_ftp_debug > 0) printf("ippr_ftp_port:inc(%d) + ip->ip_len > 65535\n", inc); return 0; @@ -346,7 +377,7 @@ int dlen; fi.fin_dlen = sizeof(*tcp2); fi.fin_plen = fi.fin_hlen + sizeof(*tcp2); fi.fin_dp = (char *)tcp2; - fi.fin_fr = &ftppxyfr; + fi.fin_fr = &ifsftp->ftppxyfr; fi.fin_out = nat->nat_dir; fi.fin_flx &= FI_LOWTTL|FI_FRAG|FI_TCPUDP|FI_OPTIONS|FI_IGNORE; swip = ip->ip_src; @@ -374,7 +405,8 @@ int dlen; } (void) fr_addstate(&fi, &nat2->nat_state, SI_W_DPORT); if (fi.fin_state != NULL) - fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + fr_statederef(&fi, (ipstate_t **)&fi.fin_state, + ifs); } ip->ip_len = slen; ip->ip_src = swip; @@ -383,26 +415,27 @@ int dlen; ipstate_t *is; nat_update(&fi, nat2, nat->nat_ptr); - READ_ENTER(&ipf_state); + READ_ENTER(&ifs->ifs_ipf_state); is = nat2->nat_state; if (is != NULL) { MUTEX_ENTER(&is->is_lock); - (void)fr_tcp_age(&is->is_sti, &fi, ips_tqtqb, - is->is_flags); + (void)fr_tcp_age(&is->is_sti, &fi, ifs->ifs_ips_tqtqb, + is->is_flags); MUTEX_EXIT(&is->is_lock); } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); } return APR_INC(inc); } -int ippr_ftp_client(fin, ip, nat, ftp, dlen) +int ippr_ftp_client(fin, ip, nat, ftp, dlen, ifsftp) fr_info_t *fin; nat_t *nat; ftpinfo_t *ftp; ip_t *ip; int dlen; +ifs_ftppxy_t *ifsftp; { char *rptr, *wptr, cmd[6], c; ftpside_t *f; @@ -453,12 +486,13 @@ int dlen; !strncmp(cmd, "ACCT ", 5)) { ftp->ftp_passok = FTPXY_ACCT_1; ftp->ftp_incok = 1; - } else if ((ftp->ftp_passok == FTPXY_GO) && !ippr_ftp_pasvonly && + } else if ((ftp->ftp_passok == FTPXY_GO) && + !ifsftp->ippr_ftp_pasvonly && !strncmp(cmd, "PORT ", 5)) { - inc = ippr_ftp_port(fin, ip, nat, f, dlen); - } else if (ippr_ftp_insecure && !ippr_ftp_pasvonly && + inc = ippr_ftp_port(fin, ip, nat, f, dlen, ifsftp); + } else if (ifsftp->ippr_ftp_insecure && !ifsftp->ippr_ftp_pasvonly && !strncmp(cmd, "PORT ", 5)) { - inc = ippr_ftp_port(fin, ip, nat, f, dlen); + inc = ippr_ftp_port(fin, ip, nat, f, dlen, ifsftp); } while ((*rptr++ != '\n') && (rptr < wptr)) @@ -468,12 +502,13 @@ int dlen; } -int ippr_ftp_pasv(fin, ip, nat, ftp, dlen) +int ippr_ftp_pasv(fin, ip, nat, ftp, dlen, ifsftp) fr_info_t *fin; ip_t *ip; nat_t *nat; ftpinfo_t *ftp; int dlen; +ifs_ftppxy_t *ifsftp; { u_int a1, a2, a3, a4, data_ip; char newbuf[IPF_FTPBUFSZ]; @@ -481,9 +516,9 @@ int dlen; u_short a5, a6; ftpside_t *f; - if (ippr_ftp_forcepasv != 0 && + if (ifsftp->ippr_ftp_forcepasv != 0 && ftp->ftp_side[0].ftps_cmds != FTPXY_C_PASV) { - if (ippr_ftp_debug > 0) + if (ifsftp->ippr_ftp_debug > 0) printf("ippr_ftp_pasv:ftps_cmds(%d) != FTPXY_C_PASV\n", ftp->ftp_side[0].ftps_cmds); return 0; @@ -496,13 +531,13 @@ int dlen; * Check for PASV reply message. */ if (dlen < IPF_MIN227LEN) { - if (ippr_ftp_debug > 1) + if (ifsftp->ippr_ftp_debug > 1) printf("ippr_ftp_pasv:dlen(%d) < IPF_MIN227LEN\n", dlen); return 0; } else if (strncmp(f->ftps_rptr, "227 Entering Passive Mod", PASV_REPLEN)) { - if (ippr_ftp_debug > 0) + if (ifsftp->ippr_ftp_debug > 0) printf("ippr_ftp_pasv:%d reply wrong\n", 227); return 0; } @@ -526,13 +561,13 @@ int dlen; */ a1 = ippr_ftp_atoi(&s); if (s == NULL) { - if (ippr_ftp_debug > 1) + if (ifsftp->ippr_ftp_debug > 1) printf("ippr_ftp_pasv:ippr_ftp_atoi(%d) failed\n", 1); return 0; } a2 = ippr_ftp_atoi(&s); if (s == NULL) { - if (ippr_ftp_debug > 1) + if (ifsftp->ippr_ftp_debug > 1) printf("ippr_ftp_pasv:ippr_ftp_atoi(%d) failed\n", 2); return 0; } @@ -548,14 +583,14 @@ int dlen; (a1 != ntohl(nat->nat_inip.s_addr))) || ((nat->nat_dir == NAT_OUTBOUND) && (a1 != ntohl(nat->nat_oip.s_addr)))) { - if (ippr_ftp_debug > 0) + if (ifsftp->ippr_ftp_debug > 0) printf("ippr_ftp_pasv:%s != nat->nat_oip\n", "a1"); return 0; } a5 = ippr_ftp_atoi(&s); if (s == NULL) { - if (ippr_ftp_debug > 1) + if (ifsftp->ippr_ftp_debug > 1) printf("ippr_ftp_pasv:ippr_ftp_atoi(%d) failed\n", 3); return 0; } @@ -572,7 +607,7 @@ int dlen; if ((*s == '\r') && (*(s + 1) == '\n')) { s += 2; } else { - if (ippr_ftp_debug > 1) + if (ifsftp->ippr_ftp_debug > 1) printf("ippr_ftp_pasv:missing %s", "cr-lf\n"); return 0; } @@ -603,10 +638,10 @@ int dlen; a5, a6, brackets[1]); #endif return ippr_ftp_pasvreply(fin, ip, nat, f, (a5 << 8 | a6), - newbuf, s, data_ip); + newbuf, s, data_ip, ifsftp); } -int ippr_ftp_pasvreply(fin, ip, nat, f, port, newmsg, s, data_ip) +int ippr_ftp_pasvreply(fin, ip, nat, f, port, newmsg, s, data_ip, ifsftp) fr_info_t *fin; ip_t *ip; nat_t *nat; @@ -615,6 +650,7 @@ u_int port; char *newmsg; char *s; u_int data_ip; +ifs_ftppxy_t *ifsftp; { int inc, off, nflags, sflags; tcphdr_t *tcp, tcph, *tcp2; @@ -624,6 +660,7 @@ u_int data_ip; fr_info_t fi; nat_t *nat2; mb_t *m; + ipf_stack_t *ifs = fin->fin_ifs; m = fin->fin_m; tcp = (tcphdr_t *)fin->fin_dp; @@ -638,7 +675,7 @@ u_int data_ip; nlen = strlen(newmsg); inc = nlen - olen; if ((inc + ip->ip_len) > 65535) { - if (ippr_ftp_debug > 0) + if (ifsftp->ippr_ftp_debug > 0) printf("ippr_ftp_pasv:inc(%d) + ip->ip_len > 65535\n", inc); return 0; @@ -679,7 +716,7 @@ u_int data_ip; fi.fin_data[0] = 0; fi.fin_data[1] = port; nflags = IPN_TCP|SI_W_SPORT; - if (ippr_ftp_pasvrdr && f->ftps_ifp) + if (ifsftp->ippr_ftp_pasvrdr && f->ftps_ifp) nflags |= SI_W_DPORT; if (nat->nat_dir == NAT_OUTBOUND) nat2 = nat_outlookup(&fi, nflags|NAT_SEARCH, @@ -703,7 +740,7 @@ u_int data_ip; fi.fin_data[0] = 0; fi.fin_dp = (char *)tcp2; fi.fin_plen = fi.fin_hlen + sizeof(*tcp); - fi.fin_fr = &ftppxyfr; + fi.fin_fr = &ifsftp->ftppxyfr; fi.fin_out = nat->nat_dir; fi.fin_flx &= FI_LOWTTL|FI_FRAG|FI_TCPUDP|FI_OPTIONS|FI_IGNORE; swip = ip->ip_src; @@ -735,7 +772,8 @@ u_int data_ip; } (void) fr_addstate(&fi, &nat2->nat_state, sflags); if (fi.fin_state != NULL) - fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + fr_statederef(&fi, (ipstate_t **)&fi.fin_state, + ifs); } ip->ip_len = slen; @@ -745,26 +783,27 @@ u_int data_ip; ipstate_t *is; nat_update(&fi, nat2, nat->nat_ptr); - READ_ENTER(&ipf_state); + READ_ENTER(&ifs->ifs_ipf_state); is = nat2->nat_state; if (is != NULL) { MUTEX_ENTER(&is->is_lock); - (void)fr_tcp_age(&is->is_sti, &fi, ips_tqtqb, - is->is_flags); + (void) fr_tcp_age(&is->is_sti, &fi, ifs->ifs_ips_tqtqb, + is->is_flags); MUTEX_EXIT(&is->is_lock); } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); } return inc; } -int ippr_ftp_server(fin, ip, nat, ftp, dlen) +int ippr_ftp_server(fin, ip, nat, ftp, dlen, ifsftp) fr_info_t *fin; ip_t *ip; nat_t *nat; ftpinfo_t *ftp; int dlen; +ifs_ftppxy_t *ifsftp; { char *rptr, *wptr; ftpside_t *f; @@ -781,13 +820,13 @@ int dlen; return 0; if (ftp->ftp_passok == FTPXY_GO) { if (!strncmp(rptr, "227 ", 4)) - inc = ippr_ftp_pasv(fin, ip, nat, ftp, dlen); + inc = ippr_ftp_pasv(fin, ip, nat, ftp, dlen, ifsftp); else if (!strncmp(rptr, "229 ", 4)) - inc = ippr_ftp_epsv(fin, ip, nat, f, dlen); - } else if (ippr_ftp_insecure && !strncmp(rptr, "227 ", 4)) { - inc = ippr_ftp_pasv(fin, ip, nat, ftp, dlen); - } else if (ippr_ftp_insecure && !strncmp(rptr, "229 ", 4)) { - inc = ippr_ftp_epsv(fin, ip, nat, f, dlen); + inc = ippr_ftp_epsv(fin, ip, nat, f, dlen, ifsftp); + } else if (ifsftp->ippr_ftp_insecure && !strncmp(rptr, "227 ", 4)) { + inc = ippr_ftp_pasv(fin, ip, nat, ftp, dlen, ifsftp); + } else if (ifsftp->ippr_ftp_insecure && !strncmp(rptr, "229 ", 4)) { + inc = ippr_ftp_epsv(fin, ip, nat, f, dlen, ifsftp); } else if (*rptr == '5' || *rptr == '4') ftp->ftp_passok = FTPXY_INIT; else if (ftp->ftp_incok) { @@ -826,10 +865,11 @@ server_cmd_ok: * Look to see if the buffer starts with something which we recognise as * being the correct syntax for the FTP protocol. */ -int ippr_ftp_client_valid(ftps, buf, len) +int ippr_ftp_client_valid(ftps, buf, len, ifsftp) ftpside_t *ftps; char *buf; size_t len; +ifs_ftppxy_t *ifsftp; { register char *s, c, pc; register size_t i = len; @@ -841,7 +881,7 @@ size_t len; return 1; if (i < 5) { - if (ippr_ftp_debug > 3) + if (ifsftp->ippr_ftp_debug > 3) printf("ippr_ftp_client_valid:i(%d) < 5\n", (int)i); return 2; } @@ -875,7 +915,7 @@ size_t len; goto bad_client_command; } else { bad_client_command: - if (ippr_ftp_debug > 3) + if (ifsftp->ippr_ftp_debug > 3) printf("%s:bad:junk %d len %d/%d c 0x%x buf [%*s]\n", "ippr_ftp_client_valid", ftps->ftps_junk, (int)len, (int)i, c, @@ -903,10 +943,11 @@ bad_client_command: } -int ippr_ftp_server_valid(ftps, buf, len) +int ippr_ftp_server_valid(ftps, buf, len, ifsftp) ftpside_t *ftps; char *buf; size_t len; +ifs_ftppxy_t *ifsftp; { register char *s, c, pc; register size_t i = len; @@ -919,7 +960,7 @@ size_t len; return 1; if (i < 5) { - if (ippr_ftp_debug > 3) + if (ifsftp->ippr_ftp_debug > 3) printf("ippr_ftp_servert_valid:i(%d) < 5\n", (int)i); return 2; } @@ -949,7 +990,7 @@ size_t len; goto bad_server_command; } else { bad_server_command: - if (ippr_ftp_debug > 3) + if (ifsftp->ippr_ftp_debug > 3) printf("%s:bad:junk %d len %d/%d c 0x%x buf [%*s]\n", "ippr_ftp_server_valid", ftps->ftps_junk, (int)len, (int)i, @@ -965,18 +1006,19 @@ search_eol: return 0; } } - if (ippr_ftp_debug > 3) + if (ifsftp->ippr_ftp_debug > 3) printf("ippr_ftp_server_valid:junk after cmd[%*s]\n", (int)len, buf); return 2; } -int ippr_ftp_valid(ftp, side, buf, len) +int ippr_ftp_valid(ftp, side, buf, len, ifsftp) ftpinfo_t *ftp; int side; char *buf; size_t len; +ifs_ftppxy_t *ifsftp; { ftpside_t *ftps; int ret; @@ -984,9 +1026,9 @@ size_t len; ftps = &ftp->ftp_side[side]; if (side == 0) - ret = ippr_ftp_client_valid(ftps, buf, len); + ret = ippr_ftp_client_valid(ftps, buf, len, ifsftp); else - ret = ippr_ftp_server_valid(ftps, buf, len); + ret = ippr_ftp_server_valid(ftps, buf, len, ifsftp); return ret; } @@ -999,11 +1041,12 @@ size_t len; * rv == 0 for inbound processing, * rv == 1 for outbound processing. */ -int ippr_ftp_process(fin, nat, ftp, rv) +int ippr_ftp_process(fin, nat, ftp, rv, ifsftp) fr_info_t *fin; nat_t *nat; ftpinfo_t *ftp; int rv; +ifs_ftppxy_t *ifsftp; { int mlen, len, off, inc, i, sel, sel2, ok, ackoff, seqoff; char *rptr, *wptr, *s; @@ -1029,7 +1072,7 @@ int rv; #else mlen = MSGDSIZE(m) - off; #endif - if (ippr_ftp_debug > 4) + if (ifsftp->ippr_ftp_debug > 4) printf("ippr_ftp_process: mlen %d\n", mlen); if (mlen <= 0) { @@ -1052,14 +1095,14 @@ int rv; ackoff = aps->aps_ackoff[!sel2]; } else { seqoff = aps->aps_ackoff[sel]; - if (ippr_ftp_debug > 2) + if (ifsftp->ippr_ftp_debug > 2) printf("seqoff %d thseq %x ackmin %x\n", seqoff, thseq, aps->aps_ackmin[sel]); if (aps->aps_ackmin[sel] > seqoff + thseq) seqoff = aps->aps_ackoff[!sel]; ackoff = aps->aps_seqoff[sel2]; - if (ippr_ftp_debug > 2) + if (ifsftp->ippr_ftp_debug > 2) printf("ackoff %d thack %x seqmin %x\n", ackoff, thack, aps->aps_seqmin[sel2]); if (ackoff > 0) { @@ -1070,7 +1113,7 @@ int rv; ackoff = aps->aps_seqoff[!sel2]; } } - if (ippr_ftp_debug > 2) { + if (ifsftp->ippr_ftp_debug > 2) { printf("%s: %x seq %x/%d ack %x/%d len %d/%d off %d\n", rv ? "IN" : "OUT", tcp->th_flags, thseq, seqoff, thack, ackoff, mlen, fin->fin_plen, off); @@ -1087,7 +1130,7 @@ int rv; * that it is out of order (and there is no real danger in doing so * apart from causing packets to go through here ordered). */ - if (ippr_ftp_debug > 2) { + if (ifsftp->ippr_ftp_debug > 2) { printf("rv %d t:seq[0] %x seq[1] %x %d/%d\n", rv, t->ftps_seq[0], t->ftps_seq[1], seqoff, ackoff); } @@ -1119,14 +1162,14 @@ int rv; } } - if (ippr_ftp_debug > 2) { + if (ifsftp->ippr_ftp_debug > 2) { if (!ok) printf("%s ok\n", "not"); } if (!mlen) { if (t->ftps_seq[0] + ackoff != thack) { - if (ippr_ftp_debug > 1) { + if (ifsftp->ippr_ftp_debug > 1) { printf("%s:seq[0](%x) + (%x) != (%x)\n", "ippr_ftp_process", t->ftps_seq[0], ackoff, thack); @@ -1134,7 +1177,7 @@ int rv; return APR_ERR(1); } - if (ippr_ftp_debug > 2) { + if (ifsftp->ippr_ftp_debug > 2) { printf("ippr_ftp_process:f:seq[0] %x seq[1] %x\n", f->ftps_seq[0], f->ftps_seq[1]); } @@ -1144,7 +1187,7 @@ int rv; f->ftps_seq[0] = f->ftps_seq[1] - seqoff; f->ftps_seq[1] = thseq + 1 - seqoff; } else { - if (ippr_ftp_debug > 1) { + if (ifsftp->ippr_ftp_debug > 1) { printf("FIN: thseq %x seqoff %d ftps_seq %x\n", thseq, seqoff, f->ftps_seq[0]); } @@ -1168,7 +1211,7 @@ int rv; if (ok == 0) { inc = thseq - f->ftps_seq[0]; - if (ippr_ftp_debug > 1) { + if (ifsftp->ippr_ftp_debug > 1) { printf("inc %d sel %d rv %d\n", inc, sel, rv); printf("th_seq %x ftps_seq %x/%x\n", thseq, f->ftps_seq[0], f->ftps_seq[1]); @@ -1195,7 +1238,7 @@ int rv; off += len; wptr += len; - if (ippr_ftp_debug > 3) + if (ifsftp->ippr_ftp_debug > 3) printf("%s:len %d/%d off %d wptr %lx junk %d [%*s]\n", "ippr_ftp_process", len, mlen, off, (u_long)wptr, f->ftps_junk, @@ -1205,15 +1248,15 @@ int rv; if (f->ftps_junk != 0) { i = f->ftps_junk; f->ftps_junk = ippr_ftp_valid(ftp, rv, rptr, - wptr - rptr); + wptr - rptr, ifsftp); - if (ippr_ftp_debug > 5) + if (ifsftp->ippr_ftp_debug > 5) printf("%s:junk %d -> %d\n", "ippr_ftp_process", i, f->ftps_junk); if (f->ftps_junk != 0) { if (wptr - rptr == sizeof(f->ftps_buf)) { - if (ippr_ftp_debug > 4) + if (ifsftp->ippr_ftp_debug > 4) printf("%s:full buffer\n", "ippr_ftp_process"); f->ftps_rptr = f->ftps_buf; @@ -1234,9 +1277,9 @@ int rv; while ((f->ftps_junk == 0) && (wptr > rptr)) { len = wptr - rptr; - f->ftps_junk = ippr_ftp_valid(ftp, rv, rptr, len); + f->ftps_junk = ippr_ftp_valid(ftp, rv, rptr, len, ifsftp); - if (ippr_ftp_debug > 3) { + if (ifsftp->ippr_ftp_debug > 3) { printf("%s=%d len %d rv %d ptr %lx/%lx ", "ippr_ftp_valid", f->ftps_junk, len, rv, (u_long)rptr, @@ -1248,10 +1291,10 @@ int rv; f->ftps_rptr = rptr; if (rv) inc += ippr_ftp_server(fin, ip, nat, - ftp, len); + ftp, len, ifsftp); else inc += ippr_ftp_client(fin, ip, nat, - ftp, len); + ftp, len, ifsftp); rptr = f->ftps_rptr; wptr = f->ftps_wptr; } @@ -1264,7 +1307,7 @@ int rv; if ((f->ftps_cmds == 0) && (f->ftps_junk == 1)) { /* f->ftps_seq[1] += inc; */ - if (ippr_ftp_debug > 1) + if (ifsftp->ippr_ftp_debug > 1) printf("%s:cmds == 0 junk == 1\n", "ippr_ftp_process"); return APR_ERR(2); @@ -1303,7 +1346,7 @@ int rv; /* f->ftps_seq[1] += inc; */ if (tcp->th_flags & TH_FIN) f->ftps_seq[1]++; - if (ippr_ftp_debug > 3) { + if (ifsftp->ippr_ftp_debug > 3) { #ifdef __sgi mlen = fin->fin_plen; #else @@ -1320,10 +1363,11 @@ int rv; } -int ippr_ftp_out(fin, aps, nat) +int ippr_ftp_out(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { ftpinfo_t *ftp; int rev; @@ -1336,14 +1380,15 @@ nat_t *nat; if (ftp->ftp_side[1 - rev].ftps_ifp == NULL) ftp->ftp_side[1 - rev].ftps_ifp = fin->fin_ifp; - return ippr_ftp_process(fin, nat, ftp, rev); + return ippr_ftp_process(fin, nat, ftp, rev, (ifs_ftppxy_t *)private); } -int ippr_ftp_in(fin, aps, nat) +int ippr_ftp_in(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { ftpinfo_t *ftp; int rev; @@ -1356,7 +1401,7 @@ nat_t *nat; if (ftp->ftp_side[rev].ftps_ifp == NULL) ftp->ftp_side[rev].ftps_ifp = fin->fin_ifp; - return ippr_ftp_process(fin, nat, ftp, 1 - rev); + return ippr_ftp_process(fin, nat, ftp, 1 - rev, (ifs_ftppxy_t *)private); } @@ -1391,12 +1436,13 @@ char **ptr; } -int ippr_ftp_epsv(fin, ip, nat, f, dlen) +int ippr_ftp_epsv(fin, ip, nat, f, dlen, ifsftp) fr_info_t *fin; ip_t *ip; nat_t *nat; ftpside_t *f; int dlen; +ifs_ftppxy_t *ifsftp; { char newbuf[IPF_FTPBUFSZ]; char *s; @@ -1454,5 +1500,5 @@ int dlen; #endif return ippr_ftp_pasvreply(fin, ip, nat, f, (u_int)ap, newbuf, s, - ip->ip_src.s_addr); + ip->ip_src.s_addr, ifsftp); } diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_h323_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_h323_pxy.c index 7fc62276c5..82b2b2d2ed 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_h323_pxy.c +++ b/usr/src/uts/common/inet/ipf/netinet/ip_h323_pxy.c @@ -10,6 +10,9 @@ * please email licensing@qnx.com. * * For more details, see QNX_OCL.txt provided with this distribution. + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ /* @@ -19,6 +22,8 @@ * ported to ipfilter 3.4.20 by Michael Grant mg-ipf@grant.org */ +#pragma ident "%Z%%M% %I% %E% SMI" + #if __FreeBSD_version >= 220000 && defined(_KERNEL) # include <sys/fcntl.h> # include <sys/filio.h> @@ -30,20 +35,21 @@ #define IPF_H323_PROXY -int ippr_h323_init __P((void)); -void ippr_h323_fini __P((void)); -int ippr_h323_new __P((fr_info_t *, ap_session_t *, nat_t *)); -void ippr_h323_del __P((ap_session_t *)); -int ippr_h323_out __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_h323_in __P((fr_info_t *, ap_session_t *, nat_t *)); - -int ippr_h245_new __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_h245_out __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_h245_in __P((fr_info_t *, ap_session_t *, nat_t *)); +typedef struct ifs_h323pxy { + frentry_t h323_fr; + int h323_proxy_init; +} ifs_h323pxy_t; -static frentry_t h323_fr; +int ippr_h323_init __P((void **, ipf_stack_t *)); +void ippr_h323_fini __P((void **, ipf_stack_t *)); +int ippr_h323_new __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +void ippr_h323_del __P((ap_session_t *, void *, ipf_stack_t *)); +int ippr_h323_out __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_h323_in __P((fr_info_t *, ap_session_t *, nat_t *, void *)); -int h323_proxy_init = 0; +int ippr_h245_new __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_h245_out __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_h245_in __P((fr_info_t *, ap_session_t *, nat_t *, void *)); static int find_port __P((int, caddr_t, int datlen, int *, u_short *)); @@ -81,31 +87,50 @@ unsigned short *port; /* * Initialize local structures. */ -int ippr_h323_init() +/*ARGSUSED*/ +int ippr_h323_init(private, ifs) +void **private; +ipf_stack_t *ifs; { - bzero((char *)&h323_fr, sizeof(h323_fr)); - h323_fr.fr_ref = 1; - h323_fr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; - MUTEX_INIT(&h323_fr.fr_lock, "H323 proxy rule lock"); - h323_proxy_init = 1; + ifs_h323pxy_t *ifsh323; + + KMALLOC(ifsh323, ifs_h323pxy_t *); + if (ifsh323 == NULL) + return -1; + + ifsh323->h323_fr.fr_ref = 1; + ifsh323->h323_fr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&ifsh323->h323_fr.fr_lock, "H323 proxy rule lock"); + ifsh323->h323_proxy_init = 1; + + *private = (void *)ifsh323; return 0; } -void ippr_h323_fini() +/*ARGSUSED*/ +void ippr_h323_fini(private, ifs) +void **private; +ipf_stack_t *ifs; { - if (h323_proxy_init == 1) { - MUTEX_DESTROY(&h323_fr.fr_lock); - h323_proxy_init = 0; + ifs_h323pxy_t *ifsh323 = *((ifs_h323pxy_t **)private); + + if (ifsh323->h323_proxy_init == 1) { + MUTEX_DESTROY(&ifsh323->h323_fr.fr_lock); + ifsh323->h323_proxy_init = 0; } -} + KFREE(ifsh323); + *private = NULL; +} -int ippr_h323_new(fin, aps, nat) +/*ARGSUSED*/ +int ippr_h323_new(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { fin = fin; /* LINT */ nat = nat; /* LINT */ @@ -116,9 +141,11 @@ nat_t *nat; return 0; } - -void ippr_h323_del(aps) +/*ARGSUSED*/ +void ippr_h323_del(aps, private, ifs) ap_session_t *aps; +void *private; +ipf_stack_t *ifs; { int i; ipnat_t *ipn; @@ -135,7 +162,7 @@ ap_session_t *aps; * called with ipf_nat locked. */ if (fr_nat_ioctl((caddr_t)ipn, SIOCRMNAT, NAT_SYSSPACE| - NAT_LOCKHELD|FWRITE) == -1) { + NAT_LOCKHELD|FWRITE, 0, NULL, ifs) == -1) { /*EMPTY*/; /* log the error */ } @@ -149,16 +176,19 @@ ap_session_t *aps; } -int ippr_h323_in(fin, aps, nat) +/*ARGSUSED*/ +int ippr_h323_in(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { int ipaddr, off, datlen; unsigned short port; caddr_t data; tcphdr_t *tcp; ip_t *ip; + ipf_stack_t *ifs = fin->fin_ifs; ip = fin->fin_ip; tcp = (tcphdr_t *)fin->fin_dp; @@ -197,13 +227,13 @@ nat_t *nat; * A (maybe better) solution is do a UPGRADE(), and instead * of calling fr_nat_ioctl(), we add the nat rule ourself. */ - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); if (fr_nat_ioctl((caddr_t)ipn, SIOCADNAT, - NAT_SYSSPACE|FWRITE) == -1) { - READ_ENTER(&ipf_nat); + NAT_SYSSPACE|FWRITE, 0, NULL, ifs) == -1) { + READ_ENTER(&ifs->ifs_ipf_nat); return -1; } - READ_ENTER(&ipf_nat); + READ_ENTER(&ifs->ifs_ipf_nat); if (aps->aps_data != NULL && aps->aps_psiz > 0) { bcopy(aps->aps_data, newarray, aps->aps_psiz); KFREES(aps->aps_data, aps->aps_psiz); @@ -215,10 +245,12 @@ nat_t *nat; } -int ippr_h245_new(fin, aps, nat) +/*ARGSUSED*/ +int ippr_h245_new(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { fin = fin; /* LINT */ nat = nat; /* LINT */ @@ -229,16 +261,19 @@ nat_t *nat; } -int ippr_h245_out(fin, aps, nat) +/*ARGSUSED*/ +int ippr_h245_out(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { int ipaddr, off, datlen; tcphdr_t *tcp; caddr_t data; u_short port; ip_t *ip; + ipf_stack_t *ifs = fin->fin_ifs; aps = aps; /* LINT */ @@ -281,7 +316,8 @@ nat_t *nat; nat2->nat_ptr->in_hits++; #ifdef IPFILTER_LOG - nat_log(nat2, (u_int)(nat->nat_ptr->in_redir)); + nat_log(nat2, (u_int)(nat->nat_ptr->in_redir), + ifs); #endif bcopy((caddr_t)&ip->ip_src.s_addr, data + off, 4); diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_htable.h b/usr/src/uts/common/inet/ipf/netinet/ip_htable.h index 082959a9c5..32f204afac 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_htable.h +++ b/usr/src/uts/common/inet/ipf/netinet/ip_htable.h @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -16,6 +16,7 @@ typedef struct iphtent_s { struct iphtent_s *ipe_next, **ipe_pnext; + struct iphtent_s *ipe_snext, **ipe_psnext; void *ipe_ptr; sa_family_t ipe_family; i6addr_t ipe_addr; @@ -38,6 +39,7 @@ typedef struct iphtable_s { ipfrwlock_t iph_rwlock; struct iphtable_s *iph_next, **iph_pnext; struct iphtent_s **iph_table; + struct iphtent_s *iph_list; size_t iph_size; /* size of hash table */ u_long iph_seed; /* hashing seed */ u_32_t iph_flags; @@ -64,19 +66,20 @@ typedef struct iphtstat_s { } iphtstat_t; -extern iphtable_t *ipf_htables[IPL_LOGSIZE]; - -extern void fr_htable_unload __P((void)); -extern int fr_newhtable __P((iplookupop_t *)); -extern iphtable_t *fr_findhtable __P((int, char *)); -extern int fr_removehtable __P((iplookupop_t *)); -extern size_t fr_flushhtable __P((iplookupflush_t *)); -extern int fr_addhtent __P((iphtable_t *, iphtent_t *)); -extern int fr_delhtent __P((iphtable_t *, iphtent_t *)); -extern void fr_derefhtable __P((iphtable_t *)); -extern void fr_delhtable __P((iphtable_t *)); -extern void *fr_iphmfindgroup __P((void *, int, void *)); -extern int fr_iphmfindip __P((void *, int, void *)); -extern int fr_gethtablestat __P((iplookupop_t *)); +extern void fr_htable_unload __P((ipf_stack_t *)); +extern int fr_newhtable __P((iplookupop_t *, ipf_stack_t *)); +extern iphtable_t *fr_findhtable __P((int, char *, ipf_stack_t *)); +extern int fr_removehtable __P((iplookupop_t *, ipf_stack_t *)); +extern size_t fr_flushhtable __P((iplookupflush_t *, ipf_stack_t *)); +extern int fr_addhtent __P((iphtable_t *, iphtent_t *, ipf_stack_t *)); +extern int fr_delhtent __P((iphtable_t *, iphtent_t *, ipf_stack_t *)); +extern void fr_derefhtable __P((iphtable_t *, ipf_stack_t *)); +extern void fr_derefhtent __P((iphtent_t *)); +extern void fr_delhtable __P((iphtable_t *, ipf_stack_t *)); +extern void *fr_iphmfindgroup __P((void *, int, void *, ipf_stack_t *)); +extern int fr_iphmfindip __P((void *, int, void *, ipf_stack_t *)); +extern int fr_gethtablestat __P((iplookupop_t *, ipf_stack_t *)); +extern int fr_htable_getnext __P((ipftoken_t *, ipflookupiter_t *, ipf_stack_t *)); +extern void fr_htable_iterderef __P((u_int, int, void *, ipf_stack_t *)); #endif /* __IP_HTABLE_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_ipsec_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_ipsec_pxy.c index aa7e1afd4d..0733c50876 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_ipsec_pxy.c +++ b/usr/src/uts/common/inet/ipf/netinet/ip_ipsec_pxy.c @@ -8,7 +8,7 @@ * * $Id: ip_ipsec_pxy.c,v 2.20.2.7 2005/07/15 21:56:50 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -16,80 +16,105 @@ #define IPF_IPSEC_PROXY - -int ippr_ipsec_init __P((void)); -void ippr_ipsec_fini __P((void)); -int ippr_ipsec_new __P((fr_info_t *, ap_session_t *, nat_t *)); -void ippr_ipsec_del __P((ap_session_t *)); -int ippr_ipsec_inout __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_ipsec_match __P((fr_info_t *, ap_session_t *, nat_t *)); - -static frentry_t ipsecfr; -static ipftq_t *ipsecnattqe; -static ipftq_t *ipsecstatetqe; -static char ipsec_buffer[1500]; - -int ipsec_proxy_init = 0; -int ipsec_proxy_ttl = 60; +typedef struct ifs_ipsecpxy { + frentry_t ipsecfr; + ipftq_t *ipsecnattqe; + ipftq_t *ipsecstatetqe; + char ipsec_buffer[1500]; + int ipsec_proxy_init; + int ipsec_proxy_ttl; +} ifs_ipsecpxy_t; + +int ippr_ipsec_init __P((void **, ipf_stack_t *)); +void ippr_ipsec_fini __P((void **, ipf_stack_t *)); +int ippr_ipsec_new __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +void ippr_ipsec_del __P((ap_session_t *, void *, ipf_stack_t *)); +int ippr_ipsec_inout __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_ipsec_match __P((fr_info_t *, ap_session_t *, nat_t *, void *)); /* * IPSec application proxy initialization. */ -int ippr_ipsec_init() +int ippr_ipsec_init(private, ifs) +void **private; +ipf_stack_t *ifs; { - bzero((char *)&ipsecfr, sizeof(ipsecfr)); - ipsecfr.fr_ref = 1; - ipsecfr.fr_flags = FR_OUTQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; - MUTEX_INIT(&ipsecfr.fr_lock, "IPsec proxy rule lock"); - ipsec_proxy_init = 1; - - ipsecnattqe = fr_addtimeoutqueue(&nat_utqe, ipsec_proxy_ttl); - if (ipsecnattqe == NULL) + ifs_ipsecpxy_t *ifsipsec; + + KMALLOC(ifsipsec, ifs_ipsecpxy_t *); + if (ifsipsec == NULL) return -1; - ipsecstatetqe = fr_addtimeoutqueue(&ips_utqe, ipsec_proxy_ttl); - if (ipsecstatetqe == NULL) { - if (fr_deletetimeoutqueue(ipsecnattqe) == 0) - fr_freetimeoutqueue(ipsecnattqe); - ipsecnattqe = NULL; + + bzero((char *)&ifsipsec->ipsecfr, sizeof(ifsipsec->ipsecfr)); + ifsipsec->ipsecfr.fr_ref = 1; + ifsipsec->ipsecfr.fr_flags = FR_OUTQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&ifsipsec->ipsecfr.fr_lock, "IPsec proxy rule lock"); + ifsipsec->ipsec_proxy_init = 1; + ifsipsec->ipsec_proxy_ttl = 60; + + ifsipsec->ipsecnattqe = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, ifsipsec->ipsec_proxy_ttl, ifs); + if (ifsipsec->ipsecnattqe == NULL) { + MUTEX_DESTROY(&ifsipsec->ipsecfr.fr_lock); + KFREE(ifsipsec); return -1; } + ifsipsec->ipsecstatetqe = fr_addtimeoutqueue(&ifs->ifs_ips_utqe, ifsipsec->ipsec_proxy_ttl, ifs); + if (ifsipsec->ipsecstatetqe == NULL) { + if (fr_deletetimeoutqueue(ifsipsec->ipsecnattqe) == 0) + fr_freetimeoutqueue(ifsipsec->ipsecnattqe, ifs); + ifsipsec->ipsecnattqe = NULL; + MUTEX_DESTROY(&ifsipsec->ipsecfr.fr_lock); + KFREE(ifsipsec); + return -1; + } + + ifsipsec->ipsecnattqe->ifq_flags |= IFQF_PROXY; + ifsipsec->ipsecstatetqe->ifq_flags |= IFQF_PROXY; + + ifsipsec->ipsecfr.fr_age[0] = ifsipsec->ipsec_proxy_ttl; + ifsipsec->ipsecfr.fr_age[1] = ifsipsec->ipsec_proxy_ttl; - ipsecnattqe->ifq_flags |= IFQF_PROXY; - ipsecstatetqe->ifq_flags |= IFQF_PROXY; + *private = (void *)ifsipsec; - ipsecfr.fr_age[0] = ipsec_proxy_ttl; - ipsecfr.fr_age[1] = ipsec_proxy_ttl; return 0; } -void ippr_ipsec_fini() +void ippr_ipsec_fini(private, ifs) +void **private; +ipf_stack_t *ifs; { - if (ipsecnattqe != NULL) { - if (fr_deletetimeoutqueue(ipsecnattqe) == 0) - fr_freetimeoutqueue(ipsecnattqe); + ifs_ipsecpxy_t *ifsipsec = *((ifs_ipsecpxy_t **)private); + + if (ifsipsec->ipsecnattqe != NULL) { + if (fr_deletetimeoutqueue(ifsipsec->ipsecnattqe) == 0) + fr_freetimeoutqueue(ifsipsec->ipsecnattqe, ifs); } - ipsecnattqe = NULL; - if (ipsecstatetqe != NULL) { - if (fr_deletetimeoutqueue(ipsecstatetqe) == 0) - fr_freetimeoutqueue(ipsecstatetqe); + ifsipsec->ipsecnattqe = NULL; + if (ifsipsec->ipsecstatetqe != NULL) { + if (fr_deletetimeoutqueue(ifsipsec->ipsecstatetqe) == 0) + fr_freetimeoutqueue(ifsipsec->ipsecstatetqe, ifs); } - ipsecstatetqe = NULL; + ifsipsec->ipsecstatetqe = NULL; - if (ipsec_proxy_init == 1) { - MUTEX_DESTROY(&ipsecfr.fr_lock); - ipsec_proxy_init = 0; + if (ifsipsec->ipsec_proxy_init == 1) { + MUTEX_DESTROY(&ifsipsec->ipsecfr.fr_lock); + ifsipsec->ipsec_proxy_init = 0; } + + KFREE(ifsipsec); + *private = NULL; } /* * Setup for a new IPSEC proxy. */ -int ippr_ipsec_new(fin, aps, nat) +int ippr_ipsec_new(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { ipsec_pxy_t *ipsec; fr_info_t fi; @@ -98,16 +123,19 @@ nat_t *nat; int p, off, dlen, ttl; mb_t *m; ip_t *ip; + ipf_stack_t *ifs = fin->fin_ifs; + ifs_ipsecpxy_t *ifsipsec = (ifs_ipsecpxy_t *)private; off = fin->fin_plen - fin->fin_dlen + fin->fin_ipoff; - bzero(ipsec_buffer, sizeof(ipsec_buffer)); + bzero(ifsipsec->ipsec_buffer, sizeof(ifsipsec->ipsec_buffer)); ip = fin->fin_ip; m = fin->fin_m; dlen = M_LEN(m) - off; if (dlen < 16) return -1; - COPYDATA(m, off, MIN(sizeof(ipsec_buffer), dlen), ipsec_buffer); + COPYDATA(m, off, MIN(sizeof(ifsipsec->ipsec_buffer), dlen), + ifsipsec->ipsec_buffer); if (nat_outlookup(fin, 0, IPPROTO_ESP, nat->nat_inip, ip->ip_dst) != NULL) @@ -127,9 +155,9 @@ nat_t *nat; * describe ESP but UDP instead. */ ipn = &ipsec->ipsc_rule; - ttl = IPF_TTLVAL(ipsecnattqe->ifq_ttl); - ipn->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, ttl); - ipn->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, ttl); + ttl = IPF_TTLVAL(ifsipsec->ipsecnattqe->ifq_ttl); + ipn->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, ttl, ifs); + ipn->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, ttl, ifs); ipn->in_ifps[0] = fin->fin_ifp; ipn->in_apr = NULL; ipn->in_use = 1; @@ -151,7 +179,7 @@ nat_t *nat; fi.fin_state = NULL; fi.fin_nat = NULL; fi.fin_fi.fi_p = IPPROTO_ESP; - fi.fin_fr = &ipsecfr; + fi.fin_fr = &ifsipsec->ipsecfr; fi.fin_data[0] = 0; fi.fin_data[1] = 0; p = ip->ip_p; @@ -159,7 +187,7 @@ nat_t *nat; fi.fin_flx &= ~(FI_TCPUDP|FI_STATE|FI_FRAG); fi.fin_flx |= FI_IGNORE; - ptr = ipsec_buffer; + ptr = ifsipsec->ipsec_buffer; bcopy(ptr, (char *)ipsec->ipsc_icookie, sizeof(ipsec_cookie_t)); ptr += sizeof(ipsec_cookie_t); bcopy(ptr, (char *)ipsec->ipsc_rcookie, sizeof(ipsec_cookie_t)); @@ -182,7 +210,7 @@ nat_t *nat; ipsec->ipsc_state = fr_addstate(&fi, &ipsec->ipsc_state, SI_WILDP); if (fi.fin_state != NULL) - fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + fr_statederef(&fi, (ipstate_t **)&fi.fin_state, ifs); } ip->ip_p = p & 0xff; return 0; @@ -193,15 +221,18 @@ nat_t *nat; * For outgoing IKE packets. refresh timeouts for NAT & state entries, if * we can. If they have disappeared, recreate them. */ -int ippr_ipsec_inout(fin, aps, nat) +int ippr_ipsec_inout(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { ipsec_pxy_t *ipsec; fr_info_t fi; ip_t *ip; int p; + ipf_stack_t *ifs = fin->fin_ifs; + ifs_ipsecpxy_t *ifsipsec = (ifs_ipsecpxy_t *)private; if ((fin->fin_out == 1) && (nat->nat_dir == NAT_INBOUND)) return 0; @@ -220,7 +251,7 @@ nat_t *nat; fi.fin_state = NULL; fi.fin_nat = NULL; fi.fin_fi.fi_p = IPPROTO_ESP; - fi.fin_fr = &ipsecfr; + fi.fin_fr = &ifsipsec->ipsecfr; fi.fin_data[0] = 0; fi.fin_data[1] = 0; ip->ip_p = IPPROTO_ESP; @@ -232,7 +263,7 @@ nat_t *nat; * Update NAT timeout/create NAT if missing. */ if (ipsec->ipsc_nat != NULL) - fr_queueback(&ipsec->ipsc_nat->nat_tqe); + fr_queueback(&ipsec->ipsc_nat->nat_tqe, ifs); else { ipsec->ipsc_nat = nat_new(&fi, &ipsec->ipsc_rule, &ipsec->ipsc_nat, @@ -248,20 +279,20 @@ nat_t *nat; /* * Update state timeout/create state if missing. */ - READ_ENTER(&ipf_state); + READ_ENTER(&ifs->ifs_ipf_state); if (ipsec->ipsc_state != NULL) { - fr_queueback(&ipsec->ipsc_state->is_sti); + fr_queueback(&ipsec->ipsc_state->is_sti, ifs); ipsec->ipsc_state->is_die = nat->nat_age; - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); } else { - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); fi.fin_data[0] = 0; fi.fin_data[1] = 0; ipsec->ipsc_state = fr_addstate(&fi, &ipsec->ipsc_state, SI_WILDP); if (fi.fin_state != NULL) - fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + fr_statederef(&fi, (ipstate_t **)&fi.fin_state, ifs); } ip->ip_p = p; } @@ -275,10 +306,12 @@ nat_t *nat; * in the same order (not reversed depending on packet flow direction as with * UDP/TCP port numbers). */ -int ippr_ipsec_match(fin, aps, nat) +/*ARGSUSED*/ +int ippr_ipsec_match(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { ipsec_pxy_t *ipsec; u_32_t cookies[4]; @@ -319,8 +352,11 @@ nat_t *nat; /* * clean up after ourselves. */ -void ippr_ipsec_del(aps) +/*ARGSUSED*/ +void ippr_ipsec_del(aps, private, ifs) ap_session_t *aps; +void *private; +ipf_stack_t *ifs; { ipsec_pxy_t *ipsec; @@ -332,13 +368,13 @@ ap_session_t *aps; * *_del() is on a callback from aps_free(), from nat_delete() */ - READ_ENTER(&ipf_state); + READ_ENTER(&ifs->ifs_ipf_state); if (ipsec->ipsc_state != NULL) { - ipsec->ipsc_state->is_die = fr_ticks + 1; + ipsec->ipsc_state->is_die = ifs->ifs_fr_ticks + 1; ipsec->ipsc_state->is_me = NULL; fr_queuefront(&ipsec->ipsc_state->is_sti); } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); ipsec->ipsc_state = NULL; ipsec->ipsc_nat = NULL; diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_irc_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_irc_pxy.c index a0b784053a..07a7e2aa8f 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_irc_pxy.c +++ b/usr/src/uts/common/inet/ipf/netinet/ip_irc_pxy.c @@ -5,7 +5,7 @@ * * $Id: ip_irc_pxy.c,v 2.39.2.4 2005/02/04 10:22:55 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -15,45 +15,64 @@ #define IPF_IRCBUFSZ 96 /* This *MUST* be >= 64! */ +typedef struct ifs_ircpxy { + frentry_t ircnatfr; + int irc_proxy_init; +} ifs_ircpxy_t; -int ippr_irc_init __P((void)); -void ippr_irc_fini __P((void)); -int ippr_irc_new __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_irc_out __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_irc_send __P((fr_info_t *, nat_t *)); + +int ippr_irc_init __P((void **, ipf_stack_t *)); +void ippr_irc_fini __P((void **, ipf_stack_t *)); +int ippr_irc_new __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_irc_out __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_irc_send __P((fr_info_t *, nat_t *, ifs_ircpxy_t *)); int ippr_irc_complete __P((ircinfo_t *, char *, size_t)); u_short ipf_irc_atoi __P((char **)); -static frentry_t ircnatfr; - -int irc_proxy_init = 0; - - /* * Initialize local structures. */ -int ippr_irc_init() +/*ARGSUSED*/ +int ippr_irc_init(private, ifs) +void **private; +ipf_stack_t *ifs; { - bzero((char *)&ircnatfr, sizeof(ircnatfr)); - ircnatfr.fr_ref = 1; - ircnatfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; - MUTEX_INIT(&ircnatfr.fr_lock, "IRC proxy rule lock"); - irc_proxy_init = 1; + ifs_ircpxy_t *ifsirc; + + KMALLOC(ifsirc, ifs_ircpxy_t *); + if (ifsirc == NULL) + return -1; + + bzero((char *)&ifsirc->ircnatfr, sizeof(ifsirc->ircnatfr)); + ifsirc->ircnatfr.fr_ref = 1; + ifsirc->ircnatfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&ifsirc->ircnatfr.fr_lock, "IRC proxy rule lock"); + ifsirc->irc_proxy_init = 1; + + *private = (void *)ifsirc; return 0; } -void ippr_irc_fini() +/*ARGSUSED*/ +void ippr_irc_fini(private, ifs) +void **private; +ipf_stack_t *ifs; { - if (irc_proxy_init == 1) { - MUTEX_DESTROY(&ircnatfr.fr_lock); - irc_proxy_init = 0; + ifs_ircpxy_t *ifsirc = *((ifs_ircpxy_t **)private); + + if (ifsirc->irc_proxy_init == 1) { + MUTEX_DESTROY(&ifsirc->ircnatfr.fr_lock); + ifsirc->irc_proxy_init = 0; } + + KFREE(ifsirc); + *private = NULL; } -char *ippr_irc_dcctypes[] = { +static char *ippr_irc_dcctypes[] = { "CHAT ", /* CHAT chat ipnumber portnumber */ "SEND ", /* SEND filename ipnumber portnumber */ "MOVE ", @@ -227,10 +246,12 @@ size_t len; } -int ippr_irc_new(fin, aps, nat) +/*ARGSUSED*/ +int ippr_irc_new(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { ircinfo_t *irc; @@ -249,9 +270,10 @@ nat_t *nat; } -int ippr_irc_send(fin, nat) +int ippr_irc_send(fin, nat, ifsirc) fr_info_t *fin; nat_t *nat; +ifs_ircpxy_t *ifsirc; { char ctcpbuf[IPF_IRCBUFSZ], newbuf[IPF_IRCBUFSZ]; tcphdr_t *tcp, tcph, *tcp2 = &tcph; @@ -268,6 +290,7 @@ nat_t *nat; #ifdef MENTAT mb_t *m1; #endif + ipf_stack_t *ifs = fin->fin_ifs; m = fin->fin_m; ip = fin->fin_ip; @@ -407,7 +430,7 @@ nat_t *nat; fi.fin_data[0] = ntohs(sp); fi.fin_data[1] = 0; fi.fin_dp = (char *)tcp2; - fi.fin_fr = &ircnatfr; + fi.fin_fr = &ifsirc->ircnatfr; fi.fin_dlen = sizeof(*tcp2); fi.fin_plen = fi.fin_hlen + sizeof(*tcp2); swip = ip->ip_src; @@ -420,7 +443,7 @@ nat_t *nat; (void) fr_addstate(&fi, NULL, SI_W_DPORT); if (fi.fin_state != NULL) - fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + fr_statederef(&fi, (ipstate_t **)&fi.fin_state, ifs); } ip->ip_src = swip; } @@ -428,11 +451,12 @@ nat_t *nat; } -int ippr_irc_out(fin, aps, nat) +int ippr_irc_out(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { aps = aps; /* LINT */ - return ippr_irc_send(fin, nat); + return ippr_irc_send(fin, nat, (ifs_ircpxy_t *)private); } diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_lookup.h b/usr/src/uts/common/inet/ipf/netinet/ip_lookup.h index 76cdd8fd0a..2f4311bc7c 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_lookup.h +++ b/usr/src/uts/common/inet/ipf/netinet/ip_lookup.h @@ -1,7 +1,12 @@ /* * Copyright (C) 1993-2005 by Darren Reed. * See the IPFILTER.LICENCE file for details on licencing. - */ + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" #ifndef __IP_LOOKUP_H__ #define __IP_LOOKUP_H__ @@ -59,9 +64,38 @@ typedef struct iplookuplink { #define IPLT_ANON 0x80000000 -extern int ip_lookup_init __P((void)); -extern int ip_lookup_ioctl __P((caddr_t, ioctlcmd_t, int)); -extern void ip_lookup_unload __P((void)); -extern void ip_lookup_deref __P((int, void *)); + +typedef union { + struct iplookupiterkey { + char ilik_ival; + u_char ilik_type; /* IPLT_* */ + u_char ilik_otype; + char ilik_unit; /* IPL_LOG* */ + } ilik_unstr; + u_32_t ilik_key; +} iplookupiterkey_t; + +typedef struct ipflookupiter { + void *ili_data; + iplookupiterkey_t ili_lkey; + char ili_name[FR_GROUPLEN]; +} ipflookupiter_t; + +#define ili_key ili_lkey.ilik_key +#define ili_ival ili_lkey.ilik_unstr.ilik_ival +#define ili_unit ili_lkey.ilik_unstr.ilik_unit +#define ili_type ili_lkey.ilik_unstr.ilik_type +#define ili_otype ili_lkey.ilik_unstr.ilik_otype + +#define IPFLOOKUPITER_LIST 0 +#define IPFLOOKUPITER_NODE 1 + + +extern int ip_lookup_init __P((ipf_stack_t *)); +extern int ip_lookup_ioctl __P((caddr_t, ioctlcmd_t, int, int, void *, ipf_stack_t *)); +extern void ip_lookup_unload __P((ipf_stack_t *)); +extern void ip_lookup_deref __P((int, void *, ipf_stack_t *)); +extern int ip_lookup_iterate __P((void *, int, void *, ipf_stack_t *)); +extern void ip_lookup_iterderef __P((u_32_t, void *, ipf_stack_t *)); #endif /* __IP_LOOKUP_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_nat.h b/usr/src/uts/common/inet/ipf/netinet/ip_nat.h index 9b47591591..2d5a3106cf 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_nat.h +++ b/usr/src/uts/common/inet/ipf/netinet/ip_nat.h @@ -6,7 +6,7 @@ * @(#)ip_nat.h 1.5 2/4/96 * $Id: ip_nat.h,v 2.90.2.11 2005/06/18 02:41:32 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -124,6 +124,7 @@ typedef struct nat { int nat_hv[2]; char nat_ifnames[2][LIFNAMSIZ]; int nat_rev; /* 0 = forward, 1 = reverse */ + int nat_redir; /* copy of in_redir */ } nat_t; #define nat_inip nat_inip6.in4 @@ -324,6 +325,8 @@ typedef struct nattrpnt { typedef struct hostmap { struct hostmap *hm_next; struct hostmap **hm_pnext; + struct hostmap *hm_hnext; + struct hostmap **hm_phnext; struct ipnat *hm_ipnat; struct in_addr hm_srcip; struct in_addr hm_dstip; @@ -374,6 +377,7 @@ typedef struct natstat { u_int ns_hostmap_sz; nat_t *ns_instances; nattrpnt_t *ns_trpntlist; + hostmap_t *ns_maplist; u_long *ns_bucketlen[2]; } natstat_t; @@ -419,34 +423,15 @@ typedef struct natlog { #define NAT_LOCKHELD 0x40000000 -extern u_int ipf_nattable_sz; -extern u_int ipf_nattable_max; -extern u_int ipf_natrules_sz; -extern u_int ipf_rdrrules_sz; -extern u_int ipf_hostmap_sz; -extern u_int fr_nat_maxbucket; -extern u_int fr_nat_maxbucket_reset; -extern int fr_nat_lock; -extern void fr_nataddrsync __P((void *, struct in_addr *)); -extern void fr_natifpsync __P((int, void *, char *)); -extern u_long fr_defnatage; -extern u_long fr_defnaticmpage; -extern u_long fr_defnatipage; - /* nat_table[0] -> hashed list sorted by inside (ip, port) */ - /* nat_table[1] -> hashed list sorted by outside (ip, port) */ -extern nat_t **nat_table[2]; -extern nat_t *nat_instances; -extern ipnat_t *nat_list; -extern ipnat_t **nat_rules; -extern ipnat_t **rdr_rules; -extern ipftq_t *nat_utqe; -extern natstat_t nat_stats; +extern void fr_natsync __P((void *, ipf_stack_t *)); +extern void fr_nataddrsync __P((void *, struct in_addr *, ipf_stack_t *)); +extern void fr_natifpsync __P((int, void *, char *, ipf_stack_t *)); #if defined(__OpenBSD__) -extern void nat_ifdetach __P((void *)); +extern void nat_ifdetach __P((void *, ipf_stack_t *)); #endif -extern int fr_nat_ioctl __P((caddr_t, ioctlcmd_t, int)); -extern int fr_natinit __P((void)); +extern int fr_nat_ioctl __P((caddr_t, ioctlcmd_t, int, int, void *, ipf_stack_t *)); +extern int fr_natinit __P((ipf_stack_t *)); extern nat_t *nat_new __P((fr_info_t *, ipnat_t *, nat_t **, u_int, int)); extern nat_t *nat_outlookup __P((fr_info_t *, u_int, u_int, struct in_addr, struct in_addr)); @@ -456,23 +441,25 @@ extern nat_t *nat_inlookup __P((fr_info_t *, u_int, u_int, struct in_addr, extern nat_t *nat_tnlookup __P((fr_info_t *, int)); extern nat_t *nat_maplookup __P((void *, u_int, struct in_addr, struct in_addr)); -extern nat_t *nat_lookupredir __P((natlookup_t *)); +extern nat_t *nat_lookupredir __P((natlookup_t *, ipf_stack_t *)); extern nat_t *nat_icmperrorlookup __P((fr_info_t *, int)); extern nat_t *nat_icmperror __P((fr_info_t *, u_int *, int)); -extern int nat_insert __P((nat_t *, int)); +extern int nat_insert __P((nat_t *, int, ipf_stack_t *)); extern int fr_checknatout __P((fr_info_t *, u_32_t *)); extern int fr_natout __P((fr_info_t *, nat_t *, int, u_32_t)); extern int fr_checknatin __P((fr_info_t *, u_32_t *)); extern int fr_natin __P((fr_info_t *, nat_t *, int, u_32_t)); -extern void fr_natunload __P((void)); -extern void fr_natexpire __P((void)); -extern void nat_log __P((struct nat *, u_int)); +extern void fr_natunload __P((ipf_stack_t *)); +extern void fr_natexpire __P((ipf_stack_t *)); +extern void nat_log __P((struct nat *, u_int, ipf_stack_t *)); extern void fix_incksum __P((u_short *, u_32_t)); extern void fix_outcksum __P((u_short *, u_32_t)); -extern void fr_natderef __P((nat_t **)); +extern void fr_ipnatderef __P((ipnat_t **, ipf_stack_t *)); +extern void fr_natderef __P((nat_t **, ipf_stack_t *)); extern u_short *nat_proto __P((fr_info_t *, nat_t *, u_int)); extern void nat_update __P((fr_info_t *, nat_t *, ipnat_t *)); -extern void fr_setnatqueue __P((nat_t *, int)); +extern void fr_setnatqueue __P((nat_t *, int, ipf_stack_t *)); +extern void fr_hostmapderef __P((hostmap_t **)); #endif /* __IP_NAT_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_netbios_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_netbios_pxy.c index 8bfa8fac47..ba29cd89a2 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_netbios_pxy.c +++ b/usr/src/uts/common/inet/ipf/netinet/ip_netbios_pxy.c @@ -30,46 +30,73 @@ * SUCH DAMAGE. * * $Id: ip_netbios_pxy.c,v 2.8.2.1 2005/07/15 21:56:51 darrenr Exp $ + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ -#define IPF_NETBIOS_PROXY +#pragma ident "%Z%%M% %I% %E% SMI" -int ippr_netbios_init __P((void)); -void ippr_netbios_fini __P((void)); -int ippr_netbios_out __P((fr_info_t *, ap_session_t *, nat_t *)); +#define IPF_NETBIOS_PROXY -static frentry_t netbiosfr; +typedef struct ifs_netbiospxy { + frentry_t netbiosfr; + int netbios_proxy_init; +} ifs_netbiospxy_t; -int netbios_proxy_init = 0; +int ippr_netbios_init __P((void **, ipf_stack_t *)); +void ippr_netbios_fini __P((void **, ipf_stack_t *)); +int ippr_netbios_out __P((fr_info_t *, ap_session_t *, nat_t *, void *)); /* * Initialize local structures. */ -int ippr_netbios_init() +/*ARGSUSED*/ +int ippr_netbios_init(private, ifs) +void **private; +ipf_stack_t *ifs; { - bzero((char *)&netbiosfr, sizeof(netbiosfr)); - netbiosfr.fr_ref = 1; - netbiosfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; - MUTEX_INIT(&netbiosfr.fr_lock, "NETBIOS proxy rule lock"); - netbios_proxy_init = 1; + ifs_netbiospxy_t *ifsnetbios; + + KMALLOC(ifsnetbios, ifs_netbiospxy_t *); + if (ifsnetbios == NULL) + return -1; + + bzero((char *)&ifsnetbios->netbiosfr, sizeof(ifsnetbios->netbiosfr)); + ifsnetbios->netbiosfr.fr_ref = 1; + ifsnetbios->netbiosfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&ifsnetbios->netbiosfr.fr_lock, "NETBIOS proxy rule lock"); + ifsnetbios->netbios_proxy_init = 1; + + *private = (void *)ifsnetbios; return 0; } -void ippr_netbios_fini() +/*ARGSUSED*/ +void ippr_netbios_fini(private, ifs) +void **private; +ipf_stack_t *ifs; { - if (netbios_proxy_init == 1) { - MUTEX_DESTROY(&netbiosfr.fr_lock); - netbios_proxy_init = 0; + ifs_netbiospxy_t *ifsnetbios = *((ifs_netbiospxy_t **)private); + + if (ifsnetbios->netbios_proxy_init == 1) { + MUTEX_DESTROY(&ifsnetbios->netbiosfr.fr_lock); + ifsnetbios->netbios_proxy_init = 0; } + + KFREE(ifsnetbios); + *private = NULL; } -int ippr_netbios_out(fin, aps, nat) +/*ARGSUSED*/ +int ippr_netbios_out(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { char dgmbuf[6]; int off, dlen; diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_pool.h b/usr/src/uts/common/inet/ipf/netinet/ip_pool.h index b40ba2b0fb..04f12712f7 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_pool.h +++ b/usr/src/uts/common/inet/ipf/netinet/ip_pool.h @@ -5,7 +5,7 @@ * * $Id: ip_pool.h,v 2.26.2.3 2005/06/12 07:18:27 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -40,6 +40,7 @@ typedef struct ip_pool_node { addrfamily_t ipn_addr; addrfamily_t ipn_mask; int ipn_info; + int ipn_ref; char ipn_name[FR_GROUPLEN]; u_long ipn_hits; struct ip_pool_node *ipn_next, **ipn_pnext; @@ -68,24 +69,22 @@ typedef struct ip_pool_stat { ip_pool_t *ipls_list[IPL_LOGSIZE]; } ip_pool_stat_t; - -extern ip_pool_stat_t ipoolstat; -extern ip_pool_t *ip_pool_list[IPL_LOGSIZE]; - -extern int ip_pool_search __P((void *, int, void *)); -extern int ip_pool_init __P((void)); -extern void ip_pool_fini __P((void)); -extern int ip_pool_create __P((iplookupop_t *)); +extern int ip_pool_search __P((void *, int, void *, ipf_stack_t *)); +extern int ip_pool_init __P((ipf_stack_t *)); +extern void ip_pool_fini __P((ipf_stack_t *)); +extern int ip_pool_create __P((iplookupop_t *, ipf_stack_t *)); extern int ip_pool_insert __P((ip_pool_t *, addrfamily_t *, - addrfamily_t *, int)); -extern int ip_pool_remove __P((ip_pool_t *, ip_pool_node_t *)); -extern int ip_pool_destroy __P((iplookupop_t *)); -extern void ip_pool_free __P((ip_pool_t *)); -extern void ip_pool_deref __P((ip_pool_t *)); -extern void *ip_pool_find __P((int, char *)); + addrfamily_t *, int, ipf_stack_t *)); +extern int ip_pool_remove __P((ip_pool_t *, ip_pool_node_t *, + ipf_stack_t *)); +extern int ip_pool_destroy __P((iplookupop_t *, ipf_stack_t *)); +extern void ip_pool_free __P((ip_pool_t *, ipf_stack_t *)); +extern void ip_pool_deref __P((ip_pool_t *, ipf_stack_t *)); +extern void *ip_pool_find __P((int, char *, ipf_stack_t *)); extern ip_pool_node_t *ip_pool_findeq __P((ip_pool_t *, addrfamily_t *, addrfamily_t *)); -extern int ip_pool_flush __P((iplookupflush_t *)); -extern int ip_pool_statistics __P((iplookupop_t *)); - +extern int ip_pool_flush __P((iplookupflush_t *, ipf_stack_t *)); +extern int ip_pool_statistics __P((iplookupop_t *, ipf_stack_t *)); +extern int ip_pool_getnext __P((ipftoken_t *, ipflookupiter_t *, ipf_stack_t *)); +extern void ip_pool_iterderef __P((u_int, int, void *, ipf_stack_t *)); #endif /* __IP_POOL_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_pptp_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_pptp_pxy.c index 480edf1a00..00303f729b 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_pptp_pxy.c +++ b/usr/src/uts/common/inet/ipf/netinet/ip_pptp_pxy.c @@ -6,7 +6,7 @@ * * $Id: ip_pptp_pxy.c,v 2.10.2.10 2005/07/15 21:56:52 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -14,6 +14,11 @@ #define IPF_PPTP_PROXY +typedef struct ifs_pptppxy { + frentry_t pptpfr; + int pptp_proxy_init; +} ifs_pptppxy_t; + typedef struct pptp_hdr { u_short pptph_len; u_short pptph_type; @@ -38,56 +43,73 @@ typedef struct pptp_hdr { #define PPTP_MTCTL_LINKINFO 15 -int ippr_pptp_init __P((void)); -void ippr_pptp_fini __P((void)); -int ippr_pptp_new __P((fr_info_t *, ap_session_t *, nat_t *)); -void ippr_pptp_del __P((ap_session_t *)); -int ippr_pptp_inout __P((fr_info_t *, ap_session_t *, nat_t *)); -void ippr_pptp_donatstate __P((fr_info_t *, nat_t *, pptp_pxy_t *)); -int ippr_pptp_message __P((fr_info_t *, nat_t *, pptp_pxy_t *, pptp_side_t *)); -int ippr_pptp_nextmessage __P((fr_info_t *, nat_t *, pptp_pxy_t *, int)); -int ippr_pptp_mctl __P((fr_info_t *, nat_t *, pptp_pxy_t *, pptp_side_t *)); - -static frentry_t pptpfr; - -int pptp_proxy_init = 0; -int ippr_pptp_debug = 0; -int ippr_pptp_gretimeout = IPF_TTLVAL(120); /* 2 minutes */ +int ippr_pptp_init __P((void **, ipf_stack_t *)); +void ippr_pptp_fini __P((void **, ipf_stack_t *)); +int ippr_pptp_new __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +void ippr_pptp_del __P((ap_session_t *, void *, ipf_stack_t *)); +int ippr_pptp_inout __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +void ippr_pptp_donatstate __P((fr_info_t *, nat_t *, pptp_pxy_t *, ifs_pptppxy_t *)); +int ippr_pptp_message __P((fr_info_t *, nat_t *, pptp_pxy_t *, pptp_side_t *, ifs_pptppxy_t *)); +int ippr_pptp_nextmessage __P((fr_info_t *, nat_t *, pptp_pxy_t *, int, ifs_pptppxy_t *)); +int ippr_pptp_mctl __P((fr_info_t *, nat_t *, pptp_pxy_t *, pptp_side_t *, ifs_pptppxy_t *)); +int ippr_pptp_debug = 0; /* It never changes */ +int ippr_pptp_gretimeout = IPF_TTLVAL(120); /* 2 minutes, never changes */ /* * PPTP application proxy initialization. */ -int ippr_pptp_init() +/*ARGSUSED*/ +int ippr_pptp_init(private, ifs) +void **private; +ipf_stack_t *ifs; { - bzero((char *)&pptpfr, sizeof(pptpfr)); - pptpfr.fr_ref = 1; - pptpfr.fr_age[0] = ippr_pptp_gretimeout; - pptpfr.fr_age[1] = ippr_pptp_gretimeout; - pptpfr.fr_flags = FR_OUTQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; - MUTEX_INIT(&pptpfr.fr_lock, "PPTP proxy rule lock"); - pptp_proxy_init = 1; + ifs_pptppxy_t *ifspptp; + + KMALLOC(ifspptp, ifs_pptppxy_t *); + if (ifspptp == NULL) + return -1; + + bzero((char *)&ifspptp->pptpfr, sizeof(ifspptp->pptpfr)); + ifspptp->pptpfr.fr_ref = 1; + ifspptp->pptpfr.fr_age[0] = ippr_pptp_gretimeout; + ifspptp->pptpfr.fr_age[1] = ippr_pptp_gretimeout; + ifspptp->pptpfr.fr_flags = FR_OUTQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&ifspptp->pptpfr.fr_lock, "PPTP proxy rule lock"); + ifspptp->pptp_proxy_init = 1; + + *private = (void *)ifspptp; return 0; } -void ippr_pptp_fini() +/*ARGSUSED*/ +void ippr_pptp_fini(private, ifs) +void **private; +ipf_stack_t *ifs; { - if (pptp_proxy_init == 1) { - MUTEX_DESTROY(&pptpfr.fr_lock); - pptp_proxy_init = 0; + ifs_pptppxy_t *ifspptp = *((ifs_pptppxy_t **)private); + + if (ifspptp->pptp_proxy_init == 1) { + MUTEX_DESTROY(&ifspptp->pptpfr.fr_lock); + ifspptp->pptp_proxy_init = 0; } + + KFREE(ifspptp); + *private = NULL; } /* * Setup for a new PPTP proxy. */ -int ippr_pptp_new(fin, aps, nat) +/*ARGSUSED*/ +int ippr_pptp_new(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { pptp_pxy_t *pptp; ipnat_t *ipn; @@ -147,16 +169,18 @@ nat_t *nat; } -void ippr_pptp_donatstate(fin, nat, pptp) +void ippr_pptp_donatstate(fin, nat, pptp, ifspptp) fr_info_t *fin; nat_t *nat; pptp_pxy_t *pptp; +ifs_pptppxy_t *ifspptp; { fr_info_t fi; grehdr_t gre; nat_t *nat2; u_char p; ip_t *ip; + ipf_stack_t *ifs = fin->fin_ifs; ip = fin->fin_ip; p = ip->ip_p; @@ -168,7 +192,7 @@ pptp_pxy_t *pptp; fi.fin_state = NULL; fi.fin_nat = NULL; fi.fin_fi.fi_p = IPPROTO_GRE; - fi.fin_fr = &pptpfr; + fi.fin_fr = &ifspptp->pptpfr; if ((nat->nat_dir == NAT_OUTBOUND && fin->fin_out) || (nat->nat_dir == NAT_INBOUND && !fin->fin_out)) { fi.fin_data[0] = pptp->pptp_call[0]; @@ -196,7 +220,7 @@ pptp_pxy_t *pptp; * Update NAT timeout/create NAT if missing. */ if (nat2 != NULL) - fr_queueback(&nat2->nat_tqe); + fr_queueback(&nat2->nat_tqe, ifs); else { nat2 = nat_new(&fi, &pptp->pptp_rule, &pptp->pptp_nat, NAT_SLAVE, nat->nat_dir); @@ -207,12 +231,12 @@ pptp_pxy_t *pptp; } } - READ_ENTER(&ipf_state); + READ_ENTER(&ifs->ifs_ipf_state); if (pptp->pptp_state != NULL) { - fr_queueback(&pptp->pptp_state->is_sti); - RWLOCK_EXIT(&ipf_state); + fr_queueback(&pptp->pptp_state->is_sti, ifs); + RWLOCK_EXIT(&ifs->ifs_ipf_state); } else { - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); if (nat->nat_dir == NAT_INBOUND) fi.fin_fi.fi_daddr = nat2->nat_inip.s_addr; else @@ -221,7 +245,7 @@ pptp_pxy_t *pptp; pptp->pptp_state = fr_addstate(&fi, &pptp->pptp_state, 0); if (fi.fin_state != NULL) - fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + fr_statederef(&fi, (ipstate_t **)&fi.fin_state, ifs); } ip->ip_p = p; return; @@ -233,11 +257,12 @@ pptp_pxy_t *pptp; * build it up completely (fits in our buffer) then pass it off to the message * parsing function. */ -int ippr_pptp_nextmessage(fin, nat, pptp, rev) +int ippr_pptp_nextmessage(fin, nat, pptp, rev, ifspptp) fr_info_t *fin; nat_t *nat; pptp_pxy_t *pptp; int rev; +ifs_pptppxy_t *ifspptp; { static char *funcname = "ippr_pptp_nextmessage"; pptp_side_t *pptps; @@ -339,7 +364,7 @@ int rev; if (pptps->pptps_len > pptps->pptps_bytes) break; - (void) ippr_pptp_message(fin, nat, pptp, pptps); + (void) ippr_pptp_message(fin, nat, pptp, pptps, ifspptp); pptps->pptps_wptr = pptps->pptps_buffer; pptps->pptps_gothdr = 0; pptps->pptps_bytes = 0; @@ -357,18 +382,19 @@ int rev; /* * handle a complete PPTP message */ -int ippr_pptp_message(fin, nat, pptp, pptps) +int ippr_pptp_message(fin, nat, pptp, pptps, ifspptp) fr_info_t *fin; nat_t *nat; pptp_pxy_t *pptp; pptp_side_t *pptps; +ifs_pptppxy_t *ifspptp; { pptp_hdr_t *hdr = (pptp_hdr_t *)pptps->pptps_buffer; switch (ntohs(hdr->pptph_type)) { case PPTP_MSGTYPE_CTL : - (void) ippr_pptp_mctl(fin, nat, pptp, pptps); + (void) ippr_pptp_mctl(fin, nat, pptp, pptps, ifspptp); break; default : @@ -381,11 +407,12 @@ pptp_side_t *pptps; /* * handle a complete PPTP control message */ -int ippr_pptp_mctl(fin, nat, pptp, pptps) +int ippr_pptp_mctl(fin, nat, pptp, pptps, ifspptp) fr_info_t *fin; nat_t *nat; pptp_pxy_t *pptp; pptp_side_t *pptps; +ifs_pptppxy_t *ifspptp; { u_short *buffer = (u_short *)(pptps->pptps_buffer); pptp_side_t *pptpo; @@ -430,7 +457,7 @@ pptp_side_t *pptps; pptps->pptps_state = PPTP_MTCTL_OUTREP; pptp->pptp_call[0] = buffer[7]; pptp->pptp_call[1] = buffer[6]; - ippr_pptp_donatstate(fin, nat, pptp); + ippr_pptp_donatstate(fin, nat, pptp, ifspptp); } break; case PPTP_MTCTL_INREQ : @@ -441,7 +468,7 @@ pptp_side_t *pptps; pptps->pptps_state = PPTP_MTCTL_INREP; pptp->pptp_call[0] = buffer[7]; pptp->pptp_call[1] = buffer[6]; - ippr_pptp_donatstate(fin, nat, pptp); + ippr_pptp_donatstate(fin, nat, pptp, ifspptp); } break; case PPTP_MTCTL_INCONNECT : @@ -469,10 +496,11 @@ pptp_side_t *pptps; * For outgoing PPTP packets. refresh timeouts for NAT & state entries, if * we can. If they have disappeared, recreate them. */ -int ippr_pptp_inout(fin, aps, nat) +int ippr_pptp_inout(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { pptp_pxy_t *pptp; tcphdr_t *tcp; @@ -494,15 +522,18 @@ nat_t *nat; pptp->pptp_side[rev].pptps_nexthdr = ntohl(tcp->th_seq) + 1; } return ippr_pptp_nextmessage(fin, nat, (pptp_pxy_t *)aps->aps_data, - rev); + rev, (ifs_pptppxy_t *)private); } /* * clean up after ourselves. */ -void ippr_pptp_del(aps) +/*ARGSUSED*/ +void ippr_pptp_del(aps, private, ifs) ap_session_t *aps; +void *private; +ipf_stack_t *ifs; { pptp_pxy_t *pptp; @@ -514,15 +545,16 @@ ap_session_t *aps; * *_del() is on a callback from aps_free(), from nat_delete() */ - READ_ENTER(&ipf_state); + READ_ENTER(&ifs->ifs_ipf_state); if (pptp->pptp_state != NULL) { - pptp->pptp_state->is_die = fr_ticks + 1; + pptp->pptp_state->is_die = ifs->ifs_fr_ticks + 1; pptp->pptp_state->is_me = NULL; fr_queuefront(&pptp->pptp_state->is_sti); } - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); pptp->pptp_state = NULL; pptp->pptp_nat = NULL; + } } diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h b/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h index 1e0bedef64..7c490673e0 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h +++ b/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h @@ -4,8 +4,13 @@ * See the IPFILTER.LICENCE file for details on licencing. * * $Id: ip_proxy.h,v 2.31.2.3 2005/06/18 02:41:33 darrenr Exp $ + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ +#pragma ident "%Z%%M% %I% %E% SMI" + #ifndef __IP_PROXY_H__ #define __IP_PROXY_H__ @@ -97,14 +102,15 @@ typedef struct aproxy { u_char apr_p; /* protocol */ int apr_ref; /* +1 per rule referencing it */ int apr_flags; - int (* apr_init) __P((void)); - void (* apr_fini) __P((void)); - int (* apr_new) __P((fr_info_t *, ap_session_t *, struct nat *)); - void (* apr_del) __P((ap_session_t *)); - int (* apr_inpkt) __P((fr_info_t *, ap_session_t *, struct nat *)); - int (* apr_outpkt) __P((fr_info_t *, ap_session_t *, struct nat *)); - int (* apr_match) __P((fr_info_t *, ap_session_t *, struct nat *)); - int (* apr_ctl) __P((struct aproxy *, struct ap_control *)); + void *apr_private; /* proxy private data */ + int (* apr_init) __P((void **, ipf_stack_t *)); + void (* apr_fini) __P((void **, ipf_stack_t *)); + int (* apr_new) __P((fr_info_t *, ap_session_t *, struct nat *, void *)); + void (* apr_del) __P((ap_session_t *, void *, ipf_stack_t *)); + int (* apr_inpkt) __P((fr_info_t *, ap_session_t *, struct nat *, void *)); + int (* apr_outpkt) __P((fr_info_t *, ap_session_t *, struct nat *, void *)); + int (* apr_match) __P((fr_info_t *, ap_session_t *, struct nat *, void *)); + int (* apr_ctl) __P((struct aproxy *, struct ap_control *, void *)); } aproxy_t; #define APR_DELETE 1 @@ -436,23 +442,18 @@ typedef struct rpcb_session { */ #define XDRALIGN(x) ((((x) % 4) != 0) ? ((((x) + 3) / 4) * 4) : (x)) -extern ap_session_t *ap_sess_tab[AP_SESS_SIZE]; -extern ap_session_t *ap_sess_list; -extern aproxy_t ap_proxies[]; -extern int ippr_ftp_pasvonly; - -extern int appr_add __P((aproxy_t *)); -extern int appr_ctl __P((ap_ctl_t *)); -extern int appr_del __P((aproxy_t *)); -extern int appr_init __P((void)); -extern void appr_unload __P((void)); +extern int appr_add __P((aproxy_t *, ipf_stack_t *)); +extern int appr_ctl __P((ap_ctl_t *, ipf_stack_t *)); +extern int appr_del __P((aproxy_t *, ipf_stack_t *)); +extern int appr_init __P((ipf_stack_t *)); +extern void appr_unload __P((ipf_stack_t *)); extern int appr_ok __P((fr_info_t *, tcphdr_t *, struct ipnat *)); extern int appr_match __P((fr_info_t *, struct nat *)); extern void appr_free __P((aproxy_t *)); -extern void aps_free __P((ap_session_t *)); +extern void aps_free __P((ap_session_t *, ipf_stack_t *)); extern int appr_check __P((fr_info_t *, struct nat *)); -extern aproxy_t *appr_lookup __P((u_int, char *)); +extern aproxy_t *appr_lookup __P((u_int, char *, ipf_stack_t *)); extern int appr_new __P((fr_info_t *, struct nat *)); -extern int appr_ioctl __P((caddr_t, ioctlcmd_t, int)); +extern int appr_ioctl __P((caddr_t, ioctlcmd_t, int, ipf_stack_t *)); #endif /* __IP_PROXY_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_raudio_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_raudio_pxy.c index a9abc5809b..0f63f273e7 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_raudio_pxy.c +++ b/usr/src/uts/common/inet/ipf/netinet/ip_raudio_pxy.c @@ -5,7 +5,7 @@ * * $Id: ip_raudio_pxy.c,v 1.40.2.3 2005/02/04 10:22:55 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -13,49 +13,69 @@ #define IPF_RAUDIO_PROXY +typedef struct ifs_raudiopxy { + frentry_t raudiofr; + int raudio_proxy_init; +} ifs_raudiopxy_t; -int ippr_raudio_init __P((void)); -void ippr_raudio_fini __P((void)); -int ippr_raudio_new __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_raudio_in __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_raudio_out __P((fr_info_t *, ap_session_t *, nat_t *)); - -static frentry_t raudiofr; - -int raudio_proxy_init = 0; - +int ippr_raudio_init __P((void **, ipf_stack_t *)); +void ippr_raudio_fini __P((void **, ipf_stack_t *)); +int ippr_raudio_new __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_raudio_in __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_raudio_out __P((fr_info_t *, ap_session_t *, nat_t *, void *)); /* * Real Audio application proxy initialization. */ -int ippr_raudio_init() +/*ARGSUSED*/ +int ippr_raudio_init(private, ifs) +void **private; +ipf_stack_t *ifs; { - bzero((char *)&raudiofr, sizeof(raudiofr)); - raudiofr.fr_ref = 1; - raudiofr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; - MUTEX_INIT(&raudiofr.fr_lock, "Real Audio proxy rule lock"); - raudio_proxy_init = 1; + ifs_raudiopxy_t *ifsraudio; + + KMALLOC(ifsraudio, ifs_raudiopxy_t *); + if (ifsraudio == NULL) + return -1; + + bzero((char *)&ifsraudio->raudiofr, sizeof(ifsraudio->raudiofr)); + ifsraudio->raudiofr.fr_ref = 1; + ifsraudio->raudiofr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&ifsraudio->raudiofr.fr_lock, "Real Audio proxy rule lock"); + ifsraudio->raudio_proxy_init = 1; + + *private = (void *)ifsraudio; return 0; } -void ippr_raudio_fini() +/*ARGSUSED*/ +void ippr_raudio_fini(private, ifs) +void **private; +ipf_stack_t *ifs; { - if (raudio_proxy_init == 1) { - MUTEX_DESTROY(&raudiofr.fr_lock); - raudio_proxy_init = 0; + ifs_raudiopxy_t *ifsraudio = *((ifs_raudiopxy_t **)private); + + if (ifsraudio->raudio_proxy_init == 1) { + MUTEX_DESTROY(&ifsraudio->raudiofr.fr_lock); + ifsraudio->raudio_proxy_init = 0; } + + KFREE(ifsraudio); + *private = NULL; } /* * Setup for a new proxy to handle Real Audio. */ -int ippr_raudio_new(fin, aps, nat) +/*ARGSUSED*/ +int ippr_raudio_new(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { raudio_t *rap; @@ -74,11 +94,12 @@ nat_t *nat; } - -int ippr_raudio_out(fin, aps, nat) +/*ARGSUSED*/ +int ippr_raudio_out(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { raudio_t *rap = aps->aps_data; unsigned char membuf[512 + 1], *s; @@ -181,10 +202,11 @@ nat_t *nat; } -int ippr_raudio_in(fin, aps, nat) +int ippr_raudio_in(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { unsigned char membuf[IPF_MAXPORTLEN + 1], *s; tcphdr_t *tcp, tcph, *tcp2 = &tcph; @@ -199,6 +221,8 @@ nat_t *nat; u_char swp; ip_t *ip; mb_t *m; + ipf_stack_t *ifs = fin->fin_ifs; + ifs_raudiopxy_t *ifsraudio = (ifs_raudiopxy_t *)private; /* * Wait until we've seen the end of the start messages and even then @@ -284,7 +308,7 @@ nat_t *nat; fi.fin_nat = NULL; fi.fin_flx |= FI_IGNORE; fi.fin_dp = (char *)tcp2; - fi.fin_fr = &raudiofr; + fi.fin_fr = &ifsraudio->raudiofr; fi.fin_dlen = sizeof(*tcp2); fi.fin_plen = fi.fin_hlen + sizeof(*tcp2); tcp2->th_win = htons(8192); @@ -309,7 +333,7 @@ nat_t *nat; (void) fr_addstate(&fi, NULL, (sp ? 0 : SI_W_SPORT)); if (fi.fin_state != NULL) - fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + fr_statederef(&fi, (ipstate_t **)&fi.fin_state, ifs); } } @@ -329,7 +353,7 @@ nat_t *nat; (void) fr_addstate(&fi, NULL, SI_W_DPORT); if (fi.fin_state != NULL) - fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + fr_statederef(&fi, (ipstate_t **)&fi.fin_state, ifs); } } diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_rcmd_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_rcmd_pxy.c index 919c47cb90..770c5c5d97 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_rcmd_pxy.c +++ b/usr/src/uts/common/inet/ipf/netinet/ip_rcmd_pxy.c @@ -5,7 +5,7 @@ * * $Id: ip_rcmd_pxy.c,v 1.41.2.4 2005/02/04 10:22:55 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * * Simple RCMD transparent proxy for in-kernel use. For use with the NAT @@ -16,51 +16,71 @@ #define IPF_RCMD_PROXY +typedef struct ifs_rcmdpxy { + frentry_t rcmdfr; + int rcmd_proxy_init; +} ifs_rcmdpxy_t; -int ippr_rcmd_init __P((void)); -void ippr_rcmd_fini __P((void)); -int ippr_rcmd_new __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_rcmd_out __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_rcmd_in __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_rcmd_init __P((void **, ipf_stack_t *)); +void ippr_rcmd_fini __P((void **, ipf_stack_t *)); +int ippr_rcmd_new __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_rcmd_out __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_rcmd_in __P((fr_info_t *, ap_session_t *, nat_t *, void *)); u_short ipf_rcmd_atoi __P((char *)); -int ippr_rcmd_portmsg __P((fr_info_t *, ap_session_t *, nat_t *)); - -static frentry_t rcmdfr; - -int rcmd_proxy_init = 0; - +int ippr_rcmd_portmsg __P((fr_info_t *, ap_session_t *, nat_t *, ifs_rcmdpxy_t *)); /* * RCMD application proxy initialization. */ -int ippr_rcmd_init() +/*ARGSUSED*/ +int ippr_rcmd_init(private, ifs) +void **private; +ipf_stack_t *ifs; { - bzero((char *)&rcmdfr, sizeof(rcmdfr)); - rcmdfr.fr_ref = 1; - rcmdfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; - MUTEX_INIT(&rcmdfr.fr_lock, "RCMD proxy rule lock"); - rcmd_proxy_init = 1; + ifs_rcmdpxy_t *ifsrcmd; + + KMALLOC(ifsrcmd, ifs_rcmdpxy_t *); + if (ifsrcmd == NULL) + return -1; + + bzero((char *)&ifsrcmd->rcmdfr, sizeof(ifsrcmd->rcmdfr)); + ifsrcmd->rcmdfr.fr_ref = 1; + ifsrcmd->rcmdfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&ifsrcmd->rcmdfr.fr_lock, "RCMD proxy rule lock"); + ifsrcmd->rcmd_proxy_init = 1; + + *private = (void *)ifsrcmd; return 0; } -void ippr_rcmd_fini() +/*ARGSUSED*/ +void ippr_rcmd_fini(private, ifs) +void **private; +ipf_stack_t *ifs; { - if (rcmd_proxy_init == 1) { - MUTEX_DESTROY(&rcmdfr.fr_lock); - rcmd_proxy_init = 0; + ifs_rcmdpxy_t *ifsrcmd = *((ifs_rcmdpxy_t **)private); + + if (ifsrcmd->rcmd_proxy_init == 1) { + MUTEX_DESTROY(&ifsrcmd->rcmdfr.fr_lock); + ifsrcmd->rcmd_proxy_init = 0; } + + KFREE(ifsrcmd); + *private = NULL; } /* * Setup for a new RCMD proxy. */ -int ippr_rcmd_new(fin, aps, nat) +/*ARGSUSED*/ +int ippr_rcmd_new(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp; @@ -99,10 +119,11 @@ char *ptr; } -int ippr_rcmd_portmsg(fin, aps, nat) +int ippr_rcmd_portmsg(fin, aps, nat, ifsrcmd) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +ifs_rcmdpxy_t *ifsrcmd; { tcphdr_t *tcp, tcph, *tcp2 = &tcph; struct in_addr swip, swip2; @@ -113,6 +134,7 @@ nat_t *nat; nat_t *nat2; ip_t *ip; mb_t *m; + ipf_stack_t *ifs = fin->fin_ifs; tcp = (tcphdr_t *)fin->fin_dp; @@ -177,7 +199,7 @@ nat_t *nat; TCP_OFF_A(tcp2, 5); tcp2->th_flags = TH_SYN; fi.fin_dp = (char *)tcp2; - fi.fin_fr = &rcmdfr; + fi.fin_fr = &ifsrcmd->rcmdfr; fi.fin_dlen = sizeof(*tcp2); fi.fin_plen = fi.fin_hlen + sizeof(*tcp2); fi.fin_flx &= FI_LOWTTL|FI_FRAG|FI_TCPUDP|FI_OPTIONS|FI_IGNORE; @@ -207,7 +229,7 @@ nat_t *nat; } (void) fr_addstate(&fi, &nat2->nat_state, SI_W_DPORT); if (fi.fin_state != NULL) - fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + fr_statederef(&fi, (ipstate_t **)&fi.fin_state, ifs); } ip->ip_len = slen; ip->ip_src = swip; @@ -217,23 +239,25 @@ nat_t *nat; } -int ippr_rcmd_out(fin, aps, nat) +int ippr_rcmd_out(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { if (nat->nat_dir == NAT_OUTBOUND) - return ippr_rcmd_portmsg(fin, aps, nat); + return ippr_rcmd_portmsg(fin, aps, nat, (ifs_rcmdpxy_t *)private); return 0; } -int ippr_rcmd_in(fin, aps, nat) +int ippr_rcmd_in(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; +void *private; { if (nat->nat_dir == NAT_INBOUND) - return ippr_rcmd_portmsg(fin, aps, nat); + return ippr_rcmd_portmsg(fin, aps, nat, (ifs_rcmdpxy_t *)private); return 0; } diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_rpcb_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_rpcb_pxy.c index f67c01a232..9e6a27c30c 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_rpcb_pxy.c +++ b/usr/src/uts/common/inet/ipf/netinet/ip_rpcb_pxy.c @@ -2,6 +2,9 @@ * Copyright (C) 2002-2003 by Ryan Beasley <ryanb@goddamnbastard.org> * * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ /* * Overview: @@ -40,23 +43,34 @@ * $Id: ip_rpcb_pxy.c,v 2.25.2.3 2005/02/04 10:22:56 darrenr Exp $ */ +#pragma ident "%Z%%M% %I% %E% SMI" + #define IPF_RPCB_PROXY +typedef struct ifs_rpcbpxy { + frentry_t rpcbfr; /* Skeleton rule for reference by entities + this proxy creates. */ + int rpcbcnt;/* Upper bound of allocated RPCB sessions. */ + /* XXX rpcbcnt still requires locking. */ + int rpcb_proxy_init; +} ifs_rpcbpxy_t; + /* * Function prototypes */ -int ippr_rpcb_init __P((void)); -void ippr_rpcb_fini __P((void)); -int ippr_rpcb_new __P((fr_info_t *, ap_session_t *, nat_t *)); -void ippr_rpcb_del __P((ap_session_t *)); -int ippr_rpcb_in __P((fr_info_t *, ap_session_t *, nat_t *)); -int ippr_rpcb_out __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_rpcb_init __P((void **, ipf_stack_t *)); +void ippr_rpcb_fini __P((void **, ipf_stack_t *)); +int ippr_rpcb_new __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +void ippr_rpcb_del __P((ap_session_t *, void *, ipf_stack_t *)); +int ippr_rpcb_in __P((fr_info_t *, ap_session_t *, nat_t *, void *)); +int ippr_rpcb_out __P((fr_info_t *, ap_session_t *, nat_t *, void *)); static void ippr_rpcb_flush __P((rpcb_session_t *)); static int ippr_rpcb_decodereq __P((fr_info_t *, nat_t *, - rpcb_session_t *, rpc_msg_t *)); + rpcb_session_t *, rpc_msg_t *, ifs_rpcbpxy_t *)); static int ippr_rpcb_skipauth __P((rpc_msg_t *, xdr_auth_t *, u_32_t **)); -static int ippr_rpcb_insert __P((rpcb_session_t *, rpcb_xact_t *)); +static int ippr_rpcb_insert __P((rpcb_session_t *, rpcb_xact_t *, + ifs_rpcbpxy_t *)); static int ippr_rpcb_xdrrpcb __P((rpc_msg_t *, u_32_t *, rpcb_args_t *)); static int ippr_rpcb_getuaddr __P((rpc_msg_t *, xdr_uaddr_t *, u_32_t **)); @@ -64,12 +78,13 @@ static u_int ippr_rpcb_atoi __P((char *)); static int ippr_rpcb_modreq __P((fr_info_t *, nat_t *, rpc_msg_t *, mb_t *, u_int)); static int ippr_rpcb_decoderep __P((fr_info_t *, nat_t *, - rpcb_session_t *, rpc_msg_t *, rpcb_xact_t **)); + rpcb_session_t *, rpc_msg_t *, rpcb_xact_t **, ifs_rpcbpxy_t *)); static rpcb_xact_t * ippr_rpcb_lookup __P((rpcb_session_t *, u_32_t)); -static void ippr_rpcb_deref __P((rpcb_session_t *, rpcb_xact_t *)); +static void ippr_rpcb_deref __P((rpcb_session_t *, rpcb_xact_t *, + ifs_rpcbpxy_t *)); static int ippr_rpcb_getproto __P((rpc_msg_t *, xdr_proto_t *, u_32_t **)); -static int ippr_rpcb_getnat __P((fr_info_t *, nat_t *, u_int, u_int)); +static int ippr_rpcb_getnat __P((fr_info_t *, nat_t *, u_int, u_int, ifs_rpcbpxy_t *)); static int ippr_rpcb_modv3 __P((fr_info_t *, nat_t *, rpc_msg_t *, mb_t *, u_int)); static int ippr_rpcb_modv4 __P((fr_info_t *, nat_t *, rpc_msg_t *, @@ -77,17 +92,6 @@ static int ippr_rpcb_modv4 __P((fr_info_t *, nat_t *, rpc_msg_t *, static void ippr_rpcb_fixlen __P((fr_info_t *, int)); /* - * Global variables - */ -static frentry_t rpcbfr; /* Skeleton rule for reference by entities - this proxy creates. */ -static int rpcbcnt; /* Upper bound of allocated RPCB sessions. */ - /* XXX rpcbcnt still requires locking. */ - -int rpcb_proxy_init = 0; - - -/* * Since rpc_msg contains only pointers, one should use this macro as a * handy way to get to the goods. (In case you're wondering about the name, * this started as BYTEREF -> BREF -> B.) @@ -105,16 +109,27 @@ int rpcb_proxy_init = 0; /* */ /* Initialize the filter rule entry and session limiter. */ /* -------------------------------------------------------------------- */ +/*ARGSUSED*/ int -ippr_rpcb_init() +ippr_rpcb_init(private, ifs) +void **private; +ipf_stack_t *ifs; { - rpcbcnt = 0; + ifs_rpcbpxy_t *ifsrpcb; + + KMALLOC(ifsrpcb, ifs_rpcbpxy_t *); + if (ifsrpcb == NULL) + return -1; + + ifsrpcb->rpcbcnt = 0; - bzero((char *)&rpcbfr, sizeof(rpcbfr)); - rpcbfr.fr_ref = 1; - rpcbfr.fr_flags = FR_PASS|FR_QUICK|FR_KEEPSTATE; - MUTEX_INIT(&rpcbfr.fr_lock, "ipf Sun RPCB proxy rule lock"); - rpcb_proxy_init = 1; + bzero((char *)&ifsrpcb->rpcbfr, sizeof(ifsrpcb->rpcbfr)); + ifsrpcb->rpcbfr.fr_ref = 1; + ifsrpcb->rpcbfr.fr_flags = FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&ifsrpcb->rpcbfr.fr_lock, "ipf Sun RPCB proxy rule lock"); + ifsrpcb->rpcb_proxy_init = 1; + + *private = (void *)ifsrpcb; return(0); } @@ -126,13 +141,21 @@ ippr_rpcb_init() /* */ /* Destroy rpcbfr's mutex to avoid a lock leak. */ /* -------------------------------------------------------------------- */ +/*ARGSUSED*/ void -ippr_rpcb_fini() +ippr_rpcb_fini(private, ifs) +void **private; +ipf_stack_t *ifs; { - if (rpcb_proxy_init == 1) { - MUTEX_DESTROY(&rpcbfr.fr_lock); - rpcb_proxy_init = 0; + ifs_rpcbpxy_t *ifsrpcb = *((ifs_rpcbpxy_t **)private); + + if (ifsrpcb->rpcb_proxy_init == 1) { + MUTEX_DESTROY(&ifsrpcb->rpcbfr.fr_lock); + ifsrpcb->rpcb_proxy_init = 0; } + + KFREE(ifsrpcb); + *private = NULL; } /* -------------------------------------------------------------------- */ @@ -144,11 +167,13 @@ ippr_rpcb_fini() /* */ /* Allocate resources for per-session proxy structures. */ /* -------------------------------------------------------------------- */ +/*ARGSUSED*/ int -ippr_rpcb_new(fin, aps, nat) +ippr_rpcb_new(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; + void *private; { rpcb_session_t *rs; @@ -174,13 +199,18 @@ ippr_rpcb_new(fin, aps, nat) /* */ /* Free up a session's list of RPCB requests. */ /* -------------------------------------------------------------------- */ +/*ARGSUSED*/ void -ippr_rpcb_del(aps) +ippr_rpcb_del(aps, private, ifs) ap_session_t *aps; + void *private; + ipf_stack_t *ifs; { rpcb_session_t *rs; rs = (rpcb_session_t *)aps->aps_data; + ifs = ifs; /* LINT */ + MUTEX_ENTER(&rs->rs_rxlock); ippr_rpcb_flush(rs); MUTEX_EXIT(&rs->rs_rxlock); @@ -201,10 +231,11 @@ ippr_rpcb_del(aps) /* for decoding. Also pass packet off for a rewrite if necessary. */ /* -------------------------------------------------------------------- */ int -ippr_rpcb_in(fin, aps, nat) +ippr_rpcb_in(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; + void *private; { rpc_msg_t rpcmsg, *rm; rpcb_session_t *rs; @@ -235,7 +266,7 @@ ippr_rpcb_in(fin, aps, nat) rm->rm_buflen = dlen; /* Send off to decode request. */ - rv = ippr_rpcb_decodereq(fin, nat, rs, rm); + rv = ippr_rpcb_decodereq(fin, nat, rs, rm, (ifs_rpcbpxy_t *)private); switch(rv) { @@ -270,10 +301,11 @@ ippr_rpcb_in(fin, aps, nat) /* allow direct communication between RPC client and server. */ /* -------------------------------------------------------------------- */ int -ippr_rpcb_out(fin, aps, nat) +ippr_rpcb_out(fin, aps, nat, private) fr_info_t *fin; ap_session_t *aps; nat_t *nat; + void *private; { rpc_msg_t rpcmsg, *rm; rpcb_session_t *rs; @@ -281,6 +313,7 @@ ippr_rpcb_out(fin, aps, nat) u_int off, dlen; int rv, diff; mb_t *m; + ifs_rpcbpxy_t *ifsrpcb = (ifs_rpcbpxy_t *)private; /* Disallow fragmented or illegally short packets. */ if ((fin->fin_flx & (FI_FRAG|FI_SHORT)) != 0) @@ -306,14 +339,14 @@ ippr_rpcb_out(fin, aps, nat) rm->rm_buflen = dlen; /* Send off to decode reply. */ - rv = ippr_rpcb_decoderep(fin, nat, rs, rm, &rx); + rv = ippr_rpcb_decoderep(fin, nat, rs, rm, &rx, ifsrpcb); switch(rv) { case -1: /* Bad packet */ if (rx != NULL) { MUTEX_ENTER(&rs->rs_rxlock); - ippr_rpcb_deref(rs, rx); + ippr_rpcb_deref(rs, rx, ifsrpcb); MUTEX_EXIT(&rs->rs_rxlock); } return(APR_ERR(1)); @@ -347,8 +380,8 @@ ippr_rpcb_out(fin, aps, nat) * finished with rx, and the other signals that we've * processed its reply. */ - ippr_rpcb_deref(rs, rx); - ippr_rpcb_deref(rs, rx); + ippr_rpcb_deref(rs, rx, ifsrpcb); + ippr_rpcb_deref(rs, rx, ifsrpcb); MUTEX_EXIT(&rs->rs_rxlock); } @@ -401,11 +434,12 @@ ippr_rpcb_flush(rs) /* is enough room in rs_buf for the basic RPC message "preamble". */ /* -------------------------------------------------------------------- */ static int -ippr_rpcb_decodereq(fin, nat, rs, rm) +ippr_rpcb_decodereq(fin, nat, rs, rm, ifsrpcb) fr_info_t *fin; nat_t *nat; rpcb_session_t *rs; rpc_msg_t *rm; + ifs_rpcbpxy_t *ifsrpcb; { rpcb_args_t *ra; u_32_t xdr, *p; @@ -499,7 +533,7 @@ ippr_rpcb_decodereq(fin, nat, rs, rm) } MUTEX_ENTER(&rs->rs_rxlock); - if (ippr_rpcb_insert(rs, &rx) != 0) { + if (ippr_rpcb_insert(rs, &rx, ifsrpcb) != 0) { MUTEX_EXIT(&rs->rs_rxlock); return(-1); } @@ -559,9 +593,10 @@ ippr_rpcb_skipauth(rm, auth, buf) /* rx(I) - pointer to RPCB transaction structure */ /* -------------------------------------------------------------------- */ static int -ippr_rpcb_insert(rs, rx) +ippr_rpcb_insert(rs, rx, ifsrpcb) rpcb_session_t *rs; rpcb_xact_t *rx; + ifs_rpcbpxy_t *ifsrpcb; { rpcb_xact_t *rxp; @@ -571,7 +606,7 @@ ippr_rpcb_insert(rs, rx) return(0); } - if (rpcbcnt == RPCB_MAXREQS) + if (ifsrpcb->rpcbcnt == RPCB_MAXREQS) return(-1); KMALLOC(rxp, rpcb_xact_t *); @@ -589,7 +624,7 @@ ippr_rpcb_insert(rs, rx) rxp->rx_ref = 1; - ++rpcbcnt; + ++ifsrpcb->rpcbcnt; return(0); } @@ -844,12 +879,13 @@ ippr_rpcb_modreq(fin, nat, rm, m, off) /* is enough room in rs_buf for the basic RPC message "preamble". */ /* -------------------------------------------------------------------- */ static int -ippr_rpcb_decoderep(fin, nat, rs, rm, rxp) +ippr_rpcb_decoderep(fin, nat, rs, rm, rxp, ifsrpcb) fr_info_t *fin; nat_t *nat; rpcb_session_t *rs; rpc_msg_t *rm; rpcb_xact_t **rxp; + ifs_rpcbpxy_t *ifsrpcb; { rpcb_listp_t *rl; rpcb_entry_t *re; @@ -922,7 +958,7 @@ ippr_rpcb_decoderep(fin, nat, rs, rm, rxp) return(-1); /* Create NAT & state table entries. */ - if (ippr_rpcb_getnat(fin, nat, rx->rx_proto, (u_int)xdr) != 0) + if (ippr_rpcb_getnat(fin, nat, rx->rx_proto, (u_int)xdr, ifsrpcb) != 0) return(-1); break; case RPCB_RES_STRING: @@ -949,7 +985,7 @@ ippr_rpcb_decoderep(fin, nat, rs, rm, rxp) /* Create NAT & state table entries. */ if (ippr_rpcb_getnat(fin, nat, rx->rx_proto, - (u_int)rr->rr_v3.xu_port) != 0) + (u_int)rr->rr_v3.xu_port, ifsrpcb) != 0) return(-1); break; case RPCB_RES_LIST: @@ -1004,7 +1040,8 @@ ippr_rpcb_decoderep(fin, nat, rs, rm, rxp) re = &rl->rl_entries[rl->rl_cnt]; rv = ippr_rpcb_getnat(fin, nat, re->re_proto.xp_proto, - (u_int)re->re_maddr.xu_port); + (u_int)re->re_maddr.xu_port, + ifsrpcb); if (rv != 0) return(-1); } @@ -1053,9 +1090,10 @@ ippr_rpcb_lookup(rs, xid) /* Free the RPCB transaction record rx from the chain of entries. */ /* -------------------------------------------------------------------- */ static void -ippr_rpcb_deref(rs, rx) +ippr_rpcb_deref(rs, rx, ifsrpcb) rpcb_session_t *rs; rpcb_xact_t *rx; + ifs_rpcbpxy_t *ifsrpcb; { rs = rs; /* LINT */ @@ -1072,7 +1110,7 @@ ippr_rpcb_deref(rs, rx) KFREE(rx); - --rpcbcnt; + --ifsrpcb->rpcbcnt; } /* -------------------------------------------------------------------- */ @@ -1133,11 +1171,12 @@ ippr_rpcb_getproto(rm, xp, p) /* attempt between RPC client and server. */ /* -------------------------------------------------------------------- */ static int -ippr_rpcb_getnat(fin, nat, proto, port) +ippr_rpcb_getnat(fin, nat, proto, port, ifsrpcb) fr_info_t *fin; nat_t *nat; u_int proto; u_int port; + ifs_rpcbpxy_t *ifsrpcb; { ipnat_t *ipn, ipnat; tcphdr_t tcp; @@ -1145,6 +1184,7 @@ ippr_rpcb_getnat(fin, nat, proto, port) fr_info_t fi; nat_t *natl; int nflags; + ipf_stack_t *ifs = fin->fin_ifs; ipn = nat->nat_ptr; @@ -1186,15 +1226,15 @@ ippr_rpcb_getnat(fin, nat, proto, port) */ is = fr_stlookup(&fi, &tcp, NULL); if (is != NULL) - RWLOCK_EXIT(&ipf_state); + RWLOCK_EXIT(&ifs->ifs_ipf_state); - RWLOCK_EXIT(&ipf_nat); + RWLOCK_EXIT(&ifs->ifs_ipf_nat); - WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ifs->ifs_ipf_nat); natl = nat_inlookup(&fi, nflags, proto, fi.fin_src, fi.fin_dst); if ((natl != NULL) && (is != NULL)) { - MUTEX_DOWNGRADE(&ipf_nat); + MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); return(0); } @@ -1203,7 +1243,7 @@ ippr_rpcb_getnat(fin, nat, proto, port) * flags that may be detrimental to the creation process or simply * shouldn't be associated with a table entry. */ - fi.fin_fr = &rpcbfr; + fi.fin_fr = &ifsrpcb->rpcbfr; fi.fin_flx &= ~FI_IGNORE; nflags &= ~NAT_SEARCH; @@ -1235,7 +1275,7 @@ ippr_rpcb_getnat(fin, nat, proto, port) bcopy((char *)&ipnat, (char *)ipn, sizeof(ipnat)); if (natl == NULL) { - MUTEX_DOWNGRADE(&ipf_nat); + MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); return(-1); } @@ -1243,7 +1283,7 @@ ippr_rpcb_getnat(fin, nat, proto, port) (void) nat_proto(&fi, natl, nflags); nat_update(&fi, natl, natl->nat_ptr); } - MUTEX_DOWNGRADE(&ipf_nat); + MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); if (is == NULL) { /* Create state entry. Return NULL if this fails. */ @@ -1260,12 +1300,12 @@ ippr_rpcb_getnat(fin, nat, proto, port) * XXX nat_delete is private to ip_nat.c. Should * check w/ Darren about this one. * - * nat_delete(natl, NL_EXPIRE); + * nat_delete(natl, NL_EXPIRE, ifs); */ return(-1); } if (fi.fin_state != NULL) - fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + fr_statederef(&fi, (ipstate_t **)&fi.fin_state, ifs); } return(0); diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_state.h b/usr/src/uts/common/inet/ipf/netinet/ip_state.h index 5f35878d89..b12db33ef5 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_state.h +++ b/usr/src/uts/common/inet/ipf/netinet/ip_state.h @@ -6,7 +6,7 @@ * @(#)ip_state.h 1.3 1/12/96 (C) 1995 Darren Reed * $Id: ip_state.h,v 2.68.2.5 2005/08/11 19:58:04 darrenr Exp $ * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -223,45 +223,24 @@ typedef struct ips_stat { u_long *iss_bucketlen; } ips_stat_t; - -extern u_long fr_tcpidletimeout; -extern u_long fr_tcpclosewait; -extern u_long fr_tcplastack; -extern u_long fr_tcptimeout; -extern u_long fr_tcpclosed; -extern u_long fr_tcphalfclosed; -extern u_long fr_udptimeout; -extern u_long fr_udpacktimeout; -extern u_long fr_icmptimeout; -extern u_long fr_icmpacktimeout; -extern u_long fr_iptimeout; -extern int fr_statemax; -extern int fr_statesize; -extern int fr_state_lock; -extern int fr_state_maxbucket; -extern int fr_state_maxbucket_reset; -extern ipstate_t *ips_list; -extern ipftq_t *ips_utqe; -extern ipftq_t ips_tqtqb[IPF_TCP_NSTATES]; - -extern int fr_stateinit __P((void)); +extern int fr_stateinit __P((ipf_stack_t *)); extern ipstate_t *fr_addstate __P((fr_info_t *, ipstate_t **, u_int)); extern frentry_t *fr_checkstate __P((struct fr_info *, u_32_t *)); extern ipstate_t *fr_stlookup __P((fr_info_t *, tcphdr_t *, ipftq_t **)); -extern void fr_statesync __P((int, int, void *, char *)); -extern void fr_timeoutstate __P((void)); +extern void fr_statesync __P((int, int, void *, char *, ipf_stack_t *)); +extern void fr_timeoutstate __P((ipf_stack_t *)); extern int fr_tcp_age __P((struct ipftqent *, struct fr_info *, struct ipftq *, int)); extern int fr_tcpinwindow __P((struct fr_info *, struct tcpdata *, struct tcpdata *, tcphdr_t *, int)); -extern void fr_stateunload __P((void)); -extern void ipstate_log __P((struct ipstate *, u_int)); -extern int fr_state_ioctl __P((caddr_t, ioctlcmd_t, int)); -extern void fr_stinsert __P((struct ipstate *, int)); -extern void fr_sttab_init __P((struct ipftq *)); +extern void fr_stateunload __P((ipf_stack_t *)); +extern void ipstate_log __P((struct ipstate *, u_int, ipf_stack_t *)); +extern int fr_state_ioctl __P((caddr_t, ioctlcmd_t, int, int, void *, ipf_stack_t *)); +extern void fr_stinsert __P((struct ipstate *, int, ipf_stack_t *)); +extern void fr_sttab_init __P((struct ipftq *, ipf_stack_t *)); extern void fr_sttab_destroy __P((struct ipftq *)); extern void fr_updatestate __P((fr_info_t *, ipstate_t *, ipftq_t *)); -extern void fr_statederef __P((fr_info_t *, ipstate_t **)); -extern void fr_setstatequeue __P((ipstate_t *, int)); +extern void fr_statederef __P((fr_info_t *, ipstate_t **, ipf_stack_t *)); +extern void fr_setstatequeue __P((ipstate_t *, int, ipf_stack_t *)); #endif /* __IP_STATE_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h new file mode 100644 index 0000000000..7c53a6f65d --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h @@ -0,0 +1,288 @@ +/* + * Copyright (C) 1993-2001, 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifndef __IPF_STACK_H__ +#define __IPF_STACK_H__ + +/* FIXME: appears needed for ip_proxy.h - tcpseq */ +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <netinet/ip_icmp.h> +#include <netinet/tcpip.h> + +#include "ip_compat.h" +#include "ip_fil.h" +#include "ip_nat.h" +#include "ip_frag.h" +#include "ip_state.h" +#include "ip_proxy.h" +#include "ip_auth.h" +#include "ip_lookup.h" +#include "ip_pool.h" +#include "ip_htable.h" +#include <net/radix.h> +#include <sys/neti.h> +#include <sys/hook.h> + +/* + * IPF stack instances + */ +struct ipf_stack { + netstack_t *ifs_netstack; + + /* ipf module */ + fr_info_t ifs_frcache[2][8]; + + filterstats_t ifs_frstats[2]; + frentry_t *ifs_ipfilter[2][2]; + frentry_t *ifs_ipfilter6[2][2]; + frentry_t *ifs_ipacct6[2][2]; + frentry_t *ifs_ipacct[2][2]; +#if 0 /* not used */ + frentry_t *ifs_ipnatrules[2][2]; +#endif + frgroup_t *ifs_ipfgroups[IPL_LOGSIZE][2]; + int ifs_fr_refcnt; + /* + * For fr_running: + * 0 == loading, 1 = running, -1 = disabled, -2 = unloading + */ + int ifs_fr_running; + int ifs_fr_flags; + int ifs_fr_active; + int ifs_fr_control_forwarding; + int ifs_fr_update_ipid; +#if 0 + ushort_t ifs_fr_ip_id; +#endif + int ifs_fr_chksrc; + int ifs_fr_minttl; + int ifs_fr_icmpminfragmtu; + int ifs_fr_pass; + ulong_t ifs_fr_frouteok[2]; + ulong_t ifs_fr_userifqs; + ulong_t ifs_fr_badcoalesces[2]; + uchar_t ifs_ipf_iss_secret[32]; + timeout_id_t ifs_fr_timer_id; +#if 0 + timeout_id_t ifs_synctimeoutid; +#endif + int ifs_ipf_locks_done; + + ipftoken_t *ifs_ipftokenhead; + ipftoken_t **ifs_ipftokentail; + + ipfmutex_t ifs_ipl_mutex; + ipfmutex_t ifs_ipf_authmx; + ipfmutex_t ifs_ipf_rw; + ipfmutex_t ifs_ipf_timeoutlock; + ipfrwlock_t ifs_ipf_mutex; + ipfrwlock_t ifs_ipf_global; + ipfrwlock_t ifs_ipf_frcache; + ipfrwlock_t ifs_ip_poolrw; + ipfrwlock_t ifs_ipf_frag; + ipfrwlock_t ifs_ipf_state; + ipfrwlock_t ifs_ipf_nat; + ipfrwlock_t ifs_ipf_natfrag; + ipfmutex_t ifs_ipf_nat_new; + ipfmutex_t ifs_ipf_natio; + ipfrwlock_t ifs_ipf_auth; + ipfmutex_t ifs_ipf_stinsert; + ipfrwlock_t ifs_ipf_ipidfrag; + ipfrwlock_t ifs_ipf_tokens; + kcondvar_t ifs_iplwait; + kcondvar_t ifs_ipfauthwait; + + ipftuneable_t *ifs_ipf_tuneables; + ipftuneable_t *ifs_ipf_tunelist; + + /* ip_fil_solaris.c */ + hook_t ifs_ipfhook_in; + hook_t ifs_ipfhook_out; + hook_t ifs_ipfhook_loop_in; + hook_t ifs_ipfhook_loop_out; + hook_t ifs_ipfhook_nicevents; + + /* flags to indicate whether hooks are registered. */ + boolean_t ifs_hook4_physical_in; + boolean_t ifs_hook4_physical_out; + boolean_t ifs_hook4_nic_events; + boolean_t ifs_hook4_loopback_in; + boolean_t ifs_hook4_loopback_out; + boolean_t ifs_hook6_physical_in; + boolean_t ifs_hook6_physical_out; + boolean_t ifs_hook6_nic_events; + boolean_t ifs_hook6_loopback_in; + boolean_t ifs_hook6_loopback_out; + + int ifs_ipf_loopback; + net_data_t ifs_ipf_ipv4; + net_data_t ifs_ipf_ipv6; + + /* ip_auth.c */ + int ifs_fr_authsize; + int ifs_fr_authused; + int ifs_fr_defaultauthage; + int ifs_fr_auth_lock; + int ifs_fr_auth_init; + fr_authstat_t ifs_fr_authstats; + frauth_t *ifs_fr_auth; + mb_t **ifs_fr_authpkts; + int ifs_fr_authstart; + int ifs_fr_authend; + int ifs_fr_authnext; + frauthent_t *ifs_fae_list; + frentry_t *ifs_ipauth; + frentry_t *ifs_fr_authlist; + + /* ip_frag.c */ + ipfr_t *ifs_ipfr_list; + ipfr_t **ifs_ipfr_tail; + ipfr_t **ifs_ipfr_heads; + + ipfr_t *ifs_ipfr_natlist; + ipfr_t **ifs_ipfr_nattail; + ipfr_t **ifs_ipfr_nattab; + + ipfr_t *ifs_ipfr_ipidlist; + ipfr_t **ifs_ipfr_ipidtail; + ipfr_t **ifs_ipfr_ipidtab; + + ipfrstat_t ifs_ipfr_stats; + int ifs_ipfr_inuse; + int ifs_ipfr_size; + + int ifs_fr_ipfrttl; + int ifs_fr_frag_lock; + int ifs_fr_frag_init; + ulong_t ifs_fr_ticks; + + frentry_t ifs_frblock; + + /* ip_htable.c */ + iphtable_t *ifs_ipf_htables[IPL_LOGSIZE]; + ulong_t ifs_ipht_nomem[IPL_LOGSIZE]; + ulong_t ifs_ipf_nhtables[IPL_LOGSIZE]; + ulong_t ifs_ipf_nhtnodes[IPL_LOGSIZE]; + + /* ip_log.c */ + iplog_t **ifs_iplh[IPL_LOGSIZE]; + iplog_t *ifs_iplt[IPL_LOGSIZE]; + iplog_t *ifs_ipll[IPL_LOGSIZE]; + int ifs_iplused[IPL_LOGSIZE]; + fr_info_t ifs_iplcrc[IPL_LOGSIZE]; + int ifs_ipl_suppress; + int ifs_ipl_buffer_sz; + int ifs_ipl_logmax; + int ifs_ipl_logall; + int ifs_ipl_log_init; + int ifs_ipl_logsize; + + /* ip_lookup.c */ + ip_pool_stat_t ifs_ippoolstat; + int ifs_ip_lookup_inited; + + /* ip_nat.c */ + /* nat_table[0] -> hashed list sorted by inside (ip, port) */ + /* nat_table[1] -> hashed list sorted by outside (ip, port) */ + nat_t **ifs_nat_table[2]; + nat_t *ifs_nat_instances; + ipnat_t *ifs_nat_list; + uint_t ifs_ipf_nattable_sz; + uint_t ifs_ipf_nattable_max; + uint_t ifs_ipf_natrules_sz; + uint_t ifs_ipf_rdrrules_sz; + uint_t ifs_ipf_hostmap_sz; + uint_t ifs_fr_nat_maxbucket; + uint_t ifs_fr_nat_maxbucket_reset; + uint32_t ifs_nat_masks; + uint32_t ifs_rdr_masks; + ipnat_t **ifs_nat_rules; + ipnat_t **ifs_rdr_rules; + hostmap_t **ifs_maptable; + hostmap_t *ifs_ipf_hm_maplist; + + ipftq_t ifs_nat_tqb[IPF_TCP_NSTATES]; + ipftq_t ifs_nat_udptq; + ipftq_t ifs_nat_icmptq; + ipftq_t ifs_nat_iptq; + ipftq_t *ifs_nat_utqe; + int ifs_nat_logging; + ulong_t ifs_fr_defnatage; + ulong_t ifs_fr_defnatipage; + ulong_t ifs_fr_defnaticmpage; + natstat_t ifs_nat_stats; + int ifs_fr_nat_lock; + int ifs_fr_nat_init; + + /* ip_pool.c */ + ip_pool_stat_t ifs_ipoolstat; + ip_pool_t *ifs_ip_pool_list[IPL_LOGSIZE]; + + /* ip_proxy.c */ + ap_session_t *ifs_ap_sess_list; + aproxy_t *ifs_ap_proxylist; + aproxy_t *ifs_ap_proxies; /* copy of lcl_ap_proxies */ + + /* ip_state.c */ + ipstate_t **ifs_ips_table; + ulong_t *ifs_ips_seed; + int ifs_ips_num; + ulong_t ifs_ips_last_force_flush; + ips_stat_t ifs_ips_stats; + + ulong_t ifs_fr_tcpidletimeout; + ulong_t ifs_fr_tcpclosewait; + ulong_t ifs_fr_tcplastack; + ulong_t ifs_fr_tcptimeout; + ulong_t ifs_fr_tcpclosed; + ulong_t ifs_fr_tcphalfclosed; + ulong_t ifs_fr_udptimeout; + ulong_t ifs_fr_udpacktimeout; + ulong_t ifs_fr_icmptimeout; + ulong_t ifs_fr_icmpacktimeout; + int ifs_fr_statemax; + int ifs_fr_statesize; + int ifs_fr_state_doflush; + int ifs_fr_state_lock; + int ifs_fr_state_maxbucket; + int ifs_fr_state_maxbucket_reset; + int ifs_fr_state_init; + ipftq_t ifs_ips_tqtqb[IPF_TCP_NSTATES]; + ipftq_t ifs_ips_udptq; + ipftq_t ifs_ips_udpacktq; + ipftq_t ifs_ips_iptq; + ipftq_t ifs_ips_icmptq; + ipftq_t ifs_ips_icmpacktq; + ipftq_t *ifs_ips_utqe; + int ifs_ipstate_logging; + ipstate_t *ifs_ips_list; + ulong_t ifs_fr_iptimeout; + + /* radix.c */ + int ifs_max_keylen; + struct radix_mask *ifs_rn_mkfreelist; + struct radix_node_head *ifs_mask_rnhead; + char *ifs_addmask_key; + char *ifs_rn_zeros; + char *ifs_rn_ones; +#ifdef KERNEL + /* kstats for inbound and outbound */ + kstat_t *ifs_kstatp[2]; +#endif +}; + +#endif /* __IPF_STACK_H__ */ diff --git a/usr/src/uts/common/inet/ipf/solaris.c b/usr/src/uts/common/inet/ipf/solaris.c index b1fa886c5e..b691d3f2ea 100644 --- a/usr/src/uts/common/inet/ipf/solaris.c +++ b/usr/src/uts/common/inet/ipf/solaris.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* #pragma ident "@(#)solaris.c 1.12 6/5/96 (C) 1995 Darren Reed"*/ @@ -39,6 +39,7 @@ #if SOLARIS2 >= 6 # include <net/if_types.h> #endif +#include <sys/netstack.h> #include <net/af.h> #include <net/route.h> #include <netinet/in.h> @@ -59,14 +60,10 @@ #include "netinet/ip_frag.h" #include "netinet/ip_auth.h" #include "netinet/ip_state.h" +#include "netinet/ipf_stack.h" -extern struct filterstats frstats[]; -extern int fr_running; -extern int fr_flags; extern int iplwrite __P((dev_t, struct uio *, cred_t *)); -extern ipnat_t *nat_list; - static int ipf_getinfo __P((dev_info_t *, ddi_info_cmd_t, void *, void **)); #if SOLARIS2 < 10 @@ -74,18 +71,12 @@ static int ipf_identify __P((dev_info_t *)); #endif static int ipf_attach __P((dev_info_t *, ddi_attach_cmd_t)); static int ipf_detach __P((dev_info_t *, ddi_detach_cmd_t)); -static int ipf_property_update __P((dev_info_t *)); +static int ipf_property_g_update __P((dev_info_t *)); static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME, IPLOOKUP_NAME, NULL }; -#if SOLARIS2 >= 7 -extern timeout_id_t fr_timer_id; -#else -extern int fr_timer_id; -#endif - static struct cb_ops ipf_cb_ops = { iplopen, iplclose, @@ -191,7 +182,7 @@ static size_t hdrsizes[57][2] = { }; #endif /* SOLARIS2 >= 6 */ -static dev_info_t *ipf_dev_info = NULL; +dev_info_t *ipf_dev_info = NULL; static const filter_kstats_t ipf_kstat_tmp = { { "pass", KSTAT_DATA_ULONG }, @@ -222,47 +213,45 @@ static const filter_kstats_t ipf_kstat_tmp = { { "ip upd. fail", KSTAT_DATA_ULONG } }; -net_data_t ipf_ipv4; -net_data_t ipf_ipv6; -kstat_t *ipf_kstatp[2] = {NULL, NULL}; static int ipf_kstat_update(kstat_t *ksp, int rwflag); static void -ipf_kstat_init(void) +ipf_kstat_init(ipf_stack_t *ifs, netstackid_t stackid) { int i; for (i = 0; i < 2; i++) { - ipf_kstatp[i] = kstat_create("ipf", 0, + ifs->ifs_kstatp[i] = kstat_create_netstack("ipf", 0, (i==0)?"inbound":"outbound", "net", KSTAT_TYPE_NAMED, sizeof (filter_kstats_t) / sizeof (kstat_named_t), - 0); - if (ipf_kstatp[i] != NULL) { - bcopy(&ipf_kstat_tmp, ipf_kstatp[i]->ks_data, + 0, stackid); + if (ifs->ifs_kstatp[i] != NULL) { + bcopy(&ipf_kstat_tmp, ifs->ifs_kstatp[i]->ks_data, sizeof (filter_kstats_t)); - ipf_kstatp[i]->ks_update = ipf_kstat_update; - ipf_kstatp[i]->ks_private = &frstats[i]; - kstat_install(ipf_kstatp[i]); + ifs->ifs_kstatp[i]->ks_update = ipf_kstat_update; + ifs->ifs_kstatp[i]->ks_private = &ifs->ifs_frstats[i]; + kstat_install(ifs->ifs_kstatp[i]); } } #ifdef IPFDEBUG cmn_err(CE_NOTE, "IP Filter: ipf_kstat_init() installed 0x%x, 0x%x", - ipf_kstatp[0], ipf_kstatp[1]); + ifs->ifs_kstatp[0], ifs->ifs_kstatp[1]); #endif } static void -ipf_kstat_fini(void) +ipf_kstat_fini(ipf_stack_t *ifs, netstackid_t stackid) { int i; + for (i = 0; i < 2; i++) { - if (ipf_kstatp[i] != NULL) { - kstat_delete(ipf_kstatp[i]); - ipf_kstatp[i] = NULL; + if (ifs->ifs_kstatp[i] != NULL) { + kstat_delete_netstack(ifs->ifs_kstatp[i], stackid); + ifs->ifs_kstatp[i] = NULL; } } } @@ -273,6 +262,9 @@ ipf_kstat_update(kstat_t *ksp, int rwflag) filter_kstats_t *fkp; filterstats_t *fsp; + if (ksp == NULL || ksp->ks_data == NULL) + return (EIO); + if (rwflag == KSTAT_WRITE) return (EACCES); @@ -313,12 +305,7 @@ int _init() { int ipfinst; - ipf_kstat_init(); - ipfinst = mod_install(&modlink1); - - if (ipfinst != 0) - ipf_kstat_fini(); #ifdef IPFDEBUG cmn_err(CE_NOTE, "IP Filter: _init() = %d", ipfinst); #endif @@ -334,9 +321,6 @@ int _fini(void) #ifdef IPFDEBUG cmn_err(CE_NOTE, "IP Filter: _fini() = %d", ipfinst); #endif - if (ipfinst == 0) - ipf_kstat_fini(); - return ipfinst; } @@ -367,6 +351,154 @@ dev_info_t *dip; } #endif +/* + * Initialize things for IPF for each stack instance + */ +static void * +ipf_stack_init(netstackid_t stackid, netstack_t *ns) +{ + ipf_stack_t *ifs; + +#ifdef NS_DEBUG + (void) printf("ipf_stack_init(%d)\n", stackid); +#endif + + KMALLOCS(ifs, ipf_stack_t *, sizeof (*ifs)); + bzero(ifs, sizeof (*ifs)); + + ifs->ifs_netstack = ns; + + ifs->ifs_hook4_physical_in = B_FALSE; + ifs->ifs_hook4_physical_out = B_FALSE; + ifs->ifs_hook4_nic_events = B_FALSE; + ifs->ifs_hook4_loopback_in = B_FALSE; + ifs->ifs_hook4_loopback_out = B_FALSE; + ifs->ifs_hook6_physical_in = B_FALSE; + ifs->ifs_hook6_physical_out = B_FALSE; + ifs->ifs_hook6_nic_events = B_FALSE; + ifs->ifs_hook6_loopback_in = B_FALSE; + ifs->ifs_hook6_loopback_out = B_FALSE; + + /* + * Initialize mutex's + */ + RWLOCK_INIT(&ifs->ifs_ipf_global, "ipf filter load/unload mutex"); + RWLOCK_INIT(&ifs->ifs_ipf_mutex, "ipf filter rwlock"); + RWLOCK_INIT(&ifs->ifs_ipf_frcache, "ipf cache rwlock"); +#ifdef KERNEL + ipf_kstat_init(ifs, stackid); +#endif + + /* + * Lock people out while we set things up. + */ + WRITE_ENTER(&ifs->ifs_ipf_global); + ipftuneable_alloc(ifs); + ifs->ifs_fr_timer_id = timeout(fr_slowtimer, (void *)ifs, + drv_usectohz(500000)); + + RWLOCK_EXIT(&ifs->ifs_ipf_global); + + cmn_err(CE_CONT, "!%s, running.\n", ipfilter_version); + return (ifs); +} + +static int ipf_detach_check_zone(ipf_stack_t *ifs) +{ + /* + * Make sure we're the only one's modifying things. With + * this lock others should just fall out of the loop. + */ + READ_ENTER(&ifs->ifs_ipf_global); + if (ifs->ifs_fr_running == 1) { + RWLOCK_EXIT(&ifs->ifs_ipf_global); + return (-1); + } + + /* + * Make sure there is no active filter rule. + */ + if (ifs->ifs_ipfilter[0][ifs->ifs_fr_active] || + ifs->ifs_ipfilter[1][ifs->ifs_fr_active] || + ifs->ifs_ipfilter6[0][ifs->ifs_fr_active] || + ifs->ifs_ipfilter6[1][ifs->ifs_fr_active]) { + RWLOCK_EXIT(&ifs->ifs_ipf_global); + return (-1); + } + + RWLOCK_EXIT(&ifs->ifs_ipf_global); + + return (0); +} + +static int ipf_detach_check_all() +{ + netstack_handle_t nh; + netstack_t *ns; + int ret; + + netstack_next_init(&nh); + while ((ns = netstack_next(&nh)) != NULL) { + ret = ipf_detach_check_zone(ns->netstack_ipf); + netstack_rele(ns); + if (ret != 0) { + netstack_next_fini(&nh); + return (-1); + } + } + + netstack_next_fini(&nh); + return (0); +} + +/* + * Destroy things for ipf for one stack. + */ +/* ARGSUSED */ +static void +ipf_stack_fini(netstackid_t stackid, void *arg) +{ + ipf_stack_t *ifs = (ipf_stack_t *)arg; + +#ifdef NS_DEBUG + (void) printf("ipf_stack_destroy(%p, stackid %d)\n", + (void *)ifs, stackid); +#endif + + /* + * Make sure we're the only one's modifying things. With + * this lock others should just fall out of the loop. + */ + WRITE_ENTER(&ifs->ifs_ipf_global); + if (ifs->ifs_fr_running == -2) { + RWLOCK_EXIT(&ifs->ifs_ipf_global); + return; + } + ifs->ifs_fr_running = -2; + RWLOCK_EXIT(&ifs->ifs_ipf_global); + +#ifdef KERNEL + ipf_kstat_fini(ifs, stackid); +#endif + if (ifs->ifs_fr_timer_id != 0) { + (void) untimeout(ifs->ifs_fr_timer_id); + ifs->ifs_fr_timer_id = 0; + } + + WRITE_ENTER(&ifs->ifs_ipf_global); + if (ipldetach(ifs) != 0) { + printf("ipf_stack_fini: ipldetach failed\n"); + } + + ipftuneable_free(ifs); + + RWLOCK_EXIT(&ifs->ifs_ipf_global); + RW_DESTROY(&ifs->ifs_ipf_mutex); + RW_DESTROY(&ifs->ifs_ipf_frcache); + RW_DESTROY(&ifs->ifs_ipf_global); + + KFREE(ifs); +} static int ipf_attach(dip, cmd) dev_info_t *dip; @@ -387,14 +519,12 @@ ddi_attach_cmd_t cmd; /* Only one instance of ipf (instance 0) can be attached. */ if (instance > 0) return DDI_FAILURE; - if (fr_running != 0) - return DDI_FAILURE; #ifdef IPFDEBUG cmn_err(CE_NOTE, "IP Filter: attach ipf instance %d", instance); #endif - (void) ipf_property_update(dip); + (void) ipf_property_g_update(dip); for (i = 0; ((s = ipf_devfiles[i]) != NULL); i++) { s = strrchr(s, '/'); @@ -410,25 +540,8 @@ ddi_attach_cmd_t cmd; } ipf_dev_info = dip; - /* - * Initialize mutex's - */ - RWLOCK_INIT(&ipf_global, "ipf filter load/unload mutex"); - RWLOCK_INIT(&ipf_mutex, "ipf filter rwlock"); - RWLOCK_INIT(&ipf_frcache, "ipf cache rwlock"); - - /* - * Lock people out while we set things up. - */ - WRITE_ENTER(&ipf_global); - - fr_timer_id = timeout(fr_slowtimer, NULL, - drv_usectohz(500000)); - - RWLOCK_EXIT(&ipf_global); - - cmn_err(CE_CONT, "!%s, running.\n", ipfilter_version); - + netstack_register(NS_IPF, ipf_stack_init, NULL, + ipf_stack_fini); return DDI_SUCCESS; /* NOTREACHED */ default: @@ -436,14 +549,7 @@ ddi_attach_cmd_t cmd; } attach_failed: -#ifdef IPFDEBUG - cmn_err(CE_NOTE, "IP Filter: failed to attach\n"); -#endif - /* - * Use our own detach routine to toss - * away any stuff we allocated above. - */ - (void) ipf_detach(dip, DDI_DETACH); + ddi_prop_remove_all(dip); return DDI_FAILURE; } @@ -459,37 +565,10 @@ ddi_detach_cmd_t cmd; #endif switch (cmd) { case DDI_DETACH: - if (fr_refcnt != 0) + if (ipf_detach_check_all() != 0) return DDI_FAILURE; - /* - * Make sure we're the only one's modifying things. With - * this lock others should just fall out of the loop. - */ - WRITE_ENTER(&ipf_global); - if (fr_running == -2) { - RWLOCK_EXIT(&ipf_global); - return DDI_FAILURE; - } - /* - * Make sure there is no active filter rule. - */ - if (ipfilter[0][fr_active] || ipfilter[1][fr_active] || - ipfilter6[0][fr_active] || ipfilter6[1][fr_active]) { - RWLOCK_EXIT(&ipf_global); - return DDI_FAILURE; - } - fr_running = -2; - - RWLOCK_EXIT(&ipf_global); - - if (fr_timer_id != 0) { - (void) untimeout(fr_timer_id); - fr_timer_id = 0; - } - - /* - * Undo what we did in ipf_attach, freeing resources + /* Undo what we did in ipf_attach, freeing resources * and removing things we installed. The system * framework guarantees we are not active with this devinfo * node in any other entry points at this time. @@ -499,26 +578,16 @@ ddi_detach_cmd_t cmd; ddi_remove_minor_node(dip, NULL); if (i > 0) { cmn_err(CE_CONT, "IP Filter: still attached (%d)\n", i); - fr_running = -1; return DDI_FAILURE; } - WRITE_ENTER(&ipf_global); - if (!ipldetach()) { - RWLOCK_EXIT(&ipf_global); - RW_DESTROY(&ipf_mutex); - RW_DESTROY(&ipf_frcache); - RW_DESTROY(&ipf_global); - cmn_err(CE_CONT, "!%s detached.\n", ipfilter_version); - return (DDI_SUCCESS); - } - RWLOCK_EXIT(&ipf_global); - break; + netstack_unregister(NS_IPF); + return DDI_SUCCESS; + /* NOTREACHED */ default: break; } cmn_err(CE_NOTE, "IP Filter: failed to detach\n"); - fr_running = -1; return DDI_FAILURE; } @@ -531,8 +600,6 @@ void *arg, **result; { int error; - if (fr_running <= 0) - return DDI_FAILURE; error = DDI_FAILURE; #ifdef IPFDEBUG cmn_err(CE_NOTE, "IP Filter: ipf_getinfo(%x,%x,%x)", dip, infocmd, arg); @@ -557,16 +624,9 @@ void *arg, **result; * Fetch configuration file values that have been entered into the ipf.conf * driver file. */ -static int ipf_property_update(dip) +static int ipf_property_g_update(dip) dev_info_t *dip; { - ipftuneable_t *ipft; - int64_t *i64p; - char *name; - u_int one; - int *i32p; - int err; - #ifdef DDI_NO_AUTODETACH if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS) { @@ -581,9 +641,21 @@ dev_info_t *dip; } #endif - err = DDI_SUCCESS; - ipft = ipf_tuneables; - for (ipft = ipf_tuneables; (name = ipft->ipft_name) != NULL; ipft++) { + return DDI_SUCCESS; +} + +int ipf_property_update(dip, ifs) +dev_info_t *dip; +ipf_stack_t *ifs; +{ + ipftuneable_t *ipft; + int64_t *i64p; + char *name; + u_int one; + int *i32p; + int err; + + for (ipft = ifs->ifs_ipf_tuneables; (name = ipft->ipft_name) != NULL; ipft++) { one = 1; switch (ipft->ipft_sz) { @@ -626,13 +698,11 @@ dev_info_t *dip; ddi_prop_free(i64p); break; #endif - default : break; } if (err != DDI_SUCCESS) break; } - return err; } diff --git a/usr/src/uts/common/inet/ipp_common.h b/usr/src/uts/common/inet/ipp_common.h index 5703f29d48..9ac9837f66 100644 --- a/usr/src/uts/common/inet/ipp_common.h +++ b/usr/src/uts/common/inet/ipp_common.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -47,16 +46,16 @@ extern "C" { extern uint32_t ipp_action_count; /* Whether ip policy is enabled at callout position proc */ -#define IPP_ENABLED(proc) ((ipp_action_count != 0) && \ - (~(ip_policy_mask) & (proc))) +#define IPP_ENABLED(proc, ipst) ((ipp_action_count != 0) && \ + (~((ipst)->ips_ip_policy_mask) & (proc))) /* Apply IPQoS policies for inbound traffic? */ -#define IP6_IN_IPP(flags) (IPP_ENABLED(IPP_LOCAL_IN) && \ +#define IP6_IN_IPP(flags, ipst) (IPP_ENABLED(IPP_LOCAL_IN, ipst) && \ (!((flags) & IP6_NO_IPPOLICY))) /* Apply IPQoS policies for oubound traffic? */ -#define IP6_OUT_IPP(flags) \ - (IPP_ENABLED(IPP_LOCAL_OUT) && (!((flags) & IP6_NO_IPPOLICY))) +#define IP6_OUT_IPP(flags, ipst) \ + (IPP_ENABLED(IPP_LOCAL_OUT, ipst) && (!((flags) & IP6_NO_IPPOLICY))) /* Extracts 8 bit traffic class from IPV6 flow label field */ #ifdef _BIG_ENDIAN diff --git a/usr/src/uts/common/inet/ipsec_impl.h b/usr/src/uts/common/inet/ipsec_impl.h index 6e4764a89c..bc576a8deb 100644 --- a/usr/src/uts/common/inet/ipsec_impl.h +++ b/usr/src/uts/common/inet/ipsec_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -28,6 +28,9 @@ #pragma ident "%Z%%M% %I% %E% SMI" +#include <inet/ip.h> +#include <inet/ipdrop.h> + #ifdef __cplusplus extern "C" { #endif @@ -99,8 +102,8 @@ extern "C" { /* * So we can access IPsec global variables that live in keysock.c. */ -extern boolean_t keysock_extended_reg(void); -extern uint32_t keysock_next_seq(void); +extern boolean_t keysock_extended_reg(netstack_t *); +extern uint32_t keysock_next_seq(netstack_t *); /* * Locking for ipsec policy rules: @@ -195,6 +198,7 @@ extern uint32_t keysock_next_seq(void); kmutex_t hash_lock; \ } + typedef struct ipsec_policy_s ipsec_policy_t; typedef HASH_HEAD(ipsec_policy_s) ipsec_policy_hash_t; @@ -302,6 +306,13 @@ typedef struct ipsec_action_s } /* + * For now, use a trivially sized hash table for actions. + * In the future we can add the structure canonicalization necessary + * to get the hash function to behave correctly.. + */ +#define IPSEC_ACTION_HASH_SIZE 1 + +/* * Merged address structure, for cheezy address-family independent * matches in policy code. */ @@ -401,19 +412,18 @@ struct ipsec_policy_s atomic_add_32(&(ipp)->ipsp_refs, 1); \ ASSERT((ipp)->ipsp_refs != 0); \ } -#define IPPOL_REFRELE(ipp) { \ +#define IPPOL_REFRELE(ipp, ns) { \ ASSERT((ipp)->ipsp_refs != 0); \ membar_exit(); \ if (atomic_add_32_nv(&(ipp)->ipsp_refs, -1) == 0) \ - ipsec_policy_free(ipp); \ + ipsec_policy_free(ipp, ns); \ (ipp) = 0; \ } -#define IPPOL_UNCHAIN(php, ip) \ +#define IPPOL_UNCHAIN(php, ip, ns) \ HASHLIST_UNCHAIN((ip), ipsp_hash); \ avl_remove(&(php)->iph_rulebyid, (ip)); \ - IPPOL_REFRELE(ip); - + IPPOL_REFRELE(ip, ns); /* * Policy ruleset. One per (protocol * direction) for system policy. @@ -449,11 +459,11 @@ typedef struct ipsec_policy_head_s atomic_add_32(&(iph)->iph_refs, 1); \ ASSERT((iph)->iph_refs != 0); \ } -#define IPPH_REFRELE(iph) { \ +#define IPPH_REFRELE(iph, ns) { \ ASSERT((iph)->iph_refs != 0); \ membar_exit(); \ if (atomic_add_32_nv(&(iph)->iph_refs, -1) == 0) \ - ipsec_polhead_free(iph); \ + ipsec_polhead_free(iph, ns); \ (iph) = 0; \ } @@ -537,11 +547,11 @@ typedef struct ipsec_tun_pol_s { ASSERT((itp)->itp_refcnt != 0); \ } -#define ITP_REFRELE(itp) { \ +#define ITP_REFRELE(itp, ns) { \ ASSERT((itp)->itp_refcnt != 0); \ membar_exit(); \ if (atomic_add_32_nv(&((itp)->itp_refcnt), -1) == 0) \ - itp_free(itp); \ + itp_free(itp, ns); \ } /* @@ -576,11 +586,6 @@ typedef struct ipsid_s atomic_add_32(&(ipsid)->ipsid_refcnt, -1); \ } -extern boolean_t ipsec_inbound_v4_policy_present; -extern boolean_t ipsec_outbound_v4_policy_present; -extern boolean_t ipsec_inbound_v6_policy_present; -extern boolean_t ipsec_outbound_v6_policy_present; - struct ipsec_out_s; /* @@ -610,55 +615,181 @@ struct ipsec_out_s; #define IPSEC_DEF_BLOCKSIZE (8) /* safe default */ /* + * Identity hash table. + * + * Identities are refcounted and "interned" into the hash table. + * Only references coming from other objects (SA's, latching state) + * are counted in ipsid_refcnt. + * + * Locking: IPSID_REFHOLD is safe only when (a) the object's hash bucket + * is locked, (b) we know that the refcount must be > 0. + * + * The ipsid_next and ipsid_ptpn fields are only to be referenced or + * modified when the bucket lock is held; in particular, we only + * delete objects while holding the bucket lock, and we only increase + * the refcount from 0 to 1 while the bucket lock is held. + */ + +#define IPSID_HASHSIZE 64 + +typedef struct ipsif_s +{ + ipsid_t *ipsif_head; + kmutex_t ipsif_lock; +} ipsif_t; + + +/* + * IPSEC stack instances + */ +struct ipsec_stack { + netstack_t *ipsec_netstack; /* Common netstack */ + + /* Packet dropper for IP IPsec processing failures */ + ipdropper_t ipsec_dropper; + +/* From spd.c */ + /* + * Policy rule index generator. We assume this won't wrap in the + * lifetime of a system. If we make 2^20 policy changes per second, + * this will last 2^44 seconds, or roughly 500,000 years, so we don't + * have to worry about reusing policy index values. + */ + uint64_t ipsec_next_policy_index; + + HASH_HEAD(ipsec_action_s) ipsec_action_hash[IPSEC_ACTION_HASH_SIZE]; + HASH_HEAD(ipsec_sel) *ipsec_sel_hash; + uint32_t ipsec_spd_hashsize; + + ipsif_t ipsec_ipsid_buckets[IPSID_HASHSIZE]; + + /* + * Active & Inactive system policy roots + */ + ipsec_policy_head_t ipsec_system_policy; + ipsec_policy_head_t ipsec_inactive_policy; + + /* Packet dropper for generic SPD drops. */ + ipdropper_t ipsec_spd_dropper; + krwlock_t ipsec_itp_get_byaddr_rw_lock; + ipsec_tun_pol_t *(*ipsec_itp_get_byaddr) + (uint32_t *, uint32_t *, int, netstack_t *); + +/* ipdrop.c */ + kstat_t *ipsec_ip_drop_kstat; + struct ip_dropstats *ipsec_ip_drop_types; + +/* spd.c */ + /* + * Have a counter for every possible policy message in + * ipsec_policy_failure_msgs + */ + uint32_t ipsec_policy_failure_count[IPSEC_POLICY_MAX]; + /* Time since last ipsec policy failure that printed a message. */ + hrtime_t ipsec_policy_failure_last; + +/* ip_spd.c */ + /* stats */ + kstat_t *ipsec_ksp; + struct ipsec_kstats_s *ipsec_kstats; + +/* sadb.c */ + /* Packet dropper for generic SADB drops. */ + ipdropper_t ipsec_sadb_dropper; + +/* spd.c */ + boolean_t ipsec_inbound_v4_policy_present; + boolean_t ipsec_outbound_v4_policy_present; + boolean_t ipsec_inbound_v6_policy_present; + boolean_t ipsec_outbound_v6_policy_present; + +/* spd.c */ + /* + * Because policy needs to know what algorithms are supported, keep the + * lists of algorithms here. + */ + kmutex_t ipsec_alg_lock; + + uint8_t ipsec_nalgs[IPSEC_NALGTYPES]; + ipsec_alginfo_t *ipsec_alglists[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; + + uint8_t ipsec_sortlist[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; + + int ipsec_algs_exec_mode[IPSEC_NALGTYPES]; + + uint32_t ipsec_tun_spd_hashsize; + /* + * Tunnel policies - AVL tree indexed by tunnel name. + */ + krwlock_t ipsec_tunnel_policy_lock; + uint64_t ipsec_tunnel_policy_gen; + avl_tree_t ipsec_tunnel_policies; + +/* ipsec_loader.c */ + kmutex_t ipsec_loader_lock; + int ipsec_loader_state; + int ipsec_loader_sig; + kt_did_t ipsec_loader_tid; + kcondvar_t ipsec_loader_sig_cv; /* For loader_sig conditions. */ + +}; +typedef struct ipsec_stack ipsec_stack_t; + +/* Handle the kstat_create in ip_drop_init() failing */ +#define DROPPER(_ipss, _dropper) \ + (((_ipss)->ipsec_ip_drop_types == NULL) ? NULL : \ + &((_ipss)->ipsec_ip_drop_types->_dropper)) + +/* * Loader states.. */ #define IPSEC_LOADER_WAIT 0 #define IPSEC_LOADER_FAILED -1 #define IPSEC_LOADER_SUCCEEDED 1 -extern kmutex_t ipsec_loader_lock; -extern int ipsec_loader_state; - /* * ipsec_loader entrypoints. */ -extern void ipsec_loader_init(void); -extern void ipsec_loader_start(void); -extern void ipsec_loader_destroy(void); -extern void ipsec_loader_loadnow(void); -extern boolean_t ipsec_loader_wait(queue_t *q); -extern boolean_t ipsec_loaded(void); -extern boolean_t ipsec_failed(void); +extern void ipsec_loader_init(ipsec_stack_t *); +extern void ipsec_loader_start(ipsec_stack_t *); +extern void ipsec_loader_destroy(ipsec_stack_t *); +extern void ipsec_loader_loadnow(ipsec_stack_t *); +extern boolean_t ipsec_loader_wait(queue_t *q, ipsec_stack_t *); +extern boolean_t ipsec_loaded(ipsec_stack_t *); +extern boolean_t ipsec_failed(ipsec_stack_t *); /* * callback from ipsec_loader to ip */ -extern void ip_ipsec_load_complete(); +extern void ip_ipsec_load_complete(ipsec_stack_t *); /* * ipsec policy entrypoints (spd.c) */ -extern void ipsec_policy_destroy(void); -extern void ipsec_policy_init(void); -extern int ipsec_alloc_table(ipsec_policy_head_t *, int, int, boolean_t); +extern void ipsec_policy_g_destroy(void); +extern void ipsec_policy_g_init(void); + +extern int ipsec_alloc_table(ipsec_policy_head_t *, int, int, boolean_t, + netstack_t *); extern void ipsec_polhead_init(ipsec_policy_head_t *, int); extern void ipsec_polhead_destroy(ipsec_policy_head_t *); extern void ipsec_polhead_free_table(ipsec_policy_head_t *); extern mblk_t *ipsec_check_global_policy(mblk_t *, conn_t *, ipha_t *, - ip6_t *, boolean_t); + ip6_t *, boolean_t, netstack_t *); extern mblk_t *ipsec_check_inbound_policy(mblk_t *, conn_t *, ipha_t *, ip6_t *, boolean_t); extern boolean_t ipsec_in_to_out(mblk_t *, ipha_t *, ip6_t *); -extern void ipsec_log_policy_failure(int, char *, ipha_t *, ip6_t *, boolean_t); +extern void ipsec_log_policy_failure(int, char *, ipha_t *, ip6_t *, boolean_t, + netstack_t *); extern boolean_t ipsec_inbound_accept_clear(mblk_t *, ipha_t *, ip6_t *); extern int ipsec_conn_cache_policy(conn_t *, boolean_t); -extern mblk_t *ipsec_alloc_ipsec_out(void); +extern mblk_t *ipsec_alloc_ipsec_out(netstack_t *); extern mblk_t *ipsec_attach_ipsec_out(mblk_t *, conn_t *, ipsec_policy_t *, - uint8_t); + uint8_t, netstack_t *); extern mblk_t *ipsec_init_ipsec_out(mblk_t *, conn_t *, ipsec_policy_t *, - uint8_t); + uint8_t, netstack_t *); struct ipsec_in_s; extern ipsec_action_t *ipsec_in_to_out_action(struct ipsec_in_s *); extern boolean_t ipsec_check_ipsecin_latch(struct ipsec_in_s *, mblk_t *, @@ -666,50 +797,57 @@ extern boolean_t ipsec_check_ipsecin_latch(struct ipsec_in_s *, mblk_t *, conn_t *); extern void ipsec_latch_inbound(ipsec_latch_t *ipl, struct ipsec_in_s *ii); -extern void ipsec_policy_free(ipsec_policy_t *); +extern void ipsec_policy_free(ipsec_policy_t *, netstack_t *); extern void ipsec_action_free(ipsec_action_t *); -extern void ipsec_polhead_free(ipsec_policy_head_t *); -extern ipsec_policy_head_t *ipsec_polhead_split(ipsec_policy_head_t *); +extern void ipsec_polhead_free(ipsec_policy_head_t *, netstack_t *); +extern ipsec_policy_head_t *ipsec_polhead_split(ipsec_policy_head_t *, + netstack_t *); extern ipsec_policy_head_t *ipsec_polhead_create(void); -extern ipsec_policy_head_t *ipsec_system_policy(void); -extern ipsec_policy_head_t *ipsec_inactive_policy(void); -extern void ipsec_swap_policy(ipsec_policy_head_t *, ipsec_policy_head_t *); -extern void ipsec_swap_global_policy(void); +extern ipsec_policy_head_t *ipsec_system_policy(netstack_t *); +extern ipsec_policy_head_t *ipsec_inactive_policy(netstack_t *); +extern void ipsec_swap_policy(ipsec_policy_head_t *, ipsec_policy_head_t *, + netstack_t *); +extern void ipsec_swap_global_policy(netstack_t *); -extern int ipsec_clone_system_policy(void); +extern int ipsec_clone_system_policy(netstack_t *); extern ipsec_policy_t *ipsec_policy_create(ipsec_selkey_t *, - const ipsec_act_t *, int, int, uint64_t *); + const ipsec_act_t *, int, int, uint64_t *, netstack_t *); extern boolean_t ipsec_policy_delete(ipsec_policy_head_t *, - ipsec_selkey_t *, int); -extern int ipsec_policy_delete_index(ipsec_policy_head_t *, uint64_t); -extern void ipsec_polhead_flush(ipsec_policy_head_t *); -extern int ipsec_copy_polhead(ipsec_policy_head_t *, ipsec_policy_head_t *); -extern void ipsec_actvec_from_req(ipsec_req_t *, ipsec_act_t **, uint_t *); + ipsec_selkey_t *, int, netstack_t *); +extern int ipsec_policy_delete_index(ipsec_policy_head_t *, uint64_t, + netstack_t *); +extern void ipsec_polhead_flush(ipsec_policy_head_t *, netstack_t *); +extern int ipsec_copy_polhead(ipsec_policy_head_t *, ipsec_policy_head_t *, + netstack_t *); +extern void ipsec_actvec_from_req(ipsec_req_t *, ipsec_act_t **, uint_t *, + netstack_t *); extern void ipsec_actvec_free(ipsec_act_t *, uint_t); extern int ipsec_req_from_head(ipsec_policy_head_t *, ipsec_req_t *, int); -extern mblk_t *ipsec_construct_inverse_acquire(sadb_msg_t *, sadb_ext_t **); +extern mblk_t *ipsec_construct_inverse_acquire(sadb_msg_t *, sadb_ext_t **, + netstack_t *); extern mblk_t *ip_wput_attach_policy(mblk_t *, ipha_t *, ip6_t *, ire_t *, conn_t *, boolean_t, zoneid_t); extern mblk_t *ip_wput_ire_parse_ipsec_out(mblk_t *, ipha_t *, ip6_t *, ire_t *, conn_t *, boolean_t, zoneid_t); extern ipsec_policy_t *ipsec_find_policy(int, conn_t *, - struct ipsec_out_s *, ipsec_selector_t *); -extern ipsid_t *ipsid_lookup(int, char *); + struct ipsec_out_s *, ipsec_selector_t *, netstack_t *); +extern ipsid_t *ipsid_lookup(int, char *, netstack_t *); extern boolean_t ipsid_equal(ipsid_t *, ipsid_t *); -extern void ipsid_gc(void); +extern void ipsid_gc(netstack_t *); extern void ipsec_latch_ids(ipsec_latch_t *, ipsid_t *, ipsid_t *); -extern void ipsec_config_flush(void); +extern void ipsec_config_flush(netstack_t *); extern boolean_t ipsec_check_policy(ipsec_policy_head_t *, ipsec_policy_t *, int); -extern void ipsec_enter_policy(ipsec_policy_head_t *, ipsec_policy_t *, int); -extern boolean_t ipsec_check_action(ipsec_act_t *, int *); +extern void ipsec_enter_policy(ipsec_policy_head_t *, ipsec_policy_t *, int, + netstack_t *); +extern boolean_t ipsec_check_action(ipsec_act_t *, int *, netstack_t *); -extern mblk_t *ipsec_out_tag(mblk_t *, mblk_t *); -extern mblk_t *ipsec_in_tag(mblk_t *, mblk_t *); +extern mblk_t *ipsec_out_tag(mblk_t *, mblk_t *, netstack_t *); +extern mblk_t *ipsec_in_tag(mblk_t *, mblk_t *, netstack_t *); extern mblk_t *ip_copymsg(mblk_t *mp); -extern void iplatch_free(ipsec_latch_t *); +extern void iplatch_free(ipsec_latch_t *, netstack_t *); extern ipsec_latch_t *iplatch_create(void); extern int ipsec_set_req(cred_t *, conn_t *, ipsec_req_t *); @@ -725,33 +863,28 @@ extern boolean_t iph_ipvN(ipsec_policy_head_t *, boolean_t); */ struct tun_s; /* Defined in inet/tun.h. */ extern boolean_t ipsec_tun_inbound(mblk_t *, mblk_t **, ipsec_tun_pol_t *, - ipha_t *, ip6_t *, ipha_t *, ip6_t *, int); + ipha_t *, ip6_t *, ipha_t *, ip6_t *, int, netstack_t *); extern mblk_t *ipsec_tun_outbound(mblk_t *, struct tun_s *, ipha_t *, - ip6_t *, ipha_t *, ip6_t *, int); -extern void itp_free(ipsec_tun_pol_t *); -extern ipsec_tun_pol_t *create_tunnel_policy(char *, int *, uint64_t *); -extern ipsec_tun_pol_t *get_tunnel_policy(char *); -extern void itp_unlink(ipsec_tun_pol_t *); -extern void itp_free(ipsec_tun_pol_t *node); -extern void itp_walk(void (*)(ipsec_tun_pol_t *, void *), void *); - -extern ipsec_tun_pol_t *(*itp_get_byaddr)(uint32_t *, uint32_t *, int); -extern ipsec_tun_pol_t *itp_get_byaddr_dummy(uint32_t *, uint32_t *, - int); -extern krwlock_t itp_get_byaddr_rw_lock; + ip6_t *, ipha_t *, ip6_t *, int, netstack_t *); +extern void itp_free(ipsec_tun_pol_t *, netstack_t *); +extern ipsec_tun_pol_t *create_tunnel_policy(char *, int *, uint64_t *, + netstack_t *); +extern ipsec_tun_pol_t *get_tunnel_policy(char *, netstack_t *); +extern void itp_unlink(ipsec_tun_pol_t *, netstack_t *); +extern void itp_walk(void (*)(ipsec_tun_pol_t *, void *, netstack_t *), + void *, netstack_t *); -extern krwlock_t tunnel_policy_lock; -extern uint64_t tunnel_policy_gen; -extern avl_tree_t tunnel_policies; +extern ipsec_tun_pol_t *itp_get_byaddr_dummy(uint32_t *, uint32_t *, + int, netstack_t *); /* * IPsec AH/ESP functions called from IP. */ extern void ipsecah_in_assocfailure(mblk_t *, char, ushort_t, char *, - uint32_t, void *, int); + uint32_t, void *, int, ipsecah_stack_t *); extern void ipsecesp_in_assocfailure(mblk_t *, char, ushort_t, char *, - uint32_t, void *, int); + uint32_t, void *, int, ipsecesp_stack_t *); /* * Algorithm management helper functions. @@ -778,10 +911,10 @@ extern void spdsock_ddi_destroy(void); /* * AH- and ESP-specific functions that are called directly by other modules. */ -extern void ipsecah_fill_defs(struct sadb_x_ecomb *); -extern void ipsecesp_fill_defs(struct sadb_x_ecomb *); -extern void ipsecah_algs_changed(void); -extern void ipsecesp_algs_changed(void); +extern void ipsecah_fill_defs(struct sadb_x_ecomb *, netstack_t *); +extern void ipsecesp_fill_defs(struct sadb_x_ecomb *, netstack_t *); +extern void ipsecah_algs_changed(netstack_t *); +extern void ipsecesp_algs_changed(netstack_t *); extern void ipsecesp_init_funcs(ipsa_t *); extern void ipsecah_init_funcs(ipsa_t *); extern ipsec_status_t ipsecah_icmp_error(mblk_t *); @@ -795,17 +928,22 @@ extern void ipsec_hw_putnext(queue_t *, mblk_t *); /* * spdsock functions that are called directly by IP. */ -extern void spdsock_update_pending_algs(void); +extern void spdsock_update_pending_algs(netstack_t *); /* * IP functions that are called from AH and ESP. */ extern boolean_t ipsec_outbound_sa(mblk_t *, uint_t); -extern esph_t *ipsec_inbound_esp_sa(mblk_t *); -extern ah_t *ipsec_inbound_ah_sa(mblk_t *); +extern esph_t *ipsec_inbound_esp_sa(mblk_t *, netstack_t *); +extern ah_t *ipsec_inbound_ah_sa(mblk_t *, netstack_t *); extern ipsec_policy_t *ipsec_find_policy_head(ipsec_policy_t *, - ipsec_policy_head_t *, int, ipsec_selector_t *); + ipsec_policy_head_t *, int, ipsec_selector_t *, netstack_t *); +/* + * IP dropper init/destroy. + */ +void ip_drop_init(ipsec_stack_t *); +void ip_drop_destroy(ipsec_stack_t *); /* * NAT-Traversal cleanup diff --git a/usr/src/uts/common/inet/ipsec_info.h b/usr/src/uts/common/inet/ipsec_info.h index 60505edf8c..7bcda1da26 100644 --- a/usr/src/uts/common/inet/ipsec_info.h +++ b/usr/src/uts/common/inet/ipsec_info.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,6 +45,13 @@ extern "C" { * * * Keysock consumer interface - These messages are wrappers for * PF_KEY messages. They flow between AH/ESP and keysock. + * + * Some of these messages include pointers such as a netstack_t pointer. + * We do not explicitly reference count those with netstack_hold/rele, + * since we depend on IP's ability to discard all of the IPSEC_{IN,OUT} + * messages in order to handle the ipsa pointers. + * We have special logic when doing asynch callouts to kEF for which we + * verify netstack_t pointer using the netstackid_t. */ /* @@ -83,6 +90,12 @@ extern "C" { * attributes of the security are reflected in <foo>_done fields below. * The code in policy check infers that it is a loopback case and * would not try to get the associations. + * + * The comment below (and for other netstack_t references) refers + * to the fact that we only do netstack_hold in particular cases, + * such as the references from open streams (ill_t and conn_t's + * pointers). Internally within IP we rely on IP's ability to cleanup e.g. + * ire_t's when an ill goes away. */ typedef struct ipsec_in_s { uint32_t ipsec_in_type; @@ -125,6 +138,8 @@ typedef struct ipsec_in_s { crypto_data_t ipsec_in_crypto_mac; /* to store the MAC */ zoneid_t ipsec_in_zoneid; /* target zone for the datagram */ + netstack_t *ipsec_in_ns; /* Does not have a netstack_hold */ + netstackid_t ipsec_in_stackid; /* Used while waing for kEF callback */ } ipsec_in_t; #define IPSECOUT_MAX_ADDRLEN 4 /* Max addr len. (in 32-bit words) */ @@ -233,6 +248,8 @@ typedef struct ipsec_out_s { zoneid_t ipsec_out_zoneid; /* source zone for the datagram */ in6_addr_t ipsec_out_nexthop_v6; /* nexthop IP address */ #define ipsec_out_nexthop_addr V4_PART_OF_V6(ipsec_out_nexthop_v6) + netstack_t *ipsec_out_ns; /* Does not have a netstack_hold */ + netstackid_t ipsec_out_stackid; /* Used while waing for kEF callback */ } ipsec_out_t; /* diff --git a/usr/src/uts/common/inet/ipsecah.h b/usr/src/uts/common/inet/ipsecah.h index eaad020037..c389664164 100644 --- a/usr/src/uts/common/inet/ipsecah.h +++ b/usr/src/uts/common/inet/ipsecah.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 1998-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,19 +28,99 @@ #pragma ident "%Z%%M% %I% %E% SMI" +#include <inet/ip.h> +#include <inet/ipdrop.h> + #ifdef __cplusplus extern "C" { #endif +#include <sys/note.h> + #ifdef _KERNEL /* Named Dispatch Parameter Management Structure */ -typedef struct ipsecahpparam_s { +typedef struct ipsecahparam_s { uint_t ipsecah_param_min; uint_t ipsecah_param_max; uint_t ipsecah_param_value; char *ipsecah_param_name; } ipsecahparam_t; +/* + * Stats. This may eventually become a full-blown SNMP MIB once that spec + * stabilizes. + */ +typedef struct ah_kstats_s +{ + kstat_named_t ah_stat_num_aalgs; + kstat_named_t ah_stat_good_auth; + kstat_named_t ah_stat_bad_auth; + kstat_named_t ah_stat_replay_failures; + kstat_named_t ah_stat_replay_early_failures; + kstat_named_t ah_stat_keysock_in; + kstat_named_t ah_stat_out_requests; + kstat_named_t ah_stat_acquire_requests; + kstat_named_t ah_stat_bytes_expired; + kstat_named_t ah_stat_out_discards; + kstat_named_t ah_stat_in_accelerated; + kstat_named_t ah_stat_out_accelerated; + kstat_named_t ah_stat_noaccel; + kstat_named_t ah_stat_crypto_sync; + kstat_named_t ah_stat_crypto_async; + kstat_named_t ah_stat_crypto_failures; +} ah_kstats_t; + +/* + * ahstack->ah_kstats is equal to ahstack->ah_ksp->ks_data if + * kstat_create_netstack for ahstack->ah_ksp succeeds, but when it + * fails, it will be NULL. Note this is done for all stack instances, + * so it *could* fail. hence a non-NULL checking is done for + * AH_BUMP_STAT and AH_DEBUMP_STAT + */ +#define AH_BUMP_STAT(ahstack, x) \ +do { \ + if (ahstack->ah_kstats != NULL) \ + (ahstack->ah_kstats->ah_stat_ ## x).value.ui64++; \ +_NOTE(CONSTCOND) \ +} while (0) +#define AH_DEBUMP_STAT(ahstack, x) \ +do { \ + if (ahstack->ah_kstats != NULL) \ + (ahstack->ah_kstats->ah_stat_ ## x).value.ui64--; \ +_NOTE(CONSTCOND) \ +} while (0) + +/* + * IPSECAH stack instances + */ +struct ipsecah_stack { + netstack_t *ipsecah_netstack; /* Common netstack */ + + caddr_t ipsecah_g_nd; + ipsecahparam_t *ipsecah_params; + kmutex_t ipsecah_param_lock; /* Protects params */ + + sadbp_t ah_sadb; + + /* Packet dropper for AH drops. */ + ipdropper_t ah_dropper; + + kstat_t *ah_ksp; + ah_kstats_t *ah_kstats; + + /* + * Keysock instance of AH. There can be only one per stack instance. + * Use casptr() on this because I don't set it until KEYSOCK_HELLO + * comes down. + * Paired up with the ah_pfkey_q is the ah_event, which will age SAs. + */ + queue_t *ah_pfkey_q; + timeout_id_t ah_event; + + mblk_t *ah_ip_unbind; +}; +typedef struct ipsecah_stack ipsecah_stack_t; + #endif /* _KERNEL */ /* diff --git a/usr/src/uts/common/inet/ipsecesp.h b/usr/src/uts/common/inet/ipsecesp.h index 219ba03eb6..e0d22ec388 100644 --- a/usr/src/uts/common/inet/ipsecesp.h +++ b/usr/src/uts/common/inet/ipsecesp.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 1996-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +28,9 @@ #pragma ident "%Z%%M% %I% %E% SMI" +#include <inet/ip.h> +#include <inet/ipdrop.h> + #ifdef __cplusplus extern "C" { #endif @@ -36,13 +38,46 @@ extern "C" { #ifdef _KERNEL /* Named Dispatch Parameter Management Structure */ -typedef struct ipsecesppparam_s { +typedef struct ipsecespparam_s { uint_t ipsecesp_param_min; uint_t ipsecesp_param_max; uint_t ipsecesp_param_value; char *ipsecesp_param_name; } ipsecespparam_t; +/* + * IPSECESP stack instances + */ +struct ipsecesp_stack { + netstack_t *ipsecesp_netstack; /* Common netstack */ + + caddr_t ipsecesp_g_nd; + struct ipsecespparam_s *ipsecesp_params; + kmutex_t ipsecesp_param_lock; /* Protects params */ + + /* Packet dropper for ESP drops. */ + ipdropper_t esp_dropper; + + kstat_t *esp_ksp; + struct esp_kstats_s *esp_kstats; + + /* + * Keysock instance of ESP. There can be only one per stack instance. + * Use casptr() on this because I don't set it until KEYSOCK_HELLO + * comes down. + * Paired up with the esp_pfkey_q is the esp_event, which will age SAs. + */ + queue_t *esp_pfkey_q; + timeout_id_t esp_event; + + mblk_t *esp_ip_unbind; + + sadbp_t esp_sadb; + +}; +typedef struct ipsecesp_stack ipsecesp_stack_t; + + #endif /* _KERNEL */ /* diff --git a/usr/src/uts/common/inet/keysock.h b/usr/src/uts/common/inet/keysock.h index 450900e4c1..50189666c7 100644 --- a/usr/src/uts/common/inet/keysock.h +++ b/usr/src/uts/common/inet/keysock.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 1998,2001-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -47,6 +46,55 @@ extern optdb_obj_t keysock_opt_obj; extern uint_t keysock_max_optsize; /* + * KEYSOCK stack instances + */ +struct keysock_stack { + netstack_t *keystack_netstack; /* Common netstack */ + /* + * keysock_plumbed: zero if plumb not attempted, positive if it + * succeeded, negative if it failed. + */ + int keystack_plumbed; + caddr_t keystack_g_nd; + struct keysockparam_s *keystack_params; + + kmutex_t keystack_param_lock; + /* Protects the NDD variables. */ + + /* List of open PF_KEY sockets, protected by keysock_list_lock. */ + kmutex_t keystack_list_lock; + struct keysock_s *keystack_list; + + /* + * Consumers table. If an entry is NULL, keysock maintains + * the table. + */ + kmutex_t keystack_consumers_lock; + +#define KEYSOCK_MAX_CONSUMERS 256 + struct keysock_consumer_s *keystack_consumers[KEYSOCK_MAX_CONSUMERS]; + + /* + * State for flush/dump. This would normally be a boolean_t, but + * cas32() works best for a known 32-bit quantity. + */ + uint32_t keystack_flushdump; + int keystack_flushdump_errno; + + /* + * This integer counts the number of extended REGISTERed sockets. This + * determines if we should send extended REGISTERs. + */ + uint32_t keystack_num_extended; + + /* + * Global sequence space for SADB_ACQUIRE messages of any sort. + */ + uint32_t keystack_acquire_seq; +}; +typedef struct keysock_stack keysock_stack_t; + +/* * keysock session state (one per open PF_KEY socket (i.e. as a driver)) * * I keep these in a linked list, and assign a monotonically increasing @@ -69,6 +117,7 @@ typedef struct keysock_s { /* Also protected by keysock_list_lock. */ minor_t keysock_serial; /* Serial number of this socket. */ + keysock_stack_t *keysock_keystack; } keysock_t; #define KEYSOCK_NOLOOP 0x1 /* Don't loopback messages (no replies). */ @@ -95,14 +144,17 @@ typedef struct keysock_consumer_s { queue_t *kc_wq; /* Write queue, putnext down */ /* Other goodies as a need them. */ - uint8_t kc_sa_type; /* What sort of SA am I? */ - uint_t kc_flags; + uint8_t kc_sa_type; /* What sort of SA am I? */ + uint_t kc_flags; + keysock_stack_t *kc_keystack; } keysock_consumer_t; /* Can only set flags when keysock_consumer_lock is held. */ #define KC_INTERNAL 0x1 /* Consumer maintained by keysock itself. */ #define KC_FLUSHING 0x2 /* SADB_FLUSH pending on this consumer. */ +extern int keysock_plumb_ipsec(netstack_t *); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/inet/mi.c b/usr/src/uts/common/inet/mi.c index 217bb5fc66..084e02cbcb 100644 --- a/usr/src/uts/common/inet/mi.c +++ b/usr/src/uts/common/inet/mi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -817,10 +816,12 @@ mi_open_link(void **mi_headp, IDP ptr, dev_t *devp, int flag, int sflag, ulong_t offset; head_name = kobj_getsymname((uintptr_t)mi_headp, &offset); - if (head_name != NULL && offset == 0) + if (head_name != NULL && offset == 0) { (void) sprintf(arena_name, "%s_", head_name); - else - (void) sprintf(arena_name, "0x%p_", (void *)mi_headp); + } else { + (void) sprintf(arena_name, "Hex0x%p_", + (void *)mi_headp); + } (void) sprintf(strchr(arena_name, '_') + 1, "minor"); mi_head = (mi_head_t *)mi_zalloc_sleep(sizeof (mi_head_t)); *mi_headp = (void *)mi_head; diff --git a/usr/src/uts/common/inet/nca/nca.h b/usr/src/uts/common/inet/nca/nca.h index fe146b501a..dc1a80a716 100644 --- a/usr/src/uts/common/inet/nca/nca.h +++ b/usr/src/uts/common/inet/nca/nca.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1725,12 +1724,6 @@ extern boolean_t nca_reclaim_vlru(void); extern boolean_t nca_reclaim_plru(boolean_t, boolean_t); /* - * We want to use the tcp_mib in NCA. The ip_mib is already made extern - * in ip.h so we don't need to declare it here. - */ -extern mib2_tcp_t tcp_mib; - -/* * NCA_COUNTER() is used to add a signed long value to a unsigned long * counter, in general these counters are used to maintain NCA state. * diff --git a/usr/src/uts/common/inet/nd.c b/usr/src/uts/common/inet/nd.c index b866e939b9..cf903733b7 100644 --- a/usr/src/uts/common/inet/nd.c +++ b/usr/src/uts/common/inet/nd.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -141,7 +140,7 @@ nd_getset(queue_t *q, caddr_t nd_param, MBLKP mp) case ND_SET: if (valp) { if ((iocp->ioc_cr != NULL) && - ((err = secpolicy_net_config(iocp->ioc_cr, B_FALSE)) + ((err = secpolicy_ip_config(iocp->ioc_cr, B_FALSE)) == 0)) { err = (*nde->nde_set_pfi)(q, mp1, valp, nde->nde_data, iocp->ioc_cr); @@ -221,6 +220,7 @@ nd_get_names(queue_t *q, MBLKP mp, caddr_t nd_param, cred_t *ioc_cr) * does not exist (*ndp == 0), a new table is allocated and 'ndp' * is stuffed. If there is not enough space in the table for a new * entry, more space is allocated. + * Never fails due to memory allocation failures. */ boolean_t nd_load(caddr_t *nd_pparam, char *name, ndgetf_t get_pfi, ndsetf_t set_pfi, @@ -232,8 +232,7 @@ nd_load(caddr_t *nd_pparam, char *name, ndgetf_t get_pfi, ndsetf_t set_pfi, if (!nd_pparam) return (B_FALSE); if ((nd = (ND *)(*nd_pparam)) == NULL) { - if ((nd = (ND *)mi_alloc(sizeof (ND), BPRI_MED)) == NULL) - return (B_FALSE); + nd = (ND *)mi_alloc_sleep(sizeof (ND), BPRI_MED); bzero((caddr_t)nd, sizeof (ND)); *nd_pparam = (caddr_t)nd; } @@ -244,9 +243,8 @@ nd_load(caddr_t *nd_pparam, char *name, ndgetf_t get_pfi, ndsetf_t set_pfi, } } if (nd->nd_free_count <= 1) { - if ((nde = (NDE *)mi_alloc(nd->nd_size + - NDE_ALLOC_SIZE, BPRI_MED)) == NULL) - return (B_FALSE); + nde = (NDE *)mi_alloc_sleep(nd->nd_size + + NDE_ALLOC_SIZE, BPRI_MED); bzero((char *)nde, nd->nd_size + NDE_ALLOC_SIZE); nd->nd_free_count += NDE_ALLOC_COUNT; if (nd->nd_tbl) { diff --git a/usr/src/uts/common/inet/optcom.h b/usr/src/uts/common/inet/optcom.h index 1e93497939..89cc75c6fd 100644 --- a/usr/src/uts/common/inet/optcom.h +++ b/usr/src/uts/common/inet/optcom.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -113,10 +113,10 @@ typedef struct opdes_s { * are used to determine permissions. */ #define OA_POLICY_OK(x, c) \ - (secpolicy_net((c), (x)->opdes_access_req_priv, B_FALSE) == 0) + (secpolicy_ip((c), (x)->opdes_access_req_priv, B_FALSE) == 0) #define OA_POLICY_ONLY_OK(x, c) \ - (secpolicy_net((c), (x)->opdes_access_req_priv, B_TRUE) == 0) + (secpolicy_ip((c), (x)->opdes_access_req_priv, B_TRUE) == 0) #define OA_MATCHED_PRIV(x, c) ((x)->opdes_access_req_priv != OP_NP && \ OA_POLICY_ONLY_OK((x), (c))) diff --git a/usr/src/uts/common/inet/rawip_impl.h b/usr/src/uts/common/inet/rawip_impl.h index b0029b923a..d323b66517 100644 --- a/usr/src/uts/common/inet/rawip_impl.h +++ b/usr/src/uts/common/inet/rawip_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -36,6 +36,7 @@ extern "C" { #ifdef _KERNEL #include <sys/types.h> +#include <sys/netstack.h> #include <netinet/in.h> #include <netinet/icmp6.h> @@ -44,6 +45,27 @@ extern "C" { #include <inet/common.h> #include <inet/ip.h> +/* Named Dispatch Parameter Management Structure */ +typedef struct icmpparam_s { + uint_t icmp_param_min; + uint_t icmp_param_max; + uint_t icmp_param_value; + char *icmp_param_name; +} icmpparam_t; + +/* + * ICMP stack instances + */ +struct icmp_stack { + netstack_t *is_netstack; /* Common netstack */ + void *is_head; /* Head for list of open icmps */ + IDP is_nd; /* Points to table of ICMP ND variables. */ + icmpparam_t *is_param_arr; /* ndd variable table */ + kstat_t *is_ksp; /* kstats */ + mib2_rawip_t is_rawip_mib; /* SNMP fixed size info */ +}; +typedef struct icmp_stack icmp_stack_t; + /* Internal icmp control structure, one per open stream */ typedef struct icmp_s { uint_t icmp_state; /* TPI state */ @@ -116,7 +138,9 @@ typedef struct icmp_s { uint_t icmp_label_len_v6; /* sec. part of sticky opt */ in6_addr_t icmp_v6lastdst; /* most recent destination */ mblk_t *icmp_delabel; /* send this on close */ + icmp_stack_t *icmp_is; /* Stack instance */ } icmp_t; +#define icmp_rawip_mib icmp_is->is_rawip_mib #endif /* _KERNEL */ diff --git a/usr/src/uts/common/inet/sadb.h b/usr/src/uts/common/inet/sadb.h index b5e35166ac..86204cd5bd 100644 --- a/usr/src/uts/common/inet/sadb.h +++ b/usr/src/uts/common/inet/sadb.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,6 +35,7 @@ extern "C" { #include <inet/ipsec_info.h> #include <sys/crypto/common.h> #include <sys/crypto/api.h> +#include <sys/note.h> #define IPSA_MAX_ADDRLEN 4 /* Max address len. (in 32-bits) for an SA. */ @@ -232,6 +233,7 @@ typedef struct ipsa_s { /* MLS boxen will probably need more fields in here. */ + netstack_t *ipsa_netstack; /* Does not have a netstack_hold */ } ipsa_t; /* @@ -452,16 +454,11 @@ typedef struct sadbp_s uint32_t s_satype; queue_t *s_ip_q; uint32_t *s_acquire_timeout; - void (*s_acqfn)(ipsacq_t *, mblk_t *); + void (*s_acqfn)(ipsacq_t *, mblk_t *, netstack_t *); sadb_t s_v4; sadb_t s_v6; } sadbp_t; -/* - * Global IPsec security association databases (and all that go with them). - */ -extern sadbp_t ah_sadb, esp_sadb; - /* Pointer to an all-zeroes IPv6 address. */ #define ALL_ZEROES_PTR ((uint32_t *)&ipv6_all_zeros) @@ -533,9 +530,9 @@ ipsa_t *ipsec_getassocbyconn(isaf_t *, ipsec_out_t *, uint32_t *, uint32_t *, int sadb_insertassoc(ipsa_t *, isaf_t *); /* SA table construction and destruction. */ -void sadbp_init(const char *name, sadbp_t *, int, int); -void sadbp_flush(sadbp_t *); -void sadbp_destroy(sadbp_t *); +void sadbp_init(const char *name, sadbp_t *, int, int, netstack_t *); +void sadbp_flush(sadbp_t *, netstack_t *); +void sadbp_destroy(sadbp_t *, netstack_t *); /* SA insertion and deletion. */ int sadb_insertassoc(ipsa_t *, isaf_t *); @@ -548,9 +545,9 @@ void sadb_pfkey_echo(queue_t *, mblk_t *, sadb_msg_t *, struct keysock_in_s *, ipsa_t *); void sadb_pfkey_error(queue_t *, mblk_t *, int, int, uint_t); void sadb_keysock_hello(queue_t **, queue_t *, mblk_t *, void (*)(void *), - timeout_id_t *, int); -int sadb_addrcheck(queue_t *, mblk_t *, sadb_ext_t *, uint_t); -boolean_t sadb_addrfix(keysock_in_t *, queue_t *, mblk_t *); + void *, timeout_id_t *, int); +int sadb_addrcheck(queue_t *, mblk_t *, sadb_ext_t *, uint_t, netstack_t *); +boolean_t sadb_addrfix(keysock_in_t *, queue_t *, mblk_t *, netstack_t *); int sadb_addrset(ire_t *); int sadb_delget_sa(mblk_t *, keysock_in_t *, sadbp_t *, int *, queue_t *, boolean_t); @@ -559,27 +556,30 @@ int sadb_delget_sa(mblk_t *, keysock_in_t *, sadbp_t *, int *, queue_t *, int sadb_purge_sa(mblk_t *, keysock_in_t *, sadb_t *, queue_t *, queue_t *); int sadb_common_add(queue_t *, queue_t *, mblk_t *, sadb_msg_t *, - keysock_in_t *, isaf_t *, isaf_t *, ipsa_t *, boolean_t, boolean_t, int *); + keysock_in_t *, isaf_t *, isaf_t *, ipsa_t *, boolean_t, boolean_t, int *, + netstack_t *); void sadb_set_usetime(ipsa_t *); boolean_t sadb_age_bytes(queue_t *, ipsa_t *, uint64_t, boolean_t); int sadb_update_sa(mblk_t *, keysock_in_t *, sadb_t *, - int *, queue_t *, int (*)(mblk_t *, keysock_in_t *, int *)); + int *, queue_t *, int (*)(mblk_t *, keysock_in_t *, int *, netstack_t *), + netstack_t *); void sadb_acquire(mblk_t *, ipsec_out_t *, boolean_t, boolean_t); -void sadb_destroy_acquire(ipsacq_t *); -mblk_t *sadb_setup_acquire(ipsacq_t *, uint8_t); -ipsa_t *sadb_getspi(keysock_in_t *, uint32_t, int *); -void sadb_in_acquire(sadb_msg_t *, sadbp_t *, queue_t *); +void sadb_destroy_acquire(ipsacq_t *, netstack_t *); +struct ipsec_stack; +mblk_t *sadb_setup_acquire(ipsacq_t *, uint8_t, struct ipsec_stack *); +ipsa_t *sadb_getspi(keysock_in_t *, uint32_t, int *, netstack_t *); +void sadb_in_acquire(sadb_msg_t *, sadbp_t *, queue_t *, netstack_t *); boolean_t sadb_replay_check(ipsa_t *, uint32_t); boolean_t sadb_replay_peek(ipsa_t *, uint32_t); int sadb_dump(queue_t *, mblk_t *, minor_t, sadb_t *); void sadb_replay_delete(ipsa_t *); -void sadb_ager(sadb_t *, queue_t *, queue_t *, int); +void sadb_ager(sadb_t *, queue_t *, queue_t *, int, netstack_t *); -timeout_id_t sadb_retimeout(hrtime_t, queue_t *, void (*)(void *), +timeout_id_t sadb_retimeout(hrtime_t, queue_t *, void (*)(void *), void *, uint_t *, uint_t, short); void sadb_sa_refrele(void *target); -void sadb_set_lpkt(ipsa_t *, mblk_t *); +void sadb_set_lpkt(ipsa_t *, mblk_t *, netstack_t *); mblk_t *sadb_clear_lpkt(ipsa_t *); /* @@ -591,19 +591,22 @@ mblk_t *sadb_fmt_sa_req(uint_t, uint_t, ipsa_t *, boolean_t); * Sub-set of the IPsec hardware acceleration capabilities functions * implemented by ip_if.c */ -extern boolean_t ipsec_capab_match(ill_t *, uint_t, boolean_t, ipsa_t *); -extern void ill_ipsec_capab_send_all(uint_t, mblk_t *, ipsa_t *); +extern boolean_t ipsec_capab_match(ill_t *, uint_t, boolean_t, ipsa_t *, + netstack_t *); +extern void ill_ipsec_capab_send_all(uint_t, mblk_t *, ipsa_t *, + netstack_t *); /* * One IPsec -> IP linking routine, and two IPsec rate-limiting routines. */ extern boolean_t sadb_t_bind_req(queue_t *, int); -/*PRINTFLIKE5*/ -extern void ipsec_rl_strlog(short, short, char, ushort_t, char *, ...) - __KPRINTFLIKE(5); +/*PRINTFLIKE6*/ +extern void ipsec_rl_strlog(netstack_t *, short, short, char, + ushort_t, char *, ...) + __KPRINTFLIKE(6); extern void ipsec_assocfailure(short, short, char, ushort_t, char *, uint32_t, - void *, int); + void *, int, netstack_t *); /* * Algorithm types. @@ -671,19 +674,14 @@ typedef enum { IPSEC_ALGS_EXEC_ASYNC = 1 } ipsec_algs_exec_mode_t; -extern uint8_t ipsec_nalgs[IPSEC_NALGTYPES]; -extern ipsec_alginfo_t *ipsec_alglists[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; -extern uint8_t ipsec_sortlist[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; -extern ipsec_algs_exec_mode_t ipsec_algs_exec_mode[IPSEC_NALGTYPES]; - -extern kmutex_t alg_lock; - -extern void ipsec_alg_reg(ipsec_algtype_t, ipsec_alginfo_t *); -extern void ipsec_alg_unreg(ipsec_algtype_t, uint8_t); -extern void ipsec_alg_fix_min_max(ipsec_alginfo_t *, ipsec_algtype_t); +extern void ipsec_alg_reg(ipsec_algtype_t, ipsec_alginfo_t *, netstack_t *); +extern void ipsec_alg_unreg(ipsec_algtype_t, uint8_t, netstack_t *); +extern void ipsec_alg_fix_min_max(ipsec_alginfo_t *, ipsec_algtype_t, + netstack_t *ns); extern void ipsec_alg_free(ipsec_alginfo_t *); extern void ipsec_register_prov_update(void); -extern void sadb_alg_update(ipsec_algtype_t, uint8_t, boolean_t); +extern void sadb_alg_update(ipsec_algtype_t, uint8_t, boolean_t, + netstack_t *); /* * Context templates management. @@ -694,9 +692,12 @@ extern void sadb_alg_update(ipsec_algtype_t, uint8_t, boolean_t); if ((_tmpl = (_sa)->_which) == IPSEC_CTX_TMPL_ALLOC) { \ mutex_enter(&assoc->ipsa_lock); \ if ((_sa)->_which == IPSEC_CTX_TMPL_ALLOC) { \ - mutex_enter(&alg_lock); \ + ipsec_stack_t *ipss; \ + \ + ipss = assoc->ipsa_netstack->netstack_ipsec; \ + mutex_enter(&ipss->ipsec_alg_lock); \ (void) ipsec_create_ctx_tmpl(_sa, _type); \ - mutex_exit(&alg_lock); \ + mutex_exit(&ipss->ipsec_alg_lock); \ } \ mutex_exit(&assoc->ipsa_lock); \ if ((_tmpl = (_sa)->_which) == IPSEC_CTX_TMPL_ALLOC) \ @@ -711,9 +712,9 @@ extern void ipsec_destroy_ctx_tmpl(ipsa_t *, ipsec_algtype_t); extern int ipsec_check_key(crypto_mech_type_t, sadb_key_t *, boolean_t, int *); /* natt cleanup */ -extern void sadb_clear_timeouts(queue_t *); +extern void sadb_clear_timeouts(queue_t *, netstack_t *); -typedef struct { +typedef struct ipsec_kstats_s { kstat_named_t esp_stat_in_requests; kstat_named_t esp_stat_in_discards; kstat_named_t esp_stat_lookup_failure; @@ -724,15 +725,36 @@ typedef struct { kstat_named_t sadb_acquire_qhiwater; } ipsec_kstats_t; -extern ipsec_kstats_t *ipsec_kstats; -extern void ipsec_kstat_init(void); -extern void ipsec_kstat_destroy(void); - -#define IP_ESP_BUMP_STAT(x) (ipsec_kstats->esp_stat_ ## x).value.ui64++ -#define IP_AH_BUMP_STAT(x) (ipsec_kstats->ah_stat_ ## x).value.ui64++ -#define IP_ACQUIRE_STAT(val, new) \ -if (((uint64_t)(new)) > (ipsec_kstats->sadb_acquire_ ## val).value.ui64) \ - (ipsec_kstats->sadb_acquire_ ## val).value.ui64 = ((uint64_t)(new)) +/* + * (ipss)->ipsec_kstats is equal to (ipss)->ipsec_ksp->ks_data if + * kstat_create_netstack for (ipss)->ipsec_ksp succeeds, but when it + * fails, it will be NULL. Note this is done for all stack instances, + * so it *could* fail. hence a non-NULL checking is done for + * IP_ESP_BUMP_STAT, IP_AH_BUMP_STAT and IP_ACQUIRE_STAT + */ +#define IP_ESP_BUMP_STAT(ipss, x) \ +do { \ + if ((ipss)->ipsec_kstats != NULL) \ + ((ipss)->ipsec_kstats->esp_stat_ ## x).value.ui64++; \ +_NOTE(CONSTCOND) \ +} while (0) + +#define IP_AH_BUMP_STAT(ipss, x) \ +do { \ + if ((ipss)->ipsec_kstats != NULL) \ + ((ipss)->ipsec_kstats->ah_stat_ ## x).value.ui64++; \ +_NOTE(CONSTCOND) \ +} while (0) + +#define IP_ACQUIRE_STAT(ipss, val, new) \ +do { \ + if ((ipss)->ipsec_kstats != NULL && \ + ((uint64_t)(new)) > \ + ((ipss)->ipsec_kstats->sadb_acquire_ ## val).value.ui64) \ + ((ipss)->ipsec_kstats->sadb_acquire_ ## val).value.ui64 = \ + ((uint64_t)(new)); \ +_NOTE(CONSTCOND) \ +} while (0) #ifdef __cplusplus } diff --git a/usr/src/uts/common/inet/sctp/sctp.c b/usr/src/uts/common/inet/sctp/sctp.c index 436acd01fb..92b9d7407d 100644 --- a/usr/src/uts/common/inet/sctp/sctp.c +++ b/usr/src/uts/common/inet/sctp/sctp.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -43,6 +43,7 @@ #include <sys/cpuvar.h> #include <sys/random.h> #include <sys/priv.h> +#include <sys/sunldi.h> #include <sys/errno.h> #include <sys/signal.h> @@ -60,6 +61,7 @@ #include <inet/ip6.h> #include <inet/mi.h> #include <inet/mib2.h> +#include <inet/kstatcom.h> #include <inet/nd.h> #include <inet/optcom.h> #include <inet/ipclassifier.h> @@ -77,18 +79,29 @@ extern major_t SCTP_MAJ; int sctpdebug; sin6_t sctp_sin6_null; /* Zero address for quick clears */ -extern mblk_t *sctp_pad_mp; /* pad unaligned data chunks */ +/* + * Have to ensure that sctp_g_q_close is not done by an + * interrupt thread. + */ +static taskq_t *sctp_taskq; static void sctp_closei_local(sctp_t *sctp); static int sctp_init_values(sctp_t *, sctp_t *, int); static void sctp_icmp_error_ipv6(sctp_t *sctp, mblk_t *mp); static void sctp_process_recvq(void *); -static void sctp_rq_tq_init(void); -static void sctp_rq_tq_fini(void); +static void sctp_rq_tq_init(sctp_stack_t *); +static void sctp_rq_tq_fini(sctp_stack_t *); static void sctp_conn_cache_init(); static void sctp_conn_cache_fini(); static int sctp_conn_cache_constructor(); static void sctp_conn_cache_destructor(); +void sctp_g_q_setup(sctp_stack_t *); +void sctp_g_q_create(sctp_stack_t *); +void sctp_g_q_destroy(sctp_stack_t *); + +static void *sctp_stack_init(netstackid_t stackid, netstack_t *ns); +static void sctp_stack_shutdown(netstackid_t stackid, void *arg); +static void sctp_stack_fini(netstackid_t stackid, void *arg); /* * SCTP receive queue taskq @@ -106,15 +119,7 @@ static void sctp_conn_cache_destructor(); * determined by recvq_tq_list_cur. */ -/* This lock protects the SCTP recvq_tq_list array and recvq_tq_list_cur_sz. */ -static kmutex_t sctp_rq_tq_lock; -int sctp_recvq_tq_list_max_sz = 16; -static taskq_t **recvq_tq_list; - -/* Current number of recvq taskq. At least 1 for the default taskq. */ -static uint32_t recvq_tq_list_cur_sz = 1; -static uint32_t recvq_tq_list_cur = 0; - +/* /etc/system variables */ /* The minimum number of threads for each taskq. */ int sctp_recvq_tq_thr_min = 4; /* The maximum number of threads for each taskq. */ @@ -124,19 +129,6 @@ int sctp_recvq_tq_task_min = 5; /* The maxiimum number of tasks for each taskq. */ int sctp_recvq_tq_task_max = 50; -/* - * Default queue used for sending packets. No need to have lock for it - * as it should never be changed. - */ -queue_t *sctp_g_q; -int sctp_g_q_fd; -/* The default sctp_t for responding out of the blue packets. */ -sctp_t *gsctp; - -/* Protected by sctp_g_lock */ -list_t sctp_g_list; /* SCTP instance data chain */ -kmutex_t sctp_g_lock; - /* sctp_t/conn_t kmem cache */ struct kmem_cache *sctp_conn_cache; @@ -146,16 +138,16 @@ struct kmem_cache *sctp_conn_cache; mutex_exit(&(sctp)->sctp_reflock); /* Link/unlink a sctp_t to/from the global list. */ -#define SCTP_LINK(sctp) \ - mutex_enter(&sctp_g_lock); \ - list_insert_tail(&sctp_g_list, (sctp)); \ - mutex_exit(&sctp_g_lock); +#define SCTP_LINK(sctp, sctps) \ + mutex_enter(&(sctps)->sctps_g_lock); \ + list_insert_tail(&sctps->sctps_g_list, (sctp)); \ + mutex_exit(&(sctps)->sctps_g_lock); -#define SCTP_UNLINK(sctp) \ - mutex_enter(&sctp_g_lock); \ +#define SCTP_UNLINK(sctp, sctps) \ + mutex_enter(&(sctps)->sctps_g_lock); \ ASSERT((sctp)->sctp_condemned); \ - list_remove(&sctp_g_list, (sctp)); \ - mutex_exit(&sctp_g_lock); + list_remove(&(sctps)->sctps_g_list, (sctp)); \ + mutex_exit(&(sctps)->sctps_g_lock); /* * Hooks for Sun Cluster. On non-clustered nodes these will remain NULL. @@ -192,18 +184,26 @@ sctp_create_eager(sctp_t *psctp) mblk_t *ack_mp, *hb_mp; conn_t *connp, *pconnp; cred_t *credp; + sctp_stack_t *sctps = psctp->sctp_sctps; - if ((connp = ipcl_conn_create(IPCL_SCTPCONN, KM_NOSLEEP)) == NULL) + if ((connp = ipcl_conn_create(IPCL_SCTPCONN, KM_NOSLEEP, + sctps->sctps_netstack)) == NULL) { return (NULL); + } connp->conn_ulp_labeled = is_system_labeled(); sctp = CONN2SCTP(connp); + sctp->sctp_sctps = sctps; if ((ack_mp = sctp_timer_alloc(sctp, sctp_ack_timer)) == NULL || (hb_mp = sctp_timer_alloc(sctp, sctp_heartbeat_timer)) == NULL) { if (ack_mp != NULL) freeb(ack_mp); + netstack_rele(sctps->sctps_netstack); + connp->conn_netstack = NULL; + sctp->sctp_sctps = NULL; + SCTP_G_Q_REFRELE(sctps); kmem_cache_free(sctp_conn_cache, connp); return (NULL); } @@ -221,6 +221,10 @@ sctp_create_eager(sctp_t *psctp) if (sctp_init_values(sctp, psctp, KM_NOSLEEP) != 0) { freeb(ack_mp); freeb(hb_mp); + netstack_rele(sctps->sctps_netstack); + connp->conn_netstack = NULL; + sctp->sctp_sctps = NULL; + SCTP_G_Q_REFRELE(sctps); kmem_cache_free(sctp_conn_cache, connp); return (NULL); } @@ -249,7 +253,7 @@ sctp_create_eager(sctp_t *psctp) * Link to the global as soon as possible so that this sctp_t * can be found. */ - SCTP_LINK(sctp); + SCTP_LINK(sctp, sctps); return (sctp); } @@ -649,9 +653,12 @@ sctp_free(conn_t *connp) { sctp_t *sctp = CONN2SCTP(connp); int cnt; + sctp_stack_t *sctps = sctp->sctp_sctps; + netstack_t *ns; + ASSERT(sctps != NULL); /* Unlink it from the global list */ - SCTP_UNLINK(sctp); + SCTP_UNLINK(sctp, sctps); ASSERT(connp->conn_ref == 0); ASSERT(connp->conn_ulp == IPPROTO_SCTP); @@ -743,17 +750,19 @@ sctp_free(conn_t *connp) bzero(&sctp->sctp_bits, sizeof (sctp->sctp_bits)); /* It is time to update the global statistics. */ - UPDATE_MIB(&sctp_mib, sctpOutSCTPPkts, sctp->sctp_opkts); - UPDATE_MIB(&sctp_mib, sctpOutCtrlChunks, sctp->sctp_obchunks); - UPDATE_MIB(&sctp_mib, sctpOutOrderChunks, sctp->sctp_odchunks); - UPDATE_MIB(&sctp_mib, sctpOutUnorderChunks, sctp->sctp_oudchunks); - UPDATE_MIB(&sctp_mib, sctpRetransChunks, sctp->sctp_rxtchunks); - UPDATE_MIB(&sctp_mib, sctpInSCTPPkts, sctp->sctp_ipkts); - UPDATE_MIB(&sctp_mib, sctpInCtrlChunks, sctp->sctp_ibchunks); - UPDATE_MIB(&sctp_mib, sctpInOrderChunks, sctp->sctp_idchunks); - UPDATE_MIB(&sctp_mib, sctpInUnorderChunks, sctp->sctp_iudchunks); - UPDATE_MIB(&sctp_mib, sctpFragUsrMsgs, sctp->sctp_fragdmsgs); - UPDATE_MIB(&sctp_mib, sctpReasmUsrMsgs, sctp->sctp_reassmsgs); + UPDATE_MIB(&sctps->sctps_mib, sctpOutSCTPPkts, sctp->sctp_opkts); + UPDATE_MIB(&sctps->sctps_mib, sctpOutCtrlChunks, sctp->sctp_obchunks); + UPDATE_MIB(&sctps->sctps_mib, sctpOutOrderChunks, sctp->sctp_odchunks); + UPDATE_MIB(&sctps->sctps_mib, + sctpOutUnorderChunks, sctp->sctp_oudchunks); + UPDATE_MIB(&sctps->sctps_mib, sctpRetransChunks, sctp->sctp_rxtchunks); + UPDATE_MIB(&sctps->sctps_mib, sctpInSCTPPkts, sctp->sctp_ipkts); + UPDATE_MIB(&sctps->sctps_mib, sctpInCtrlChunks, sctp->sctp_ibchunks); + UPDATE_MIB(&sctps->sctps_mib, sctpInOrderChunks, sctp->sctp_idchunks); + UPDATE_MIB(&sctps->sctps_mib, + sctpInUnorderChunks, sctp->sctp_iudchunks); + UPDATE_MIB(&sctps->sctps_mib, sctpFragUsrMsgs, sctp->sctp_fragdmsgs); + UPDATE_MIB(&sctps->sctps_mib, sctpReasmUsrMsgs, sctp->sctp_reassmsgs); sctp->sctp_opkts = 0; sctp->sctp_obchunks = 0; sctp->sctp_odchunks = 0; @@ -775,11 +784,11 @@ sctp_free(conn_t *connp) /* Clean up conn_t stuff */ connp->conn_policy_cached = B_FALSE; if (connp->conn_latch != NULL) { - IPLATCH_REFRELE(connp->conn_latch); + IPLATCH_REFRELE(connp->conn_latch, connp->conn_netstack); connp->conn_latch = NULL; } if (connp->conn_policy != NULL) { - IPPH_REFRELE(connp->conn_policy); + IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); connp->conn_policy = NULL; } if (connp->conn_ipsec_opt_mp != NULL) { @@ -791,6 +800,13 @@ sctp_free(conn_t *connp) connp->conn_cred = NULL; } + /* Every sctp_t holds one reference on the default queue */ + sctp->sctp_sctps = NULL; + SCTP_G_Q_REFRELE(sctps); + + ns = connp->conn_netstack; + connp->conn_netstack = NULL; + netstack_rele(ns); kmem_cache_free(sctp_conn_cache, connp); } @@ -859,6 +875,7 @@ sctp_init_values(sctp_t *sctp, sctp_t *psctp, int sleep) { int err; int cnt; + sctp_stack_t *sctps = sctp->sctp_sctps; conn_t *connp, *pconnp; ASSERT((sctp->sctp_family == AF_INET && @@ -883,7 +900,7 @@ sctp_init_values(sctp_t *sctp, sctp_t *psctp, int sleep) sctp->sctp_strikes = 0; sctp->sctp_last_mtu_probe = lbolt64; - sctp->sctp_mtu_probe_intvl = sctp_mtu_probe_interval; + sctp->sctp_mtu_probe_intvl = sctps->sctps_mtu_probe_interval; sctp->sctp_sack_gaps = 0; sctp->sctp_sack_toggle = 2; @@ -991,24 +1008,27 @@ sctp_init_values(sctp_t *sctp, sctp_t *psctp, int sleep) /* * Set to system defaults */ - sctp->sctp_cookie_lifetime = MSEC_TO_TICK(sctp_cookie_life); - sctp->sctp_xmit_lowater = sctp_xmit_lowat; - sctp->sctp_xmit_hiwater = sctp_xmit_hiwat; - sctp->sctp_cwnd_max = sctp_cwnd_max_; - sctp->sctp_rwnd = sctp_recv_hiwat; + sctp->sctp_cookie_lifetime = + MSEC_TO_TICK(sctps->sctps_cookie_life); + sctp->sctp_xmit_lowater = sctps->sctps_xmit_lowat; + sctp->sctp_xmit_hiwater = sctps->sctps_xmit_hiwat; + sctp->sctp_cwnd_max = sctps->sctps_cwnd_max_; + sctp->sctp_rwnd = sctps->sctps_recv_hiwat; sctp->sctp_irwnd = sctp->sctp_rwnd; - sctp->sctp_rto_max = MSEC_TO_TICK(sctp_rto_maxg); + sctp->sctp_rto_max = MSEC_TO_TICK(sctps->sctps_rto_maxg); sctp->sctp_init_rto_max = sctp->sctp_rto_max; - sctp->sctp_rto_min = MSEC_TO_TICK(sctp_rto_ming); - sctp->sctp_rto_initial = MSEC_TO_TICK(sctp_rto_initialg); - sctp->sctp_pa_max_rxt = sctp_pa_max_retr; - sctp->sctp_pp_max_rxt = sctp_pp_max_retr; - sctp->sctp_max_init_rxt = sctp_max_init_retr; - - sctp->sctp_num_istr = sctp_max_in_streams; - sctp->sctp_num_ostr = sctp_initial_out_streams; - - sctp->sctp_hb_interval = MSEC_TO_TICK(sctp_heartbeat_interval); + sctp->sctp_rto_min = MSEC_TO_TICK(sctps->sctps_rto_ming); + sctp->sctp_rto_initial = MSEC_TO_TICK( + sctps->sctps_rto_initialg); + sctp->sctp_pa_max_rxt = sctps->sctps_pa_max_retr; + sctp->sctp_pp_max_rxt = sctps->sctps_pp_max_retr; + sctp->sctp_max_init_rxt = sctps->sctps_max_init_retr; + + sctp->sctp_num_istr = sctps->sctps_max_in_streams; + sctp->sctp_num_ostr = sctps->sctps_initial_out_streams; + + sctp->sctp_hb_interval = + MSEC_TO_TICK(sctps->sctps_heartbeat_interval); } sctp->sctp_understands_asconf = B_TRUE; sctp->sctp_understands_addip = B_TRUE; @@ -1069,6 +1089,7 @@ sctp_icmp_error(sctp_t *sctp, mblk_t *mp) uint32_t new_mtu; in6_addr_t dst; sctp_faddr_t *fp; + sctp_stack_t *sctps = sctp->sctp_sctps; dprint(1, ("sctp_icmp_error: sctp=%p, mp=%p\n", (void *)sctp, (void *)mp)); @@ -1111,7 +1132,7 @@ sctp_icmp_error(sctp_t *sctp, mblk_t *mp) * sctp_wput_slow(). Need to adjust all those * params to make sure sctp_wput_slow() work properly. */ - if (sctp_ignore_path_mtu) + if (sctps->sctps_ignore_path_mtu) break; /* find the offending faddr */ @@ -1144,7 +1165,7 @@ sctp_icmp_error(sctp_t *sctp, mblk_t *mp) if (!sctp_icmp_verf(sctp, sctph, mp)) { break; } - BUMP_MIB(&sctp_mib, sctpAborted); + BUMP_MIB(&sctps->sctps_mib, sctpAborted); sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, NULL); sctp_clean_death(sctp, ECONNREFUSED); @@ -1185,6 +1206,7 @@ sctp_icmp_error_ipv6(sctp_t *sctp, mblk_t *mp) uint8_t *nexthdrp; uint32_t new_mtu; sctp_faddr_t *fp; + sctp_stack_t *sctps = sctp->sctp_sctps; ip6h = (ip6_t *)mp->b_rptr; iph_hdr_length = (ip6h->ip6_nxt != IPPROTO_SCTP) ? @@ -1221,7 +1243,7 @@ sctp_icmp_error_ipv6(sctp_t *sctp, mblk_t *mp) * sctp_wput_slow(). Need to adjust all those * params to make sure sctp_wput_slow() work properly. */ - if (sctp_ignore_path_mtu) + if (sctps->sctps_ignore_path_mtu) break; /* find the offending faddr */ @@ -1251,7 +1273,7 @@ sctp_icmp_error_ipv6(sctp_t *sctp, mblk_t *mp) } if (sctp->sctp_state == SCTPS_COOKIE_WAIT || sctp->sctp_state == SCTPS_COOKIE_ECHOED) { - BUMP_MIB(&sctp_mib, sctpAborted); + BUMP_MIB(&sctps->sctps_mib, sctpAborted); sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, NULL); sctp_clean_death(sctp, ECONNREFUSED); @@ -1280,7 +1302,7 @@ sctp_icmp_error_ipv6(sctp_t *sctp, mblk_t *mp) break; } if (sctp->sctp_state == SCTPS_COOKIE_WAIT) { - BUMP_MIB(&sctp_mib, sctpAborted); + BUMP_MIB(&sctps->sctps_mib, sctpAborted); sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, NULL); sctp_clean_death(sctp, ECONNREFUSED); @@ -1310,25 +1332,69 @@ sctp_create(void *sctp_ulpd, sctp_t *parent, int family, int flags, conn_t *sctp_connp; mblk_t *ack_mp, *hb_mp; int sleep = flags & SCTP_CAN_BLOCK ? KM_SLEEP : KM_NOSLEEP; + zoneid_t zoneid; + sctp_stack_t *sctps; /* User must supply a credential. */ if (credp == NULL) return (NULL); - if ((sctp_connp = ipcl_conn_create(IPCL_SCTPCONN, sleep)) == NULL) { - SCTP_KSTAT(sctp_conn_create); - return (NULL); - } - sctp_connp->conn_ulp_labeled = is_system_labeled(); - psctp = (sctp_t *)parent; + if (psctp != NULL) { + sctps = psctp->sctp_sctps; + /* Increase here to have common decrease at end */ + netstack_hold(sctps->sctps_netstack); + } else { + netstack_t *ns; + + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + sctps = ns->netstack_sctp; + ASSERT(sctps != NULL); + /* + * For exclusive stacks we set the zoneid to zero + * to make SCTP operate as if in the global zone. + */ + if (sctps->sctps_netstack->netstack_stackid != + GLOBAL_NETSTACKID) + zoneid = GLOBAL_ZONEID; + else + zoneid = crgetzoneid(credp); + + /* + * For stackid zero this is done from strplumb.c, but + * non-zero stackids are handled here. + */ + if (sctps->sctps_g_q == NULL && + sctps->sctps_netstack->netstack_stackid != + GLOBAL_NETSTACKID) { + sctp_g_q_setup(sctps); + } + } + if ((sctp_connp = ipcl_conn_create(IPCL_SCTPCONN, sleep, + sctps->sctps_netstack)) == NULL) { + netstack_rele(sctps->sctps_netstack); + SCTP_KSTAT(sctps, sctp_conn_create); + return (NULL); + } + /* + * ipcl_conn_create did a netstack_hold. Undo the hold that was + * done at top of sctp_create. + */ + netstack_rele(sctps->sctps_netstack); sctp = CONN2SCTP(sctp_connp); + sctp->sctp_sctps = sctps; + sctp_connp->conn_ulp_labeled = is_system_labeled(); if ((ack_mp = sctp_timer_alloc(sctp, sctp_ack_timer)) == NULL || (hb_mp = sctp_timer_alloc(sctp, sctp_heartbeat_timer)) == NULL) { if (ack_mp != NULL) freeb(ack_mp); + netstack_rele(sctp_connp->conn_netstack); + sctp_connp->conn_netstack = NULL; + sctp->sctp_sctps = NULL; + SCTP_G_Q_REFRELE(sctps); kmem_cache_free(sctp_conn_cache, sctp_connp); return (NULL); } @@ -1356,12 +1422,16 @@ sctp_create(void *sctp_ulpd, sctp_t *parent, int family, int flags, if (sctp_init_values(sctp, psctp, sleep) != 0) { freeb(ack_mp); freeb(hb_mp); + netstack_rele(sctp_connp->conn_netstack); + sctp_connp->conn_netstack = NULL; + sctp->sctp_sctps = NULL; + SCTP_G_Q_REFRELE(sctps); kmem_cache_free(sctp_conn_cache, sctp_connp); return (NULL); } sctp->sctp_cansleep = ((flags & SCTP_CAN_BLOCK) == SCTP_CAN_BLOCK); - sctp->sctp_mss = sctp_initial_mtu - ((family == AF_INET6) ? + sctp->sctp_mss = sctps->sctps_initial_mtu - ((family == AF_INET6) ? sctp->sctp_hdr6_len : sctp->sctp_hdr_len); if (psctp != NULL) { @@ -1377,6 +1447,10 @@ sctp_create(void *sctp_ulpd, sctp_t *parent, int family, int flags, freeb(ack_mp); freeb(hb_mp); sctp_headers_free(sctp); + netstack_rele(sctps->sctps_netstack); + sctp_connp->conn_netstack = NULL; + sctp->sctp_sctps = NULL; + SCTP_G_Q_REFRELE(sctps); kmem_cache_free(sctp_conn_cache, sctp_connp); return (NULL); } @@ -1392,7 +1466,7 @@ sctp_create(void *sctp_ulpd, sctp_t *parent, int family, int flags, sctp->sctp_zoneid = psctp->sctp_zoneid; WAKE_SCTP(psctp); } else { - sctp->sctp_zoneid = getzoneid(); + sctp->sctp_zoneid = zoneid; } sctp_connp->conn_cred = credp; @@ -1432,31 +1506,240 @@ sctp_create(void *sctp_ulpd, sctp_t *parent, int family, int flags, sbl->sbl_rxlowat = SCTP_RECV_LOWATER; } /* If no sctp_ulpd, must be creating the default sctp */ - ASSERT(sctp_ulpd != NULL || gsctp == NULL); + ASSERT(sctp_ulpd != NULL || sctps->sctps_gsctp == NULL); /* Insert this in the global list. */ - SCTP_LINK(sctp); + SCTP_LINK(sctp, sctps); return (sctp); } +/* + * Make sure we wait until the default queue is setup, yet allow + * sctp_g_q_create() to open a SCTP stream. + * We need to allow sctp_g_q_create() do do an open + * of sctp, hence we compare curhread. + * All others have to wait until the sctps_g_q has been + * setup. + */ void -sctp_ddi_init(void) +sctp_g_q_setup(sctp_stack_t *sctps) { - /* Initialize locks */ - mutex_init(&sctp_g_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&sctp_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_enter(&sctps->sctps_g_q_lock); + if (sctps->sctps_g_q != NULL) { + mutex_exit(&sctps->sctps_g_q_lock); + return; + } + if (sctps->sctps_g_q_creator == NULL) { + /* This thread will set it up */ + sctps->sctps_g_q_creator = curthread; + mutex_exit(&sctps->sctps_g_q_lock); + sctp_g_q_create(sctps); + mutex_enter(&sctps->sctps_g_q_lock); + ASSERT(sctps->sctps_g_q_creator == curthread); + sctps->sctps_g_q_creator = NULL; + cv_signal(&sctps->sctps_g_q_cv); + ASSERT(sctps->sctps_g_q != NULL); + mutex_exit(&sctps->sctps_g_q_lock); + return; + } + /* Everybody but the creator has to wait */ + if (sctps->sctps_g_q_creator != curthread) { + while (sctps->sctps_g_q == NULL) + cv_wait(&sctps->sctps_g_q_cv, &sctps->sctps_g_q_lock); + } + mutex_exit(&sctps->sctps_g_q_lock); +} - /* Initialize SCTP hash arrays. */ - sctp_hash_init(); +major_t IP_MAJ; +#define IP "ip" + +#define SCTP6DEV "/devices/pseudo/sctp6@0:sctp6" + +/* + * Create a default sctp queue here instead of in strplumb + */ +void +sctp_g_q_create(sctp_stack_t *sctps) +{ + int error; + ldi_handle_t lh = NULL; + ldi_ident_t li = NULL; + int rval; + cred_t *cr; + +#ifdef NS_DEBUG + (void) printf("sctp_g_q_create()for stack %d\n", + sctps->sctps_netstack->netstack_stackid); +#endif + + ASSERT(sctps->sctps_g_q_creator == curthread); + + error = ldi_ident_from_major(IP_MAJ, &li); + if (error) { +#ifdef DEBUG + printf("sctp_g_q_create: lyr ident get failed error %d\n", + error); +#endif + return; + } + + cr = zone_get_kcred(netstackid_to_zoneid( + sctps->sctps_netstack->netstack_stackid)); + ASSERT(cr != NULL); + /* + * We set the sctp default queue to IPv6 because IPv4 falls + * back to IPv6 when it can't find a client, but + * IPv6 does not fall back to IPv4. + */ + error = ldi_open_by_name(SCTP6DEV, FREAD|FWRITE, cr, &lh, li); + if (error) { +#ifdef DEBUG + printf("sctp_g_q_create: open of SCTP6DEV failed error %d\n", + error); +#endif + goto out; + } + + /* + * This ioctl causes the sctp framework to cache a pointer to + * this stream, so we don't want to close the stream after + * this operation. + * Use the kernel credentials that are for the zone we're in. + */ + error = ldi_ioctl(lh, SCTP_IOC_DEFAULT_Q, + (intptr_t)0, FKIOCTL, cr, &rval); + if (error) { +#ifdef DEBUG + printf("sctp_g_q_create: ioctl SCTP_IOC_DEFAULT_Q failed " + "error %d\n", error); +#endif + goto out; + } + sctps->sctps_g_q_lh = lh; /* For sctp_g_q_inactive */ + lh = NULL; +out: + /* Close layered handles */ + if (li) + ldi_ident_release(li); + /* Keep cred around until _inactive needs it */ + sctps->sctps_g_q_cr = cr; +} + +/* + * Remove the sctp_default queue so that new connections will not find it. + * SCTP uses sctp_g_q for all transmission, so all sctp'ts implicitly + * refer to it. Hence have each one have a reference on sctp_g_q_ref! + * + * We decrement the refcnt added in sctp_g_q_create. Once all the + * sctp_t's which use the default go away, sctp_g_q_close will be called + * and close the sctp_g_q. Once sctp_g_q is closed, sctp_close() will drop the + * last reference count on the stack by calling netstack_rele(). + */ +void +sctp_g_q_destroy(sctp_stack_t *sctps) +{ + if (sctps->sctps_g_q == NULL) { + return; /* Nothing to cleanup */ + } + /* + * Keep sctps_g_q and sctps_gsctp until the last reference has + * dropped, since the output is always done using those. + * Need to decrement twice to take sctp_g_q_create and + * the gsctp reference into account so that sctp_g_q_inactive is called + * when all but the default queue remains. + */ +#ifdef NS_DEBUG + (void) printf("sctp_g_q_destroy: ref %d\n", + sctps->sctps_g_q_ref); +#endif + SCTP_G_Q_REFRELE(sctps); +} + +/* + * Called when last user (could be sctp_g_q_destroy) drops reference count + * using SCTP_G_Q_REFRELE. + * Run by sctp_q_q_inactive using a taskq. + */ +static void +sctp_g_q_close(void *arg) +{ + sctp_stack_t *sctps = arg; + int error; + ldi_handle_t lh = NULL; + ldi_ident_t li = NULL; + cred_t *cr; + + lh = sctps->sctps_g_q_lh; + if (lh == NULL) + return; /* Nothing to cleanup */ + + error = ldi_ident_from_major(IP_MAJ, &li); + if (error) { +#ifdef NS_DEBUG + printf("sctp_g_q_inactive: lyr ident get failed error %d\n", + error); +#endif + return; + } + + cr = sctps->sctps_g_q_cr; + sctps->sctps_g_q_cr = NULL; + ASSERT(cr != NULL); + + /* + * Make sure we can break the recursion when sctp_close decrements + * the reference count causing g_q_inactive to be called again. + */ + sctps->sctps_g_q_lh = NULL; - sctp_pad_mp = allocb(SCTP_ALIGN, BPRI_MED); - bzero(sctp_pad_mp->b_rptr, SCTP_ALIGN); - ASSERT(sctp_pad_mp); + /* close the default queue */ + (void) ldi_close(lh, FREAD|FWRITE, cr); + + /* Close layered handles */ + ldi_ident_release(li); + crfree(cr); + + ASSERT(sctps->sctps_g_q != NULL); + sctps->sctps_g_q = NULL; + /* + * Now free sctps_gsctp. + */ + ASSERT(sctps->sctps_gsctp != NULL); + sctp_closei_local(sctps->sctps_gsctp); + SCTP_CONDEMNED(sctps->sctps_gsctp); + SCTP_REFRELE(sctps->sctps_gsctp); + sctps->sctps_gsctp = NULL; +} - if (!sctp_nd_init()) { - sctp_nd_free(); +/* + * Called when last sctp_t drops reference count using SCTP_G_Q_REFRELE. + * + * Have to ensure that the ldi routines are not used by an + * interrupt thread by using a taskq. + */ +void +sctp_g_q_inactive(sctp_stack_t *sctps) +{ + if (sctps->sctps_g_q_lh == NULL) + return; /* Nothing to cleanup */ + + ASSERT(sctps->sctps_g_q_ref == 0); + SCTP_G_Q_REFHOLD(sctps); /* Compensate for what g_q_destroy did */ + + if (servicing_interrupt()) { + (void) taskq_dispatch(sctp_taskq, sctp_g_q_close, + (void *) sctps, TQ_SLEEP); + } else { + sctp_g_q_close(sctps); } +} + +/* Run at module load time */ +void +sctp_ddi_g_init(void) +{ + IP_MAJ = ddi_name_to_major(IP); /* Create sctp_t/conn_t cache */ sctp_conn_cache_init(); @@ -1470,28 +1753,73 @@ sctp_ddi_init(void) /* Create the PR-SCTP sets cache */ sctp_ftsn_sets_init(); + /* Initialize tables used for CRC calculation */ + sctp_crc32_init(); + + sctp_taskq = taskq_create("sctp_taskq", 1, minclsyspri, 1, 1, + TASKQ_PREPOPULATE); + + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel, so we can maintain the + * set of sctp_stack_t's. + */ + netstack_register(NS_SCTP, sctp_stack_init, sctp_stack_shutdown, + sctp_stack_fini); +} + +static void * +sctp_stack_init(netstackid_t stackid, netstack_t *ns) +{ + sctp_stack_t *sctps; + + sctps = kmem_zalloc(sizeof (*sctps), KM_SLEEP); + sctps->sctps_netstack = ns; + + /* Initialize locks */ + mutex_init(&sctps->sctps_g_q_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&sctps->sctps_g_q_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&sctps->sctps_g_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&sctps->sctps_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); + sctps->sctps_g_num_epriv_ports = SCTP_NUM_EPRIV_PORTS; + sctps->sctps_g_epriv_ports[0] = 2049; + sctps->sctps_g_epriv_ports[1] = 4045; + + /* Initialize SCTP hash arrays. */ + sctp_hash_init(sctps); + + sctps->sctps_pad_mp = allocb(SCTP_ALIGN, BPRI_MED); + bzero(sctps->sctps_pad_mp->b_rptr, SCTP_ALIGN); + ASSERT(sctps->sctps_pad_mp); + + if (!sctp_nd_init(sctps)) { + sctp_nd_free(sctps); + } + /* Initialize the recvq taskq. */ - sctp_rq_tq_init(); + sctp_rq_tq_init(sctps); /* saddr init */ - sctp_saddr_init(); + sctp_saddr_init(sctps); /* Global SCTP PCB list. */ - list_create(&sctp_g_list, sizeof (sctp_t), + list_create(&sctps->sctps_g_list, sizeof (sctp_t), offsetof(sctp_t, sctp_list)); - /* Initialize tables used for CRC calculation */ - sctp_crc32_init(); - /* Initialize sctp kernel stats. */ - sctp_kstat_init(); + sctps->sctps_mibkp = sctp_kstat_init(stackid); + sctps->sctps_kstat = + sctp_kstat2_init(stackid, &sctps->sctps_statistics); + + return (sctps); } +/* + * Called when the module is about to be unloaded. + */ void -sctp_ddi_destroy(void) +sctp_ddi_g_destroy(void) { - sctp_nd_free(); - /* Destroy sctp_t/conn_t caches */ sctp_conn_cache_fini(); @@ -1504,96 +1832,141 @@ sctp_ddi_destroy(void) /* Destroy the PR-SCTP sets cache */ sctp_ftsn_sets_fini(); + netstack_unregister(NS_SCTP); + taskq_destroy(sctp_taskq); +} + +/* + * Shut down the SCTP stack instance. + */ +/* ARGSUSED */ +static void +sctp_stack_shutdown(netstackid_t stackid, void *arg) +{ + sctp_stack_t *sctps = (sctp_stack_t *)arg; + + sctp_g_q_destroy(sctps); +} + +/* + * Free the SCTP stack instance. + */ +static void +sctp_stack_fini(netstackid_t stackid, void *arg) +{ + sctp_stack_t *sctps = (sctp_stack_t *)arg; + + sctp_nd_free(sctps); + /* Destroy the recvq taskqs. */ - sctp_rq_tq_fini(); + sctp_rq_tq_fini(sctps); /* Destroy saddr */ - sctp_saddr_fini(); + sctp_saddr_fini(sctps); /* Global SCTP PCB list. */ - list_destroy(&sctp_g_list); + list_destroy(&sctps->sctps_g_list); /* Destroy SCTP hash arrays. */ - sctp_hash_destroy(); + sctp_hash_destroy(sctps); + + /* Destroy SCTP kernel stats. */ + sctp_kstat2_fini(stackid, sctps->sctps_kstat); + sctps->sctps_kstat = NULL; + bzero(&sctps->sctps_statistics, sizeof (sctps->sctps_statistics)); + + sctp_kstat_fini(stackid, sctps->sctps_mibkp); + sctps->sctps_mibkp = NULL; + + freeb(sctps->sctps_pad_mp); + sctps->sctps_pad_mp = NULL; - /* Destroy SCTP kenrel stats. */ - sctp_kstat_fini(); + mutex_destroy(&sctps->sctps_g_lock); + mutex_destroy(&sctps->sctps_epriv_port_lock); + mutex_destroy(&sctps->sctps_g_q_lock); + cv_destroy(&sctps->sctps_g_q_cv); - mutex_destroy(&sctp_g_lock); - mutex_destroy(&sctp_epriv_port_lock); + kmem_free(sctps, sizeof (*sctps)); } void -sctp_display_all() +sctp_display_all(sctp_stack_t *sctps) { sctp_t *sctp_walker; - mutex_enter(&sctp_g_lock); - for (sctp_walker = gsctp; sctp_walker != NULL; - sctp_walker = (sctp_t *)list_next(&sctp_g_list, sctp_walker)) { + mutex_enter(&sctps->sctps_g_lock); + for (sctp_walker = sctps->sctps_gsctp; sctp_walker != NULL; + sctp_walker = (sctp_t *)list_next(&sctps->sctps_g_list, + sctp_walker)) { (void) sctp_display(sctp_walker, NULL); } - mutex_exit(&sctp_g_lock); + mutex_exit(&sctps->sctps_g_lock); } static void -sctp_rq_tq_init(void) +sctp_rq_tq_init(sctp_stack_t *sctps) { + sctps->sctps_recvq_tq_list_max_sz = 16; + sctps->sctps_recvq_tq_list_cur_sz = 1; /* * Initialize the recvq_tq_list and create the first recvq taskq. * What to do if it fails? */ - recvq_tq_list = kmem_zalloc(sctp_recvq_tq_list_max_sz * - sizeof (taskq_t *), KM_SLEEP); - recvq_tq_list[0] = taskq_create("sctp_def_recvq_taskq", + sctps->sctps_recvq_tq_list = + kmem_zalloc(sctps->sctps_recvq_tq_list_max_sz * sizeof (taskq_t *), + KM_SLEEP); + sctps->sctps_recvq_tq_list[0] = taskq_create("sctp_def_recvq_taskq", MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, ncpus)), minclsyspri, sctp_recvq_tq_task_min, sctp_recvq_tq_task_max, TASKQ_PREPOPULATE); - mutex_init(&sctp_rq_tq_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&sctps->sctps_rq_tq_lock, NULL, MUTEX_DEFAULT, NULL); } static void -sctp_rq_tq_fini(void) +sctp_rq_tq_fini(sctp_stack_t *sctps) { int i; - for (i = 0; i < recvq_tq_list_cur_sz; i++) { - ASSERT(recvq_tq_list[i] != NULL); - taskq_destroy(recvq_tq_list[i]); + for (i = 0; i < sctps->sctps_recvq_tq_list_cur_sz; i++) { + ASSERT(sctps->sctps_recvq_tq_list[i] != NULL); + taskq_destroy(sctps->sctps_recvq_tq_list[i]); } - kmem_free(recvq_tq_list, sctp_recvq_tq_list_max_sz * - sizeof (taskq_t *)); + kmem_free(sctps->sctps_recvq_tq_list, + sctps->sctps_recvq_tq_list_max_sz * sizeof (taskq_t *)); + sctps->sctps_recvq_tq_list = NULL; } /* Add another taskq for a new ill. */ void -sctp_inc_taskq(void) +sctp_inc_taskq(sctp_stack_t *sctps) { taskq_t *tq; char tq_name[TASKQ_NAMELEN]; - mutex_enter(&sctp_rq_tq_lock); - if (recvq_tq_list_cur_sz + 1 > sctp_recvq_tq_list_max_sz) { - mutex_exit(&sctp_rq_tq_lock); + mutex_enter(&sctps->sctps_rq_tq_lock); + if (sctps->sctps_recvq_tq_list_cur_sz + 1 > + sctps->sctps_recvq_tq_list_max_sz) { + mutex_exit(&sctps->sctps_rq_tq_lock); cmn_err(CE_NOTE, "Cannot create more SCTP recvq taskq"); return; } (void) snprintf(tq_name, sizeof (tq_name), "sctp_recvq_taskq_%u", - recvq_tq_list_cur_sz); + sctps->sctps_recvq_tq_list_cur_sz); tq = taskq_create(tq_name, MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, ncpus)), minclsyspri, sctp_recvq_tq_task_min, sctp_recvq_tq_task_max, TASKQ_PREPOPULATE); if (tq == NULL) { - mutex_exit(&sctp_rq_tq_lock); + mutex_exit(&sctps->sctps_rq_tq_lock); cmn_err(CE_NOTE, "SCTP recvq taskq creation failed"); return; } - ASSERT(recvq_tq_list[recvq_tq_list_cur_sz] == NULL); - recvq_tq_list[recvq_tq_list_cur_sz] = tq; - atomic_add_32(&recvq_tq_list_cur_sz, 1); - mutex_exit(&sctp_rq_tq_lock); + ASSERT(sctps->sctps_recvq_tq_list[ + sctps->sctps_recvq_tq_list_cur_sz] == NULL); + sctps->sctps_recvq_tq_list[sctps->sctps_recvq_tq_list_cur_sz] = tq; + atomic_add_32(&sctps->sctps_recvq_tq_list_cur_sz, 1); + mutex_exit(&sctps->sctps_rq_tq_lock); } #ifdef DEBUG @@ -1686,6 +2059,7 @@ sctp_find_next_tq(sctp_t *sctp) { int next_tq, try; taskq_t *tq; + sctp_stack_t *sctps = sctp->sctp_sctps; /* * Note that since we don't hold a lock on sctp_rq_tq_lock for @@ -1693,16 +2067,16 @@ sctp_find_next_tq(sctp_t *sctp) * this loop. The problem this will create is that the loop may * not have tried all the recvq_tq. This should be OK. */ - next_tq = atomic_add_32_nv(&recvq_tq_list_cur, 1) % - recvq_tq_list_cur_sz; - for (try = 0; try < recvq_tq_list_cur_sz; - try++, next_tq = (next_tq + 1) % recvq_tq_list_cur_sz) { - tq = recvq_tq_list[next_tq]; + next_tq = atomic_add_32_nv(&sctps->sctps_recvq_tq_list_cur, 1) % + sctps->sctps_recvq_tq_list_cur_sz; + for (try = 0; try < sctps->sctps_recvq_tq_list_cur_sz; try++) { + tq = sctps->sctps_recvq_tq_list[next_tq]; if (taskq_dispatch(tq, sctp_process_recvq, sctp, TQ_NOSLEEP) != NULL) { sctp->sctp_recvq_tq = tq; return (B_TRUE); } + next_tq = (next_tq + 1) % sctps->sctps_recvq_tq_list_cur_sz; } /* @@ -1711,13 +2085,13 @@ sctp_find_next_tq(sctp_t *sctp) * taskqs is at the maximum. We are probably in a pretty bad * shape if this actually happens... */ - sctp_inc_taskq(); - tq = recvq_tq_list[recvq_tq_list_cur_sz - 1]; + sctp_inc_taskq(sctps); + tq = sctps->sctps_recvq_tq_list[sctps->sctps_recvq_tq_list_cur_sz - 1]; if (taskq_dispatch(tq, sctp_process_recvq, sctp, TQ_NOSLEEP) != NULL) { sctp->sctp_recvq_tq = tq; return (B_TRUE); } - SCTP_KSTAT(sctp_find_next_tq); + SCTP_KSTAT(sctps, sctp_find_next_tq); return (B_FALSE); } @@ -1830,7 +2204,6 @@ sctp_conn_cache_constructor(void *buf, void *cdrarg, int kmflags) bzero(buf, (char *)&sctp[1] - (char *)buf); - ASSERT(sctp_g_q != NULL); sctp->sctp_connp = sctp_connp; mutex_init(&sctp->sctp_reflock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&sctp->sctp_lock, NULL, MUTEX_DEFAULT, NULL); diff --git a/usr/src/uts/common/inet/sctp/sctp_addr.c b/usr/src/uts/common/inet/sctp/sctp_addr.c index a4bef74443..946d85b3b1 100644 --- a/usr/src/uts/common/inet/sctp/sctp_addr.c +++ b/usr/src/uts/common/inet/sctp/sctp_addr.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -73,8 +73,8 @@ void sctp_del_saddr_list(sctp_t *, const void *, int, sctp_saddr_ipif_t *sctp_saddr_lookup(sctp_t *, in6_addr_t *, uint_t); in6_addr_t sctp_get_valid_addr(sctp_t *, boolean_t); int sctp_getmyaddrs(void *, void *, int *); -void sctp_saddr_init(); -void sctp_saddr_fini(); +void sctp_saddr_init(sctp_stack_t *); +void sctp_saddr_fini(sctp_stack_t *); #define SCTP_IPIF_USABLE(sctp_ipif_state) \ ((sctp_ipif_state) == SCTP_IPIFS_UP || \ @@ -101,13 +101,6 @@ void sctp_saddr_fini(); #define SCTP_IPIF_HASH_FN(seqid) ((seqid) % SCTP_IPIF_HASH) #define SCTP_ILL_TO_PHYINDEX(ill) ((ill)->ill_phyint->phyint_ifindex) -/* Global list of SCTP ILLs */ -sctp_ill_hash_t sctp_g_ills[SCTP_ILL_HASH]; -uint32_t sctp_ills_count = 0; - -/* Global list of SCTP IPIFs */ -sctp_ipif_hash_t sctp_g_ipifs[SCTP_IPIF_HASH]; -uint32_t sctp_g_ipifs_count = 0; /* * * @@ -127,9 +120,11 @@ sctp_ipif_inactive(sctp_ipif_t *sctp_ipif) sctp_ill_t *sctp_ill; uint_t ipif_index; uint_t ill_index; + sctp_stack_t *sctps = sctp_ipif->sctp_ipif_ill-> + sctp_ill_netstack->netstack_sctp; - rw_enter(&sctp_g_ills_lock, RW_READER); - rw_enter(&sctp_g_ipifs_lock, RW_WRITER); + rw_enter(&sctps->sctps_g_ills_lock, RW_READER); + rw_enter(&sctps->sctps_g_ipifs_lock, RW_WRITER); ipif_index = SCTP_IPIF_HASH_FN(sctp_ipif->sctp_ipif_id); sctp_ill = sctp_ipif->sctp_ipif_ill; @@ -137,32 +132,33 @@ sctp_ipif_inactive(sctp_ipif_t *sctp_ipif) ill_index = SCTP_ILL_HASH_FN(sctp_ill->sctp_ill_index); if (sctp_ipif->sctp_ipif_state != SCTP_IPIFS_CONDEMNED || sctp_ipif->sctp_ipif_refcnt != 0) { - rw_exit(&sctp_g_ipifs_lock); - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ills_lock); return; } - list_remove(&sctp_g_ipifs[ipif_index].sctp_ipif_list, sctp_ipif); - sctp_g_ipifs[ipif_index].ipif_count--; - sctp_g_ipifs_count--; + list_remove(&sctps->sctps_g_ipifs[ipif_index].sctp_ipif_list, + sctp_ipif); + sctps->sctps_g_ipifs[ipif_index].ipif_count--; + sctps->sctps_g_ipifs_count--; rw_destroy(&sctp_ipif->sctp_ipif_lock); kmem_free(sctp_ipif, sizeof (sctp_ipif_t)); (void) atomic_add_32_nv(&sctp_ill->sctp_ill_ipifcnt, -1); - if (rw_tryupgrade(&sctp_g_ills_lock) != 0) { - rw_downgrade(&sctp_g_ipifs_lock); + if (rw_tryupgrade(&sctps->sctps_g_ills_lock) != 0) { + rw_downgrade(&sctps->sctps_g_ipifs_lock); if (sctp_ill->sctp_ill_ipifcnt == 0 && sctp_ill->sctp_ill_state == SCTP_ILLS_CONDEMNED) { - list_remove(&sctp_g_ills[ill_index].sctp_ill_list, - (void *)sctp_ill); - sctp_g_ills[ill_index].ill_count--; - sctp_ills_count--; + list_remove(&sctps->sctps_g_ills[ill_index]. + sctp_ill_list, (void *)sctp_ill); + sctps->sctps_g_ills[ill_index].ill_count--; + sctps->sctps_ills_count--; kmem_free(sctp_ill->sctp_ill_name, sctp_ill->sctp_ill_name_length); kmem_free(sctp_ill, sizeof (sctp_ill_t)); } } - rw_exit(&sctp_g_ipifs_lock); - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ills_lock); } /* @@ -176,14 +172,15 @@ sctp_lookup_ipif_addr(in6_addr_t *addr, boolean_t refhold, sctp_t *sctp, int i; int j; sctp_ipif_t *sctp_ipif; + sctp_stack_t *sctps = sctp->sctp_sctps; ASSERT(sctp->sctp_zoneid != ALL_ZONES); - rw_enter(&sctp_g_ipifs_lock, RW_READER); + rw_enter(&sctps->sctps_g_ipifs_lock, RW_READER); for (i = 0; i < SCTP_IPIF_HASH; i++) { - if (sctp_g_ipifs[i].ipif_count == 0) + if (sctps->sctps_g_ipifs[i].ipif_count == 0) continue; - sctp_ipif = list_head(&sctp_g_ipifs[i].sctp_ipif_list); - for (j = 0; j < sctp_g_ipifs[i].ipif_count; j++) { + sctp_ipif = list_head(&sctps->sctps_g_ipifs[i].sctp_ipif_list); + for (j = 0; j < sctps->sctps_g_ipifs[i].ipif_count; j++) { rw_enter(&sctp_ipif->sctp_ipif_lock, RW_READER); if (SCTP_IPIF_ZONE_MATCH(sctp, sctp_ipif) && SCTP_IPIF_USABLE(sctp_ipif->sctp_ipif_state) && @@ -194,15 +191,15 @@ sctp_lookup_ipif_addr(in6_addr_t *addr, boolean_t refhold, sctp_t *sctp, rw_exit(&sctp_ipif->sctp_ipif_lock); if (refhold) SCTP_IPIF_REFHOLD(sctp_ipif); - rw_exit(&sctp_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); return (sctp_ipif); } rw_exit(&sctp_ipif->sctp_ipif_lock); - sctp_ipif = list_next(&sctp_g_ipifs[i].sctp_ipif_list, - sctp_ipif); + sctp_ipif = list_next( + &sctps->sctps_g_ipifs[i].sctp_ipif_list, sctp_ipif); } } - rw_exit(&sctp_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); return (NULL); } @@ -218,13 +215,14 @@ sctp_get_all_ipifs(sctp_t *sctp, int sleep) int i; int j; int error = 0; + sctp_stack_t *sctps = sctp->sctp_sctps; - rw_enter(&sctp_g_ipifs_lock, RW_READER); + rw_enter(&sctps->sctps_g_ipifs_lock, RW_READER); for (i = 0; i < SCTP_IPIF_HASH; i++) { - if (sctp_g_ipifs[i].ipif_count == 0) + if (sctps->sctps_g_ipifs[i].ipif_count == 0) continue; - sctp_ipif = list_head(&sctp_g_ipifs[i].sctp_ipif_list); - for (j = 0; j < sctp_g_ipifs[i].ipif_count; j++) { + sctp_ipif = list_head(&sctps->sctps_g_ipifs[i].sctp_ipif_list); + for (j = 0; j < sctps->sctps_g_ipifs[i].ipif_count; j++) { rw_enter(&sctp_ipif->sctp_ipif_lock, RW_READER); if (SCTP_IPIF_DISCARD(sctp_ipif->sctp_ipif_flags) || !SCTP_IPIF_USABLE(sctp_ipif->sctp_ipif_state) || @@ -235,7 +233,8 @@ sctp_get_all_ipifs(sctp_t *sctp, int sleep) !sctp_ipif->sctp_ipif_isv6)) { rw_exit(&sctp_ipif->sctp_ipif_lock); sctp_ipif = list_next( - &sctp_g_ipifs[i].sctp_ipif_list, sctp_ipif); + &sctps->sctps_g_ipifs[i].sctp_ipif_list, + sctp_ipif); continue; } rw_exit(&sctp_ipif->sctp_ipif_lock); @@ -244,14 +243,15 @@ sctp_get_all_ipifs(sctp_t *sctp, int sleep) B_FALSE); if (error != 0) goto free_stuff; - sctp_ipif = list_next(&sctp_g_ipifs[i].sctp_ipif_list, + sctp_ipif = list_next( + &sctps->sctps_g_ipifs[i].sctp_ipif_list, sctp_ipif); } } - rw_exit(&sctp_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); return (0); free_stuff: - rw_exit(&sctp_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); sctp_free_saddrs(sctp); return (ENOMEM); } @@ -625,17 +625,19 @@ sctp_update_ill(ill_t *ill, int op) int i; sctp_ill_t *sctp_ill = NULL; uint_t index; + netstack_t *ns = ill->ill_ipst->ips_netstack; + sctp_stack_t *sctps = ns->netstack_sctp; ip2dbg(("sctp_update_ill: %s\n", ill->ill_name)); - rw_enter(&sctp_g_ills_lock, RW_WRITER); + rw_enter(&sctps->sctps_g_ills_lock, RW_WRITER); index = SCTP_ILL_HASH_FN(SCTP_ILL_TO_PHYINDEX(ill)); - sctp_ill = list_head(&sctp_g_ills[index].sctp_ill_list); - for (i = 0; i < sctp_g_ills[index].ill_count; i++) { + sctp_ill = list_head(&sctps->sctps_g_ills[index].sctp_ill_list); + for (i = 0; i < sctps->sctps_g_ills[index].ill_count; i++) { if (sctp_ill->sctp_ill_index == SCTP_ILL_TO_PHYINDEX(ill)) break; - sctp_ill = list_next(&sctp_g_ills[index].sctp_ill_list, + sctp_ill = list_next(&sctps->sctps_g_ills[index].sctp_ill_list, sctp_ill); } @@ -645,14 +647,14 @@ sctp_update_ill(ill_t *ill, int op) /* Unmark it if it is condemned */ if (sctp_ill->sctp_ill_state == SCTP_ILLS_CONDEMNED) sctp_ill->sctp_ill_state = 0; - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ills_lock); return; } sctp_ill = kmem_zalloc(sizeof (sctp_ill_t), KM_NOSLEEP); /* Need to re-try? */ if (sctp_ill == NULL) { ip1dbg(("sctp_ill_insert: mem error..\n")); - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ills_lock); return; } sctp_ill->sctp_ill_name = @@ -660,7 +662,7 @@ sctp_update_ill(ill_t *ill, int op) if (sctp_ill->sctp_ill_name == NULL) { ip1dbg(("sctp_ill_insert: mem error..\n")); kmem_free(sctp_ill, sizeof (sctp_ill_t)); - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ills_lock); return; } bcopy(ill->ill_name, sctp_ill->sctp_ill_name, @@ -668,24 +670,25 @@ sctp_update_ill(ill_t *ill, int op) sctp_ill->sctp_ill_name_length = ill->ill_name_length; sctp_ill->sctp_ill_index = SCTP_ILL_TO_PHYINDEX(ill); sctp_ill->sctp_ill_flags = ill->ill_phyint->phyint_flags; - list_insert_tail(&sctp_g_ills[index].sctp_ill_list, + sctp_ill->sctp_ill_netstack = ns; /* No netstack_hold */ + list_insert_tail(&sctps->sctps_g_ills[index].sctp_ill_list, (void *)sctp_ill); - sctp_g_ills[index].ill_count++; - sctp_ills_count++; + sctps->sctps_g_ills[index].ill_count++; + sctps->sctps_ills_count++; break; case SCTP_ILL_REMOVE: if (sctp_ill == NULL) { - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ills_lock); return; } if (sctp_ill->sctp_ill_ipifcnt == 0) { - list_remove(&sctp_g_ills[index].sctp_ill_list, + list_remove(&sctps->sctps_g_ills[index].sctp_ill_list, (void *)sctp_ill); - sctp_g_ills[index].ill_count--; - sctp_ills_count--; + sctps->sctps_g_ills[index].ill_count--; + sctps->sctps_ills_count--; kmem_free(sctp_ill->sctp_ill_name, ill->ill_name_length); kmem_free(sctp_ill, sizeof (sctp_ill_t)); @@ -695,7 +698,7 @@ sctp_update_ill(ill_t *ill, int op) break; } - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ills_lock); } /* move ipif from f_ill to t_ill */ @@ -707,42 +710,44 @@ sctp_move_ipif(ipif_t *ipif, ill_t *f_ill, ill_t *t_ill) sctp_ipif_t *sctp_ipif; uint_t index; int i; + netstack_t *ns = ipif->ipif_ill->ill_ipst->ips_netstack; + sctp_stack_t *sctps = ns->netstack_sctp; - rw_enter(&sctp_g_ills_lock, RW_READER); - rw_enter(&sctp_g_ipifs_lock, RW_READER); + rw_enter(&sctps->sctps_g_ills_lock, RW_READER); + rw_enter(&sctps->sctps_g_ipifs_lock, RW_READER); index = SCTP_ILL_HASH_FN(SCTP_ILL_TO_PHYINDEX(f_ill)); - fsctp_ill = list_head(&sctp_g_ills[index].sctp_ill_list); - for (i = 0; i < sctp_g_ills[index].ill_count; i++) { + fsctp_ill = list_head(&sctps->sctps_g_ills[index].sctp_ill_list); + for (i = 0; i < sctps->sctps_g_ills[index].ill_count; i++) { if (fsctp_ill->sctp_ill_index == SCTP_ILL_TO_PHYINDEX(f_ill)) break; - fsctp_ill = list_next(&sctp_g_ills[index].sctp_ill_list, + fsctp_ill = list_next(&sctps->sctps_g_ills[index].sctp_ill_list, fsctp_ill); } index = SCTP_ILL_HASH_FN(SCTP_ILL_TO_PHYINDEX(t_ill)); - tsctp_ill = list_head(&sctp_g_ills[index].sctp_ill_list); - for (i = 0; i < sctp_g_ills[index].ill_count; i++) { + tsctp_ill = list_head(&sctps->sctps_g_ills[index].sctp_ill_list); + for (i = 0; i < sctps->sctps_g_ills[index].ill_count; i++) { if (tsctp_ill->sctp_ill_index == SCTP_ILL_TO_PHYINDEX(t_ill)) break; - tsctp_ill = list_next(&sctp_g_ills[index].sctp_ill_list, + tsctp_ill = list_next(&sctps->sctps_g_ills[index].sctp_ill_list, tsctp_ill); } index = SCTP_IPIF_HASH_FN(ipif->ipif_seqid); - sctp_ipif = list_head(&sctp_g_ipifs[index].sctp_ipif_list); - for (i = 0; i < sctp_g_ipifs[index].ipif_count; i++) { + sctp_ipif = list_head(&sctps->sctps_g_ipifs[index].sctp_ipif_list); + for (i = 0; i < sctps->sctps_g_ipifs[index].ipif_count; i++) { if (sctp_ipif->sctp_ipif_id == ipif->ipif_seqid) break; - sctp_ipif = list_next(&sctp_g_ipifs[index].sctp_ipif_list, - sctp_ipif); + sctp_ipif = list_next( + &sctps->sctps_g_ipifs[index].sctp_ipif_list, sctp_ipif); } /* Should be an ASSERT? */ if (fsctp_ill == NULL || tsctp_ill == NULL || sctp_ipif == NULL) { ip1dbg(("sctp_move_ipif: error moving ipif %p from %p to %p\n", (void *)ipif, (void *)f_ill, (void *)t_ill)); - rw_exit(&sctp_g_ipifs_lock); - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ills_lock); return; } rw_enter(&sctp_ipif->sctp_ipif_lock, RW_WRITER); @@ -751,8 +756,8 @@ sctp_move_ipif(ipif_t *ipif, ill_t *f_ill, ill_t *t_ill) rw_exit(&sctp_ipif->sctp_ipif_lock); (void) atomic_add_32_nv(&fsctp_ill->sctp_ill_ipifcnt, -1); atomic_add_32(&tsctp_ill->sctp_ill_ipifcnt, 1); - rw_exit(&sctp_g_ipifs_lock); - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ills_lock); } /* Insert, Remove, Mark up or Mark down the ipif */ @@ -765,38 +770,41 @@ sctp_update_ipif(ipif_t *ipif, int op) sctp_ipif_t *sctp_ipif; uint_t ill_index; uint_t ipif_index; + netstack_t *ns = ipif->ipif_ill->ill_ipst->ips_netstack; + sctp_stack_t *sctps = ns->netstack_sctp; ip2dbg(("sctp_update_ipif: %s %d\n", ill->ill_name, ipif->ipif_seqid)); - rw_enter(&sctp_g_ills_lock, RW_READER); - rw_enter(&sctp_g_ipifs_lock, RW_WRITER); + rw_enter(&sctps->sctps_g_ills_lock, RW_READER); + rw_enter(&sctps->sctps_g_ipifs_lock, RW_WRITER); ill_index = SCTP_ILL_HASH_FN(SCTP_ILL_TO_PHYINDEX(ill)); - sctp_ill = list_head(&sctp_g_ills[ill_index].sctp_ill_list); - for (i = 0; i < sctp_g_ills[ill_index].ill_count; i++) { + sctp_ill = list_head(&sctps->sctps_g_ills[ill_index].sctp_ill_list); + for (i = 0; i < sctps->sctps_g_ills[ill_index].ill_count; i++) { if (sctp_ill->sctp_ill_index == SCTP_ILL_TO_PHYINDEX(ill)) break; - sctp_ill = list_next(&sctp_g_ills[ill_index].sctp_ill_list, - sctp_ill); + sctp_ill = list_next( + &sctps->sctps_g_ills[ill_index].sctp_ill_list, sctp_ill); } if (sctp_ill == NULL) { - rw_exit(&sctp_g_ipifs_lock); - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ills_lock); return; } ipif_index = SCTP_IPIF_HASH_FN(ipif->ipif_seqid); - sctp_ipif = list_head(&sctp_g_ipifs[ipif_index].sctp_ipif_list); - for (i = 0; i < sctp_g_ipifs[ipif_index].ipif_count; i++) { + sctp_ipif = list_head(&sctps->sctps_g_ipifs[ipif_index].sctp_ipif_list); + for (i = 0; i < sctps->sctps_g_ipifs[ipif_index].ipif_count; i++) { if (sctp_ipif->sctp_ipif_id == ipif->ipif_seqid) break; - sctp_ipif = list_next(&sctp_g_ipifs[ipif_index].sctp_ipif_list, + sctp_ipif = list_next( + &sctps->sctps_g_ipifs[ipif_index].sctp_ipif_list, sctp_ipif); } if (op != SCTP_IPIF_INSERT && sctp_ipif == NULL) { ip1dbg(("sctp_update_ipif: null sctp_ipif for %d\n", op)); - rw_exit(&sctp_g_ipifs_lock); - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ills_lock); return; } #ifdef DEBUG @@ -808,16 +816,16 @@ sctp_update_ipif(ipif_t *ipif, int op) if (sctp_ipif != NULL) { if (sctp_ipif->sctp_ipif_state == SCTP_IPIFS_CONDEMNED) sctp_ipif->sctp_ipif_state = SCTP_IPIFS_INVALID; - rw_exit(&sctp_g_ipifs_lock); - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ills_lock); return; } sctp_ipif = kmem_zalloc(sizeof (sctp_ipif_t), KM_NOSLEEP); /* Try again? */ if (sctp_ipif == NULL) { ip1dbg(("sctp_ipif_insert: mem failure..\n")); - rw_exit(&sctp_g_ipifs_lock); - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ills_lock); return; } sctp_ipif->sctp_ipif_id = ipif->ipif_seqid; @@ -828,10 +836,11 @@ sctp_update_ipif(ipif_t *ipif, int op) sctp_ipif->sctp_ipif_isv6 = ill->ill_isv6; sctp_ipif->sctp_ipif_flags = ipif->ipif_flags; rw_init(&sctp_ipif->sctp_ipif_lock, NULL, RW_DEFAULT, NULL); - list_insert_tail(&sctp_g_ipifs[ipif_index].sctp_ipif_list, + list_insert_tail( + &sctps->sctps_g_ipifs[ipif_index].sctp_ipif_list, (void *)sctp_ipif); - sctp_g_ipifs[ipif_index].ipif_count++; - sctp_g_ipifs_count++; + sctps->sctps_g_ipifs[ipif_index].ipif_count++; + sctps->sctps_g_ipifs_count++; atomic_add_32(&sctp_ill->sctp_ill_ipifcnt, 1); break; @@ -841,27 +850,27 @@ sctp_update_ipif(ipif_t *ipif, int op) list_t *ipif_list; list_t *ill_list; - ill_list = &sctp_g_ills[ill_index].sctp_ill_list; - ipif_list = &sctp_g_ipifs[ipif_index].sctp_ipif_list; + ill_list = &sctps->sctps_g_ills[ill_index].sctp_ill_list; + ipif_list = &sctps->sctps_g_ipifs[ipif_index].sctp_ipif_list; if (sctp_ipif->sctp_ipif_refcnt != 0) { sctp_ipif->sctp_ipif_state = SCTP_IPIFS_CONDEMNED; - rw_exit(&sctp_g_ipifs_lock); - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ills_lock); return; } list_remove(ipif_list, (void *)sctp_ipif); - sctp_g_ipifs[ipif_index].ipif_count--; - sctp_g_ipifs_count--; + sctps->sctps_g_ipifs[ipif_index].ipif_count--; + sctps->sctps_g_ipifs_count--; rw_destroy(&sctp_ipif->sctp_ipif_lock); kmem_free(sctp_ipif, sizeof (sctp_ipif_t)); (void) atomic_add_32_nv(&sctp_ill->sctp_ill_ipifcnt, -1); - if (rw_tryupgrade(&sctp_g_ills_lock) != 0) { - rw_downgrade(&sctp_g_ipifs_lock); + if (rw_tryupgrade(&sctps->sctps_g_ills_lock) != 0) { + rw_downgrade(&sctps->sctps_g_ipifs_lock); if (sctp_ill->sctp_ill_ipifcnt == 0 && sctp_ill->sctp_ill_state == SCTP_ILLS_CONDEMNED) { list_remove(ill_list, (void *)sctp_ill); - sctp_ills_count--; - sctp_g_ills[ill_index].ill_count--; + sctps->sctps_ills_count--; + sctps->sctps_g_ills[ill_index].ill_count--; kmem_free(sctp_ill->sctp_ill_name, sctp_ill->sctp_ill_name_length); kmem_free(sctp_ill, sizeof (sctp_ill_t)); @@ -872,7 +881,7 @@ sctp_update_ipif(ipif_t *ipif, int op) case SCTP_IPIF_UP: - rw_downgrade(&sctp_g_ipifs_lock); + rw_downgrade(&sctps->sctps_g_ipifs_lock); rw_enter(&sctp_ipif->sctp_ipif_lock, RW_WRITER); sctp_ipif->sctp_ipif_state = SCTP_IPIFS_UP; sctp_ipif->sctp_ipif_saddr = ipif->ipif_v6lcl_addr; @@ -884,7 +893,7 @@ sctp_update_ipif(ipif_t *ipif, int op) case SCTP_IPIF_UPDATE: - rw_downgrade(&sctp_g_ipifs_lock); + rw_downgrade(&sctps->sctps_g_ipifs_lock); rw_enter(&sctp_ipif->sctp_ipif_lock, RW_WRITER); sctp_ipif->sctp_ipif_mtu = ipif->ipif_mtu; sctp_ipif->sctp_ipif_saddr = ipif->ipif_v6lcl_addr; @@ -896,15 +905,15 @@ sctp_update_ipif(ipif_t *ipif, int op) case SCTP_IPIF_DOWN: - rw_downgrade(&sctp_g_ipifs_lock); + rw_downgrade(&sctps->sctps_g_ipifs_lock); rw_enter(&sctp_ipif->sctp_ipif_lock, RW_WRITER); sctp_ipif->sctp_ipif_state = SCTP_IPIFS_DOWN; rw_exit(&sctp_ipif->sctp_ipif_lock); break; } - rw_exit(&sctp_g_ipifs_lock); - rw_exit(&sctp_g_ills_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ills_lock); } /* @@ -1347,6 +1356,7 @@ sctp_get_addrlist(sctp_t *sctp, const void *addrs, uint32_t *addrcnt, struct sockaddr_in6 *s6; uchar_t *p; int err = 0; + sctp_stack_t *sctps = sctp->sctp_sctps; *addrlist = NULL; *size = 0; @@ -1419,25 +1429,30 @@ get_all_addrs: * We allocate upfront so that the clustering module need to bother * re-sizing the list. */ - if (sctp->sctp_family == AF_INET) - *size = sizeof (struct sockaddr_in) * sctp_g_ipifs_count; - else - *size = sizeof (struct sockaddr_in6) * sctp_g_ipifs_count; - + if (sctp->sctp_family == AF_INET) { + *size = sizeof (struct sockaddr_in) * + sctps->sctps_g_ipifs_count; + } else { + *size = sizeof (struct sockaddr_in6) * + sctps->sctps_g_ipifs_count; + } *addrlist = kmem_zalloc(*size, KM_SLEEP); *addrcnt = 0; p = *addrlist; - rw_enter(&sctp_g_ipifs_lock, RW_READER); + rw_enter(&sctps->sctps_g_ipifs_lock, RW_READER); /* * Walk through the global interface list and add all addresses, * except those that are hosted on loopback interfaces. */ for (cnt = 0; cnt < SCTP_IPIF_HASH; cnt++) { - if (sctp_g_ipifs[cnt].ipif_count == 0) + if (sctps->sctps_g_ipifs[cnt].ipif_count == 0) continue; - sctp_ipif = list_head(&sctp_g_ipifs[cnt].sctp_ipif_list); - for (icnt = 0; icnt < sctp_g_ipifs[cnt].ipif_count; icnt++) { + sctp_ipif = list_head( + &sctps->sctps_g_ipifs[cnt].sctp_ipif_list); + for (icnt = 0; + icnt < sctps->sctps_g_ipifs[cnt].ipif_count; + icnt++) { in6_addr_t addr; rw_enter(&sctp_ipif->sctp_ipif_lock, RW_READER); @@ -1453,7 +1468,7 @@ get_all_addrs: !sctp_ipif->sctp_ipif_isv6)) { rw_exit(&sctp_ipif->sctp_ipif_lock); sctp_ipif = list_next( - &sctp_g_ipifs[cnt].sctp_ipif_list, + &sctps->sctps_g_ipifs[cnt].sctp_ipif_list, sctp_ipif); continue; } @@ -1472,11 +1487,12 @@ get_all_addrs: p += sizeof (*s6); } (*addrcnt)++; - sctp_ipif = list_next(&sctp_g_ipifs[cnt].sctp_ipif_list, + sctp_ipif = list_next( + &sctps->sctps_g_ipifs[cnt].sctp_ipif_list, sctp_ipif); } } - rw_exit(&sctp_g_ipifs_lock); + rw_exit(&sctps->sctps_g_ipifs_lock); return (err); } @@ -1512,7 +1528,8 @@ sctp_get_saddr_list(sctp_t *sctp, uchar_t *p, size_t psize) psize -= sizeof (ipif->sctp_ipif_saddr); if (scanned >= sctp->sctp_nsaddrs) return; - obj = list_next(&sctp->sctp_saddrs[icnt].sctp_ipif_list, + obj = list_next( + &sctp->sctp_saddrs[icnt].sctp_ipif_list, obj); } } @@ -1536,37 +1553,112 @@ sctp_get_faddr_list(sctp_t *sctp, uchar_t *p, size_t psize) } } +static void +sctp_free_ills(sctp_stack_t *sctps) +{ + int i; + int l; + sctp_ill_t *sctp_ill; + + if (sctps->sctps_ills_count == 0) + return; + + for (i = 0; i < SCTP_ILL_HASH; i++) { + sctp_ill = list_tail(&sctps->sctps_g_ills[i].sctp_ill_list); + for (l = 0; l < sctps->sctps_g_ills[i].ill_count; l++) { + ASSERT(sctp_ill->sctp_ill_ipifcnt == 0); + list_remove(&sctps->sctps_g_ills[i].sctp_ill_list, + sctp_ill); + sctps->sctps_ills_count--; + kmem_free(sctp_ill->sctp_ill_name, + sctp_ill->sctp_ill_name_length); + kmem_free(sctp_ill, sizeof (sctp_ill_t)); + sctp_ill = + list_tail(&sctps->sctps_g_ills[i].sctp_ill_list); + } + sctps->sctps_g_ills[i].ill_count = 0; + } + ASSERT(sctps->sctps_ills_count == 0); +} + +static void +sctp_free_ipifs(sctp_stack_t *sctps) +{ + int i; + int l; + sctp_ipif_t *sctp_ipif; + sctp_ill_t *sctp_ill; + + if (sctps->sctps_g_ipifs_count == 0) + return; + + for (i = 0; i < SCTP_IPIF_HASH; i++) { + sctp_ipif = list_tail(&sctps->sctps_g_ipifs[i].sctp_ipif_list); + for (l = 0; l < sctps->sctps_g_ipifs[i].ipif_count; l++) { + sctp_ill = sctp_ipif->sctp_ipif_ill; + + list_remove(&sctps->sctps_g_ipifs[i].sctp_ipif_list, + sctp_ipif); + sctps->sctps_g_ipifs_count--; + (void) atomic_add_32_nv(&sctp_ill->sctp_ill_ipifcnt, + -1); + kmem_free(sctp_ipif, sizeof (sctp_ipif_t)); + sctp_ipif = + list_tail(&sctps->sctps_g_ipifs[i].sctp_ipif_list); + } + sctps->sctps_g_ipifs[i].ipif_count = 0; + } + ASSERT(sctps->sctps_g_ipifs_count == 0); +} + + /* Initialize the SCTP ILL list and lock */ void -sctp_saddr_init() +sctp_saddr_init(sctp_stack_t *sctps) { int i; - rw_init(&sctp_g_ills_lock, NULL, RW_DEFAULT, NULL); - rw_init(&sctp_g_ipifs_lock, NULL, RW_DEFAULT, NULL); + sctps->sctps_g_ills = kmem_zalloc(sizeof (sctp_ill_hash_t) * + SCTP_ILL_HASH, KM_SLEEP); + sctps->sctps_g_ipifs = kmem_zalloc(sizeof (sctp_ipif_hash_t) * + SCTP_IPIF_HASH, KM_SLEEP); + + rw_init(&sctps->sctps_g_ills_lock, NULL, RW_DEFAULT, NULL); + rw_init(&sctps->sctps_g_ipifs_lock, NULL, RW_DEFAULT, NULL); for (i = 0; i < SCTP_ILL_HASH; i++) { - sctp_g_ills[i].ill_count = 0; - list_create(&sctp_g_ills[i].sctp_ill_list, sizeof (sctp_ill_t), + sctps->sctps_g_ills[i].ill_count = 0; + list_create(&sctps->sctps_g_ills[i].sctp_ill_list, + sizeof (sctp_ill_t), offsetof(sctp_ill_t, sctp_ills)); } for (i = 0; i < SCTP_IPIF_HASH; i++) { - sctp_g_ipifs[i].ipif_count = 0; - list_create(&sctp_g_ipifs[i].sctp_ipif_list, + sctps->sctps_g_ipifs[i].ipif_count = 0; + list_create(&sctps->sctps_g_ipifs[i].sctp_ipif_list, sizeof (sctp_ipif_t), offsetof(sctp_ipif_t, sctp_ipifs)); } } void -sctp_saddr_fini() +sctp_saddr_fini(sctp_stack_t *sctps) { int i; - rw_destroy(&sctp_g_ills_lock); - rw_destroy(&sctp_g_ipifs_lock); - ASSERT(sctp_ills_count == 0 && sctp_g_ipifs_count == 0); + sctp_free_ipifs(sctps); + sctp_free_ills(sctps); + for (i = 0; i < SCTP_ILL_HASH; i++) - list_destroy(&sctp_g_ills[i].sctp_ill_list); + list_destroy(&sctps->sctps_g_ills[i].sctp_ill_list); for (i = 0; i < SCTP_IPIF_HASH; i++) - list_destroy(&sctp_g_ipifs[i].sctp_ipif_list); + list_destroy(&sctps->sctps_g_ipifs[i].sctp_ipif_list); + + ASSERT(sctps->sctps_ills_count == 0 && sctps->sctps_g_ipifs_count == 0); + kmem_free(sctps->sctps_g_ills, sizeof (sctp_ill_hash_t) * + SCTP_ILL_HASH); + sctps->sctps_g_ills = NULL; + kmem_free(sctps->sctps_g_ipifs, sizeof (sctp_ipif_hash_t) * + SCTP_IPIF_HASH); + sctps->sctps_g_ipifs = NULL; + rw_destroy(&sctps->sctps_g_ills_lock); + rw_destroy(&sctps->sctps_g_ipifs_lock); } diff --git a/usr/src/uts/common/inet/sctp/sctp_addr.h b/usr/src/uts/common/inet/sctp/sctp_addr.h index bbd8509588..4e1eaf78e2 100644 --- a/usr/src/uts/common/inet/sctp/sctp_addr.h +++ b/usr/src/uts/common/inet/sctp/sctp_addr.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -102,15 +101,22 @@ typedef struct sctp_saddrs_ipif_s { * sctp_ill_ipifcnt gives the number of IPIFs for this ILL, * sctp_ill_index is phyint_ifindex in the actual ILL structure (in IP) * and sctp_ill_flags is ill_flags from the ILL structure. + * + * The comment below (and for other netstack_t references) refers + * to the fact that we only do netstack_hold in particular cases, + * such as the references from open streams (ill_t and conn_t's + * pointers). Internally within IP we rely on IP's ability to cleanup e.g. + * ire_t's when an ill goes away. */ typedef struct sctp_ill_s { - list_node_t sctp_ills; - int sctp_ill_name_length; - char *sctp_ill_name; - int sctp_ill_state; - uint32_t sctp_ill_ipifcnt; - uint_t sctp_ill_index; - uint64_t sctp_ill_flags; + list_node_t sctp_ills; + int sctp_ill_name_length; + char *sctp_ill_name; + int sctp_ill_state; + uint32_t sctp_ill_ipifcnt; + uint_t sctp_ill_index; + uint64_t sctp_ill_flags; + netstack_t *sctp_ill_netstack; /* Does not have a netstack_hold */ } sctp_ill_t; /* ill_state */ @@ -123,17 +129,6 @@ typedef struct sctp_ill_hash_s { int ill_count; } sctp_ill_hash_t; -/* Global list of SCTP ILLs */ -extern sctp_ill_hash_t sctp_g_ills[SCTP_ILL_HASH]; -krwlock_t sctp_g_ills_lock; -extern uint32_t sctp_ills_count; -extern uint32_t sctp_ills_min_mtu; - -/* Global list of SCTP ipifs */ -extern sctp_ipif_hash_t sctp_g_ipifs[SCTP_IPIF_HASH]; -extern uint32_t sctp_g_ipifs_count; -krwlock_t sctp_g_ipifs_lock; - #define SCTP_IPIF_REFHOLD(sctp_ipif) { \ atomic_add_32(&(sctp_ipif)->sctp_ipif_refcnt, 1); \ @@ -167,8 +162,8 @@ extern void sctp_del_saddr_list(sctp_t *, const void *, int, boolean_t); extern void sctp_del_saddr(sctp_t *, sctp_saddr_ipif_t *); extern void sctp_free_saddrs(sctp_t *); -extern void sctp_saddr_init(); -extern void sctp_saddr_fini(); +extern void sctp_saddr_init(sctp_stack_t *); +extern void sctp_saddr_fini(sctp_stack_t *); extern sctp_saddr_ipif_t *sctp_ipif_lookup(sctp_t *, uint_t); extern int sctp_getmyaddrs(void *, void *, int *); extern int sctp_saddr_add_addr(sctp_t *, in6_addr_t *, uint_t); diff --git a/usr/src/uts/common/inet/sctp/sctp_asconf.c b/usr/src/uts/common/inet/sctp/sctp_asconf.c index a242564f14..22d53c57e6 100644 --- a/usr/src/uts/common/inet/sctp/sctp_asconf.c +++ b/usr/src/uts/common/inet/sctp/sctp_asconf.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -43,6 +43,7 @@ #include <inet/ip.h> #include <inet/ip6.h> #include <inet/mib2.h> +#include <inet/ipclassifier.h> #include "sctp_impl.h" #include "sctp_asconf.h" #include "sctp_addr.h" @@ -391,6 +392,7 @@ sctp_input_asconf(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp) uchar_t *dptr = NULL; int acount = 0; int dcount = 0; + sctp_stack_t *sctps = sctp->sctp_sctps; ASSERT(ch->sch_id == CHUNK_ASCONF); @@ -417,7 +419,7 @@ sctp_input_asconf(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp) hmp = sctp_make_mp(sctp, fp, sizeof (*ach) + sizeof (*idp)); if (hmp == NULL) { /* Let the peer retransmit */ - SCTP_KSTAT(sctp_send_asconf_ack_failed); + SCTP_KSTAT(sctps, sctp_send_asconf_ack_failed); return; } ach = (sctp_chunk_hdr_t *)hmp->b_wptr; @@ -480,7 +482,7 @@ sctp_input_asconf(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp) alist = kmem_alloc(asize, KM_NOSLEEP); if (alist == NULL) { freeb(hmp); - SCTP_KSTAT(sctp_cl_assoc_change); + SCTP_KSTAT(sctps, sctp_cl_assoc_change); return; } } @@ -491,7 +493,7 @@ sctp_input_asconf(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp) if (acount > 0) kmem_free(alist, asize); freeb(hmp); - SCTP_KSTAT(sctp_cl_assoc_change); + SCTP_KSTAT(sctps, sctp_cl_assoc_change); return; } } @@ -839,6 +841,7 @@ sctp_rc_timer(sctp_t *sctp, sctp_faddr_t *fp) #define SCTP_CLR_SENT_FLAG(mp) ((mp)->b_flag &= ~SCTP_CHUNK_FLAG_SENT) sctp_faddr_t *nfp; sctp_faddr_t *ofp; + sctp_stack_t *sctps = sctp->sctp_sctps; ASSERT(fp != NULL); @@ -863,7 +866,7 @@ sctp_rc_timer(sctp_t *sctp, sctp_faddr_t *fp) /* Retransmission */ if (sctp->sctp_strikes >= sctp->sctp_pa_max_rxt) { /* time to give up */ - BUMP_MIB(&sctp_mib, sctpAborted); + BUMP_MIB(&sctps->sctps_mib, sctpAborted); sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL); sctp_clean_death(sctp, ETIMEDOUT); return; @@ -913,6 +916,7 @@ sctp_wput_asconf(sctp_t *sctp, sctp_faddr_t *fp) uint32_t *snp; sctp_parm_hdr_t *ph; boolean_t isv4; + sctp_stack_t *sctps = sctp->sctp_sctps; if (sctp->sctp_cchunk_pend || sctp->sctp_cxmit_list == NULL || /* Queue it for later transmission if not yet established */ @@ -931,7 +935,7 @@ sctp_wput_asconf(sctp_t *sctp, sctp_faddr_t *fp) ipmp = sctp_make_mp(sctp, fp, 0); if (ipmp == NULL) { SCTP_FADDR_RC_TIMER_RESTART(sctp, fp, fp->rto); - SCTP_KSTAT(sctp_send_asconf_failed); + SCTP_KSTAT(sctps, sctp_send_asconf_failed); return; } mp = sctp->sctp_cxmit_list; @@ -1144,11 +1148,12 @@ sctp_addip_req(sctp_t *sctp, sctp_parm_hdr_t *ph, uint32_t cid, sctp_faddr_t *nfp; sctp_parm_hdr_t *oph = ph; int err; + sctp_stack_t *sctps = sctp->sctp_sctps; *cont = 1; /* Send back an authorization error if addip is disabled */ - if (!sctp_addip_enabled) { + if (!sctps->sctps_addip_enabled) { err = SCTP_ERR_UNAUTHORIZED; goto error_handler; } @@ -1528,9 +1533,10 @@ sctp_del_ip(sctp_t *sctp, const void *addrs, uint32_t cnt, uchar_t *ulist, sctp_cl_ainfo_t *ainfo = NULL; uchar_t *p = ulist; boolean_t check_lport = B_FALSE; + sctp_stack_t *sctps = sctp->sctp_sctps; /* Does the peer understand ASCONF and Add-IP? */ - if (sctp->sctp_state <= SCTPS_LISTEN || !sctp_addip_enabled || + if (sctp->sctp_state <= SCTPS_LISTEN || !sctps->sctps_addip_enabled || !sctp->sctp_understands_asconf || !sctp->sctp_understands_addip) { asconf = B_FALSE; } diff --git a/usr/src/uts/common/inet/sctp/sctp_bind.c b/usr/src/uts/common/inet/sctp/sctp_bind.c index 2cbb7c21ee..62b44f5ebf 100644 --- a/usr/src/uts/common/inet/sctp/sctp_bind.c +++ b/usr/src/uts/common/inet/sctp/sctp_bind.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -51,14 +51,14 @@ #include "sctp_asconf.h" #include "sctp_addr.h" -uint_t sctp_next_port_to_try; - /* * Returns 0 on success, EACCES on permission failure. */ static int sctp_select_port(sctp_t *sctp, in_port_t *requested_port, int *user_specified) { + sctp_stack_t *sctps = sctp->sctp_sctps; + /* * Get a valid port (within the anonymous range and should not * be a privileged one) to use if the user has not given a port. @@ -68,8 +68,9 @@ sctp_select_port(sctp_t *sctp, in_port_t *requested_port, int *user_specified) * the same port. */ if (*requested_port == 0) { - *requested_port = sctp_update_next_port(sctp_next_port_to_try, - crgetzone(sctp->sctp_credp)); + *requested_port = sctp_update_next_port( + sctps->sctps_next_port_to_try, + crgetzone(sctp->sctp_credp), sctps); if (*requested_port == 0) return (EACCES); *user_specified = 0; @@ -86,11 +87,12 @@ sctp_select_port(sctp_t *sctp, in_port_t *requested_port, int *user_specified) * changes * - the atomic assignment of the elements of the array */ - if (*requested_port < sctp_smallest_nonpriv_port) { + if (*requested_port < sctps->sctps_smallest_nonpriv_port) { priv = B_TRUE; } else { - for (i = 0; i < sctp_g_num_epriv_ports; i++) { - if (*requested_port == sctp_g_epriv_ports[i]) { + for (i = 0; i < sctps->sctps_g_num_epriv_ports; i++) { + if (*requested_port == + sctps->sctps_g_epriv_ports[i]) { priv = B_TRUE; break; } @@ -119,6 +121,7 @@ int sctp_listen(sctp_t *sctp) { sctp_tf_t *tf; + sctp_stack_t *sctps = sctp->sctp_sctps; RUN_SCTP(sctp); /* @@ -149,7 +152,8 @@ sctp_listen(sctp_t *sctp) (void) random_get_pseudo_bytes(sctp->sctp_secret, SCTP_SECRET_LEN); sctp->sctp_last_secret_update = lbolt64; bzero(sctp->sctp_old_secret, SCTP_SECRET_LEN); - tf = &sctp_listen_fanout[SCTP_LISTEN_HASH(ntohs(sctp->sctp_lport))]; + tf = &sctps->sctps_listen_fanout[SCTP_LISTEN_HASH( + ntohs(sctp->sctp_lport))]; sctp_listen_hash_insert(tf, sctp); WAKE_SCTP(sctp); return (0); @@ -256,6 +260,7 @@ sctp_bind_add(sctp_t *sctp, const void *addrs, uint32_t addrcnt, { int err = 0; boolean_t do_asconf = B_FALSE; + sctp_stack_t *sctps = sctp->sctp_sctps; if (!caller_hold_lock) RUN_SCTP(sctp); @@ -271,7 +276,8 @@ sctp_bind_add(sctp_t *sctp, const void *addrs, uint32_t addrcnt, * Let's do some checking here rather than undoing the * add later (for these reasons). */ - if (!sctp_addip_enabled || !sctp->sctp_understands_asconf || + if (!sctps->sctps_addip_enabled || + !sctp->sctp_understands_asconf || !sctp->sctp_understands_addip) { if (!caller_hold_lock) WAKE_SCTP(sctp); @@ -314,7 +320,7 @@ sctp_bind_add(sctp_t *sctp, const void *addrs, uint32_t addrcnt, ASSERT(size == 0); if (!caller_hold_lock) WAKE_SCTP(sctp); - SCTP_KSTAT(sctp_cl_check_addrs); + SCTP_KSTAT(sctps, sctp_cl_check_addrs); return (err); } ASSERT(addrlist != NULL); @@ -378,6 +384,7 @@ sctp_bind_del(sctp_t *sctp, const void *addrs, uint32_t addrcnt, boolean_t do_asconf = B_FALSE; uchar_t *ulist = NULL; size_t usize = 0; + sctp_stack_t *sctps = sctp->sctp_sctps; if (!caller_hold_lock) RUN_SCTP(sctp); @@ -392,7 +399,8 @@ sctp_bind_del(sctp_t *sctp, const void *addrs, uint32_t addrcnt, * to the peer. */ if (sctp->sctp_state > SCTPS_LISTEN) { - if (!sctp_addip_enabled || !sctp->sctp_understands_asconf || + if (!sctps->sctps_addip_enabled || + !sctp->sctp_understands_asconf || !sctp->sctp_understands_addip) { if (!caller_hold_lock) WAKE_SCTP(sctp); @@ -461,6 +469,7 @@ sctp_bindi(sctp_t *sctp, in_port_t port, boolean_t bind_to_req_port_only, int loopmax; zoneid_t zoneid = sctp->sctp_zoneid; zone_t *zone = crgetzone(sctp->sctp_credp); + sctp_stack_t *sctps = sctp->sctp_sctps; /* * Lookup for free addresses is done in a loop and "loopmax" @@ -480,8 +489,8 @@ sctp_bindi(sctp_t *sctp, in_port_t port, boolean_t bind_to_req_port_only, * Set loopmax appropriately so that one does not look * forever in the case all of the anonymous ports are in use. */ - loopmax = (sctp_largest_anon_port - - sctp_smallest_anon_port + 1); + loopmax = (sctps->sctps_largest_anon_port - + sctps->sctps_smallest_anon_port + 1); } do { uint16_t lport; @@ -504,7 +513,7 @@ sctp_bindi(sctp_t *sctp, in_port_t port, boolean_t bind_to_req_port_only, * in sctp_compress() */ sctp_bind_hash_remove(sctp); - tbf = &sctp_bind_fanout[SCTP_BIND_HASH(port)]; + tbf = &sctps->sctps_bind_fanout[SCTP_BIND_HASH(port)]; mutex_enter(&tbf->tf_lock); for (lsctp = tbf->tf_sctp; lsctp != NULL; lsctp = lsctp->sctp_bind_hash) { @@ -563,7 +572,8 @@ sctp_bindi(sctp_t *sctp, in_port_t port, boolean_t bind_to_req_port_only, sctp->sctp_ipversion, sctp->sctp_ipversion == IPV4_VERSION ? (void *)&sctp->sctp_ipha->ipha_src : - (void *)&sctp->sctp_ip6h->ip6_src); + (void *)&sctp->sctp_ip6h->ip6_src, + sctps->sctps_netstack->netstack_ip); /* * tsol_mlp_addr_type returns the possibilities @@ -589,6 +599,10 @@ sctp_bindi(sctp_t *sctp, in_port_t port, boolean_t bind_to_req_port_only, * make sure that this zone is the one * that owns that MLP. Shared MLPs can * be owned by at most one zone. + * + * No need to handle exclusive-stack + * zones since ALL_ZONES only applies + * to the shared stack. */ if (mlptype == mlptShared && @@ -611,7 +625,8 @@ sctp_bindi(sctp_t *sctp, in_port_t port, boolean_t bind_to_req_port_only, sctp->sctp_lport = lport; sctp->sctp_sctph->sh_sport = lport; - ASSERT(&sctp_bind_fanout[SCTP_BIND_HASH(port)] == tbf); + ASSERT(&sctps->sctps_bind_fanout[ + SCTP_BIND_HASH(port)] == tbf); sctp_bind_hash_insert(tbf, sctp, 1); mutex_exit(&tbf->tf_lock); @@ -625,7 +640,7 @@ sctp_bindi(sctp_t *sctp, in_port_t port, boolean_t bind_to_req_port_only, * be in the valid range. */ if (user_specified == 0) - sctp_next_port_to_try = port + 1; + sctps->sctps_next_port_to_try = port + 1; *allocated_port = port; @@ -637,11 +652,12 @@ sctp_bindi(sctp_t *sctp, in_port_t port, boolean_t bind_to_req_port_only, * We may have to return an anonymous port. So * get one to start with. */ - port = sctp_update_next_port(sctp_next_port_to_try, - zone); + port = sctp_update_next_port( + sctps->sctps_next_port_to_try, + zone, sctps); user_specified = 0; } else { - port = sctp_update_next_port(port + 1, zone); + port = sctp_update_next_port(port + 1, zone, sctps); } if (port == 0) break; @@ -668,27 +684,27 @@ sctp_bindi(sctp_t *sctp, in_port_t port, boolean_t bind_to_req_port_only, * - the atomic assignment of the elements of the array */ in_port_t -sctp_update_next_port(in_port_t port, zone_t *zone) +sctp_update_next_port(in_port_t port, zone_t *zone, sctp_stack_t *sctps) { int i; boolean_t restart = B_FALSE; retry: - if (port < sctp_smallest_anon_port) - port = sctp_smallest_anon_port; + if (port < sctps->sctps_smallest_anon_port) + port = sctps->sctps_smallest_anon_port; - if (port > sctp_largest_anon_port) { + if (port > sctps->sctps_largest_anon_port) { if (restart) return (0); restart = B_TRUE; - port = sctp_smallest_anon_port; + port = sctps->sctps_smallest_anon_port; } - if (port < sctp_smallest_nonpriv_port) - port = sctp_smallest_nonpriv_port; + if (port < sctps->sctps_smallest_nonpriv_port) + port = sctps->sctps_smallest_nonpriv_port; - for (i = 0; i < sctp_g_num_epriv_ports; i++) { - if (port == sctp_g_epriv_ports[i]) { + for (i = 0; i < sctps->sctps_g_num_epriv_ports; i++) { + if (port == sctps->sctps_g_epriv_ports[i]) { port++; /* * Make sure whether the port is in the diff --git a/usr/src/uts/common/inet/sctp/sctp_common.c b/usr/src/uts/common/inet/sctp/sctp_common.c index 799ffa2b94..101cd49ecc 100644 --- a/usr/src/uts/common/inet/sctp/sctp_common.c +++ b/usr/src/uts/common/inet/sctp/sctp_common.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -93,6 +93,8 @@ sctp_get_ire(sctp_t *sctp, sctp_faddr_t *fp) uint_t ipif_seqid; int hdrlen; ts_label_t *tsl; + sctp_stack_t *sctps = sctp->sctp_sctps; + ip_stack_t *ipst = sctps->sctps_netstack->netstack_ip; /* Remove the previous cache IRE */ if ((ire = fp->ire) != NULL) { @@ -113,11 +115,12 @@ sctp_get_ire(sctp_t *sctp, sctp_faddr_t *fp) if (fp->isv4) { IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4); - ire = ire_cache_lookup(addr4, sctp->sctp_zoneid, tsl); + ire = ire_cache_lookup(addr4, sctp->sctp_zoneid, tsl, ipst); if (ire != NULL) IN6_IPADDR_TO_V4MAPPED(ire->ire_src_addr, &laddr); } else { - ire = ire_cache_lookup_v6(&fp->faddr, sctp->sctp_zoneid, tsl); + ire = ire_cache_lookup_v6(&fp->faddr, sctp->sctp_zoneid, tsl, + ipst); if (ire != NULL) laddr = ire->ire_src_addr_v6; } @@ -231,7 +234,8 @@ sctp_get_ire(sctp_t *sctp, sctp_faddr_t *fp) /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ fp->sfa_pmss = (ire->ire_max_frag - hdrlen) & ~(SCTP_ALIGN - 1); if (fp->cwnd < (fp->sfa_pmss * 2)) { - fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial; + fp->cwnd = fp->sfa_pmss * + sctps->sctps_slow_start_initial; } } @@ -245,6 +249,7 @@ sctp_update_ire(sctp_t *sctp) { ire_t *ire; sctp_faddr_t *fp; + sctp_stack_t *sctps = sctp->sctp_sctps; for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { if ((ire = fp->ire) == NULL) @@ -277,8 +282,8 @@ sctp_update_ire(sctp_t *sctp) } } - if (sctp_rtt_updates != 0 && - fp->rtt_updates >= sctp_rtt_updates) { + if (sctps->sctps_rtt_updates != 0 && + fp->rtt_updates >= sctps->sctps_rtt_updates) { /* * If there is no old cached values, initialize them * conservatively. Set them to be (1.5 * new value). @@ -319,6 +324,7 @@ sctp_make_mp(sctp_t *sctp, sctp_faddr_t *sendto, int trailer) size_t ipsctplen; int isv4; sctp_faddr_t *fp; + sctp_stack_t *sctps = sctp->sctp_sctps; ASSERT(sctp->sctp_current != NULL || sendto != NULL); if (sendto == NULL) { @@ -342,13 +348,13 @@ sctp_make_mp(sctp_t *sctp, sctp_faddr_t *sendto, int trailer) ipsctplen = sctp->sctp_hdr6_len; } - mp = allocb_cred(ipsctplen + sctp_wroff_xtra + trailer, + mp = allocb_cred(ipsctplen + sctps->sctps_wroff_xtra + trailer, CONN_CRED(sctp->sctp_connp)); if (mp == NULL) { ip1dbg(("sctp_make_mp: error making mp..\n")); return (NULL); } - mp->b_rptr += sctp_wroff_xtra; + mp->b_rptr += sctps->sctps_wroff_xtra; mp->b_wptr = mp->b_rptr + ipsctplen; ASSERT(OK_32PTR(mp->b_wptr)); @@ -411,6 +417,7 @@ void sctp_set_ulp_prop(sctp_t *sctp) { int hdrlen; + sctp_stack_t *sctps = sctp->sctp_sctps; if (sctp->sctp_current->isv4) { hdrlen = sctp->sctp_hdr_len; @@ -421,7 +428,7 @@ sctp_set_ulp_prop(sctp_t *sctp) ASSERT(sctp->sctp_current->sfa_pmss == sctp->sctp_mss); sctp->sctp_ulp_prop(sctp->sctp_ulpd, - sctp_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), + sctps->sctps_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), sctp->sctp_mss - sizeof (sctp_data_hdr_t)); } @@ -704,6 +711,7 @@ int sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate) { sctp_faddr_t *ofp; + sctp_stack_t *sctps = sctp->sctp_sctps; if (fp->state == SCTP_FADDRS_ALIVE) { sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_UNREACHABLE, 0); @@ -755,7 +763,7 @@ sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate) /* All faddrs are down; kill the association */ dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n")); - BUMP_MIB(&sctp_mib, sctpAborted); + BUMP_MIB(&sctps->sctps_mib, sctpAborted); sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ? SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL); sctp_clean_death(sctp, sctp->sctp_client_errno ? @@ -895,6 +903,7 @@ int sctp_header_init_ipv4(sctp_t *sctp, int sleep) { sctp_hdr_t *sctph; + sctp_stack_t *sctps = sctp->sctp_sctps; /* * This is a simple initialization. If there's @@ -929,7 +938,7 @@ sctp_header_init_ipv4(sctp_t *sctp, int sleep) * sctp->sctp_ipha->ipha_fragment_offset_and_flags. */ - sctp->sctp_ipha->ipha_ttl = sctp_ipv4_ttl; + sctp->sctp_ipha->ipha_ttl = sctps->sctps_ipv4_ttl; sctp->sctp_ipha->ipha_protocol = IPPROTO_SCTP; sctph = (sctp_hdr_t *)(sctp->sctp_iphc + sizeof (ipha_t)); @@ -955,6 +964,7 @@ sctp_build_hdrs(sctp_t *sctp) ip6_pkt_t *ipp = &sctp->sctp_sticky_ipp; in6_addr_t src; in6_addr_t dst; + sctp_stack_t *sctps = sctp->sctp_sctps; /* * save the existing sctp header and source/dest IP addresses @@ -1002,7 +1012,7 @@ sctp_build_hdrs(sctp_t *sctp) * set it to the default value for SCTP. */ if (!(ipp->ipp_fields & IPPF_UNICAST_HOPS)) - sctp->sctp_ip6h->ip6_hops = sctp_ipv6_hoplimit; + sctp->sctp_ip6h->ip6_hops = sctps->sctps_ipv6_hoplimit; /* * If we're setting extension headers after a connection * has been established, and if we have a routing header @@ -1019,8 +1029,10 @@ sctp_build_hdrs(sctp_t *sctp) rth = ip_find_rthdr_v6(sctp->sctp_ip6h, (uint8_t *)sctp->sctp_sctph6); - if (rth != NULL) - (void) ip_massage_options_v6(sctp->sctp_ip6h, rth); + if (rth != NULL) { + (void) ip_massage_options_v6(sctp->sctp_ip6h, rth, + sctps->sctps_netstack); + } } return (0); } @@ -1032,6 +1044,7 @@ int sctp_header_init_ipv6(sctp_t *sctp, int sleep) { sctp_hdr_t *sctph; + sctp_stack_t *sctps = sctp->sctp_sctps; /* * This is a simple initialization. If there's @@ -1062,7 +1075,7 @@ sctp_header_init_ipv6(sctp_t *sctp, int sleep) sctp->sctp_ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; sctp->sctp_ip6h->ip6_plen = ntohs(sizeof (sctp_hdr_t)); sctp->sctp_ip6h->ip6_nxt = IPPROTO_SCTP; - sctp->sctp_ip6h->ip6_hops = sctp_ipv6_hoplimit; + sctp->sctp_ip6h->ip6_hops = sctps->sctps_ipv6_hoplimit; sctph = (sctp_hdr_t *)(sctp->sctp_iphc6 + IPV6_HDR_LEN); sctp->sctp_sctph6 = sctph; @@ -1078,7 +1091,8 @@ sctp_v4_label(sctp_t *sctp) int added; if (tsol_compute_label(cr, sctp->sctp_ipha->ipha_dst, optbuf, - sctp->sctp_mac_exempt) != 0) + sctp->sctp_mac_exempt, + sctp->sctp_sctps->sctps_netstack->netstack_ip) != 0) return (EACCES); added = tsol_remove_secopt(sctp->sctp_ipha, sctp->sctp_hdr_len); @@ -1108,7 +1122,8 @@ sctp_v6_label(sctp_t *sctp) const cred_t *cr = CONN_CRED(sctp->sctp_connp); if (tsol_compute_label_v6(cr, &sctp->sctp_ip6h->ip6_dst, optbuf, - sctp->sctp_mac_exempt) != 0) + sctp->sctp_mac_exempt, + sctp->sctp_sctps->sctps_netstack->netstack_ip) != 0) return (EACCES); if (tsol_update_sticky(&sctp->sctp_sticky_ipp, &sctp->sctp_v6label_len, optbuf) != 0) @@ -1309,6 +1324,7 @@ sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt, int supp_af = 0; boolean_t check_saddr = B_TRUE; in6_addr_t curaddr; + sctp_stack_t *sctps = sctp->sctp_sctps; if (sctp_options != NULL) *sctp_options = 0; @@ -1513,7 +1529,7 @@ next: asize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; alist = kmem_alloc(asize, KM_NOSLEEP); if (alist == NULL) { - SCTP_KSTAT(sctp_cl_assoc_change); + SCTP_KSTAT(sctps, sctp_cl_assoc_change); return (ENOMEM); } /* @@ -1528,7 +1544,7 @@ next: dlist = kmem_alloc(dsize, KM_NOSLEEP); if (dlist == NULL) { kmem_free(alist, asize); - SCTP_KSTAT(sctp_cl_assoc_change); + SCTP_KSTAT(sctps, sctp_cl_assoc_change); return (ENOMEM); } bcopy(&curaddr, dlist, sizeof (curaddr)); @@ -1547,7 +1563,7 @@ next: */ int sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports, - int sleep) + int sleep, sctp_stack_t *sctps) { sctp_faddr_t *fp, *fpa, *fphead = NULL; sctp_parm_hdr_t *ph; @@ -1660,7 +1676,7 @@ sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports, * If all sctp's faddrs are disjoint, this is a legitimate new * association. */ - tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]); + tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]); mutex_enter(&tf->tf_lock); for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) { @@ -1769,11 +1785,12 @@ void sctp_congest_reset(sctp_t *sctp) { sctp_faddr_t *fp; + sctp_stack_t *sctps = sctp->sctp_sctps; mblk_t *mp; for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { - fp->ssthresh = sctp_initial_mtu; - fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial; + fp->ssthresh = sctps->sctps_initial_mtu; + fp->cwnd = fp->sfa_pmss * sctps->sctps_slow_start_initial; fp->suna = 0; fp->pba = 0; } @@ -1815,18 +1832,21 @@ static void sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr, mblk_t *timer_mp) { + sctp_stack_t *sctps = sctp->sctp_sctps; + bcopy(addr, &fp->faddr, sizeof (*addr)); if (IN6_IS_ADDR_V4MAPPED(addr)) { fp->isv4 = 1; /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ - fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr_len) & + fp->sfa_pmss = (sctps->sctps_initial_mtu - sctp->sctp_hdr_len) & ~(SCTP_ALIGN - 1); } else { fp->isv4 = 0; - fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr6_len) & - ~(SCTP_ALIGN - 1); + fp->sfa_pmss = + (sctps->sctps_initial_mtu - sctp->sctp_hdr6_len) & + ~(SCTP_ALIGN - 1); } - fp->cwnd = sctp_slow_start_initial * fp->sfa_pmss; + fp->cwnd = sctps->sctps_slow_start_initial * fp->sfa_pmss; fp->rto = MIN(sctp->sctp_rto_initial, sctp->sctp_init_rto_max); fp->srtt = -1; fp->rtt_updates = 0; @@ -1835,7 +1855,7 @@ sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr, /* Mark it as not confirmed. */ fp->state = SCTP_FADDRS_UNCONFIRMED; fp->hb_interval = sctp->sctp_hb_interval; - fp->ssthresh = sctp_initial_ssthresh; + fp->ssthresh = sctps->sctps_initial_ssthresh; fp->suna = 0; fp->pba = 0; fp->acked = 0; diff --git a/usr/src/uts/common/inet/sctp/sctp_conn.c b/usr/src/uts/common/inet/sctp/sctp_conn.c index 6fa418d22d..4ca802d7ba 100644 --- a/usr/src/uts/common/inet/sctp/sctp_conn.c +++ b/usr/src/uts/common/inet/sctp/sctp_conn.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -67,6 +67,7 @@ sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt, conn_t *aconnp; conn_t *lconnp; cred_t *cr; + sctp_stack_t *sctps = listener->sctp_sctps; sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len); ASSERT(OK_32PTR(sctph)); @@ -96,7 +97,7 @@ sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt, return (err); if ((sctp_options & SCTP_PRSCTP_OPTION) && - listener->sctp_prsctp_aware && sctp_prsctp_enabled) { + listener->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) { acceptor->sctp_prsctp_aware = B_TRUE; } else { acceptor->sctp_prsctp_aware = B_FALSE; @@ -130,9 +131,9 @@ sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt, */ RUN_SCTP(acceptor); - sctp_conn_hash_insert(&sctp_conn_fanout[ - SCTP_CONN_HASH(acceptor->sctp_ports)], acceptor, 0); - sctp_bind_hash_insert(&sctp_bind_fanout[ + sctp_conn_hash_insert(&sctps->sctps_conn_fanout[ + SCTP_CONN_HASH(sctps, acceptor->sctp_ports)], acceptor, 0); + sctp_bind_hash_insert(&sctps->sctps_bind_fanout[ SCTP_BIND_HASH(ntohs(acceptor->sctp_lport))], acceptor, 0); /* @@ -166,6 +167,7 @@ sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, ip6_t *ip6h; int err; conn_t *connp, *econnp; + sctp_stack_t *sctps; /* * No need to check for duplicate as this is the listener @@ -202,6 +204,7 @@ sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, } connp = sctp->sctp_connp; + sctps = sctp->sctp_sctps; econnp = eager->sctp_connp; if (connp->conn_policy != NULL) { @@ -216,7 +219,7 @@ sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, ipsec_mp->b_datap->db_type = IPSEC_POLICY_SET; if (!ip_bind_ipsec_policy_set(econnp, ipsec_mp)) { sctp_close_eager(eager); - BUMP_MIB(&sctp_mib, sctpListenDrop); + BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); return (NULL); } } @@ -242,14 +245,14 @@ sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, } if (ipsec_conn_cache_policy(econnp, ipvers == IPV4_VERSION) != 0) { sctp_close_eager(eager); - BUMP_MIB(&sctp_mib, sctpListenDrop); + BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); return (NULL); } err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack); if (err) { sctp_close_eager(eager); - BUMP_MIB(&sctp_mib, sctpListenDrop); + BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); return (NULL); } @@ -273,8 +276,8 @@ sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, if (flist != NULL) kmem_free(flist, fsize); sctp_close_eager(eager); - BUMP_MIB(&sctp_mib, sctpListenDrop); - SCTP_KSTAT(sctp_cl_connect); + BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); + SCTP_KSTAT(sctps, sctp_cl_connect); return (NULL); } /* The clustering module frees these list */ @@ -290,18 +293,18 @@ sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd, eager)) == NULL) { sctp_close_eager(eager); - BUMP_MIB(&sctp_mib, sctpListenDrop); + BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); return (NULL); } ASSERT(SCTP_IS_DETACHED(eager)); eager->sctp_detached = B_FALSE; if (eager->sctp_family == AF_INET) { eager->sctp_ulp_prop(eager->sctp_ulpd, - sctp_wroff_xtra + sizeof (sctp_data_hdr_t) + + sctps->sctps_wroff_xtra + sizeof (sctp_data_hdr_t) + sctp->sctp_hdr_len, strmsgsz); } else { eager->sctp_ulp_prop(eager->sctp_ulpd, - sctp_wroff_xtra + sizeof (sctp_data_hdr_t) + + sctps->sctps_wroff_xtra + sizeof (sctp_data_hdr_t) + sctp->sctp_hdr6_len, strmsgsz); } return (eager); @@ -328,6 +331,7 @@ sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen) ip6_rthdr_t *rth; int err; sctp_faddr_t *cur_fp; + sctp_stack_t *sctps = sctp->sctp_sctps; /* * Determine packet type based on type of address passed in @@ -462,7 +466,8 @@ sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen) * Ensure that the duplicate check and insertion is atomic. */ sctp_conn_hash_remove(sctp); - tbf = &sctp_conn_fanout[SCTP_CONN_HASH(sctp->sctp_ports)]; + tbf = &sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, + sctp->sctp_ports)]; mutex_enter(&tbf->tf_lock); lsctp = sctp_lookup(sctp, &dstaddr, tbf, &sctp->sctp_ports, SCTPS_COOKIE_WAIT); @@ -509,8 +514,10 @@ sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen) */ rth = ip_find_rthdr_v6(sctp->sctp_ip6h, (uint8_t *)sctp->sctp_sctph6); - if (rth != NULL) - (void) ip_massage_options_v6(sctp->sctp_ip6h, rth); + if (rth != NULL) { + (void) ip_massage_options_v6(sctp->sctp_ip6h, rth, + sctps->sctps_netstack); + } /* * Turn off the don't fragment bit on the (only) faddr, @@ -576,7 +583,8 @@ sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen) BUMP_LOCAL(sctp->sctp_opkts); sctp->sctp_ulp_prop(sctp->sctp_ulpd, - sctp_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), 0); + sctps->sctps_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), + 0); return (0); default: diff --git a/usr/src/uts/common/inet/sctp/sctp_cookie.c b/usr/src/uts/common/inet/sctp/sctp_cookie.c index 5caf975050..dcb94c2ccd 100644 --- a/usr/src/uts/common/inet/sctp/sctp_cookie.c +++ b/usr/src/uts/common/inet/sctp/sctp_cookie.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -444,6 +444,7 @@ sctp_send_initack(sctp_t *sctp, sctp_hdr_t *initsh, sctp_chunk_hdr_t *ch, boolean_t linklocal = B_FALSE; cred_t *cr; ts_label_t *initlabel; + sctp_stack_t *sctps = sctp->sctp_sctps; BUMP_LOCAL(sctp->sctp_ibchunks); isv4 = (IPH_HDR_VERSION(initmp->b_rptr) == IPV4_VERSION); @@ -520,7 +521,7 @@ sctp_send_initack(sctp_t *sctp, sctp_hdr_t *initsh, sctp_chunk_hdr_t *ch, if (sctp->sctp_send_adaption) iacklen += (sizeof (sctp_parm_hdr_t) + sizeof (uint32_t)); if (((sctp_options & SCTP_PRSCTP_OPTION) || initcollision) && - sctp->sctp_prsctp_aware && sctp_prsctp_enabled) { + sctp->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) { iacklen += sctp_options_param_len(sctp, SCTP_PRSCTP_OPTION); } if (initcollision) @@ -562,10 +563,10 @@ sctp_send_initack(sctp_t *sctp, sctp_hdr_t *initsh, sctp_chunk_hdr_t *ch, SCTP_ERR_NO_RESOURCES, NULL, 0, initmp, 0, B_FALSE); return; } - iackmp = allocb_cred(ipsctplen + sctp_wroff_xtra, cr); + iackmp = allocb_cred(ipsctplen + sctps->sctps_wroff_xtra, cr); crfree(cr); } else { - iackmp = allocb_cred(ipsctplen + sctp_wroff_xtra, + iackmp = allocb_cred(ipsctplen + sctps->sctps_wroff_xtra, CONN_CRED(sctp->sctp_connp)); } if (iackmp == NULL) { @@ -575,7 +576,7 @@ sctp_send_initack(sctp_t *sctp, sctp_hdr_t *initsh, sctp_chunk_hdr_t *ch, } /* Copy in the [imcomplete] IP/SCTP composite header */ - p = (char *)(iackmp->b_rptr + sctp_wroff_xtra); + p = (char *)(iackmp->b_rptr + sctps->sctps_wroff_xtra); iackmp->b_rptr = (uchar_t *)p; if (isv4) { bcopy(sctp->sctp_iphc, p, sctp->sctp_hdr_len); @@ -628,7 +629,7 @@ sctp_send_initack(sctp_t *sctp, sctp_hdr_t *initsh, sctp_chunk_hdr_t *ch, if (!linklocal) p += sctp_addr_params(sctp, supp_af, (uchar_t *)p); if (((sctp_options & SCTP_PRSCTP_OPTION) || initcollision) && - sctp->sctp_prsctp_aware && sctp_prsctp_enabled) { + sctp->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) { p += sctp_options_param(sctp, p, SCTP_PRSCTP_OPTION); } /* @@ -711,9 +712,9 @@ sctp_send_initack(sctp_t *sctp, sctp_hdr_t *initsh, sctp_chunk_hdr_t *ch, * older than the new secret lifetime parameter permits, * copying the current secret to sctp_old_secret. */ - if (sctp_new_secret_interval > 0 && + if (sctps->sctps_new_secret_interval > 0 && (sctp->sctp_last_secret_update + - MSEC_TO_TICK(sctp_new_secret_interval)) <= nowt) { + MSEC_TO_TICK(sctps->sctps_new_secret_interval)) <= nowt) { bcopy(sctp->sctp_secret, sctp->sctp_old_secret, SCTP_SECRET_LEN); (void) random_get_pseudo_bytes(sctp->sctp_secret, @@ -734,10 +735,12 @@ sctp_send_initack(sctp_t *sctp, sctp_hdr_t *initsh, sctp_chunk_hdr_t *ch, if (isv4) err = tsol_check_label(cr, &iackmp, &adjust, - connp->conn_mac_exempt); + connp->conn_mac_exempt, + sctps->sctps_netstack->netstack_ip); else err = tsol_check_label_v6(cr, &iackmp, &adjust, - connp->conn_mac_exempt); + connp->conn_mac_exempt, + sctps->sctps_netstack->netstack_ip); if (err != 0) { sctp_send_abort(sctp, sctp_init2vtag(ch), SCTP_ERR_AUTH_ERR, NULL, 0, initmp, 0, B_FALSE); @@ -772,11 +775,12 @@ sctp_send_cookie_ack(sctp_t *sctp) { sctp_chunk_hdr_t *cach; mblk_t *camp; + sctp_stack_t *sctps = sctp->sctp_sctps; camp = sctp_make_mp(sctp, NULL, sizeof (*cach)); if (camp == NULL) { /* XXX should abort, but don't have the inmp anymore */ - SCTP_KSTAT(sctp_send_cookie_ack_failed); + SCTP_KSTAT(sctps, sctp_send_cookie_ack_failed); return; } @@ -833,13 +837,14 @@ sctp_send_cookie_echo(sctp_t *sctp, sctp_chunk_hdr_t *iackch, mblk_t *iackmp) uint_t sctp_options; int error; uint16_t old_num_str; + sctp_stack_t *sctps = sctp->sctp_sctps; iack = (sctp_init_chunk_t *)(iackch + 1); cph = NULL; if (validate_init_params(sctp, iackch, iack, iackmp, &cph, &errmp, &pad, &sctp_options) == 0) { /* result in 'pad' ignored */ - BUMP_MIB(&sctp_mib, sctpAborted); + BUMP_MIB(&sctps->sctps_mib, sctpAborted); sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, NULL); sctp_clean_death(sctp, ECONNABORTED); return; @@ -858,7 +863,8 @@ sctp_send_cookie_echo(sctp_t *sctp, sctp_chunk_hdr_t *iackch, mblk_t *iackmp) else hdrlen = sctp->sctp_hdr6_len; - cemp = allocb(sctp_wroff_xtra + hdrlen + ceclen + pad, BPRI_MED); + cemp = allocb(sctps->sctps_wroff_xtra + hdrlen + ceclen + pad, + BPRI_MED); if (cemp == NULL) { SCTP_FADDR_TIMER_RESTART(sctp, sctp->sctp_current, sctp->sctp_current->rto); @@ -866,7 +872,7 @@ sctp_send_cookie_echo(sctp_t *sctp, sctp_chunk_hdr_t *iackch, mblk_t *iackmp) freeb(errmp); return; } - cemp->b_rptr += (sctp_wroff_xtra + hdrlen); + cemp->b_rptr += (sctps->sctps_wroff_xtra + hdrlen); /* Process the INIT ACK */ sctp->sctp_sctph->sh_verf = iack->sic_inittag; @@ -887,7 +893,7 @@ sctp_send_cookie_echo(sctp_t *sctp, sctp_chunk_hdr_t *iackch, mblk_t *iackmp) * Since IP uses this info during the fanout process, we need to hold * the lock for this hash line while performing this operation. */ - /* XXX sctp_conn_fanout + SCTP_CONN_HASH(sctp->sctp_ports); */ + /* XXX sctp_conn_fanout + SCTP_CONN_HASH(sctps, sctp->sctp_ports); */ ASSERT(sctp->sctp_conn_tfp != NULL); tf = sctp->sctp_conn_tfp; /* sctp isn't a listener so only need to hold conn fanout lock */ @@ -1057,7 +1063,7 @@ sendcookie: SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); if (errmp != NULL) freeb(errmp); - SCTP_KSTAT(sctp_send_cookie_failed); + SCTP_KSTAT(sctps, sctp_send_cookie_failed); return; } /* @@ -1112,6 +1118,7 @@ sctp_process_cookie(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *cmp, uint32_t *lttag; uint32_t *fttag; uint32_t ports; + sctp_stack_t *sctps = sctp->sctp_sctps; BUMP_LOCAL(sctp->sctp_ibchunks); /* Verify the ICV */ @@ -1183,7 +1190,8 @@ sctp_process_cookie(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *cmp, /* Check for attack by adding addresses to a restart */ bcopy(insctph, &ports, sizeof (ports)); - if (sctp_secure_restart_check(cmp, initch, ports, KM_NOSLEEP) != 1) { + if (sctp_secure_restart_check(cmp, initch, ports, KM_NOSLEEP, + sctps) != 1) { return (-1); } @@ -1311,7 +1319,7 @@ sctp_process_cookie(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *cmp, */ sctp_t * sctp_addrlist2sctp(mblk_t *mp, sctp_hdr_t *sctph, sctp_chunk_hdr_t *ich, - uint_t ipif_seqid, zoneid_t zoneid) + uint_t ipif_seqid, zoneid_t zoneid, sctp_stack_t *sctps) { int isv4; ipha_t *iph; @@ -1359,7 +1367,7 @@ sctp_addrlist2sctp(mblk_t *mp, sctp_hdr_t *sctph, sctp_chunk_hdr_t *ich, &src); sctp = sctp_conn_match(&src, &dst, ports, ipif_seqid, - zoneid); + zoneid, sctps); dprint(1, ("sctp_addrlist2sctp: src=%x:%x:%x:%x, sctp=%p\n", @@ -1372,7 +1380,7 @@ sctp_addrlist2sctp(mblk_t *mp, sctp_hdr_t *sctph, sctp_chunk_hdr_t *ich, } else if (ph->sph_type == PARM_ADDR6) { src = *(in6_addr_t *)(ph + 1); sctp = sctp_conn_match(&src, &dst, ports, ipif_seqid, - zoneid); + zoneid, sctps); dprint(1, ("sctp_addrlist2sctp: src=%x:%x:%x:%x, sctp=%p\n", diff --git a/usr/src/uts/common/inet/sctp/sctp_error.c b/usr/src/uts/common/inet/sctp/sctp_error.c index e5ec99104d..3bd7e70639 100644 --- a/usr/src/uts/common/inet/sctp/sctp_error.c +++ b/usr/src/uts/common/inet/sctp/sctp_error.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -101,10 +101,11 @@ sctp_user_abort(sctp_t *sctp, mblk_t *data, boolean_t tbit) int len, hdrlen; char *cause; sctp_faddr_t *fp = sctp->sctp_current; + sctp_stack_t *sctps = sctp->sctp_sctps; mp = sctp_make_mp(sctp, fp, 0); if (mp == NULL) { - SCTP_KSTAT(sctp_send_user_abort_failed); + SCTP_KSTAT(sctps, sctp_send_user_abort_failed); return; } @@ -134,7 +135,7 @@ sctp_user_abort(sctp_t *sctp, mblk_t *data, boolean_t tbit) return; } sctp_set_iplen(sctp, mp); - BUMP_MIB(&sctp_mib, sctpAborted); + BUMP_MIB(&sctps->sctps_mib, sctpAborted); BUMP_LOCAL(sctp->sctp_opkts); BUMP_LOCAL(sctp->sctp_obchunks); @@ -168,6 +169,8 @@ sctp_send_abort(sctp_t *sctp, uint32_t vtag, uint16_t serror, char *details, ts_label_t *tsl; conn_t *connp; cred_t *cr = NULL; + sctp_stack_t *sctps = sctp->sctp_sctps; + ip_stack_t *ipst; isv4 = (IPH_HDR_VERSION(inmp->b_rptr) == IPV4_VERSION); if (isv4) { @@ -183,14 +186,15 @@ sctp_send_abort(sctp_t *sctp, uint32_t vtag, uint16_t serror, char *details, if (is_system_labeled() && !tsol_can_reply_error(inmp)) return; - hmp = allocb_cred(sctp_wroff_xtra + ahlen, CONN_CRED(sctp->sctp_connp)); + hmp = allocb_cred(sctps->sctps_wroff_xtra + ahlen, + CONN_CRED(sctp->sctp_connp)); if (hmp == NULL) { /* XXX no resources */ return; } /* copy in the IP / SCTP header */ - p = hmp->b_rptr + sctp_wroff_xtra; + p = hmp->b_rptr + sctps->sctps_wroff_xtra; hmp->b_rptr = p; hmp->b_wptr = p + ahlen; if (isv4) { @@ -247,7 +251,7 @@ sctp_send_abort(sctp_t *sctp, uint32_t vtag, uint16_t serror, char *details, ahip6h->ip6_plen = htons(alen + sizeof (*sh)); } - BUMP_MIB(&sctp_mib, sctpAborted); + BUMP_MIB(&sctps->sctps_mib, sctpAborted); BUMP_LOCAL(sctp->sctp_obchunks); connp = sctp->sctp_connp; @@ -257,10 +261,12 @@ sctp_send_abort(sctp_t *sctp, uint32_t vtag, uint16_t serror, char *details, if (isv4) err = tsol_check_label(cr, &hmp, &adjust, - connp->conn_mac_exempt); + connp->conn_mac_exempt, + sctps->sctps_netstack->netstack_ip); else err = tsol_check_label_v6(cr, &hmp, &adjust, - connp->conn_mac_exempt); + connp->conn_mac_exempt, + sctps->sctps_netstack->netstack_ip); if (err != 0) { freemsg(hmp); return; @@ -283,12 +289,15 @@ sctp_send_abort(sctp_t *sctp, uint32_t vtag, uint16_t serror, char *details, * Let's just mark the IRE for this destination as temporary * to prevent any DoS attack. */ + ipst = sctps->sctps_netstack->netstack_ip; tsl = cr == NULL ? NULL : crgetlabel(cr); - if (isv4) - ire = ire_cache_lookup(iniph->ipha_src, sctp->sctp_zoneid, tsl); - else + if (isv4) { + ire = ire_cache_lookup(iniph->ipha_src, sctp->sctp_zoneid, tsl, + ipst); + } else { ire = ire_cache_lookup_v6(&inip6h->ip6_src, sctp->sctp_zoneid, - tsl); + tsl, ipst); + } /* * In the normal case the ire would be non-null, however it could be * null, say, if IP needs to resolve the gateway for this address. We @@ -363,6 +372,7 @@ void sctp_send_err(sctp_t *sctp, mblk_t *emp, sctp_faddr_t *dest) { mblk_t *sendmp; + sctp_stack_t *sctps = sctp->sctp_sctps; sendmp = sctp_make_sack(sctp, dest, NULL); if (sendmp != NULL) { @@ -370,7 +380,7 @@ sctp_send_err(sctp_t *sctp, mblk_t *emp, sctp_faddr_t *dest) } else { sendmp = sctp_make_mp(sctp, dest, 0); if (sendmp == NULL) { - SCTP_KSTAT(sctp_send_err_failed); + SCTP_KSTAT(sctps, sctp_send_err_failed); freemsg(emp); return; } diff --git a/usr/src/uts/common/inet/sctp/sctp_hash.c b/usr/src/uts/common/inet/sctp/sctp_hash.c index 7c37295e48..f6b3666da5 100644 --- a/usr/src/uts/common/inet/sctp/sctp_hash.c +++ b/usr/src/uts/common/inet/sctp/sctp_hash.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -46,16 +46,10 @@ #include "sctp_impl.h" #include "sctp_addr.h" -/* SCTP bind hash list - all sctp_t with state >= BOUND. */ -sctp_tf_t sctp_bind_fanout[SCTP_BIND_FANOUT_SIZE]; -/* SCTP listen hash list - all sctp_t with state == LISTEN. */ -sctp_tf_t sctp_listen_fanout[SCTP_LISTEN_FANOUT_SIZE]; - /* Default association hash size. The size must be a power of 2. */ #define SCTP_CONN_HASH_SIZE 8192 -sctp_tf_t *sctp_conn_fanout; -uint_t sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; +uint_t sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; /* /etc/system */ /* * Cluster networking hook for traversing current assoc list. @@ -64,57 +58,76 @@ uint_t sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; */ int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *, boolean_t); +static int cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *, + void *), void *arg, boolean_t cansleep, sctp_stack_t *sctps); void -sctp_hash_init() +sctp_hash_init(sctp_stack_t *sctps) { int i; - if (sctp_conn_hash_size & (sctp_conn_hash_size - 1)) { + /* Start with /etc/system value */ + sctps->sctps_conn_hash_size = sctp_conn_hash_size; + + if (sctps->sctps_conn_hash_size & (sctps->sctps_conn_hash_size - 1)) { /* Not a power of two. Round up to nearest power of two */ for (i = 0; i < 31; i++) { - if (sctp_conn_hash_size < (1 << i)) + if (sctps->sctps_conn_hash_size < (1 << i)) break; } - sctp_conn_hash_size = 1 << i; + sctps->sctps_conn_hash_size = 1 << i; } - if (sctp_conn_hash_size < SCTP_CONN_HASH_SIZE) { - sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; + if (sctps->sctps_conn_hash_size < SCTP_CONN_HASH_SIZE) { + sctps->sctps_conn_hash_size = SCTP_CONN_HASH_SIZE; cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n", - sctp_conn_hash_size); + sctps->sctps_conn_hash_size); } - sctp_conn_fanout = - (sctp_tf_t *)kmem_zalloc(sctp_conn_hash_size * + sctps->sctps_conn_fanout = + (sctp_tf_t *)kmem_zalloc(sctps->sctps_conn_hash_size * sizeof (sctp_tf_t), KM_SLEEP); - for (i = 0; i < sctp_conn_hash_size; i++) { - mutex_init(&sctp_conn_fanout[i].tf_lock, NULL, + for (i = 0; i < sctps->sctps_conn_hash_size; i++) { + mutex_init(&sctps->sctps_conn_fanout[i].tf_lock, NULL, MUTEX_DEFAULT, NULL); } - for (i = 0; i < A_CNT(sctp_listen_fanout); i++) { - mutex_init(&sctp_listen_fanout[i].tf_lock, NULL, + sctps->sctps_listen_fanout = kmem_zalloc(SCTP_LISTEN_FANOUT_SIZE * + sizeof (sctp_tf_t), KM_SLEEP); + for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) { + mutex_init(&sctps->sctps_listen_fanout[i].tf_lock, NULL, MUTEX_DEFAULT, NULL); } - for (i = 0; i < A_CNT(sctp_bind_fanout); i++) { - mutex_init(&sctp_bind_fanout[i].tf_lock, NULL, + sctps->sctps_bind_fanout = kmem_zalloc(SCTP_BIND_FANOUT_SIZE * + sizeof (sctp_tf_t), KM_SLEEP); + for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) { + mutex_init(&sctps->sctps_bind_fanout[i].tf_lock, NULL, MUTEX_DEFAULT, NULL); } } void -sctp_hash_destroy() +sctp_hash_destroy(sctp_stack_t *sctps) { int i; - for (i = 0; i < sctp_conn_hash_size; i++) { - mutex_destroy(&sctp_conn_fanout[i].tf_lock); + for (i = 0; i < sctps->sctps_conn_hash_size; i++) { + mutex_destroy(&sctps->sctps_conn_fanout[i].tf_lock); } - kmem_free(sctp_conn_fanout, sctp_conn_hash_size * sizeof (sctp_tf_t)); - for (i = 0; i < A_CNT(sctp_listen_fanout); i++) { - mutex_destroy(&sctp_listen_fanout[i].tf_lock); + kmem_free(sctps->sctps_conn_fanout, sctps->sctps_conn_hash_size * + sizeof (sctp_tf_t)); + sctps->sctps_conn_fanout = NULL; + + for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) { + mutex_destroy(&sctps->sctps_listen_fanout[i].tf_lock); } - for (i = 0; i < A_CNT(sctp_bind_fanout); i++) { - mutex_destroy(&sctp_bind_fanout[i].tf_lock); + kmem_free(sctps->sctps_listen_fanout, SCTP_LISTEN_FANOUT_SIZE * + sizeof (sctp_tf_t)); + sctps->sctps_listen_fanout = NULL; + + for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) { + mutex_destroy(&sctps->sctps_bind_fanout[i].tf_lock); } + kmem_free(sctps->sctps_bind_fanout, SCTP_BIND_FANOUT_SIZE * + sizeof (sctp_tf_t)); + sctps->sctps_bind_fanout = NULL; } /* @@ -133,19 +146,21 @@ sctp_ire_cache_flush(ipif_t *ipif) sctp_faddr_t *fp; conn_t *connp; ire_t *ire; + sctp_stack_t *sctps = ipif->ipif_ill->ill_ipst-> + ips_netstack->netstack_sctp; - sctp = gsctp; - mutex_enter(&sctp_g_lock); + sctp = sctps->sctps_gsctp; + mutex_enter(&sctps->sctps_g_lock); while (sctp != NULL) { mutex_enter(&sctp->sctp_reflock); if (sctp->sctp_condemned) { mutex_exit(&sctp->sctp_reflock); - sctp = list_next(&sctp_g_list, sctp); + sctp = list_next(&sctps->sctps_g_list, sctp); continue; } sctp->sctp_refcnt++; mutex_exit(&sctp->sctp_reflock); - mutex_exit(&sctp_g_lock); + mutex_exit(&sctps->sctps_g_lock); if (sctp_prev != NULL) SCTP_REFRELE(sctp_prev); @@ -188,10 +203,10 @@ sctp_ire_cache_flush(ipif_t *ipif) } WAKE_SCTP(sctp); sctp_prev = sctp; - mutex_enter(&sctp_g_lock); - sctp = list_next(&sctp_g_list, sctp); + mutex_enter(&sctps->sctps_g_lock); + sctp = list_next(&sctps->sctps_g_list, sctp); } - mutex_exit(&sctp_g_lock); + mutex_exit(&sctps->sctps_g_lock); if (sctp_prev != NULL) SCTP_REFRELE(sctp_prev); } @@ -199,10 +214,31 @@ sctp_ire_cache_flush(ipif_t *ipif) /* * Exported routine for extracting active SCTP associations. * Like TCP, we terminate the walk if the callback returns non-zero. + * + * Need to walk all sctp_stack_t instances since this clustering + * interface is assumed global for all instances */ int -cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *arg, - boolean_t cansleep) +cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), + void *arg, boolean_t cansleep) +{ + netstack_handle_t nh; + netstack_t *ns; + int ret = 0; + + netstack_next_init(&nh); + while ((ns = netstack_next(&nh)) != NULL) { + ret = cl_sctp_walk_list_stack(cl_callback, arg, cansleep, + ns->netstack_sctp); + netstack_rele(ns); + } + netstack_next_fini(&nh); + return (ret); +} + +static int +cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *, void *), + void *arg, boolean_t cansleep, sctp_stack_t *sctps) { sctp_t *sctp; sctp_t *sctp_prev; @@ -210,9 +246,9 @@ cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *arg, uchar_t *slist; uchar_t *flist; - sctp = gsctp; + sctp = sctps->sctps_gsctp; sctp_prev = NULL; - mutex_enter(&sctp_g_lock); + mutex_enter(&sctps->sctps_g_lock); while (sctp != NULL) { size_t ssize; size_t fsize; @@ -220,12 +256,12 @@ cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *arg, mutex_enter(&sctp->sctp_reflock); if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) { mutex_exit(&sctp->sctp_reflock); - sctp = list_next(&sctp_g_list, sctp); + sctp = list_next(&sctps->sctps_g_list, sctp); continue; } sctp->sctp_refcnt++; mutex_exit(&sctp->sctp_reflock); - mutex_exit(&sctp_g_lock); + mutex_exit(&sctps->sctps_g_lock); if (sctp_prev != NULL) SCTP_REFRELE(sctp_prev); RUN_SCTP(sctp); @@ -265,10 +301,10 @@ cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *arg, } /* list will be freed by cl_callback */ sctp_prev = sctp; - mutex_enter(&sctp_g_lock); - sctp = list_next(&sctp_g_list, sctp); + mutex_enter(&sctps->sctps_g_lock); + sctp = list_next(&sctps->sctps_g_list, sctp); } - mutex_exit(&sctp_g_lock); + mutex_exit(&sctps->sctps_g_lock); if (sctp_prev != NULL) SCTP_REFRELE(sctp_prev); return (0); @@ -276,13 +312,13 @@ cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *arg, sctp_t * sctp_conn_match(in6_addr_t *faddr, in6_addr_t *laddr, uint32_t ports, - uint_t ipif_seqid, zoneid_t zoneid) + uint_t ipif_seqid, zoneid_t zoneid, sctp_stack_t *sctps) { sctp_tf_t *tf; sctp_t *sctp; sctp_faddr_t *fp; - tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]); + tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]); mutex_enter(&tf->tf_lock); for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) { @@ -325,7 +361,7 @@ done: static sctp_t * listen_match(in6_addr_t *laddr, uint32_t ports, uint_t ipif_seqid, - zoneid_t zoneid) + zoneid_t zoneid, sctp_stack_t *sctps) { sctp_t *sctp; sctp_tf_t *tf; @@ -333,7 +369,7 @@ listen_match(in6_addr_t *laddr, uint32_t ports, uint_t ipif_seqid, lport = ((uint16_t *)&ports)[1]; - tf = &(sctp_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]); + tf = &(sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]); mutex_enter(&tf->tf_lock); for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) { @@ -364,15 +400,15 @@ done: /* called by ipsec_sctp_pol */ conn_t * sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports, - uint_t ipif_seqid, zoneid_t zoneid) + uint_t ipif_seqid, zoneid_t zoneid, sctp_stack_t *sctps) { sctp_t *sctp; if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid, - zoneid)) == NULL) { + zoneid, sctps)) == NULL) { /* Not in conn fanout; check listen fanout */ if ((sctp = listen_match(dst, ports, ipif_seqid, - zoneid)) == NULL) { + zoneid, sctps)) == NULL) { return (NULL); } } @@ -381,15 +417,20 @@ sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports, conn_t * sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports, - uint_t ipif_seqid, zoneid_t zoneid, mblk_t *mp) + uint_t ipif_seqid, zoneid_t zoneid, mblk_t *mp, sctp_stack_t *sctps) + { sctp_t *sctp; boolean_t shared_addr; if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid, - zoneid)) == NULL) { + zoneid, sctps)) == NULL) { shared_addr = (zoneid == ALL_ZONES); if (shared_addr) { + /* + * No need to handle exclusive-stack zones since + * ALL_ZONES only applies to the shared stack. + */ zoneid = tsol_mlp_findzone(IPPROTO_SCTP, htons(ntohl(ports) & 0xFFFF)); /* @@ -405,7 +446,7 @@ sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports, } /* Not in conn fanout; check listen fanout */ if ((sctp = listen_match(dst, ports, ipif_seqid, - zoneid)) == NULL) { + zoneid, sctps)) == NULL) { return (NULL); } /* @@ -446,6 +487,14 @@ ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, ip6_t *ip6h; in6_addr_t map_src, map_dst; in6_addr_t *src, *dst; + ip_stack_t *ipst; + ipsec_stack_t *ipss; + sctp_stack_t *sctps; + + ASSERT(recv_ill != NULL); + ipst = recv_ill->ill_ipst; + sctps = ipst->ips_netstack->netstack_sctp; + ipss = ipst->ips_netstack->netstack_ipsec; first_mp = mp; if (mctl_present) { @@ -473,8 +522,8 @@ ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, dst = &map_dst; isv4 = B_TRUE; } - if ((connp = sctp_fanout(src, dst, ports, ipif_seqid, zoneid, mp)) == - NULL) { + connp = sctp_find_conn(src, dst, ports, ipif_seqid, zoneid, sctps); + if (connp == NULL) { ip_fanout_sctp_raw(first_mp, recv_ill, ipha, isv4, ports, mctl_present, flags, ip_policy, ipif_seqid, zoneid); @@ -489,7 +538,7 @@ ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, * We check some fields in conn_t without holding a lock. * This should be fine. */ - if (CONN_INBOUND_POLICY_PRESENT(connp) || mctl_present) { + if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || mctl_present) { first_mp = ipsec_check_inbound_policy(first_mp, connp, ipha, NULL, mctl_present); if (first_mp == NULL) { @@ -499,7 +548,7 @@ ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, } /* Initiate IPPF processing for fastpath */ - if (IPP_ENABLED(IPP_LOCAL_IN)) { + if (IPP_ENABLED(IPP_LOCAL_IN, ipst)) { ip_process(IPP_LOCAL_IN, &mp, recv_ill->ill_phyint->phyint_ifindex); if (mp == NULL) { @@ -530,7 +579,7 @@ ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, } if (isv4) { mp = ip_add_info(mp, recv_ill, in_flags, - IPCL_ZONEID(connp)); + IPCL_ZONEID(connp), ipst); } else { mp = ip_add_info_v6(mp, recv_ill, &ip6h->ip6_dst); } diff --git a/usr/src/uts/common/inet/sctp/sctp_heartbeat.c b/usr/src/uts/common/inet/sctp/sctp_heartbeat.c index 652cef09a7..914f1cac3f 100644 --- a/usr/src/uts/common/inet/sctp/sctp_heartbeat.c +++ b/usr/src/uts/common/inet/sctp/sctp_heartbeat.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,6 +38,7 @@ #include <inet/common.h> #include <inet/ip.h> #include <inet/mib2.h> +#include <inet/ipclassifier.h> #include "sctp_impl.h" void @@ -51,6 +52,7 @@ sctp_return_heartbeat(sctp_t *sctp, sctp_chunk_hdr_t *hbcp, mblk_t *mp) in6_addr_t addr; sctp_faddr_t *fp; uint16_t len; + sctp_stack_t *sctps = sctp->sctp_sctps; BUMP_LOCAL(sctp->sctp_ibchunks); @@ -82,7 +84,7 @@ sctp_return_heartbeat(sctp_t *sctp, sctp_chunk_hdr_t *hbcp, mblk_t *mp) /* Create an IP header, returning to the src addr from the heartbt */ smp = sctp_make_mp(sctp, fp, len); if (smp == NULL) { - SCTP_KSTAT(sctp_return_hb_failed); + SCTP_KSTAT(sctps, sctp_return_hb_failed); return; } @@ -118,6 +120,7 @@ sctp_send_heartbeat(sctp_t *sctp, sctp_faddr_t *fp) in6_addr_t *a; mblk_t *hbmp; size_t hblen; + sctp_stack_t *sctps = sctp->sctp_sctps; dprint(3, ("sctp_send_heartbeat: to %x:%x:%x:%x from %x:%x:%x:%x\n", SCTP_PRINTADDR(fp->faddr), SCTP_PRINTADDR(fp->saddr))); @@ -129,7 +132,7 @@ sctp_send_heartbeat(sctp_t *sctp, sctp_faddr_t *fp) sizeof (fp->faddr); hbmp = sctp_make_mp(sctp, fp, hblen); if (hbmp == NULL) { - SCTP_KSTAT(sctp_send_hb_failed); + SCTP_KSTAT(sctps, sctp_send_hb_failed); return; } @@ -184,7 +187,7 @@ sctp_send_heartbeat(sctp_t *sctp, sctp_faddr_t *fp) fp->hb_pending = B_TRUE; BUMP_LOCAL(sctp->sctp_obchunks); - BUMP_MIB(&sctp_mib, sctpTimHeartBeatProbe); + BUMP_MIB(&sctps->sctps_mib, sctpTimHeartBeatProbe); sctp_add_sendq(sctp, hbmp); } @@ -199,10 +202,11 @@ sctp_validate_peer(sctp_t *sctp) int cnt; int64_t now; int64_t earliest_expiry; + sctp_stack_t *sctps = sctp->sctp_sctps; now = lbolt64; earliest_expiry = 0; - cnt = sctp_maxburst; + cnt = sctps->sctps_maxburst; /* * Loop thru the list looking for unconfirmed addresses and diff --git a/usr/src/uts/common/inet/sctp/sctp_impl.h b/usr/src/uts/common/inet/sctp/sctp_impl.h index f773654fad..e405703a29 100644 --- a/usr/src/uts/common/inet/sctp/sctp_impl.h +++ b/usr/src/uts/common/inet/sctp/sctp_impl.h @@ -29,10 +29,6 @@ #pragma ident "%Z%%M% %I% %E% SMI" -#ifdef __cplusplus -extern "C" { -#endif - #include <sys/inttypes.h> #include <sys/taskq.h> #include <sys/list.h> @@ -42,6 +38,11 @@ extern "C" { #include <inet/optcom.h> #include <netinet/sctp.h> #include <inet/sctp_itf.h> +#include "sctp_stack.h" + +#ifdef __cplusplus +extern "C" { +#endif /* Streams device identifying info and version */ #define SCTP_DEV_IDINFO "SCTP Streams device 1.0" @@ -74,11 +75,6 @@ typedef struct sctpt_s { ((isv4) ? IN6_IS_ADDR_V4MAPPED_ANY(&(addr)) : \ IN6_IS_ADDR_UNSPECIFIED(&(addr))) -extern int sctp_g_num_epriv_ports; -extern uint16_t sctp_g_epriv_ports[]; -extern kmutex_t sctp_epriv_port_lock; - -extern uint_t sctp_next_port_to_try; /* * SCTP parameters */ @@ -90,71 +86,70 @@ typedef struct sctpparam_s { char *sctp_param_name; } sctpparam_t; -extern sctpparam_t sctp_param_arr[]; -#define sctp_max_init_retr sctp_param_arr[0].sctp_param_val -#define sctp_max_init_retr_high sctp_param_arr[0].sctp_param_max -#define sctp_max_init_retr_low sctp_param_arr[0].sctp_param_min -#define sctp_pa_max_retr sctp_param_arr[1].sctp_param_val -#define sctp_pa_max_retr_high sctp_param_arr[1].sctp_param_max -#define sctp_pa_max_retr_low sctp_param_arr[1].sctp_param_min -#define sctp_pp_max_retr sctp_param_arr[2].sctp_param_val -#define sctp_pp_max_retr_high sctp_param_arr[2].sctp_param_max -#define sctp_pp_max_retr_low sctp_param_arr[2].sctp_param_min -#define sctp_cwnd_max_ sctp_param_arr[3].sctp_param_val -#define sctp_dbg sctp_param_arr[4].sctp_param_val -#define sctp_smallest_nonpriv_port sctp_param_arr[5].sctp_param_val -#define sctp_ipv4_ttl sctp_param_arr[6].sctp_param_val -#define sctp_heartbeat_interval sctp_param_arr[7].sctp_param_val -#define sctp_heartbeat_interval_high sctp_param_arr[7].sctp_param_max -#define sctp_heartbeat_interval_low sctp_param_arr[7].sctp_param_min -#define sctp_initial_mtu sctp_param_arr[8].sctp_param_val -#define sctp_mtu_probe_interval sctp_param_arr[9].sctp_param_val -#define sctp_new_secret_interval sctp_param_arr[10].sctp_param_val -#define sctp_deferred_ack_interval sctp_param_arr[11].sctp_param_val -#define sctp_snd_lowat_fraction sctp_param_arr[12].sctp_param_val -#define sctp_ignore_path_mtu sctp_param_arr[13].sctp_param_val -#define sctp_initial_ssthresh sctp_param_arr[14].sctp_param_val -#define sctp_smallest_anon_port sctp_param_arr[15].sctp_param_val -#define sctp_largest_anon_port sctp_param_arr[16].sctp_param_val -#define sctp_xmit_hiwat sctp_param_arr[17].sctp_param_val -#define sctp_xmit_lowat sctp_param_arr[18].sctp_param_val -#define sctp_recv_hiwat sctp_param_arr[19].sctp_param_val -#define sctp_max_buf sctp_param_arr[20].sctp_param_val -#define sctp_rtt_updates sctp_param_arr[21].sctp_param_val -#define sctp_ipv6_hoplimit sctp_param_arr[22].sctp_param_val -#define sctp_rto_ming sctp_param_arr[23].sctp_param_val -#define sctp_rto_ming_high sctp_param_arr[23].sctp_param_max -#define sctp_rto_ming_low sctp_param_arr[23].sctp_param_min -#define sctp_rto_maxg sctp_param_arr[24].sctp_param_val -#define sctp_rto_maxg_high sctp_param_arr[24].sctp_param_max -#define sctp_rto_maxg_low sctp_param_arr[24].sctp_param_min -#define sctp_rto_initialg sctp_param_arr[25].sctp_param_val -#define sctp_rto_initialg_high sctp_param_arr[25].sctp_param_max -#define sctp_rto_initialg_low sctp_param_arr[25].sctp_param_min -#define sctp_cookie_life sctp_param_arr[26].sctp_param_val -#define sctp_cookie_life_high sctp_param_arr[26].sctp_param_max -#define sctp_cookie_life_low sctp_param_arr[26].sctp_param_min -#define sctp_max_in_streams sctp_param_arr[27].sctp_param_val -#define sctp_max_in_streams_high sctp_param_arr[27].sctp_param_max -#define sctp_max_in_streams_low sctp_param_arr[27].sctp_param_min -#define sctp_initial_out_streams sctp_param_arr[28].sctp_param_val -#define sctp_initial_out_streams_high sctp_param_arr[28].sctp_param_max -#define sctp_initial_out_streams_low sctp_param_arr[28].sctp_param_min -#define sctp_shutack_wait_bound sctp_param_arr[29].sctp_param_val -#define sctp_maxburst sctp_param_arr[30].sctp_param_val -#define sctp_addip_enabled sctp_param_arr[31].sctp_param_val -#define sctp_recv_hiwat_minmss sctp_param_arr[32].sctp_param_val -#define sctp_slow_start_initial sctp_param_arr[33].sctp_param_val -#define sctp_slow_start_after_idle sctp_param_arr[34].sctp_param_val -#define sctp_prsctp_enabled sctp_param_arr[35].sctp_param_val -#define sctp_fast_rxt_thresh sctp_param_arr[36].sctp_param_val -#define sctp_deferred_acks_max sctp_param_arr[37].sctp_param_val +#define sctps_max_init_retr sctps_params[0].sctp_param_val +#define sctps_max_init_retr_high sctps_params[0].sctp_param_max +#define sctps_max_init_retr_low sctps_params[0].sctp_param_min +#define sctps_pa_max_retr sctps_params[1].sctp_param_val +#define sctps_pa_max_retr_high sctps_params[1].sctp_param_max +#define sctps_pa_max_retr_low sctps_params[1].sctp_param_min +#define sctps_pp_max_retr sctps_params[2].sctp_param_val +#define sctps_pp_max_retr_high sctps_params[2].sctp_param_max +#define sctps_pp_max_retr_low sctps_params[2].sctp_param_min +#define sctps_cwnd_max_ sctps_params[3].sctp_param_val +#define __sctps_not_used1 sctps_params[4].sctp_param_val +#define sctps_smallest_nonpriv_port sctps_params[5].sctp_param_val +#define sctps_ipv4_ttl sctps_params[6].sctp_param_val +#define sctps_heartbeat_interval sctps_params[7].sctp_param_val +#define sctps_heartbeat_interval_high sctps_params[7].sctp_param_max +#define sctps_heartbeat_interval_low sctps_params[7].sctp_param_min +#define sctps_initial_mtu sctps_params[8].sctp_param_val +#define sctps_mtu_probe_interval sctps_params[9].sctp_param_val +#define sctps_new_secret_interval sctps_params[10].sctp_param_val +#define sctps_deferred_ack_interval sctps_params[11].sctp_param_val +#define sctps_snd_lowat_fraction sctps_params[12].sctp_param_val +#define sctps_ignore_path_mtu sctps_params[13].sctp_param_val +#define sctps_initial_ssthresh sctps_params[14].sctp_param_val +#define sctps_smallest_anon_port sctps_params[15].sctp_param_val +#define sctps_largest_anon_port sctps_params[16].sctp_param_val +#define sctps_xmit_hiwat sctps_params[17].sctp_param_val +#define sctps_xmit_lowat sctps_params[18].sctp_param_val +#define sctps_recv_hiwat sctps_params[19].sctp_param_val +#define sctps_max_buf sctps_params[20].sctp_param_val +#define sctps_rtt_updates sctps_params[21].sctp_param_val +#define sctps_ipv6_hoplimit sctps_params[22].sctp_param_val +#define sctps_rto_ming sctps_params[23].sctp_param_val +#define sctps_rto_ming_high sctps_params[23].sctp_param_max +#define sctps_rto_ming_low sctps_params[23].sctp_param_min +#define sctps_rto_maxg sctps_params[24].sctp_param_val +#define sctps_rto_maxg_high sctps_params[24].sctp_param_max +#define sctps_rto_maxg_low sctps_params[24].sctp_param_min +#define sctps_rto_initialg sctps_params[25].sctp_param_val +#define sctps_rto_initialg_high sctps_params[25].sctp_param_max +#define sctps_rto_initialg_low sctps_params[25].sctp_param_min +#define sctps_cookie_life sctps_params[26].sctp_param_val +#define sctps_cookie_life_high sctps_params[26].sctp_param_max +#define sctps_cookie_life_low sctps_params[26].sctp_param_min +#define sctps_max_in_streams sctps_params[27].sctp_param_val +#define sctps_max_in_streams_high sctps_params[27].sctp_param_max +#define sctps_max_in_streams_low sctps_params[27].sctp_param_min +#define sctps_initial_out_streams sctps_params[28].sctp_param_val +#define sctps_initial_out_streams_high sctps_params[28].sctp_param_max +#define sctps_initial_out_streams_low sctps_params[28].sctp_param_min +#define sctps_shutack_wait_bound sctps_params[29].sctp_param_val +#define sctps_maxburst sctps_params[30].sctp_param_val +#define sctps_addip_enabled sctps_params[31].sctp_param_val +#define sctps_recv_hiwat_minmss sctps_params[32].sctp_param_val +#define sctps_slow_start_initial sctps_params[33].sctp_param_val +#define sctps_slow_start_after_idle sctps_params[34].sctp_param_val +#define sctps_prsctp_enabled sctps_params[35].sctp_param_val +#define sctps_fast_rxt_thresh sctps_params[36].sctp_param_val +#define sctps_deferred_acks_max sctps_params[37].sctp_param_val + /* * sctp_wroff_xtra is the extra space in front of SCTP/IP header for link * layer header. It has to be a multiple of 4. */ -extern sctpparam_t sctp_wroff_xtra_param; -#define sctp_wroff_xtra sctp_wroff_xtra_param.sctp_param_val +#define sctps_wroff_xtra sctps_wroff_xtra_param->sctp_param_val /* * Retransmission timer start and stop macro for a given faddr. @@ -205,6 +200,27 @@ extern sctpparam_t sctp_wroff_xtra_param; } \ } +#define SCTP_G_Q_REFHOLD(sctps) { \ + atomic_add_32(&(sctps)->sctps_g_q_ref, 1); \ + ASSERT((sctps)->sctps_g_q_ref != 0); \ + DTRACE_PROBE1(sctp__g__q__refhold, sctp_stack_t, sctps); \ +} + +/* + * Decrement the reference count on sctp_g_q + * In architectures e.g sun4u, where atomic_add_32_nv is just + * a cas, we need to maintain the right memory barrier semantics + * as that of mutex_exit i.e all the loads and stores should complete + * before the cas is executed. membar_exit() does that here. + */ +#define SCTP_G_Q_REFRELE(sctps) { \ + ASSERT((sctps)->sctps_g_q_ref != 0); \ + membar_exit(); \ + DTRACE_PROBE1(sctp__g__q__refrele, sctp_stack_t, sctps); \ + if (atomic_add_32_nv(&(sctps)->sctps_g_q_ref, -1) == 0) \ + sctp_g_q_inactive(sctps); \ +} + #define SCTP_PRINTADDR(a) (a).s6_addr32[0], (a).s6_addr32[1],\ (a).s6_addr32[2], (a).s6_addr32[3] @@ -336,8 +352,9 @@ typedef struct { ((lbolt64 - (mhdr)->smh_tob) > (mhdr)->smh_ttl)) /* SCTP association hash function. */ -#define SCTP_CONN_HASH(ports) \ - ((((ports) ^ ((ports) >> 16)) * 31) & (sctp_conn_hash_size - 1)) +#define SCTP_CONN_HASH(sctps, ports) \ + ((((ports) ^ ((ports) >> 16)) * 31) & \ + ((sctps)->sctps_conn_hash_size - 1)) /* * Bind hash array size and hash function. The size must be a power @@ -361,17 +378,6 @@ typedef struct sctp_tf_s { kmutex_t tf_lock; } sctp_tf_t; -/* SCTP association hash list */ -extern sctp_tf_t *sctp_conn_fanout; -/* Size of sctp_conn_fanout */ -extern uint_t sctp_conn_hash_size; - -/* SCTP bind hash list - all sctp_t with state >= BOUND. */ -extern sctp_tf_t sctp_bind_fanout[]; - -/* SCTP listener hash list - all sctp_t with state == LISTEN. */ -extern sctp_tf_t sctp_listen_fanout[]; - /* Round up the value to the nearest mss. */ #define MSS_ROUNDUP(value, mss) ((((value) - 1) / (mss) + 1) * (mss)) @@ -380,42 +386,6 @@ extern sin6_t sctp_sin6_null; /* Zero address for quick clears */ #define SCTP_IS_DETACHED(sctp) ((sctp)->sctp_detached) -extern mib2_sctp_t sctp_mib; /* SNMP fixed size info */ - -/* SCTP kstat */ -typedef struct sctp_kstat_s { - kstat_named_t sctp_add_faddr; - kstat_named_t sctp_add_timer; - kstat_named_t sctp_conn_create; - kstat_named_t sctp_find_next_tq; - kstat_named_t sctp_fr_add_hdr; - kstat_named_t sctp_fr_not_found; - kstat_named_t sctp_output_failed; - kstat_named_t sctp_rexmit_failed; - kstat_named_t sctp_send_init_failed; - kstat_named_t sctp_send_cookie_failed; - kstat_named_t sctp_send_cookie_ack_failed; - kstat_named_t sctp_send_err_failed; - kstat_named_t sctp_send_sack_failed; - kstat_named_t sctp_send_shutdown_failed; - kstat_named_t sctp_send_shutdown_ack_failed; - kstat_named_t sctp_send_shutdown_comp_failed; - kstat_named_t sctp_send_user_abort_failed; - kstat_named_t sctp_send_asconf_failed; - kstat_named_t sctp_send_asconf_ack_failed; - kstat_named_t sctp_send_ftsn_failed; - kstat_named_t sctp_send_hb_failed; - kstat_named_t sctp_return_hb_failed; - kstat_named_t sctp_ss_rexmit_failed; - kstat_named_t sctp_cl_connect; - kstat_named_t sctp_cl_assoc_change; - kstat_named_t sctp_cl_check_addrs; -} sctp_kstat_t; - -extern sctp_kstat_t sctp_statistics; - -#define SCTP_KSTAT(x) (sctp_statistics.x.value.ui64++) - /* * Object to represent database of options to search passed to * {sock,tpi}optcom_req() interface routine to take care of option @@ -639,6 +609,8 @@ typedef struct sctp_s { #define sctp_credp sctp_connp->conn_cred #define sctp_reuseaddr sctp_connp->conn_reuseaddr + sctp_stack_t *sctp_sctps; + /* Peer address tracking */ sctp_faddr_t *sctp_lastfaddr; /* last faddr in list */ sctp_faddr_t *sctp_primary; /* primary faddr */ @@ -936,17 +908,8 @@ typedef struct sctp_s { uint32_t sctp_rxt_maxtsn; /* Max TSN sent at time out */ } sctp_t; -extern list_t sctp_g_list; /* Head of SCTP instance data chain */ -extern kmutex_t sctp_g_lock; - #endif /* (defined(_KERNEL) || defined(_KMEMUSER)) */ -extern queue_t *sctp_g_q; /* Default queue used during detached closes */ -extern sctp_t *gsctp; - -/* Padding mblk for SCTP chunks. */ -extern mblk_t *sctp_pad_mp; - extern void sctp_ack_timer(sctp_t *); extern size_t sctp_adaption_code_param(sctp_t *, uchar_t *); extern void sctp_adaption_event(sctp_t *); @@ -962,7 +925,7 @@ extern mblk_t *sctp_add_proto_hdr(sctp_t *, sctp_faddr_t *, mblk_t *, int, int *); extern void sctp_addr_req(sctp_t *, mblk_t *); extern sctp_t *sctp_addrlist2sctp(mblk_t *, sctp_hdr_t *, sctp_chunk_hdr_t *, - uint_t, zoneid_t); + uint_t, zoneid_t, sctp_stack_t *); extern void sctp_add_hdr(sctp_t *, uchar_t *, size_t); extern void sctp_check_adv_ack_pt(sctp_t *, mblk_t *, mblk_t *); extern void sctp_assoc_event(sctp_t *, uint16_t, uint16_t, @@ -985,7 +948,7 @@ extern void sctp_congest_reset(sctp_t *); extern void sctp_conn_hash_insert(sctp_tf_t *, sctp_t *, int); extern void sctp_conn_hash_remove(sctp_t *); extern sctp_t *sctp_conn_match(in6_addr_t *, in6_addr_t *, uint32_t, uint_t, - zoneid_t); + zoneid_t, sctp_stack_t *); extern sctp_t *sctp_conn_request(sctp_t *, mblk_t *, uint_t, uint_t, sctp_init_chunk_t *, mblk_t *); extern int sctp_conprim_opt_process(queue_t *, mblk_t *, int *, int *, @@ -996,7 +959,7 @@ extern sctp_t *sctp_create_eager(sctp_t *); extern void sctp_dispatch_rput(queue_t *, sctp_t *, sctp_hdr_t *, mblk_t *, uint_t, uint_t, in6_addr_t); extern char *sctp_display(sctp_t *, char *); -extern void sctp_display_all(void); +extern void sctp_display_all(sctp_stack_t *); extern void sctp_error_event(sctp_t *, sctp_chunk_hdr_t *); @@ -1016,6 +979,7 @@ extern void sctp_ftsn_sets_init(void); extern int sctp_get_addrlist(sctp_t *, const void *, uint32_t *, uchar_t **, int *, size_t *); +extern void sctp_g_q_inactive(sctp_stack_t *); extern int sctp_get_addrparams(sctp_t *, sctp_t *, mblk_t *, sctp_chunk_hdr_t *, uint_t *); extern void sctp_get_ire(sctp_t *, sctp_faddr_t *); @@ -1027,14 +991,14 @@ extern void sctp_get_saddr_list(sctp_t *, uchar_t *, size_t); extern int sctp_handle_error(sctp_t *, sctp_hdr_t *, sctp_chunk_hdr_t *, mblk_t *); -extern void sctp_hash_destroy(void); -extern void sctp_hash_init(void); +extern void sctp_hash_destroy(sctp_stack_t *); +extern void sctp_hash_init(sctp_stack_t *); extern int sctp_header_init_ipv4(sctp_t *, int); extern int sctp_header_init_ipv6(sctp_t *, int); extern void sctp_heartbeat_timer(sctp_t *); extern void sctp_icmp_error(sctp_t *, mblk_t *); -extern void sctp_inc_taskq(void); +extern void sctp_inc_taskq(sctp_stack_t *); extern void sctp_info_req(sctp_t *, mblk_t *); extern mblk_t *sctp_init_mp(sctp_t *); extern boolean_t sctp_initialize_params(sctp_t *, sctp_init_chunk_t *, @@ -1045,8 +1009,10 @@ extern void sctp_input_data(sctp_t *, mblk_t *, mblk_t *); extern void sctp_instream_cleanup(sctp_t *, boolean_t); extern int sctp_is_a_faddr_clean(sctp_t *); -extern void sctp_kstat_init(void); -extern void sctp_kstat_fini(void); +extern void *sctp_kstat_init(netstackid_t); +extern void sctp_kstat_fini(netstackid_t, kstat_t *); +extern void *sctp_kstat2_init(netstackid_t, sctp_kstat_t *); +extern void sctp_kstat2_fini(netstackid_t, kstat_t *); extern ssize_t sctp_link_abort(mblk_t *, uint16_t, char *, size_t, int, boolean_t); @@ -1066,9 +1032,9 @@ extern mblk_t *sctp_make_sack(sctp_t *, sctp_faddr_t *, mblk_t *); extern void sctp_maxpsz_set(sctp_t *); extern void sctp_move_faddr_timers(queue_t *, sctp_t *); -extern void sctp_nd_free(void); +extern void sctp_nd_free(sctp_stack_t *); extern int sctp_nd_getset(queue_t *, MBLKP); -extern boolean_t sctp_nd_init(void); +extern boolean_t sctp_nd_init(sctp_stack_t *); extern sctp_parm_hdr_t *sctp_next_parm(sctp_parm_hdr_t *, ssize_t *); extern void sctp_ootb_shutdown_ack(sctp_t *, mblk_t *, uint_t); @@ -1076,7 +1042,7 @@ extern size_t sctp_options_param(const sctp_t *, void *, int); extern size_t sctp_options_param_len(const sctp_t *, int); extern void sctp_output(sctp_t *sctp); -extern boolean_t sctp_param_register(sctpparam_t *, int); +extern boolean_t sctp_param_register(IDP *, sctpparam_t *, int, sctp_stack_t *); extern void sctp_partial_delivery_event(sctp_t *); extern int sctp_process_cookie(sctp_t *, sctp_chunk_hdr_t *, mblk_t *, sctp_init_chunk_t **, sctp_hdr_t *, int *, in6_addr_t *); @@ -1095,7 +1061,7 @@ extern sctp_faddr_t *sctp_rotate_faddr(sctp_t *, sctp_faddr_t *); extern void sctp_sack(sctp_t *, mblk_t *); extern int sctp_secure_restart_check(mblk_t *, sctp_chunk_hdr_t *, - uint32_t, int); + uint32_t, int, sctp_stack_t *); extern void sctp_send_abort(sctp_t *, uint32_t, uint16_t, char *, size_t, mblk_t *, int, boolean_t); extern void sctp_send_cookie_ack(sctp_t *); @@ -1131,7 +1097,7 @@ extern void sctp_timer_stop(mblk_t *); extern void sctp_unlink_faddr(sctp_t *, sctp_faddr_t *); extern void sctp_update_ire(sctp_t *sctp); -extern in_port_t sctp_update_next_port(in_port_t, zone_t *zone); +extern in_port_t sctp_update_next_port(in_port_t, zone_t *zone, sctp_stack_t *); extern void sctp_update_rtt(sctp_t *, sctp_faddr_t *, clock_t); extern void sctp_user_abort(sctp_t *, mblk_t *, boolean_t); @@ -1159,10 +1125,12 @@ extern void (*cl_sctp_check_addrs)(sa_family_t, in_port_t, uchar_t **, /* Send a mp to IP. */ #define IP_PUT(mp, conn, isv4) \ { \ + sctp_stack_t *sctps = conn->conn_netstack->netstack_sctp; \ + \ if ((isv4)) \ - ip_output((conn), (mp), WR(sctp_g_q), IP_WPUT); \ + ip_output((conn), (mp), WR(sctps->sctps_g_q), IP_WPUT); \ else \ - ip_output_v6((conn), (mp), WR(sctp_g_q), IP_WPUT); \ + ip_output_v6((conn), (mp), WR(sctps->sctps_g_q), IP_WPUT);\ } #define RUN_SCTP(sctp) \ diff --git a/usr/src/uts/common/inet/sctp/sctp_init.c b/usr/src/uts/common/inet/sctp/sctp_init.c index 5585d42213..68df56e14f 100644 --- a/usr/src/uts/common/inet/sctp/sctp_init.c +++ b/usr/src/uts/common/inet/sctp/sctp_init.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,6 +41,7 @@ #include <inet/mib2.h> #include <inet/nd.h> #include <inet/optcom.h> +#include <inet/ipclassifier.h> #include "sctp_impl.h" #include "sctp_addr.h" @@ -175,6 +176,7 @@ sctp_init_mp(sctp_t *sctp) sctp_chunk_hdr_t *chp; uint16_t schlen; int supp_af; + sctp_stack_t *sctps = sctp->sctp_sctps; if (sctp->sctp_family == AF_INET) { supp_af = PARM_SUPP_V4; @@ -191,7 +193,7 @@ sctp_init_mp(sctp_t *sctp) } initlen += sctp_supaddr_param_len(sctp); initlen += sctp_addr_params_len(sctp, supp_af, B_TRUE); - if (sctp->sctp_prsctp_aware && sctp_prsctp_enabled) + if (sctp->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) initlen += sctp_options_param_len(sctp, SCTP_PRSCTP_OPTION); /* @@ -203,7 +205,7 @@ sctp_init_mp(sctp_t *sctp) mp = sctp_make_mp(sctp, NULL, initlen); if (mp == NULL) { - SCTP_KSTAT(sctp_send_init_failed); + SCTP_KSTAT(sctps, sctp_send_init_failed); return (NULL); } @@ -235,7 +237,7 @@ sctp_init_mp(sctp_t *sctp) p += sctp_addr_params(sctp, supp_af, p); /* Add Forward-TSN-Supported param */ - if (sctp->sctp_prsctp_aware && sctp_prsctp_enabled) + if (sctp->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) p += sctp_options_param(sctp, p, SCTP_PRSCTP_OPTION); BUMP_LOCAL(sctp->sctp_obchunks); diff --git a/usr/src/uts/common/inet/sctp/sctp_input.c b/usr/src/uts/common/inet/sctp/sctp_input.c index 75e1fc3c4d..4169486856 100644 --- a/usr/src/uts/common/inet/sctp/sctp_input.c +++ b/usr/src/uts/common/inet/sctp/sctp_input.c @@ -1188,6 +1188,7 @@ sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups, int trypartial = 0; int tpfinished = 1; int32_t new_rwnd; + sctp_stack_t *sctps = sctp->sctp_sctps; /* The following are used multiple times, so we inline them */ #define SCTP_ACK_IT(sctp, tsn) \ @@ -1236,7 +1237,7 @@ sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups, /* We cannot deliver anything up now but we still need to handle it. */ if (SCTP_IS_DETACHED(sctp)) { - BUMP_MIB(&sctp_mib, sctpInClosed); + BUMP_MIB(&sctps->sctps_mib, sctpInClosed); can_deliver = B_FALSE; } @@ -1622,13 +1623,14 @@ sctp_make_sack(sctp_t *sctp, sctp_faddr_t *sendto, mblk_t *dups) sctp_chunk_hdr_t *sch; sctp_sack_chunk_t *sc; int32_t acks_max; + sctp_stack_t *sctps = sctp->sctp_sctps; if (sctp->sctp_force_sack) { sctp->sctp_force_sack = 0; goto checks_done; } - acks_max = sctp_deferred_acks_max; + acks_max = sctps->sctps_deferred_acks_max; if (sctp->sctp_state == SCTPS_ESTABLISHED) { if (sctp->sctp_sack_toggle < acks_max) { /* no need to SACK right now */ @@ -1653,7 +1655,7 @@ checks_done: (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); smp = sctp_make_mp(sctp, sendto, slen); if (smp == NULL) { - SCTP_KSTAT(sctp_send_sack_failed); + SCTP_KSTAT(sctps, sctp_send_sack_failed); return (NULL); } sch = (sctp_chunk_hdr_t *)smp->b_wptr; @@ -1675,6 +1677,7 @@ void sctp_sack(sctp_t *sctp, mblk_t *dups) { mblk_t *smp; + sctp_stack_t *sctps = sctp->sctp_sctps; /* If we are shutting down, let send_shutdown() bundle the SACK */ if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) { @@ -1698,7 +1701,7 @@ sctp_sack(sctp_t *sctp, mblk_t *dups) sctp->sctp_active = lbolt64; - BUMP_MIB(&sctp_mib, sctpOutAck); + BUMP_MIB(&sctps->sctps_mib, sctpOutAck); sctp_add_sendq(sctp, smp); } @@ -1721,6 +1724,7 @@ sctp_check_abandoned_msg(sctp_t *sctp, mblk_t *meta) mblk_t *mp1 = meta->b_cont; uint32_t adv_pap = sctp->sctp_adv_pap; sctp_faddr_t *fp = sctp->sctp_current; + sctp_stack_t *sctps = sctp->sctp_sctps; dh = (sctp_data_hdr_t *)mp1->b_rptr; if (SEQ_GEQ(sctp->sctp_lastack_rxd, ntohl(dh->sdh_tsn))) { @@ -1748,7 +1752,7 @@ sctp_check_abandoned_msg(sctp_t *sctp, mblk_t *meta) if (head == NULL) { sctp->sctp_adv_pap = adv_pap; freemsg(nmp); - SCTP_KSTAT(sctp_send_ftsn_failed); + SCTP_KSTAT(sctps, sctp_send_ftsn_failed); return (ENOMEM); } SCTP_MSG_SET_ABANDONED(meta); @@ -1790,6 +1794,7 @@ sctp_cumack(sctp_t *sctp, uint32_t tsn, mblk_t **first_unacked) sctp_data_hdr_t *sdc; uint32_t cumack_forward = 0; sctp_msg_hdr_t *mhdr; + sctp_stack_t *sctps = sctp->sctp_sctps; ump = sctp->sctp_xmit_head; @@ -1877,7 +1882,7 @@ sctp_cumack(sctp_t *sctp, uint32_t tsn, mblk_t **first_unacked) cum_ack_done: *first_unacked = mp; if (cumack_forward > 0) { - BUMP_MIB(&sctp_mib, sctpInAck); + BUMP_MIB(&sctps->sctps_mib, sctpInAck); if (SEQ_GT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn)) { sctp->sctp_recovery_tsn = sctp->sctp_lastack_rxd; } @@ -1898,7 +1903,7 @@ cum_ack_done: sctp->sctp_xmit_unacked = mp; } else { /* dup ack */ - BUMP_MIB(&sctp_mib, sctpInDupAck); + BUMP_MIB(&sctps->sctps_mib, sctpInDupAck); } sctp->sctp_lastack_rxd = tsn; if (SEQ_LT(sctp->sctp_adv_pap, sctp->sctp_lastack_rxd)) @@ -2041,12 +2046,13 @@ sctp_process_forward_tsn(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp, mblk_t *pmp; sctp_data_hdr_t *dc; ssize_t remaining; + sctp_stack_t *sctps = sctp->sctp_sctps; *ftsn = ntohl(*ftsn); remaining = ntohs(ch->sch_len) - sizeof (*ch) - sizeof (*ftsn); if (SCTP_IS_DETACHED(sctp)) { - BUMP_MIB(&sctp_mib, sctpInClosed); + BUMP_MIB(&sctps->sctps_mib, sctpInClosed); can_deliver = B_FALSE; } /* @@ -2244,6 +2250,7 @@ sctp_process_uo_gaps(sctp_t *sctp, uint32_t ctsn, sctp_sack_frag_t *ssf, mblk_t *mp = mphead; sctp_faddr_t *fp; uint32_t acked = 0; + sctp_stack_t *sctps = sctp->sctp_sctps; /* * gstart tracks the last (in the order of TSN) gapstart that @@ -2275,7 +2282,7 @@ sctp_process_uo_gaps(sctp_t *sctp, uint32_t ctsn, sctp_sack_frag_t *ssf, /* SACK for TSN we have not sent - ABORT */ if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) || SEQ_GT(gapend, sctp->sctp_ltsn - 1)) { - BUMP_MIB(&sctp_mib, sctpInAckUnsent); + BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent); *trysend = -1; return (acked); } else if (SEQ_LT(gapend, gapstart)) { @@ -2394,7 +2401,8 @@ sctp_process_uo_gaps(sctp_t *sctp, uint32_t ctsn, sctp_sack_frag_t *ssf, */ if (SEQ_GT(xtsn, fr_xtsn) && !SCTP_CHUNK_ISACKED(mp)) { SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1); - if (SCTP_CHUNK_SACKCNT(mp) == sctp_fast_rxt_thresh) { + if (SCTP_CHUNK_SACKCNT(mp) == + sctps->sctps_fast_rxt_thresh) { SCTP_CHUNK_REXMIT(mp); sctp->sctp_chk_fast_rexmit = B_TRUE; *trysend = 1; @@ -2453,6 +2461,7 @@ sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch) boolean_t fast_recovery = B_FALSE; boolean_t cumack_forward = B_FALSE; boolean_t fwd_tsn = B_FALSE; + sctp_stack_t *sctps = sctp->sctp_sctps; BUMP_LOCAL(sctp->sctp_ibchunks); chunklen = ntohs(sch->sch_len); @@ -2470,7 +2479,7 @@ sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch) return (0); if (SEQ_GT(cumtsn, sctp->sctp_ltsn - 1)) { - BUMP_MIB(&sctp_mib, sctpInAckUnsent); + BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent); /* Send an ABORT */ return (-1); } @@ -2496,7 +2505,7 @@ sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch) mp = sctp->sctp_xmit_head->b_cont; else mp = NULL; - BUMP_MIB(&sctp_mib, sctpInDupAck); + BUMP_MIB(&sctps->sctps_mib, sctpInDupAck); /* * If we were doing a zero win probe and the win * has now opened to at least MSS, re-transmit the @@ -2519,7 +2528,7 @@ sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch) pkt = sctp_rexmit_packet(sctp, &meta, &mp1, fp, &pkt_len); if (pkt == NULL) { - SCTP_KSTAT(sctp_ss_rexmit_failed); + SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); return (0); } ASSERT(pkt_len <= fp->sfa_pmss); @@ -2604,7 +2613,8 @@ sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch) sctp->sctp_xmit_head, mp1, &trysend, &fast_recovery, gapstart); if (trysend < 0) { - BUMP_MIB(&sctp_mib, sctpInAckUnsent); + BUMP_MIB(&sctps->sctps_mib, + sctpInAckUnsent); return (-1); } break; @@ -2616,7 +2626,7 @@ sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch) /* SACK for TSN we have not sent - ABORT */ if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) || SEQ_GT(gapend, sctp->sctp_ltsn - 1)) { - BUMP_MIB(&sctp_mib, sctpInAckUnsent); + BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent); return (-1); } else if (SEQ_LT(gapend, gapstart)) { break; @@ -2638,7 +2648,8 @@ sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch) ASSERT(SEQ_LT(xtsn, gapstart)); while (xtsn != gapstart) { SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1); - if (SCTP_CHUNK_SACKCNT(mp) == sctp_fast_rxt_thresh) { + if (SCTP_CHUNK_SACKCNT(mp) == + sctps->sctps_fast_rxt_thresh) { SCTP_CHUNK_REXMIT(mp); sctp->sctp_chk_fast_rexmit = B_TRUE; trysend = 1; @@ -2830,8 +2841,10 @@ ret: /* * Limit the burst of transmitted data segments. */ - if (fp->suna + sctp_maxburst * fp->sfa_pmss < fp->cwnd) { - fp->cwnd = fp->suna + sctp_maxburst * fp->sfa_pmss; + if (fp->suna + sctps->sctps_maxburst * fp->sfa_pmss < + fp->cwnd) { + fp->cwnd = fp->suna + sctps->sctps_maxburst * + fp->sfa_pmss; } fp->acked = 0; goto check_ss_rxmit; @@ -2856,8 +2869,10 @@ ret: } } } - if (fp->suna + sctp_maxburst * fp->sfa_pmss < fp->cwnd) { - fp->cwnd = fp->suna + sctp_maxburst * fp->sfa_pmss; + if (fp->suna + sctps->sctps_maxburst * fp->sfa_pmss < + fp->cwnd) { + fp->cwnd = fp->suna + sctps->sctps_maxburst * + fp->sfa_pmss; } fp->acked = 0; } @@ -3126,9 +3141,14 @@ sctp_check_in_policy(mblk_t *mp, mblk_t *ipsec_mp) boolean_t policy_present; ipha_t *ipha; ip6_t *ip6h; + netstack_t *ns; + ipsec_stack_t *ipss; ii = (ipsec_in_t *)ipsec_mp->b_rptr; ASSERT(ii->ipsec_in_type == IPSEC_IN); + ns = ii->ipsec_in_ns; + ipss = ns->netstack_ipsec; + if (ii->ipsec_in_dont_check) { check = B_FALSE; if (!ii->ipsec_in_secure) { @@ -3137,11 +3157,11 @@ sctp_check_in_policy(mblk_t *mp, mblk_t *ipsec_mp) } } if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { - policy_present = ipsec_inbound_v4_policy_present; + policy_present = ipss->ipsec_inbound_v4_policy_present; ipha = (ipha_t *)mp->b_rptr; ip6h = NULL; } else { - policy_present = ipsec_inbound_v6_policy_present; + policy_present = ipss->ipsec_inbound_v6_policy_present; ipha = NULL; ip6h = (ip6_t *)mp->b_rptr; } @@ -3152,7 +3172,7 @@ sctp_check_in_policy(mblk_t *mp, mblk_t *ipsec_mp) * nobody's home. */ ipsec_mp = ipsec_check_global_policy(ipsec_mp, (conn_t *)NULL, - ipha, ip6h, B_TRUE); + ipha, ip6h, B_TRUE, ns); if (ipsec_mp == NULL) return (NULL); } @@ -3176,16 +3196,37 @@ sctp_ootb_input(mblk_t *mp, ill_t *recv_ill, uint_t ipif_seqid, ssize_t mlen; ip_pktinfo_t *pinfo = NULL; mblk_t *first_mp; + sctp_stack_t *sctps; + ip_stack_t *ipst; + + ASSERT(recv_ill != NULL); + ipst = recv_ill->ill_ipst; + sctps = ipst->ips_netstack->netstack_sctp; + + BUMP_MIB(&sctps->sctps_mib, sctpOutOfBlue); + BUMP_MIB(&sctps->sctps_mib, sctpInSCTPPkts); - BUMP_MIB(&sctp_mib, sctpOutOfBlue); - BUMP_MIB(&sctp_mib, sctpInSCTPPkts); + if (sctps->sctps_gsctp == NULL) { + /* + * For non-zero stackids the default queue isn't created + * until the first open, thus there can be a need to send + * an error before then. But we can't do that, hence we just + * drop the packet. Later during boot, when the default queue + * has been setup, a retransmitted packet from the peer + * will result in a error. + */ + ASSERT(sctps->sctps_netstack->netstack_stackid != + GLOBAL_NETSTACKID); + freemsg(mp); + return; + } first_mp = mp; if (mctl_present) mp = mp->b_cont; /* Initiate IPPf processing, if needed. */ - if (IPP_ENABLED(IPP_LOCAL_IN)) { + if (IPP_ENABLED(IPP_LOCAL_IN, ipst)) { ip_process(IPP_LOCAL_IN, &mp, recv_ill->ill_phyint->phyint_ifindex); if (mp == NULL) { @@ -3226,12 +3267,13 @@ sctp_ootb_input(mblk_t *mp, ill_t *recv_ill, uint_t ipif_seqid, /* no listener; send abort */ if (mctl_present && sctp_check_in_policy(mp, first_mp) == NULL) return; - sctp_send_abort(gsctp, sctp_init2vtag(ch), 0, + sctp_send_abort(sctps->sctps_gsctp, sctp_init2vtag(ch), 0, NULL, 0, mp, 0, B_TRUE); break; case CHUNK_INIT_ACK: /* check for changed src addr */ - sctp = sctp_addrlist2sctp(mp, sctph, ch, ipif_seqid, zoneid); + sctp = sctp_addrlist2sctp(mp, sctph, ch, ipif_seqid, zoneid, + sctps); if (sctp != NULL) { /* success; proceed to normal path */ mutex_enter(&sctp->sctp_lock); @@ -3265,8 +3307,8 @@ sctp_ootb_input(mblk_t *mp, ill_t *recv_ill, uint_t ipif_seqid, case CHUNK_SHUTDOWN_ACK: if (mctl_present && sctp_check_in_policy(mp, first_mp) == NULL) return; - sctp_ootb_shutdown_ack(gsctp, mp, ip_hdr_len); - sctp_process_sendq(gsctp); + sctp_ootb_shutdown_ack(sctps->sctps_gsctp, mp, ip_hdr_len); + sctp_process_sendq(sctps->sctps_gsctp); return; case CHUNK_ERROR: case CHUNK_ABORT: @@ -3278,11 +3320,11 @@ sctp_ootb_input(mblk_t *mp, ill_t *recv_ill, uint_t ipif_seqid, default: if (mctl_present && sctp_check_in_policy(mp, first_mp) == NULL) return; - sctp_send_abort(gsctp, sctph->sh_verf, 0, NULL, 0, mp, 0, - B_TRUE); + sctp_send_abort(sctps->sctps_gsctp, sctph->sh_verf, 0, + NULL, 0, mp, 0, B_TRUE); break; } - sctp_process_sendq(gsctp); + sctp_process_sendq(sctps->sctps_gsctp); freemsg(mp); } @@ -3291,12 +3333,14 @@ sctp_input(conn_t *connp, ipha_t *ipha, mblk_t *mp, mblk_t *first_mp, ill_t *recv_ill, boolean_t isv4, boolean_t mctl_present) { sctp_t *sctp = CONN2SCTP(connp); + ip_stack_t *ipst = recv_ill->ill_ipst; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; /* * We check some fields in conn_t without holding a lock. * This should be fine. */ - if (CONN_INBOUND_POLICY_PRESENT(connp) || mctl_present) { + if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || mctl_present) { first_mp = ipsec_check_inbound_policy(first_mp, connp, ipha, NULL, mctl_present); if (first_mp == NULL) { @@ -3307,7 +3351,7 @@ sctp_input(conn_t *connp, ipha_t *ipha, mblk_t *mp, mblk_t *first_mp, } /* Initiate IPPF processing for fastpath */ - if (IPP_ENABLED(IPP_LOCAL_IN)) { + if (IPP_ENABLED(IPP_LOCAL_IN, ipst)) { ip_process(IPP_LOCAL_IN, &mp, recv_ill->ill_phyint->phyint_ifindex); if (mp == NULL) { @@ -3338,7 +3382,7 @@ sctp_input(conn_t *connp, ipha_t *ipha, mblk_t *mp, mblk_t *first_mp, } if (isv4) { mp = ip_add_info(mp, recv_ill, in_flags, - IPCL_ZONEID(connp)); + IPCL_ZONEID(connp), ipst); } else { mp = ip_add_info_v6(mp, recv_ill, &(((ip6_t *)ipha)->ip6_dst)); @@ -3400,7 +3444,9 @@ sctp_input(conn_t *connp, ipha_t *ipha, mblk_t *mp, mblk_t *first_mp, static void sctp_process_abort(sctp_t *sctp, sctp_chunk_hdr_t *ch, int err) { - BUMP_MIB(&sctp_mib, sctpAborted); + sctp_stack_t *sctps = sctp->sctp_sctps; + + BUMP_MIB(&sctps->sctps_mib, sctpAborted); BUMP_LOCAL(sctp->sctp_ibchunks); sctp_assoc_event(sctp, SCTP_COMM_LOST, @@ -3432,6 +3478,8 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) ip_pktinfo_t *pinfo = NULL; in6_addr_t peer_src; int64_t now; + sctp_stack_t *sctps = sctp->sctp_sctps; + ip_stack_t *ipst = sctps->sctps_netstack->netstack_ip; if (DB_TYPE(mp) != M_DATA) { ASSERT(DB_TYPE(mp) == M_CTL); @@ -3456,7 +3504,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) * assume a single contiguous chunk of data. */ if (pullupmsg(mp, -1) == 0) { - BUMP_MIB(&ip_mib, ipIfStatsInDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards); if (ipsec_mp != NULL) freeb(ipsec_mp); if (pinfo != NULL) @@ -3474,7 +3522,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) mlen = mp->b_wptr - (uchar_t *)(sctph + 1); ch = sctp_first_chunk((uchar_t *)(sctph + 1), mlen); if (ch == NULL) { - BUMP_MIB(&ip_mib, ipIfStatsInDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards); if (ipsec_mp != NULL) freeb(ipsec_mp); freemsg(mp); @@ -3482,7 +3530,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) } if (!sctp_check_input(sctp, ch, mlen, 1)) { - BUMP_MIB(&ip_mib, ipIfStatsInDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards); goto done; } /* @@ -3591,10 +3639,11 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) * shutdown ack from the peer, * abort the association. */ - if (sctp_shutack_wait_bound != 0 && + if (sctps->sctps_shutack_wait_bound != + 0 && TICK_TO_MSEC(now - sctp->sctp_out_time) > - sctp_shutack_wait_bound) { + sctps->sctps_shutack_wait_bound) { sctp_send_abort(sctp, sctp->sctp_fvtag, 0, NULL, 0, mp, 0, B_FALSE); @@ -3642,7 +3691,8 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) BUMP_LOCAL(sctp->sctp_ibchunks); if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) { sctp_shutdown_complete(sctp); - BUMP_MIB(&sctp_mib, sctpShutdowns); + BUMP_MIB(&sctps->sctps_mib, + sctpShutdowns); sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0, NULL); sctp_clean_death(sctp, 0); @@ -3677,7 +3727,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) sctp_adaption_event(sctp); } } else { - BUMP_MIB(&sctp_mib, + BUMP_MIB(&sctps->sctps_mib, sctpInInvalidCookie); } break; @@ -3730,7 +3780,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) if (sctp_process_cookie(sctp, ch, mp, &iack, sctph, &recv_adaption, &peer_src) == -1) { - BUMP_MIB(&sctp_mib, + BUMP_MIB(&sctps->sctps_mib, sctpInInvalidCookie); goto done; } @@ -3774,7 +3824,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) * properly reprocessed on the * eager's queue. */ - BUMP_MIB(&sctp_mib, sctpPassiveEstab); + BUMP_MIB(&sctps->sctps_mib, sctpPassiveEstab); if (mlen > ntohs(ch->sch_len)) { eager->sctp_cookie_mp = dupb(mp); mblk_setcred(eager->sctp_cookie_mp, @@ -3851,7 +3901,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) case CHUNK_COOKIE: if (sctp_process_cookie(sctp, ch, mp, &iack, sctph, &recv_adaption, NULL) == -1) { - BUMP_MIB(&sctp_mib, + BUMP_MIB(&sctps->sctps_mib, sctpInInvalidCookie); break; } @@ -3863,7 +3913,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) } sctp->sctp_state = SCTPS_ESTABLISHED; sctp->sctp_assoc_start_time = (uint32_t)lbolt; - BUMP_MIB(&sctp_mib, sctpActiveEstab); + BUMP_MIB(&sctps->sctps_mib, sctpActiveEstab); if (sctp->sctp_cookie_mp) { freemsg(sctp->sctp_cookie_mp); sctp->sctp_cookie_mp = NULL; @@ -3900,7 +3950,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) sctp_stop_faddr_timers(sctp); sctp->sctp_state = SCTPS_ESTABLISHED; sctp->sctp_assoc_start_time = (uint32_t)lbolt; - BUMP_MIB(&sctp_mib, sctpActiveEstab); + BUMP_MIB(&sctps->sctps_mib, sctpActiveEstab); BUMP_LOCAL(sctp->sctp_ibchunks); if (sctp->sctp_cookie_mp) { freemsg(sctp->sctp_cookie_mp); @@ -3922,7 +3972,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) case CHUNK_COOKIE: if (sctp_process_cookie(sctp, ch, mp, &iack, sctph, &recv_adaption, NULL) == -1) { - BUMP_MIB(&sctp_mib, + BUMP_MIB(&sctps->sctps_mib, sctpInInvalidCookie); break; } @@ -3936,7 +3986,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) sctp_stop_faddr_timers(sctp); sctp->sctp_state = SCTPS_ESTABLISHED; sctp->sctp_assoc_start_time = (uint32_t)lbolt; - BUMP_MIB(&sctp_mib, sctpActiveEstab); + BUMP_MIB(&sctps->sctps_mib, sctpActiveEstab); if (sctp->sctp_cookie_mp) { freemsg(sctp->sctp_cookie_mp); sctp->sctp_cookie_mp = NULL; @@ -3968,7 +4018,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) p = (sctp_parm_hdr_t *)(ch + 1); if (p->sph_type == htons(SCTP_ERR_STALE_COOKIE)) { - BUMP_MIB(&sctp_mib, + BUMP_MIB(&sctps->sctps_mib, sctpAborted); sctp_error_event(sctp, ch); sctp_assoc_event(sctp, @@ -3999,7 +4049,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) goto done; case CHUNK_SHUTDOWN_COMPLETE: BUMP_LOCAL(sctp->sctp_ibchunks); - BUMP_MIB(&sctp_mib, sctpShutdowns); + BUMP_MIB(&sctps->sctps_mib, sctpShutdowns); sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0, NULL); @@ -4010,7 +4060,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) case CHUNK_SHUTDOWN_ACK: sctp_shutdown_complete(sctp); BUMP_LOCAL(sctp->sctp_ibchunks); - BUMP_MIB(&sctp_mib, sctpShutdowns); + BUMP_MIB(&sctps->sctps_mib, sctpShutdowns); sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0, NULL); sctp_clean_death(sctp, 0); @@ -4089,7 +4139,7 @@ nomorechunks: if (!sctp->sctp_ack_timer_running) { sctp->sctp_ack_timer_running = B_TRUE; sctp_timer(sctp, sctp->sctp_ack_mp, - MSEC_TO_TICK(sctp_deferred_ack_interval)); + MSEC_TO_TICK(sctps->sctps_deferred_ack_interval)); } } @@ -4138,6 +4188,7 @@ void sctp_recvd(sctp_t *sctp, int len) { int32_t old, new; + sctp_stack_t *sctps = sctp->sctp_sctps; ASSERT(sctp != NULL); RUN_SCTP(sctp); @@ -4154,7 +4205,7 @@ sctp_recvd(sctp_t *sctp, int len) if (sctp->sctp_state >= SCTPS_ESTABLISHED && ((old <= new >> 1) || (old < sctp->sctp_mss))) { sctp->sctp_force_sack = 1; - BUMP_MIB(&sctp_mib, sctpOutWinUpdate); + BUMP_MIB(&sctps->sctps_mib, sctpOutWinUpdate); sctp_sack(sctp, NULL); old = 1; } else { diff --git a/usr/src/uts/common/inet/sctp/sctp_ioc.c b/usr/src/uts/common/inet/sctp/sctp_ioc.c index 127bd30581..6fa9abc632 100644 --- a/usr/src/uts/common/inet/sctp/sctp_ioc.c +++ b/usr/src/uts/common/inet/sctp/sctp_ioc.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -64,7 +63,8 @@ sctp_def_q_set(queue_t *q, mblk_t *mp) struct iocblk *iocp = (struct iocblk *)mp->b_rptr; mblk_t *mp1; hrtime_t t; - extern int sctp_g_q_fd; + sctp_stack_t *sctps = connp->conn_netstack-> + netstack_sctp; ASSERT(connp != NULL && connp->conn_ulp == IPPROTO_SCTP && connp->conn_rq == NULL); @@ -75,22 +75,28 @@ sctp_def_q_set(queue_t *q, mblk_t *mp) goto done; } - if (sctp_g_q != NULL) { + mutex_enter(&sctps->sctps_g_q_lock); + if (sctps->sctps_g_q != NULL) { + mutex_exit(&sctps->sctps_g_q_lock); ip0dbg(("sctp_def_q_set: already set\n")); iocp->ioc_error = EALREADY; goto done; } - sctp_g_q = q; - sctp_g_q_fd = *(int *)(mp1->b_rptr); - gsctp = (sctp_t *)sctp_create(NULL, NULL, AF_INET6, + sctps->sctps_g_q = q; + mutex_exit(&sctps->sctps_g_q_lock); + sctps->sctps_gsctp = (sctp_t *)sctp_create(NULL, NULL, AF_INET6, SCTP_CAN_BLOCK, NULL, NULL, connp->conn_cred); - if (gsctp == NULL) { - sctp_g_q = NULL; + mutex_enter(&sctps->sctps_g_q_lock); + if (sctps->sctps_gsctp == NULL) { + sctps->sctps_g_q = NULL; + mutex_exit(&sctps->sctps_g_q_lock); iocp->ioc_error = ENOMEM; goto done; } - ASSERT(list_head(&sctp_g_list) == gsctp); + mutex_exit(&sctps->sctps_g_q_lock); + ASSERT(sctps->sctps_g_q_ref >= 1); + ASSERT(list_head(&sctps->sctps_g_list) == sctps->sctps_gsctp); /* * As a good citizen of using /dev/urandom, add some entropy @@ -130,7 +136,7 @@ sctp_wput_ioctl(queue_t *q, mblk_t *mp) switch (iocp->ioc_cmd) { case SCTP_IOC_DEFAULT_Q: /* Wants to be the default wq. */ - if (cr != NULL && secpolicy_net_config(cr, B_FALSE) != 0) { + if (cr != NULL && secpolicy_ip_config(cr, B_FALSE) != 0) { iocp->ioc_error = EPERM; goto err_ret; } diff --git a/usr/src/uts/common/inet/sctp/sctp_opt_data.c b/usr/src/uts/common/inet/sctp/sctp_opt_data.c index 787134e5a3..3776c76bc2 100644 --- a/usr/src/uts/common/inet/sctp/sctp_opt_data.c +++ b/usr/src/uts/common/inet/sctp/sctp_opt_data.c @@ -267,31 +267,32 @@ sctp_set_rtoinfo(sctp_t *sctp, const void *invalp, uint_t inlen) { const struct sctp_rtoinfo *srto; boolean_t ispriv; + sctp_stack_t *sctps = sctp->sctp_sctps; if (inlen < sizeof (*srto)) { return (EINVAL); } srto = invalp; - ispriv = secpolicy_net_config(CRED(), B_TRUE) == 0; + ispriv = secpolicy_ip_config(sctp->sctp_credp, B_TRUE) == 0; /* * Bounds checking. Priviledged user can set the RTO initial * outside the ndd boundary. */ if (srto->srto_initial != 0 && - (!ispriv && (srto->srto_initial < sctp_rto_initialg_low || - srto->srto_initial > sctp_rto_initialg_high))) { + (!ispriv && (srto->srto_initial < sctps->sctps_rto_initialg_low || + srto->srto_initial > sctps->sctps_rto_initialg_high))) { return (EINVAL); } if (srto->srto_max != 0 && - (!ispriv && (srto->srto_max < sctp_rto_maxg_low || - srto->srto_max > sctp_rto_maxg_high))) { + (!ispriv && (srto->srto_max < sctps->sctps_rto_maxg_low || + srto->srto_max > sctps->sctps_rto_maxg_high))) { return (EINVAL); } if (srto->srto_min != 0 && - (!ispriv && (srto->srto_min < sctp_rto_ming_low || - srto->srto_min > sctp_rto_ming_high))) { + (!ispriv && (srto->srto_min < sctps->sctps_rto_ming_low || + srto->srto_min > sctps->sctps_rto_ming_high))) { return (EINVAL); } @@ -340,6 +341,7 @@ sctp_set_assocparams(sctp_t *sctp, const void *invalp, uint_t inlen) const struct sctp_assocparams *sap = invalp; uint32_t sum = 0; sctp_faddr_t *fp; + sctp_stack_t *sctps = sctp->sctp_sctps; if (inlen < sizeof (*sap)) { return (EINVAL); @@ -358,8 +360,8 @@ sctp_set_assocparams(sctp_t *sctp, const void *invalp, uint_t inlen) return (EINVAL); } } - if (sap->sasoc_asocmaxrxt < sctp_pa_max_retr_low || - sap->sasoc_asocmaxrxt > sctp_pa_max_retr_high) { + if (sap->sasoc_asocmaxrxt < sctps->sctps_pa_max_retr_low || + sap->sasoc_asocmaxrxt > sctps->sctps_pa_max_retr_high) { /* * Out of bounds. */ @@ -367,8 +369,8 @@ sctp_set_assocparams(sctp_t *sctp, const void *invalp, uint_t inlen) } } if (sap->sasoc_cookie_life != 0 && - (sap->sasoc_cookie_life < sctp_cookie_life_low || - sap->sasoc_cookie_life > sctp_cookie_life_high)) { + (sap->sasoc_cookie_life < sctps->sctps_cookie_life_low || + sap->sasoc_cookie_life > sctps->sctps_cookie_life_high)) { return (EINVAL); } @@ -402,6 +404,7 @@ static int sctp_set_initmsg(sctp_t *sctp, const void *invalp, uint_t inlen) { const struct sctp_initmsg *si = invalp; + sctp_stack_t *sctps = sctp->sctp_sctps; if (sctp->sctp_state > SCTPS_LISTEN) { return (EINVAL); @@ -410,27 +413,28 @@ sctp_set_initmsg(sctp_t *sctp, const void *invalp, uint_t inlen) return (EINVAL); } if (si->sinit_num_ostreams != 0 && - (si->sinit_num_ostreams < sctp_initial_out_streams_low || - si->sinit_num_ostreams > sctp_initial_out_streams_high)) { + (si->sinit_num_ostreams < sctps->sctps_initial_out_streams_low || + si->sinit_num_ostreams > + sctps->sctps_initial_out_streams_high)) { /* * Out of bounds. */ return (EINVAL); } if (si->sinit_max_instreams != 0 && - (si->sinit_max_instreams < sctp_max_in_streams_low || - si->sinit_max_instreams > sctp_max_in_streams_high)) { + (si->sinit_max_instreams < sctps->sctps_max_in_streams_low || + si->sinit_max_instreams > sctps->sctps_max_in_streams_high)) { return (EINVAL); } if (si->sinit_max_attempts != 0 && - (si->sinit_max_attempts < sctp_max_init_retr_low || - si->sinit_max_attempts > sctp_max_init_retr_high)) { + (si->sinit_max_attempts < sctps->sctps_max_init_retr_low || + si->sinit_max_attempts > sctps->sctps_max_init_retr_high)) { return (EINVAL); } if (si->sinit_max_init_timeo != 0 && - (secpolicy_net_config(CRED(), B_TRUE) != 0 && - (si->sinit_max_init_timeo < sctp_rto_maxg_low || - si->sinit_max_init_timeo > sctp_rto_maxg_high))) { + (secpolicy_ip_config(sctp->sctp_credp, B_TRUE) != 0 && + (si->sinit_max_init_timeo < sctps->sctps_rto_maxg_low || + si->sinit_max_init_timeo > sctps->sctps_rto_maxg_high))) { return (EINVAL); } if (si->sinit_num_ostreams != 0) @@ -511,6 +515,7 @@ sctp_set_peer_addr_params(sctp_t *sctp, const void *invalp, uint_t inlen) int retval; uint32_t sum = 0; int64_t now; + sctp_stack_t *sctps = sctp->sctp_sctps; if (inlen < sizeof (*spp)) { return (EINVAL); @@ -522,13 +527,13 @@ sctp_set_peer_addr_params(sctp_t *sctp, const void *invalp, uint_t inlen) } if (spp->spp_hbinterval && spp->spp_hbinterval != UINT32_MAX && - (spp->spp_hbinterval < sctp_heartbeat_interval_low || - spp->spp_hbinterval > sctp_heartbeat_interval_high)) { + (spp->spp_hbinterval < sctps->sctps_heartbeat_interval_low || + spp->spp_hbinterval > sctps->sctps_heartbeat_interval_high)) { return (EINVAL); } if (spp->spp_pathmaxrxt && - (spp->spp_pathmaxrxt < sctp_pp_max_retr_low || - spp->spp_pathmaxrxt > sctp_pp_max_retr_high)) { + (spp->spp_pathmaxrxt < sctps->sctps_pp_max_retr_low || + spp->spp_pathmaxrxt > sctps->sctps_pp_max_retr_high)) { return (EINVAL); } if (spp->spp_pathmaxrxt && sctp->sctp_faddrs) { @@ -1128,6 +1133,7 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp, boolean_t onoff; int retval = 0, addrcnt; conn_t *connp = sctp->sctp_connp; + sctp_stack_t *sctps = sctp->sctp_sctps; /* In all cases, the size of the option must be bigger than int */ if (inlen >= sizeof (int32_t)) { @@ -1187,7 +1193,7 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp, sctp->sctp_dgram_errind = onoff; break; case SO_SNDBUF: - if (*i1 > sctp_max_buf) { + if (*i1 > sctps->sctps_max_buf) { retval = ENOBUFS; break; } @@ -1196,13 +1202,13 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp, break; } sctp->sctp_xmit_hiwater = *i1; - if (sctp_snd_lowat_fraction != 0) + if (sctps->sctps_snd_lowat_fraction != 0) sctp->sctp_xmit_lowater = sctp->sctp_xmit_hiwater / - sctp_snd_lowat_fraction; + sctps->sctps_snd_lowat_fraction; break; case SO_RCVBUF: - if (*i1 > sctp_max_buf) { + if (*i1 > sctps->sctps_max_buf) { retval = ENOBUFS; break; } @@ -1216,7 +1222,8 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp, * acknowledgement. */ *i1 = MAX(*i1, - sctp_recv_hiwat_minmss * sctp->sctp_mss); + sctps->sctps_recv_hiwat_minmss * + sctp->sctp_mss); sctp->sctp_rwnd = *i1; sctp->sctp_irwnd = sctp->sctp_rwnd; } @@ -1226,7 +1233,7 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp, */ break; case SO_ALLZONES: - if (secpolicy_net(sctp->sctp_credp, OP_CONFIG, + if (secpolicy_ip(sctp->sctp_credp, OP_CONFIG, B_TRUE)) { retval = EACCES; break; @@ -1432,11 +1439,13 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp, ipaddr_t addr = *i1; ipif_t *ipif = NULL; ill_t *ill; + ip_stack_t *ipst = sctps->sctps_netstack->netstack_ip; - if (secpolicy_net(CRED(), OP_CONFIG, B_TRUE) == 0) { + if (secpolicy_ip(sctp->sctp_credp, OP_CONFIG, + B_TRUE) == 0) { ipif = ipif_lookup_onlink_addr(addr, - connp->conn_zoneid); + connp->conn_zoneid, ipst); if (ipif == NULL) { retval = EHOSTUNREACH; break; @@ -1481,7 +1490,8 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp, break; } if (*i1 == -1) { - ipp->ipp_unicast_hops = sctp_ipv6_hoplimit; + ipp->ipp_unicast_hops = + sctps->sctps_ipv6_hoplimit; ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; } else { ipp->ipp_unicast_hops = (uint8_t)*i1; @@ -1606,6 +1616,7 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp, break; case IPV6_NEXTHOP: { struct sockaddr_in6 *sin6; + ip_stack_t *ipst = sctps->sctps_netstack->netstack_ip; if (inlen != 0 && inlen != sizeof (sin6_t)) { retval = EINVAL; @@ -1633,7 +1644,7 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp, ire = ire_route_lookup_v6( &sin6->sin6_addr, NULL, NULL, 0, NULL, NULL, ALL_ZONES, NULL, - MATCH_IRE_DEFAULT); + MATCH_IRE_DEFAULT, ipst); if (ire == NULL) { retval = EHOSTUNREACH; break; diff --git a/usr/src/uts/common/inet/sctp/sctp_output.c b/usr/src/uts/common/inet/sctp/sctp_output.c index 1913a62629..f4dfc8f17a 100644 --- a/usr/src/uts/common/inet/sctp/sctp_output.c +++ b/usr/src/uts/common/inet/sctp/sctp_output.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -87,9 +87,6 @@ static struct kmem_cache *sctp_kmem_ftsn_set_cache; -/* Padding mblk for SCTP chunks. */ -mblk_t *sctp_pad_mp; - #ifdef DEBUG static boolean_t sctp_verify_chain(mblk_t *, mblk_t *); #endif @@ -327,14 +324,17 @@ sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send) sctp_faddr_t *fp1; size_t xtralen; sctp_msg_hdr_t *msg_hdr; + sctp_stack_t *sctps = sctp->sctp_sctps; fp = SCTP_CHUNK_DEST(mdblk); if (fp == NULL) fp = sctp->sctp_current; if (fp->isv4) - xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra + sizeof (*sdc); + xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra + + sizeof (*sdc); else - xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra + sizeof (*sdc); + xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra + + sizeof (*sdc); count = chunksize = first_len - sizeof (*sdc); nextmsg: chunk_mp = mdblk->b_cont; @@ -520,7 +520,7 @@ try_next: xtralen = sctp->sctp_hdr_len; else xtralen = sctp->sctp_hdr6_len; - xtralen += sctp_wroff_xtra + sizeof (*sdc); + xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc); count = chunksize = fp1->sfa_pmss - sizeof (*sdc); fp = fp1; } @@ -550,6 +550,7 @@ sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, int hdrlen; char *hdr; int isv4 = fp->isv4; + sctp_stack_t *sctps = sctp->sctp_sctps; if (error != NULL) *error = 0; @@ -581,7 +582,7 @@ sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, } /* Copy in IP header. */ if ((mp->b_rptr - mp->b_datap->db_base) < - (sctp_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 || + (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 || !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) { mblk_t *nmp; @@ -590,14 +591,14 @@ sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, * data was moved into chunks, or during retransmission, * or things like snoop is running. */ - nmp = allocb_cred(sctp_wroff_xtra + hdrlen + sacklen, + nmp = allocb_cred(sctps->sctps_wroff_xtra + hdrlen + sacklen, CONN_CRED(sctp->sctp_connp)); if (nmp == NULL) { if (error != NULL) *error = ENOMEM; return (NULL); } - nmp->b_rptr += sctp_wroff_xtra; + nmp->b_rptr += sctps->sctps_wroff_xtra; nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen; nmp->b_cont = mp; mp = nmp; @@ -651,12 +652,12 @@ sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, * the specified pad length. */ static mblk_t * -sctp_get_padding(int pad) +sctp_get_padding(int pad, sctp_stack_t *sctps) { mblk_t *fill; ASSERT(pad < SCTP_ALIGN); - if ((fill = dupb(sctp_pad_mp)) != NULL) { + if ((fill = dupb(sctps->sctps_pad_mp)) != NULL) { fill->b_wptr += pad; return (fill); } @@ -689,6 +690,7 @@ sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) sctp_msg_hdr_t *msg_hdr; sctp_faddr_t *old_fp = NULL; sctp_faddr_t *chunk_fp; + sctp_stack_t *sctps = sctp->sctp_sctps; for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; @@ -738,14 +740,14 @@ sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) if ((nmp = dupmsg(mp)) == NULL) return (start_mp); if (extra > 0) { - fill = sctp_get_padding(extra); + fill = sctp_get_padding(extra, sctps); if (fill != NULL) { linkb(nmp, fill); } else { return (start_mp); } } - BUMP_MIB(&sctp_mib, sctpOutFastRetrans); + BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans); BUMP_LOCAL(sctp->sctp_rxtchunks); SCTP_CHUNK_CLEAR_REXMIT(mp); if (start_mp == NULL) { @@ -941,16 +943,17 @@ sctp_fast_rexmit(sctp_t *sctp) mblk_t *mp, *head; int pktlen = 0; sctp_faddr_t *fp = NULL; + sctp_stack_t *sctps = sctp->sctp_sctps; ASSERT(sctp->sctp_xmit_head != NULL); mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp); if (mp == NULL) { - SCTP_KSTAT(sctp_fr_not_found); + SCTP_KSTAT(sctps, sctp_fr_not_found); return; } if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) { freemsg(mp); - SCTP_KSTAT(sctp_fr_add_hdr); + SCTP_KSTAT(sctps, sctp_fr_add_hdr); return; } if ((pktlen > fp->sfa_pmss) && fp->isv4) { @@ -986,6 +989,7 @@ sctp_output(sctp_t *sctp) sctp_data_hdr_t *sdc; int error; boolean_t notsent = B_TRUE; + sctp_stack_t *sctps = sctp->sctp_sctps; if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { sacklen = 0; @@ -1089,7 +1093,7 @@ sctp_output(sctp_t *sctp) fp, chunklen, meta); } freemsg(nmp); - SCTP_KSTAT(sctp_output_failed); + SCTP_KSTAT(sctps, sctp_output_failed); goto unsent_data; } seglen += sacklen; @@ -1104,7 +1108,7 @@ sctp_output(sctp_t *sctp) * a while, do slow start again. */ if (now - fp->lastactive > fp->rto) { - fp->cwnd = sctp_slow_start_after_idle * + fp->cwnd = sctps->sctps_slow_start_after_idle * fp->sfa_pmss; } @@ -1135,7 +1139,7 @@ sctp_output(sctp_t *sctp) fp, chunklen, meta); } freemsg(nmp); - SCTP_KSTAT(sctp_output_failed); + SCTP_KSTAT(sctps, sctp_output_failed); goto unsent_data; } } @@ -1152,7 +1156,7 @@ sctp_output(sctp_t *sctp) ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn)); } if (extra > 0) { - fill = sctp_get_padding(extra); + fill = sctp_get_padding(extra, sctps); if (fill != NULL) { linkb(head, fill); pad = extra; @@ -1201,7 +1205,7 @@ sctp_output(sctp_t *sctp) if ((nmp = dupmsg(mp)) == NULL) break; if (extra > 0) { - fill = sctp_get_padding(extra); + fill = sctp_get_padding(extra, sctps); if (fill != NULL) { pad += extra; new_len += extra; @@ -1364,12 +1368,13 @@ sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets, uint16_t schlen; size_t xtralen; ftsn_entry_t *ftsn_entry; + sctp_stack_t *sctps = sctp->sctp_sctps; seglen += sizeof (sctp_chunk_hdr_t); if (fp->isv4) - xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra; + xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra; else - xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra; + xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra; ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp)); if (ftsn_mp == NULL) return (NULL); @@ -1425,6 +1430,7 @@ sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp, uint32_t adv_pap = sctp->sctp_adv_pap; uint32_t unsent = 0; boolean_t ubit; + sctp_stack_t *sctps = sctp->sctp_sctps; *seglen = sizeof (uint32_t); @@ -1483,7 +1489,7 @@ ftsn_done: if (head == NULL) { freemsg(*nmp); *nmp = NULL; - SCTP_KSTAT(sctp_send_ftsn_failed); + SCTP_KSTAT(sctps, sctp_send_ftsn_failed); return; } *seglen += sacklen; @@ -1638,6 +1644,7 @@ sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) uint32_t first_ua_tsn; sctp_msg_hdr_t *mhdr; uint32_t tot_wnd; + sctp_stack_t *sctps = sctp->sctp_sctps; while (meta != NULL) { for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { @@ -1722,7 +1729,7 @@ window_probe: sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn); sctp->sctp_zero_win_probe = B_TRUE; - BUMP_MIB(&sctp_mib, sctpOutWinProbe); + BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); } return; out: @@ -1747,7 +1754,7 @@ out: sctp_set_iplen(sctp, pkt); sctp_add_sendq(sctp, pkt); } else { - SCTP_KSTAT(sctp_ss_rexmit_failed); + SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); } oldfp->strikes++; sctp->sctp_strikes++; @@ -1755,7 +1762,7 @@ out: if (oldfp != fp && oldfp->suna != 0) SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto); SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); - BUMP_MIB(&sctp_mib, sctpOutWinProbe); + BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); return; } @@ -1844,7 +1851,7 @@ out: if (nmp == NULL) goto restart_timer; if (extra > 0) { - fill = sctp_get_padding(extra); + fill = sctp_get_padding(extra, sctps); if (fill != NULL) { linkb(nmp, fill); seglen += extra; @@ -1857,7 +1864,7 @@ out: head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL); if (head == NULL) { freemsg(nmp); - SCTP_KSTAT(sctp_rexmit_failed); + SCTP_KSTAT(sctps, sctp_rexmit_failed); goto restart_timer; } seglen += sacklen; @@ -1921,7 +1928,7 @@ try_bundle: break; if (extra > 0) { - fill = sctp_get_padding(extra); + fill = sctp_get_padding(extra, sctps); if (fill != NULL) { linkb(nmp, fill); } else { @@ -2059,6 +2066,7 @@ sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp, mblk_t *fill; sctp_data_hdr_t *sdc; sctp_msg_hdr_t *mhdr; + sctp_stack_t *sctps = sctp->sctp_sctps; sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; seglen = ntohs(sdc->sdh_len); @@ -2070,7 +2078,7 @@ sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp, if (nmp == NULL) return (NULL); if (extra > 0) { - fill = sctp_get_padding(extra); + fill = sctp_get_padding(extra, sctps); if (fill != NULL) { linkb(nmp, fill); seglen += extra; @@ -2143,7 +2151,7 @@ try_bundle: break; if (extra > 0) { - fill = sctp_get_padding(extra); + fill = sctp_get_padding(extra, sctps); if (fill != NULL) { linkb(nmp, fill); } else { @@ -2190,6 +2198,7 @@ sctp_ss_rexmit(sctp_t *sctp) uint32_t tot_wnd; sctp_data_hdr_t *sdc; int burst; + sctp_stack_t *sctps = sctp->sctp_sctps; ASSERT(!sctp->sctp_zero_win_probe); @@ -2248,7 +2257,7 @@ found_msg: SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); if (pkt == NULL) { - SCTP_KSTAT(sctp_ss_rexmit_failed); + SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); return; } if ((pkt_len > fp->sfa_pmss) && fp->isv4) { @@ -2274,7 +2283,7 @@ found_msg: return; /* Retransmit another packet if the window allows. */ - for (tot_wnd -= pkt_len, burst = sctp_maxburst - 1; + for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1; meta != NULL && burst > 0; meta = meta->b_next, burst--) { if (mp == NULL) mp = meta->b_cont; diff --git a/usr/src/uts/common/inet/sctp/sctp_param.c b/usr/src/uts/common/inet/sctp/sctp_param.c index 603159b0e4..5d5ed19676 100644 --- a/usr/src/uts/common/inet/sctp/sctp_param.c +++ b/usr/src/uts/common/inet/sctp/sctp_param.c @@ -40,6 +40,7 @@ #include <inet/mi.h> #include <inet/mib2.h> #include <inet/nd.h> +#include <inet/ipclassifier.h> #include "sctp_impl.h" #include "sctp_addr.h" @@ -67,9 +68,6 @@ * Protected by sctp_epriv_port_lock. */ #define SCTP_NUM_EPRIV_PORTS 64 -int sctp_g_num_epriv_ports = SCTP_NUM_EPRIV_PORTS; -uint16_t sctp_g_epriv_ports[SCTP_NUM_EPRIV_PORTS] = { 2049, 4045 }; -kmutex_t sctp_epriv_port_lock; /* * sctp_wroff_xtra is the extra space in front of SCTP/IP header for link @@ -77,7 +75,7 @@ kmutex_t sctp_epriv_port_lock; * Also there has to be enough space to stash in information passed between * IP and SCTP. */ -sctpparam_t sctp_wroff_xtra_param = { sizeof (conn_t *) + sizeof (ire_t *), +sctpparam_t lcl_sctp_wroff_xtra_param = { sizeof (conn_t *) + sizeof (ire_t *), 256, 32, "sctp_wroff_xtra" }; /* @@ -86,7 +84,7 @@ sctpparam_t sctp_wroff_xtra_param = { sizeof (conn_t *) + sizeof (ire_t *), * per the SCTP spec. */ /* BEGIN CSTYLED */ -sctpparam_t sctp_param_arr[] = { +sctpparam_t lcl_sctp_param_arr[] = { /*min max value name */ { 0, 128, 8, "sctp_max_init_retr"}, { 1, 128, 10, "sctp_pa_max_retr"}, @@ -129,9 +127,6 @@ sctpparam_t sctp_param_arr[] = { }; /* END CSTYLED */ -/* Only modified during _init and _fini thus no locking is needed. */ -static caddr_t sctp_g_nd; /* Head of 'named dispatch' variable list */ - /* Get callback routine passed to nd_load by sctp_param_register */ /* ARGSUSED */ static int @@ -196,10 +191,12 @@ static int sctp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) { int i; + sctp_stack_t *sctps = Q_TO_CONN(q)->conn_netstack->netstack_sctp; - for (i = 0; i < sctp_g_num_epriv_ports; i++) { - if (sctp_g_epriv_ports[i] != 0) - (void) mi_mpprintf(mp, "%d ", sctp_g_epriv_ports[i]); + for (i = 0; i < sctps->sctps_g_num_epriv_ports; i++) { + if (sctps->sctps_g_epriv_ports[i] != 0) + (void) mi_mpprintf(mp, "%d ", + sctps->sctps_g_epriv_ports[i]); } return (0); } @@ -215,6 +212,7 @@ sctp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, { long new_value; int i; + sctp_stack_t *sctps = Q_TO_CONN(q)->conn_netstack->netstack_sctp; /* * Fail the request if the new value does not lie within the @@ -225,26 +223,26 @@ sctp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, return (EINVAL); } - mutex_enter(&sctp_epriv_port_lock); + mutex_enter(&sctps->sctps_epriv_port_lock); /* Check if the value is already in the list */ - for (i = 0; i < sctp_g_num_epriv_ports; i++) { - if (new_value == sctp_g_epriv_ports[i]) { - mutex_exit(&sctp_epriv_port_lock); + for (i = 0; i < sctps->sctps_g_num_epriv_ports; i++) { + if (new_value == sctps->sctps_g_epriv_ports[i]) { + mutex_exit(&sctps->sctps_epriv_port_lock); return (EEXIST); } } /* Find an empty slot */ - for (i = 0; i < sctp_g_num_epriv_ports; i++) { - if (sctp_g_epriv_ports[i] == 0) + for (i = 0; i < sctps->sctps_g_num_epriv_ports; i++) { + if (sctps->sctps_g_epriv_ports[i] == 0) break; } - if (i == sctp_g_num_epriv_ports) { - mutex_exit(&sctp_epriv_port_lock); + if (i == sctps->sctps_g_num_epriv_ports) { + mutex_exit(&sctps->sctps_epriv_port_lock); return (EOVERFLOW); } /* Set the new value */ - sctp_g_epriv_ports[i] = (uint16_t)new_value; - mutex_exit(&sctp_epriv_port_lock); + sctps->sctps_g_epriv_ports[i] = (uint16_t)new_value; + mutex_exit(&sctps->sctps_epriv_port_lock); return (0); } @@ -259,6 +257,7 @@ sctp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, { long new_value; int i; + sctp_stack_t *sctps = Q_TO_CONN(q)->conn_netstack->netstack_sctp; /* * Fail the request if the new value does not lie within the @@ -269,19 +268,19 @@ sctp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, return (EINVAL); } - mutex_enter(&sctp_epriv_port_lock); + mutex_enter(&sctps->sctps_epriv_port_lock); /* Check that the value is already in the list */ - for (i = 0; i < sctp_g_num_epriv_ports; i++) { - if (sctp_g_epriv_ports[i] == new_value) + for (i = 0; i < sctps->sctps_g_num_epriv_ports; i++) { + if (sctps->sctps_g_epriv_ports[i] == new_value) break; } - if (i == sctp_g_num_epriv_ports) { - mutex_exit(&sctp_epriv_port_lock); + if (i == sctps->sctps_g_num_epriv_ports) { + mutex_exit(&sctps->sctps_epriv_port_lock); return (ESRCH); } /* Clear the value */ - sctp_g_epriv_ports[i] = 0; - mutex_exit(&sctp_epriv_port_lock); + sctps->sctps_g_epriv_ports[i] = 0; + mutex_exit(&sctps->sctps_epriv_port_lock); return (0); } @@ -290,63 +289,78 @@ sctp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, * named dispatch handler. */ boolean_t -sctp_param_register(sctpparam_t *sctppa, int cnt) +sctp_param_register(IDP *ndp, sctpparam_t *sctppa, int cnt, sctp_stack_t *sctps) { - if (sctp_g_nd != NULL) { + if (*ndp != NULL) { return (B_TRUE); } for (; cnt-- > 0; sctppa++) { if (sctppa->sctp_param_name && sctppa->sctp_param_name[0]) { - if (!nd_load(&sctp_g_nd, sctppa->sctp_param_name, + if (!nd_load(ndp, sctppa->sctp_param_name, sctp_param_get, sctp_param_set, (caddr_t)sctppa)) { - nd_free(&sctp_g_nd); + nd_free(ndp); return (B_FALSE); } } } - if (!nd_load(&sctp_g_nd, sctp_wroff_xtra_param.sctp_param_name, + sctps->sctps_wroff_xtra_param = kmem_zalloc(sizeof (sctpparam_t), + KM_SLEEP); + bcopy(&lcl_sctp_wroff_xtra_param, sctps->sctps_wroff_xtra_param, + sizeof (sctpparam_t)); + if (!nd_load(ndp, sctps->sctps_wroff_xtra_param->sctp_param_name, sctp_param_get, sctp_wroff_xtra_set, - (caddr_t)&sctp_wroff_xtra_param)) { - nd_free(&sctp_g_nd); + (caddr_t)sctps->sctps_wroff_xtra_param)) { + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&sctp_g_nd, "sctp_extra_priv_ports", + if (!nd_load(ndp, "sctp_extra_priv_ports", sctp_extra_priv_ports_get, NULL, NULL)) { - nd_free(&sctp_g_nd); + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&sctp_g_nd, "sctp_extra_priv_ports_add", + if (!nd_load(ndp, "sctp_extra_priv_ports_add", NULL, sctp_extra_priv_ports_add, NULL)) { - nd_free(&sctp_g_nd); + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&sctp_g_nd, "sctp_extra_priv_ports_del", + if (!nd_load(ndp, "sctp_extra_priv_ports_del", NULL, sctp_extra_priv_ports_del, NULL)) { - nd_free(&sctp_g_nd); + nd_free(ndp); return (B_FALSE); } return (B_TRUE); } boolean_t -sctp_nd_init() +sctp_nd_init(sctp_stack_t *sctps) { - return (sctp_param_register(sctp_param_arr, A_CNT(sctp_param_arr))); -} + sctpparam_t *pa; -/* Accessors to keep the static sctp_g_nd local */ + pa = kmem_alloc(sizeof (lcl_sctp_param_arr), KM_SLEEP); + bcopy(lcl_sctp_param_arr, pa, sizeof (lcl_sctp_param_arr)); + sctps->sctps_params = pa; + return (sctp_param_register(&sctps->sctps_g_nd, pa, + A_CNT(lcl_sctp_param_arr), sctps)); +} int sctp_nd_getset(queue_t *q, MBLKP mp) { - return (nd_getset(q, sctp_g_nd, mp)); + sctp_stack_t *sctps = Q_TO_CONN(q)->conn_netstack->netstack_sctp; + + return (nd_getset(q, sctps->sctps_g_nd, mp)); } void -sctp_nd_free() +sctp_nd_free(sctp_stack_t *sctps) { - nd_free(&sctp_g_nd); + nd_free(&sctps->sctps_g_nd); + kmem_free(sctps->sctps_params, sizeof (lcl_sctp_param_arr)); + sctps->sctps_params = NULL; + kmem_free(sctps->sctps_wroff_xtra_param, sizeof (sctpparam_t)); + sctps->sctps_wroff_xtra_param = NULL; + } diff --git a/usr/src/uts/common/inet/sctp/sctp_shutdown.c b/usr/src/uts/common/inet/sctp/sctp_shutdown.c index e8904e71ea..166a0b4d16 100644 --- a/usr/src/uts/common/inet/sctp/sctp_shutdown.c +++ b/usr/src/uts/common/inet/sctp/sctp_shutdown.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,6 +44,7 @@ #include <inet/nd.h> #include <inet/optcom.h> #include <inet/sctp_ip.h> +#include <inet/ipclassifier.h> #include "sctp_impl.h" void @@ -54,6 +55,7 @@ sctp_send_shutdown(sctp_t *sctp, int rexmit) sctp_chunk_hdr_t *sch; uint32_t *ctsn; sctp_faddr_t *fp; + sctp_stack_t *sctps = sctp->sctp_sctps; if (sctp->sctp_state != SCTPS_ESTABLISHED && sctp->sctp_state != SCTPS_SHUTDOWN_PENDING && @@ -110,7 +112,7 @@ sctp_send_shutdown(sctp_t *sctp, int rexmit) sendmp = sctp_make_mp(sctp, fp, sizeof (*sch) + sizeof (*ctsn)); if (sendmp == NULL) { - SCTP_KSTAT(sctp_send_shutdown_failed); + SCTP_KSTAT(sctps, sctp_send_shutdown_failed); goto done; } sch = (sctp_chunk_hdr_t *)sendmp->b_wptr; @@ -150,6 +152,7 @@ sctp_shutdown_received(sctp_t *sctp, sctp_chunk_hdr_t *sch, boolean_t crwsd, sctp_chunk_hdr_t *sach; uint32_t *tsn; int trysend = 0; + sctp_stack_t *sctps = sctp->sctp_sctps; if (sctp->sctp_state != SCTPS_SHUTDOWN_ACK_SENT) sctp->sctp_state = SCTPS_SHUTDOWN_RECEIVED; @@ -183,7 +186,7 @@ sctp_shutdown_received(sctp_t *sctp, sctp_chunk_hdr_t *sch, boolean_t crwsd, samp = sctp_make_mp(sctp, fp, sizeof (*sach)); if (samp == NULL) { - SCTP_KSTAT(sctp_send_shutdown_ack_failed); + SCTP_KSTAT(sctps, sctp_send_shutdown_ack_failed); goto dotimer; } @@ -227,11 +230,12 @@ sctp_shutdown_complete(sctp_t *sctp) { mblk_t *scmp; sctp_chunk_hdr_t *scch; + sctp_stack_t *sctps = sctp->sctp_sctps; scmp = sctp_make_mp(sctp, NULL, sizeof (*scch)); if (scmp == NULL) { /* XXX use timer approach */ - SCTP_KSTAT(sctp_send_shutdown_comp_failed); + SCTP_KSTAT(sctps, sctp_send_shutdown_comp_failed); return; } @@ -265,6 +269,7 @@ sctp_ootb_shutdown_ack(sctp_t *gsctp, mblk_t *inmp, uint_t ip_hdr_len) int i; uint16_t port; mblk_t *mp1; + sctp_stack_t *sctps = gsctp->sctp_sctps; isv4 = (IPH_HDR_VERSION(inmp->b_rptr) == IPV4_VERSION); @@ -284,12 +289,12 @@ sctp_ootb_shutdown_ack(sctp_t *gsctp, mblk_t *inmp, uint_t ip_hdr_len) */ if (!IS_P2ALIGNED(DB_BASE(inmp), sizeof (ire_t *)) || DB_REF(inmp) != 1) { - mp1 = allocb(MBLKL(inmp) + sctp_wroff_xtra, BPRI_MED); + mp1 = allocb(MBLKL(inmp) + sctps->sctps_wroff_xtra, BPRI_MED); if (mp1 == NULL) { freeb(inmp); return; } - mp1->b_rptr += sctp_wroff_xtra; + mp1->b_rptr += sctps->sctps_wroff_xtra; mp1->b_wptr = mp1->b_rptr + MBLKL(inmp); bcopy(inmp->b_rptr, mp1->b_rptr, MBLKL(inmp)); freeb(inmp); @@ -315,7 +320,7 @@ sctp_ootb_shutdown_ack(sctp_t *gsctp, mblk_t *inmp, uint_t ip_hdr_len) inip4h->ipha_src = inip4h->ipha_dst; inip4h->ipha_dst = v4addr; inip4h->ipha_ident = 0; - inip4h->ipha_ttl = (uchar_t)sctp_ipv4_ttl; + inip4h->ipha_ttl = (uchar_t)sctps->sctps_ipv4_ttl; } else { in6_addr_t v6addr; @@ -336,7 +341,7 @@ sctp_ootb_shutdown_ack(sctp_t *gsctp, mblk_t *inmp, uint_t ip_hdr_len) v6addr = inip6h->ip6_src; inip6h->ip6_src = inip6h->ip6_dst; inip6h->ip6_dst = v6addr; - inip6h->ip6_hops = (uchar_t)sctp_ipv6_hoplimit; + inip6h->ip6_hops = (uchar_t)sctps->sctps_ipv6_hoplimit; } insctph = (sctp_hdr_t *)(inmp->b_rptr + ip_hdr_len); diff --git a/usr/src/uts/common/inet/sctp/sctp_snmp.c b/usr/src/uts/common/inet/sctp/sctp_snmp.c index d42cce5d81..cbb922ea47 100644 --- a/usr/src/uts/common/inet/sctp/sctp_snmp.c +++ b/usr/src/uts/common/inet/sctp/sctp_snmp.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -46,54 +46,18 @@ #include "sctp_impl.h" #include "sctp_addr.h" -mib2_sctp_t sctp_mib; -static kstat_t *sctp_mibkp; /* kstat exporting sctp_mib data */ -static kstat_t *sctp_kstat; /* kstat exporting general sctp stats */ - static int sctp_snmp_state(sctp_t *sctp); -/* - * The following kstats are for debugging purposes. They keep - * track of problems which should not happen normally. But in - * those cases which they do happen, these kstats would be handy - * for engineers to diagnose the problems. They are not intended - * to be consumed by customers. - */ -sctp_kstat_t sctp_statistics = { - { "sctp_add_faddr", KSTAT_DATA_UINT64 }, - { "sctp_add_timer", KSTAT_DATA_UINT64 }, - { "sctp_conn_create", KSTAT_DATA_UINT64 }, - { "sctp_find_next_tq", KSTAT_DATA_UINT64 }, - { "sctp_fr_add_hdr", KSTAT_DATA_UINT64 }, - { "sctp_fr_not_found", KSTAT_DATA_UINT64 }, - { "sctp_output_failed", KSTAT_DATA_UINT64 }, - { "sctp_rexmit_failed", KSTAT_DATA_UINT64 }, - { "sctp_send_init_failed", KSTAT_DATA_UINT64 }, - { "sctp_send_cookie_failed", KSTAT_DATA_UINT64 }, - { "sctp_send_cookie_ack_failed", KSTAT_DATA_UINT64 }, - { "sctp_send_err_failed", KSTAT_DATA_UINT64 }, - { "sctp_send_sack_failed", KSTAT_DATA_UINT64 }, - { "sctp_send_shutdown_failed", KSTAT_DATA_UINT64 }, - { "sctp_send_shutdown_ack_failed", KSTAT_DATA_UINT64 }, - { "sctp_send_shutdown_comp_failed", KSTAT_DATA_UINT64 }, - { "sctp_send_user_abort_failed", KSTAT_DATA_UINT64 }, - { "sctp_send_asconf_failed", KSTAT_DATA_UINT64 }, - { "sctp_send_asconf_ack_failed", KSTAT_DATA_UINT64 }, - { "sctp_send_ftsn_failed", KSTAT_DATA_UINT64 }, - { "sctp_send_hb_failed", KSTAT_DATA_UINT64 }, - { "sctp_return_hb_failed", KSTAT_DATA_UINT64 }, - { "sctp_ss_rexmit_failed", KSTAT_DATA_UINT64 }, - { "sctp_cl_connect", KSTAT_DATA_UINT64 }, - { "sctp_cl_assoc_change", KSTAT_DATA_UINT64 }, - { "sctp_cl_check_addrs", KSTAT_DATA_UINT64 }, -}; static int sctp_kstat_update(kstat_t *kp, int rw) { sctp_named_kstat_t *sctpkp; sctp_t *sctp, *sctp_prev; - zoneid_t zoneid; + zoneid_t myzoneid; + netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; + netstack_t *ns; + sctp_stack_t *sctps; if (kp == NULL|| kp->ks_data == NULL) return (EIO); @@ -101,107 +65,116 @@ sctp_kstat_update(kstat_t *kp, int rw) if (rw == KSTAT_WRITE) return (EACCES); - zoneid = getzoneid(); + ns = netstack_find_by_stackid(stackid); + if (ns == NULL) + return (-1); + sctps = ns->netstack_sctp; + if (sctps == NULL) { + netstack_rele(ns); + return (-1); + } + myzoneid = netstackid_to_zoneid(stackid); /* * Get the number of current associations and gather their * individual set of statistics. */ - SET_MIB(sctp_mib.sctpCurrEstab, 0); - sctp = gsctp; + SET_MIB(sctps->sctps_mib.sctpCurrEstab, 0); + sctp = sctps->sctps_gsctp; sctp_prev = NULL; - mutex_enter(&sctp_g_lock); + mutex_enter(&sctps->sctps_g_lock); while (sctp != NULL) { mutex_enter(&sctp->sctp_reflock); if (sctp->sctp_condemned) { mutex_exit(&sctp->sctp_reflock); - sctp = list_next(&sctp_g_list, sctp); + sctp = list_next(&sctps->sctps_g_list, sctp); continue; } sctp->sctp_refcnt++; mutex_exit(&sctp->sctp_reflock); - mutex_exit(&sctp_g_lock); + mutex_exit(&sctps->sctps_g_lock); if (sctp_prev != NULL) SCTP_REFRELE(sctp_prev); - if (sctp->sctp_connp->conn_zoneid != zoneid) + if (sctp->sctp_connp->conn_zoneid != myzoneid) goto next_sctp; if (sctp->sctp_state == SCTPS_ESTABLISHED || sctp->sctp_state == SCTPS_SHUTDOWN_PENDING || sctp->sctp_state == SCTPS_SHUTDOWN_RECEIVED) { - BUMP_MIB(&sctp_mib, sctpCurrEstab); + BUMP_MIB(&sctps->sctps_mib, sctpCurrEstab); } if (sctp->sctp_opkts) { - UPDATE_MIB(&sctp_mib, sctpOutSCTPPkts, + UPDATE_MIB(&sctps->sctps_mib, sctpOutSCTPPkts, sctp->sctp_opkts); sctp->sctp_opkts = 0; } if (sctp->sctp_obchunks) { - UPDATE_MIB(&sctp_mib, sctpOutCtrlChunks, + UPDATE_MIB(&sctps->sctps_mib, sctpOutCtrlChunks, sctp->sctp_obchunks); sctp->sctp_obchunks = 0; } if (sctp->sctp_odchunks) { - UPDATE_MIB(&sctp_mib, sctpOutOrderChunks, + UPDATE_MIB(&sctps->sctps_mib, sctpOutOrderChunks, sctp->sctp_odchunks); sctp->sctp_odchunks = 0; } if (sctp->sctp_oudchunks) { - UPDATE_MIB(&sctp_mib, sctpOutUnorderChunks, + UPDATE_MIB(&sctps->sctps_mib, sctpOutUnorderChunks, sctp->sctp_oudchunks); sctp->sctp_oudchunks = 0; } if (sctp->sctp_rxtchunks) { - UPDATE_MIB(&sctp_mib, sctpRetransChunks, + UPDATE_MIB(&sctps->sctps_mib, sctpRetransChunks, sctp->sctp_rxtchunks); sctp->sctp_rxtchunks = 0; } if (sctp->sctp_ipkts) { - UPDATE_MIB(&sctp_mib, sctpInSCTPPkts, sctp->sctp_ipkts); + UPDATE_MIB(&sctps->sctps_mib, sctpInSCTPPkts, + sctp->sctp_ipkts); sctp->sctp_ipkts = 0; } if (sctp->sctp_ibchunks) { - UPDATE_MIB(&sctp_mib, sctpInCtrlChunks, + UPDATE_MIB(&sctps->sctps_mib, sctpInCtrlChunks, sctp->sctp_ibchunks); sctp->sctp_ibchunks = 0; } if (sctp->sctp_idchunks) { - UPDATE_MIB(&sctp_mib, sctpInOrderChunks, + UPDATE_MIB(&sctps->sctps_mib, sctpInOrderChunks, sctp->sctp_idchunks); sctp->sctp_idchunks = 0; } if (sctp->sctp_iudchunks) { - UPDATE_MIB(&sctp_mib, sctpInUnorderChunks, + UPDATE_MIB(&sctps->sctps_mib, sctpInUnorderChunks, sctp->sctp_iudchunks); sctp->sctp_iudchunks = 0; } if (sctp->sctp_fragdmsgs) { - UPDATE_MIB(&sctp_mib, sctpFragUsrMsgs, + UPDATE_MIB(&sctps->sctps_mib, sctpFragUsrMsgs, sctp->sctp_fragdmsgs); sctp->sctp_fragdmsgs = 0; } if (sctp->sctp_reassmsgs) { - UPDATE_MIB(&sctp_mib, sctpReasmUsrMsgs, + UPDATE_MIB(&sctps->sctps_mib, sctpReasmUsrMsgs, sctp->sctp_reassmsgs); sctp->sctp_reassmsgs = 0; } next_sctp: sctp_prev = sctp; - mutex_enter(&sctp_g_lock); - sctp = list_next(&sctp_g_list, sctp); + mutex_enter(&sctps->sctps_g_lock); + sctp = list_next(&sctps->sctps_g_list, sctp); } - mutex_exit(&sctp_g_lock); + mutex_exit(&sctps->sctps_g_lock); if (sctp_prev != NULL) SCTP_REFRELE(sctp_prev); @@ -209,53 +182,68 @@ next_sctp: sctpkp = (sctp_named_kstat_t *)kp->ks_data; /* These are from global ndd params. */ - sctpkp->sctpRtoMin.value.ui32 = sctp_rto_ming; - sctpkp->sctpRtoMax.value.ui32 = sctp_rto_maxg; - sctpkp->sctpRtoInitial.value.ui32 = sctp_rto_initialg; - sctpkp->sctpValCookieLife.value.ui32 = sctp_cookie_life; - sctpkp->sctpMaxInitRetr.value.ui32 = sctp_max_init_retr; - - sctpkp->sctpCurrEstab.value.i32 = sctp_mib.sctpCurrEstab; - sctpkp->sctpActiveEstab.value.i32 = sctp_mib.sctpActiveEstab; - sctpkp->sctpPassiveEstab.value.i32 = sctp_mib.sctpPassiveEstab; - sctpkp->sctpAborted.value.i32 = sctp_mib.sctpAborted; - sctpkp->sctpShutdowns.value.i32 = sctp_mib.sctpShutdowns; - sctpkp->sctpOutOfBlue.value.i32 = sctp_mib.sctpOutOfBlue; - sctpkp->sctpChecksumError.value.i32 = sctp_mib.sctpChecksumError; - sctpkp->sctpOutCtrlChunks.value.i64 = sctp_mib.sctpOutCtrlChunks; - sctpkp->sctpOutOrderChunks.value.i64 = sctp_mib.sctpOutOrderChunks; - sctpkp->sctpOutUnorderChunks.value.i64 = sctp_mib.sctpOutUnorderChunks; - sctpkp->sctpRetransChunks.value.i64 = sctp_mib.sctpRetransChunks; - sctpkp->sctpOutAck.value.i32 = sctp_mib.sctpOutAck; - sctpkp->sctpOutAckDelayed.value.i32 = sctp_mib.sctpOutAckDelayed; - sctpkp->sctpOutWinUpdate.value.i32 = sctp_mib.sctpOutWinUpdate; - sctpkp->sctpOutFastRetrans.value.i32 = sctp_mib.sctpOutFastRetrans; - sctpkp->sctpOutWinProbe.value.i32 = sctp_mib.sctpOutWinProbe; - sctpkp->sctpInCtrlChunks.value.i64 = sctp_mib.sctpInCtrlChunks; - sctpkp->sctpInOrderChunks.value.i64 = sctp_mib.sctpInOrderChunks; - sctpkp->sctpInUnorderChunks.value.i64 = sctp_mib.sctpInUnorderChunks; - sctpkp->sctpInAck.value.i32 = sctp_mib.sctpInAck; - sctpkp->sctpInDupAck.value.i32 = sctp_mib.sctpInDupAck; - sctpkp->sctpInAckUnsent.value.i32 = sctp_mib.sctpInAckUnsent; - sctpkp->sctpFragUsrMsgs.value.i64 = sctp_mib.sctpFragUsrMsgs; - sctpkp->sctpReasmUsrMsgs.value.i64 = sctp_mib.sctpReasmUsrMsgs; - sctpkp->sctpOutSCTPPkts.value.i64 = sctp_mib.sctpOutSCTPPkts; - sctpkp->sctpInSCTPPkts.value.i64 = sctp_mib.sctpInSCTPPkts; - sctpkp->sctpInInvalidCookie.value.i32 = sctp_mib.sctpInInvalidCookie; - sctpkp->sctpTimRetrans.value.i32 = sctp_mib.sctpTimRetrans; - sctpkp->sctpTimRetransDrop.value.i32 = sctp_mib.sctpTimRetransDrop; + sctpkp->sctpRtoMin.value.ui32 = sctps->sctps_rto_ming; + sctpkp->sctpRtoMax.value.ui32 = sctps->sctps_rto_maxg; + sctpkp->sctpRtoInitial.value.ui32 = sctps->sctps_rto_initialg; + sctpkp->sctpValCookieLife.value.ui32 = sctps->sctps_cookie_life; + sctpkp->sctpMaxInitRetr.value.ui32 = sctps->sctps_max_init_retr; + + sctpkp->sctpCurrEstab.value.i32 = sctps->sctps_mib.sctpCurrEstab; + sctpkp->sctpActiveEstab.value.i32 = sctps->sctps_mib.sctpActiveEstab; + sctpkp->sctpPassiveEstab.value.i32 = sctps->sctps_mib.sctpPassiveEstab; + sctpkp->sctpAborted.value.i32 = sctps->sctps_mib.sctpAborted; + sctpkp->sctpShutdowns.value.i32 = sctps->sctps_mib.sctpShutdowns; + sctpkp->sctpOutOfBlue.value.i32 = sctps->sctps_mib.sctpOutOfBlue; + sctpkp->sctpChecksumError.value.i32 = + sctps->sctps_mib.sctpChecksumError; + sctpkp->sctpOutCtrlChunks.value.i64 = + sctps->sctps_mib.sctpOutCtrlChunks; + sctpkp->sctpOutOrderChunks.value.i64 = + sctps->sctps_mib.sctpOutOrderChunks; + sctpkp->sctpOutUnorderChunks.value.i64 = + sctps->sctps_mib.sctpOutUnorderChunks; + sctpkp->sctpRetransChunks.value.i64 = + sctps->sctps_mib.sctpRetransChunks; + sctpkp->sctpOutAck.value.i32 = sctps->sctps_mib.sctpOutAck; + sctpkp->sctpOutAckDelayed.value.i32 = + sctps->sctps_mib.sctpOutAckDelayed; + sctpkp->sctpOutWinUpdate.value.i32 = sctps->sctps_mib.sctpOutWinUpdate; + sctpkp->sctpOutFastRetrans.value.i32 = + sctps->sctps_mib.sctpOutFastRetrans; + sctpkp->sctpOutWinProbe.value.i32 = sctps->sctps_mib.sctpOutWinProbe; + sctpkp->sctpInCtrlChunks.value.i64 = sctps->sctps_mib.sctpInCtrlChunks; + sctpkp->sctpInOrderChunks.value.i64 = + sctps->sctps_mib.sctpInOrderChunks; + sctpkp->sctpInUnorderChunks.value.i64 = + sctps->sctps_mib.sctpInUnorderChunks; + sctpkp->sctpInAck.value.i32 = sctps->sctps_mib.sctpInAck; + sctpkp->sctpInDupAck.value.i32 = sctps->sctps_mib.sctpInDupAck; + sctpkp->sctpInAckUnsent.value.i32 = sctps->sctps_mib.sctpInAckUnsent; + sctpkp->sctpFragUsrMsgs.value.i64 = sctps->sctps_mib.sctpFragUsrMsgs; + sctpkp->sctpReasmUsrMsgs.value.i64 = sctps->sctps_mib.sctpReasmUsrMsgs; + sctpkp->sctpOutSCTPPkts.value.i64 = sctps->sctps_mib.sctpOutSCTPPkts; + sctpkp->sctpInSCTPPkts.value.i64 = sctps->sctps_mib.sctpInSCTPPkts; + sctpkp->sctpInInvalidCookie.value.i32 = + sctps->sctps_mib.sctpInInvalidCookie; + sctpkp->sctpTimRetrans.value.i32 = sctps->sctps_mib.sctpTimRetrans; + sctpkp->sctpTimRetransDrop.value.i32 = + sctps->sctps_mib.sctpTimRetransDrop; sctpkp->sctpTimHeartBeatProbe.value.i32 = - sctp_mib.sctpTimHeartBeatProbe; - sctpkp->sctpTimHeartBeatDrop.value.i32 = sctp_mib.sctpTimHeartBeatDrop; - sctpkp->sctpListenDrop.value.i32 = sctp_mib.sctpListenDrop; - sctpkp->sctpInClosed.value.i32 = sctp_mib.sctpInClosed; + sctps->sctps_mib.sctpTimHeartBeatProbe; + sctpkp->sctpTimHeartBeatDrop.value.i32 = + sctps->sctps_mib.sctpTimHeartBeatDrop; + sctpkp->sctpListenDrop.value.i32 = sctps->sctps_mib.sctpListenDrop; + sctpkp->sctpInClosed.value.i32 = sctps->sctps_mib.sctpInClosed; + netstack_rele(ns); return (0); } -void -sctp_kstat_init(void) +void * +sctp_kstat_init(netstackid_t stackid) { + kstat_t *ksp; + sctp_named_kstat_t template = { { "sctpRtoAlgorithm", KSTAT_DATA_INT32, 0 }, { "sctpRtoMin", KSTAT_DATA_UINT32, 0 }, @@ -299,40 +287,95 @@ sctp_kstat_init(void) { "sctpInClosed", KSTAT_DATA_INT32, 0 } }; - sctp_mibkp = kstat_create(SCTP_MOD_NAME, 0, "sctp", "mib2", - KSTAT_TYPE_NAMED, NUM_OF_FIELDS(sctp_named_kstat_t), 0); + ksp = kstat_create_netstack(SCTP_MOD_NAME, 0, "sctp", "mib2", + KSTAT_TYPE_NAMED, NUM_OF_FIELDS(sctp_named_kstat_t), 0, stackid); - if (sctp_mibkp == NULL) - return; + if (ksp == NULL || ksp->ks_data == NULL) + return (NULL); /* These won't change. */ template.sctpRtoAlgorithm.value.i32 = MIB2_SCTP_RTOALGO_VANJ; template.sctpMaxAssocs.value.i32 = -1; - bcopy(&template, sctp_mibkp->ks_data, sizeof (template)); + bcopy(&template, ksp->ks_data, sizeof (template)); + ksp->ks_update = sctp_kstat_update; + ksp->ks_private = (void *)(uintptr_t)stackid; + + kstat_install(ksp); + return (ksp); +} + +/* + * The following kstats are for debugging purposes. They keep + * track of problems which should not happen normally. But in + * those cases which they do happen, these kstats would be handy + * for engineers to diagnose the problems. They are not intended + * to be consumed by customers. + */ +void * +sctp_kstat2_init(netstackid_t stackid, sctp_kstat_t *sctps_statisticsp) +{ + kstat_t *ksp; + + sctp_kstat_t template = { + { "sctp_add_faddr", KSTAT_DATA_UINT64 }, + { "sctp_add_timer", KSTAT_DATA_UINT64 }, + { "sctp_conn_create", KSTAT_DATA_UINT64 }, + { "sctp_find_next_tq", KSTAT_DATA_UINT64 }, + { "sctp_fr_add_hdr", KSTAT_DATA_UINT64 }, + { "sctp_fr_not_found", KSTAT_DATA_UINT64 }, + { "sctp_output_failed", KSTAT_DATA_UINT64 }, + { "sctp_rexmit_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_init_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_cookie_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_cookie_ack_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_err_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_sack_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_shutdown_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_shutdown_ack_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_shutdown_comp_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_user_abort_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_asconf_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_asconf_ack_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_ftsn_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_hb_failed", KSTAT_DATA_UINT64 }, + { "sctp_return_hb_failed", KSTAT_DATA_UINT64 }, + { "sctp_ss_rexmit_failed", KSTAT_DATA_UINT64 }, + { "sctp_cl_connect", KSTAT_DATA_UINT64 }, + { "sctp_cl_assoc_change", KSTAT_DATA_UINT64 }, + { "sctp_cl_check_addrs", KSTAT_DATA_UINT64 }, + }; + + ksp = kstat_create_netstack(SCTP_MOD_NAME, 0, "sctpstat", "net", + KSTAT_TYPE_NAMED, NUM_OF_FIELDS(template), KSTAT_FLAG_VIRTUAL, + stackid); - sctp_mibkp->ks_update = sctp_kstat_update; + if (ksp == NULL) + return (NULL); - kstat_install(sctp_mibkp); + bcopy(&template, sctps_statisticsp, sizeof (template)); + ksp->ks_data = (void *)sctps_statisticsp; + ksp->ks_private = (void *)(uintptr_t)stackid; - if ((sctp_kstat = kstat_create(SCTP_MOD_NAME, 0, "sctpstat", - "net", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(sctp_statistics), - KSTAT_FLAG_VIRTUAL)) != NULL) { - sctp_kstat->ks_data = &sctp_statistics; - kstat_install(sctp_kstat); - } + kstat_install(ksp); + return (ksp); } void -sctp_kstat_fini(void) +sctp_kstat_fini(netstackid_t stackid, kstat_t *ksp) { - if (sctp_mibkp != NULL) { - kstat_delete(sctp_mibkp); - sctp_mibkp = NULL; + if (ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); + kstat_delete_netstack(ksp, stackid); } - if (sctp_kstat != NULL) { - kstat_delete(sctp_kstat); - sctp_kstat = NULL; +} + +void +sctp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) +{ + if (ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); + kstat_delete_netstack(ksp, stackid); } } @@ -343,7 +386,7 @@ sctp_kstat_fini(void) * remote address table in mp_rem_data. */ mblk_t * -sctp_snmp_get_mib2(queue_t *q, mblk_t *mpctl) +sctp_snmp_get_mib2(queue_t *q, mblk_t *mpctl, sctp_stack_t *sctps) { mblk_t *mpdata, *mp_ret; mblk_t *mp_conn_ctl = NULL; @@ -406,28 +449,28 @@ sctp_snmp_get_mib2(queue_t *q, mblk_t *mpctl) sce.sctpAssocRemHostName.o_bytes[0] = 0; /* build table of connections -- need count in fixed part */ - SET_MIB(sctp_mib.sctpRtoAlgorithm, MIB2_SCTP_RTOALGO_VANJ); - SET_MIB(sctp_mib.sctpRtoMin, sctp_rto_ming); - SET_MIB(sctp_mib.sctpRtoMax, sctp_rto_maxg); - SET_MIB(sctp_mib.sctpRtoInitial, sctp_rto_initialg); - SET_MIB(sctp_mib.sctpMaxAssocs, -1); - SET_MIB(sctp_mib.sctpValCookieLife, sctp_cookie_life); - SET_MIB(sctp_mib.sctpMaxInitRetr, sctp_max_init_retr); - SET_MIB(sctp_mib.sctpCurrEstab, 0); + SET_MIB(sctps->sctps_mib.sctpRtoAlgorithm, MIB2_SCTP_RTOALGO_VANJ); + SET_MIB(sctps->sctps_mib.sctpRtoMin, sctps->sctps_rto_ming); + SET_MIB(sctps->sctps_mib.sctpRtoMax, sctps->sctps_rto_maxg); + SET_MIB(sctps->sctps_mib.sctpRtoInitial, sctps->sctps_rto_initialg); + SET_MIB(sctps->sctps_mib.sctpMaxAssocs, -1); + SET_MIB(sctps->sctps_mib.sctpValCookieLife, sctps->sctps_cookie_life); + SET_MIB(sctps->sctps_mib.sctpMaxInitRetr, sctps->sctps_max_init_retr); + SET_MIB(sctps->sctps_mib.sctpCurrEstab, 0); idx = 0; - sctp = gsctp; - mutex_enter(&sctp_g_lock); + sctp = sctps->sctps_gsctp; + mutex_enter(&sctps->sctps_g_lock); while (sctp != NULL) { mutex_enter(&sctp->sctp_reflock); if (sctp->sctp_condemned) { mutex_exit(&sctp->sctp_reflock); - sctp = list_next(&sctp_g_list, sctp); + sctp = list_next(&sctps->sctps_g_list, sctp); continue; } sctp->sctp_refcnt++; mutex_exit(&sctp->sctp_reflock); - mutex_exit(&sctp_g_lock); + mutex_exit(&sctps->sctps_g_lock); if (sctp_prev != NULL) SCTP_REFRELE(sctp_prev); if (sctp->sctp_connp->conn_zoneid != zoneid) @@ -435,31 +478,40 @@ sctp_snmp_get_mib2(queue_t *q, mblk_t *mpctl) if (sctp->sctp_state == SCTPS_ESTABLISHED || sctp->sctp_state == SCTPS_SHUTDOWN_PENDING || sctp->sctp_state == SCTPS_SHUTDOWN_RECEIVED) { - BUMP_MIB(&sctp_mib, sctpCurrEstab); + BUMP_MIB(&sctps->sctps_mib, sctpCurrEstab); } - UPDATE_MIB(&sctp_mib, sctpOutSCTPPkts, sctp->sctp_opkts); + UPDATE_MIB(&sctps->sctps_mib, + sctpOutSCTPPkts, sctp->sctp_opkts); sctp->sctp_opkts = 0; - UPDATE_MIB(&sctp_mib, sctpOutCtrlChunks, sctp->sctp_obchunks); + UPDATE_MIB(&sctps->sctps_mib, + sctpOutCtrlChunks, sctp->sctp_obchunks); sctp->sctp_obchunks = 0; - UPDATE_MIB(&sctp_mib, sctpOutOrderChunks, sctp->sctp_odchunks); + UPDATE_MIB(&sctps->sctps_mib, + sctpOutOrderChunks, sctp->sctp_odchunks); sctp->sctp_odchunks = 0; - UPDATE_MIB(&sctp_mib, sctpOutUnorderChunks, + UPDATE_MIB(&sctps->sctps_mib, sctpOutUnorderChunks, sctp->sctp_oudchunks); sctp->sctp_oudchunks = 0; - UPDATE_MIB(&sctp_mib, sctpRetransChunks, sctp->sctp_rxtchunks); + UPDATE_MIB(&sctps->sctps_mib, + sctpRetransChunks, sctp->sctp_rxtchunks); sctp->sctp_rxtchunks = 0; - UPDATE_MIB(&sctp_mib, sctpInSCTPPkts, sctp->sctp_ipkts); + UPDATE_MIB(&sctps->sctps_mib, + sctpInSCTPPkts, sctp->sctp_ipkts); sctp->sctp_ipkts = 0; - UPDATE_MIB(&sctp_mib, sctpInCtrlChunks, sctp->sctp_ibchunks); + UPDATE_MIB(&sctps->sctps_mib, + sctpInCtrlChunks, sctp->sctp_ibchunks); sctp->sctp_ibchunks = 0; - UPDATE_MIB(&sctp_mib, sctpInOrderChunks, sctp->sctp_idchunks); + UPDATE_MIB(&sctps->sctps_mib, + sctpInOrderChunks, sctp->sctp_idchunks); sctp->sctp_idchunks = 0; - UPDATE_MIB(&sctp_mib, sctpInUnorderChunks, + UPDATE_MIB(&sctps->sctps_mib, sctpInUnorderChunks, sctp->sctp_iudchunks); sctp->sctp_iudchunks = 0; - UPDATE_MIB(&sctp_mib, sctpFragUsrMsgs, sctp->sctp_fragdmsgs); + UPDATE_MIB(&sctps->sctps_mib, + sctpFragUsrMsgs, sctp->sctp_fragdmsgs); sctp->sctp_fragdmsgs = 0; - UPDATE_MIB(&sctp_mib, sctpReasmUsrMsgs, sctp->sctp_reassmsgs); + UPDATE_MIB(&sctps->sctps_mib, + sctpReasmUsrMsgs, sctp->sctp_reassmsgs); sctp->sctp_reassmsgs = 0; sce.sctpAssocId = ntohl(sctp->sctp_lvtag); @@ -488,7 +540,7 @@ sctp_snmp_get_mib2(queue_t *q, mblk_t *mpctl) bzero(&sce.sctpAssocLocPrimAddr, sizeof (sce.sctpAssocLocPrimAddr)); sce.sctpAssocHeartBeatInterval = - sctp_heartbeat_interval; + sctps->sctps_heartbeat_interval; } /* @@ -598,21 +650,22 @@ done: &mp_attr_tail, (char *)&mlp, sizeof (mlp)); next_sctp: sctp_prev = sctp; - mutex_enter(&sctp_g_lock); - sctp = list_next(&sctp_g_list, sctp); + mutex_enter(&sctps->sctps_g_lock); + sctp = list_next(&sctps->sctps_g_list, sctp); } - mutex_exit(&sctp_g_lock); + mutex_exit(&sctps->sctps_g_lock); if (sctp_prev != NULL) SCTP_REFRELE(sctp_prev); /* fixed length structure for IPv4 and IPv6 counters */ - SET_MIB(sctp_mib.sctpEntrySize, sizeof (sce)); - SET_MIB(sctp_mib.sctpLocalEntrySize, sizeof (scle)); - SET_MIB(sctp_mib.sctpRemoteEntrySize, sizeof (scre)); + SET_MIB(sctps->sctps_mib.sctpEntrySize, sizeof (sce)); + SET_MIB(sctps->sctps_mib.sctpLocalEntrySize, sizeof (scle)); + SET_MIB(sctps->sctps_mib.sctpRemoteEntrySize, sizeof (scre)); optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = MIB2_SCTP; optp->name = 0; - (void) snmp_append_data(mpdata, (char *)&sctp_mib, sizeof (sctp_mib)); + (void) snmp_append_data(mpdata, (char *)&sctps->sctps_mib, + sizeof (sctps->sctps_mib)); optp->len = msgdsize(mpdata); qreply(q, mpctl); diff --git a/usr/src/uts/common/inet/sctp/sctp_stack.h b/usr/src/uts/common/inet/sctp/sctp_stack.h new file mode 100644 index 0000000000..542476ab50 --- /dev/null +++ b/usr/src/uts/common/inet/sctp/sctp_stack.h @@ -0,0 +1,143 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _INET_SCTP_SCTP_STACK_H +#define _INET_SCTP_SCTP_STACK_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/netstack.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* SCTP kstat */ +typedef struct sctp_kstat_s { + kstat_named_t sctp_add_faddr; + kstat_named_t sctp_add_timer; + kstat_named_t sctp_conn_create; + kstat_named_t sctp_find_next_tq; + kstat_named_t sctp_fr_add_hdr; + kstat_named_t sctp_fr_not_found; + kstat_named_t sctp_output_failed; + kstat_named_t sctp_rexmit_failed; + kstat_named_t sctp_send_init_failed; + kstat_named_t sctp_send_cookie_failed; + kstat_named_t sctp_send_cookie_ack_failed; + kstat_named_t sctp_send_err_failed; + kstat_named_t sctp_send_sack_failed; + kstat_named_t sctp_send_shutdown_failed; + kstat_named_t sctp_send_shutdown_ack_failed; + kstat_named_t sctp_send_shutdown_comp_failed; + kstat_named_t sctp_send_user_abort_failed; + kstat_named_t sctp_send_asconf_failed; + kstat_named_t sctp_send_asconf_ack_failed; + kstat_named_t sctp_send_ftsn_failed; + kstat_named_t sctp_send_hb_failed; + kstat_named_t sctp_return_hb_failed; + kstat_named_t sctp_ss_rexmit_failed; + kstat_named_t sctp_cl_connect; + kstat_named_t sctp_cl_assoc_change; + kstat_named_t sctp_cl_check_addrs; +} sctp_kstat_t; + +#define SCTP_KSTAT(sctps, x) ((sctps)->sctps_statistics.x.value.ui64++) + +/* + * SCTP stack instances + */ +struct sctp_stack { + netstack_t *sctps_netstack; /* Common netstack */ + + mib2_sctp_t sctps_mib; + + /* Protected by sctps_g_q_lock */ + queue_t *sctps_g_q; + uint_t sctps_g_q_ref; /* Number of sctp_t's that use it */ + kmutex_t sctps_g_q_lock; + kcondvar_t sctps_g_q_cv; + kthread_t *sctps_g_q_creator; + struct __ldi_handle *sctps_g_q_lh; + cred_t *sctps_g_q_cr; /* For _inactive close call */ + /* The default sctp_t for responding out of the blue packets. */ + struct sctp_s *sctps_gsctp; + + /* Protected by sctps_g_lock */ + struct list sctps_g_list; /* SCTP instance data chain */ + kmutex_t sctps_g_lock; + +#define SCTP_NUM_EPRIV_PORTS 64 + int sctps_g_num_epriv_ports; + uint16_t sctps_g_epriv_ports[SCTP_NUM_EPRIV_PORTS]; + kmutex_t sctps_epriv_port_lock; + uint_t sctps_next_port_to_try; + + mblk_t *sctps_pad_mp; /* pad unaligned data chunks */ + + /* SCTP bind hash list - all sctp_t with state >= BOUND. */ + struct sctp_tf_s *sctps_bind_fanout; + /* SCTP listen hash list - all sctp_t with state == LISTEN. */ + struct sctp_tf_s *sctps_listen_fanout; + struct sctp_tf_s *sctps_conn_fanout; + uint_t sctps_conn_hash_size; + + /* Only modified during _init and _fini thus no locking is needed. */ + caddr_t sctps_g_nd; + struct sctpparam_s *sctps_params; + struct sctpparam_s *sctps_wroff_xtra_param; + +/* This lock protects the SCTP recvq_tq_list array and recvq_tq_list_cur_sz. */ + kmutex_t sctps_rq_tq_lock; + int sctps_recvq_tq_list_max_sz; + taskq_t **sctps_recvq_tq_list; + + /* Current number of recvq taskq. At least 1 for the default taskq. */ + uint32_t sctps_recvq_tq_list_cur_sz; + uint32_t sctps_recvq_tq_list_cur; + + /* Global list of SCTP ILLs */ + struct sctp_ill_hash_s *sctps_g_ills; + uint32_t sctps_ills_count; + krwlock_t sctps_g_ills_lock; + + /* Global list of SCTP IPIFs */ + struct sctp_ipif_hash_s *sctps_g_ipifs; + uint32_t sctps_g_ipifs_count; + krwlock_t sctps_g_ipifs_lock; + + /* kstat exporting sctp_mib data */ + kstat_t *sctps_mibkp; + kstat_t *sctps_kstat; + sctp_kstat_t sctps_statistics; +}; +typedef struct sctp_stack sctp_stack_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _INET_SCTP_SCTP_STACK_H */ diff --git a/usr/src/uts/common/inet/sctp/sctp_timer.c b/usr/src/uts/common/inet/sctp/sctp_timer.c index 93e937edf8..bb68380a7a 100644 --- a/usr/src/uts/common/inet/sctp/sctp_timer.c +++ b/usr/src/uts/common/inet/sctp/sctp_timer.c @@ -152,6 +152,7 @@ sctp_timer_alloc(sctp_t *sctp, pfv_t func) mblk_t *mp; sctp_tb_t *sctp_tb; sctpt_t *sctpt; + sctp_stack_t *sctps = sctp->sctp_sctps; if ((mp = allocb(sizeof (sctp_t) + sizeof (sctp_tb_t), BPRI_HI))) { mp->b_datap->db_type = M_PCSIG; @@ -167,7 +168,7 @@ sctp_timer_alloc(sctp_t *sctp, pfv_t func) sctpt->sctpt_pfv = func; return (mp); } - SCTP_KSTAT(sctp_add_timer); + SCTP_KSTAT(sctps, sctp_add_timer); return (NULL); } @@ -364,9 +365,11 @@ sctp_timer_call(sctp_t *sctp, mblk_t *mp) void sctp_ack_timer(sctp_t *sctp) { + sctp_stack_t *sctps = sctp->sctp_sctps; + sctp->sctp_ack_timer_running = 0; - sctp->sctp_sack_toggle = sctp_deferred_acks_max; - BUMP_MIB(&sctp_mib, sctpOutAckDelayed); + sctp->sctp_sack_toggle = sctps->sctps_deferred_acks_max; + BUMP_MIB(&sctps->sctps_mib, sctpOutAckDelayed); sctp_sack(sctp, NULL); } @@ -380,6 +383,7 @@ sctp_heartbeat_timer(sctp_t *sctp) int64_t now; int64_t earliest_expiry; int cnt; + sctp_stack_t *sctps = sctp->sctp_sctps; if (sctp->sctp_strikes >= sctp->sctp_pa_max_rxt) { /* @@ -393,8 +397,8 @@ sctp_heartbeat_timer(sctp_t *sctp) */ if (!sctp_is_a_faddr_clean(sctp)) { /* time to give up */ - BUMP_MIB(&sctp_mib, sctpAborted); - BUMP_MIB(&sctp_mib, sctpTimHeartBeatDrop); + BUMP_MIB(&sctps->sctps_mib, sctpAborted); + BUMP_MIB(&sctps->sctps_mib, sctpTimHeartBeatDrop); sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL); sctp_clean_death(sctp, sctp->sctp_client_errno ? sctp->sctp_client_errno : ETIMEDOUT); @@ -410,7 +414,7 @@ sctp_heartbeat_timer(sctp_t *sctp) now = lbolt64; earliest_expiry = 0; - cnt = sctp_maxburst; + cnt = sctps->sctps_maxburst; /* * Walk through all faddrs. Since the timer should run infrequently @@ -528,6 +532,7 @@ sctp_rexmit_timer(sctp_t *sctp, sctp_faddr_t *fp) { mblk_t *mp; uint32_t rto_max = sctp->sctp_rto_max; + sctp_stack_t *sctps = sctp->sctp_sctps; ASSERT(fp != NULL); @@ -540,8 +545,8 @@ sctp_rexmit_timer(sctp_t *sctp, sctp_faddr_t *fp) if (sctp->sctp_state < SCTPS_ESTABLISHED) { if (fp->strikes >= sctp->sctp_max_init_rxt) { /* time to give up */ - BUMP_MIB(&sctp_mib, sctpAborted); - BUMP_MIB(&sctp_mib, sctpTimRetransDrop); + BUMP_MIB(&sctps->sctps_mib, sctpAborted); + BUMP_MIB(&sctps->sctps_mib, sctpTimRetransDrop); sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, NULL); sctp_clean_death(sctp, sctp->sctp_client_errno ? sctp->sctp_client_errno : ETIMEDOUT); @@ -550,8 +555,8 @@ sctp_rexmit_timer(sctp_t *sctp, sctp_faddr_t *fp) } else if (sctp->sctp_state >= SCTPS_ESTABLISHED) { if (sctp->sctp_strikes >= sctp->sctp_pa_max_rxt) { /* time to give up */ - BUMP_MIB(&sctp_mib, sctpAborted); - BUMP_MIB(&sctp_mib, sctpTimRetransDrop); + BUMP_MIB(&sctps->sctps_mib, sctpAborted); + BUMP_MIB(&sctps->sctps_mib, sctpTimRetransDrop); sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL); sctp_clean_death(sctp, sctp->sctp_client_errno ? sctp->sctp_client_errno : ETIMEDOUT); @@ -593,7 +598,7 @@ sctp_rexmit_timer(sctp_t *sctp, sctp_faddr_t *fp) return; } - BUMP_MIB(&sctp_mib, sctpTimRetrans); + BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans); sctp_rexmit(sctp, fp); /* @@ -612,7 +617,7 @@ rxmit_init: */ mp = sctp_init_mp(sctp); if (mp != NULL) { - BUMP_MIB(&sctp_mib, sctpTimRetrans); + BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans); sctp_add_sendq(sctp, mp); } rto_max = sctp->sctp_init_rto_max; @@ -633,14 +638,14 @@ rxmit_init: if (IPH_HDR_VERSION(iph) == IPV4_VERSION) iph->ipha_ident = 0; sctp_add_sendq(sctp, mp); - BUMP_MIB(&sctp_mib, sctpTimRetrans); + BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans); rto_max = sctp->sctp_init_rto_max; break; } case SCTPS_SHUTDOWN_SENT: BUMP_LOCAL(sctp->sctp_T2expire); sctp_send_shutdown(sctp, 1); - BUMP_MIB(&sctp_mib, sctpTimRetrans); + BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans); break; case SCTPS_SHUTDOWN_ACK_SENT: /* We shouldn't have any more outstanding data */ @@ -650,7 +655,7 @@ rxmit_init: BUMP_LOCAL(sctp->sctp_T2expire); (void) sctp_shutdown_received(sctp, NULL, B_FALSE, B_TRUE, NULL); - BUMP_MIB(&sctp_mib, sctpTimRetrans); + BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans); break; default: ASSERT(0); diff --git a/usr/src/uts/common/inet/sctp_ip.h b/usr/src/uts/common/inet/sctp_ip.h index e5017d2a71..89b5730f90 100644 --- a/usr/src/uts/common/inet/sctp_ip.h +++ b/usr/src/uts/common/inet/sctp_ip.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,26 +32,28 @@ extern "C" { #endif +#include <inet/sctp/sctp_stack.h> + #define SCTP_COMMON_HDR_LENGTH 12 /* SCTP common header length */ /* SCTP routines for IP to call. */ extern void ip_fanout_sctp(mblk_t *, ill_t *, ipha_t *, uint32_t, uint_t, boolean_t, boolean_t, uint_t, zoneid_t); -extern void sctp_ddi_init(void); -extern void sctp_ddi_destroy(void); +extern void sctp_ddi_g_init(void); +extern void sctp_ddi_g_destroy(void); extern conn_t *sctp_find_conn(in6_addr_t *, in6_addr_t *, uint32_t, uint_t, - zoneid_t); + zoneid_t, sctp_stack_t *); extern conn_t *sctp_fanout(in6_addr_t *, in6_addr_t *, uint32_t, uint_t, - zoneid_t, mblk_t *); + zoneid_t, mblk_t *, sctp_stack_t *); extern void sctp_input(conn_t *, ipha_t *, mblk_t *, mblk_t *, ill_t *, boolean_t, boolean_t); extern void sctp_wput(queue_t *, mblk_t *); extern void sctp_ootb_input(mblk_t *, ill_t *, uint_t, zoneid_t, boolean_t); -extern void sctp_hash_init(void); -extern void sctp_hash_destroy(void); +extern void sctp_hash_init(sctp_stack_t *); +extern void sctp_hash_destroy(sctp_stack_t *); extern uint32_t sctp_cksum(mblk_t *, int); -extern mblk_t *sctp_snmp_get_mib2(queue_t *, mblk_t *); +extern mblk_t *sctp_snmp_get_mib2(queue_t *, mblk_t *, sctp_stack_t *); extern void sctp_free(conn_t *); #define SCTP_STASH_IPINFO(mp, ire) \ @@ -90,9 +92,6 @@ extern void ip_fanout_sctp_raw(mblk_t *, ill_t *, ipha_t *, boolean_t, uint32_t, boolean_t, uint_t, boolean_t, uint_t, zoneid_t); extern void sctp_ire_cache_flush(ipif_t *); -/* SNMP fixed size info */ -extern mib2_sctp_t sctp_mib; - /* * Private (and possibly temporary) ioctls. It is a large number * to avoid conflict with other ioctls, which are normally smaller diff --git a/usr/src/uts/common/inet/snmpcom.c b/usr/src/uts/common/inet/snmpcom.c index fa417fae88..39d8ed5b32 100644 --- a/usr/src/uts/common/inet/snmpcom.c +++ b/usr/src/uts/common/inet/snmpcom.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -214,7 +213,7 @@ snmpcom_req(queue_t *q, mblk_t *mp, pfi_t setfn, pfi_t getfn, cred_t *credp) switch (tor->MGMT_flags) { case T_NEGOTIATE: - if (secpolicy_net_config(credp, B_FALSE) != 0) { + if (secpolicy_ip_config(credp, B_FALSE) != 0) { optcom_err_ack(q, mp, TACCES, 0); return (B_TRUE); } diff --git a/usr/src/uts/common/inet/spdsock.h b/usr/src/uts/common/inet/spdsock.h index 229c4e7305..aabf73029f 100644 --- a/usr/src/uts/common/inet/spdsock.h +++ b/usr/src/uts/common/inet/spdsock.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -28,11 +28,39 @@ #pragma ident "%Z%%M% %I% %E% SMI" +#include <sys/netstack.h> + #ifdef __cplusplus extern "C" { #endif /* + * SPDSOCK stack instances + */ +struct spd_stack { + netstack_t *spds_netstack; /* Common netstack */ + + caddr_t spds_g_nd; + struct spdsockparam_s *spds_params; + kmutex_t spds_param_lock; + /* Protects the NDD variables. */ + + /* + * To save algorithm update messages that are processed only after + * IPsec is loaded. + */ + struct spd_ext *spds_extv_algs[SPD_EXT_MAX + 1]; + mblk_t *spds_mp_algs; + boolean_t spds_algs_pending; + struct ipsec_alginfo + *spds_algs[IPSEC_NALGTYPES][IPSEC_MAX_ALGS]; + int spds_algs_exec_mode[IPSEC_NALGTYPES]; + kmutex_t spds_alg_lock; +}; +typedef struct spd_stack spd_stack_t; + + +/* * spdsock (PF_POLICY) session state; one per open PF_POLICY socket. * * These are kept on a linked list by the spdsock module. @@ -64,6 +92,7 @@ typedef struct spdsock_s ipsec_policy_t *spdsock_dump_cur_rule; uint32_t spdsock_dump_cur_chain; uint32_t spdsock_dump_count; + spd_stack_t *spdsock_spds; /* These are used for all-polhead dumps. */ int spdsock_dump_tun_gen; boolean_t spdsock_dump_active; diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h index a586064d3b..6937e3e9e0 100644 --- a/usr/src/uts/common/inet/tcp.h +++ b/usr/src/uts/common/inet/tcp.h @@ -36,9 +36,16 @@ extern "C" { #include <sys/inttypes.h> #include <netinet/ip6.h> #include <netinet/tcp.h> -#include <inet/tcp_sack.h> #include <sys/socket.h> #include <sys/multidata.h> +#include <sys/md5.h> +#include <inet/common.h> +#include <inet/ip.h> +#include <inet/ip6.h> +#include <inet/mi.h> +#include <inet/mib2.h> +#include <inet/tcp_stack.h> +#include <inet/tcp_sack.h> #include <inet/kssl/ksslapi.h> /* @@ -142,12 +149,14 @@ struct conn_s; typedef struct tcp_s { /* Pointer to previous bind hash next. */ - struct tcp_s *tcp_time_wait_next; + struct tcp_s *tcp_time_wait_next; /* Pointer to next T/W block */ - struct tcp_s *tcp_time_wait_prev; + struct tcp_s *tcp_time_wait_prev; /* Pointer to previous T/W next */ - clock_t tcp_time_wait_expire; - struct conn_s *tcp_connp; + clock_t tcp_time_wait_expire; + + struct conn_s *tcp_connp; + tcp_stack_t *tcp_tcps; /* Shortcut via conn_netstack */ int32_t tcp_state; int32_t tcp_rcv_ws; /* My window scale power */ @@ -603,16 +612,48 @@ typedef struct tcp_s { #define TCP_DEBUG_GETPCSTACK(buffer, depth) #endif +/* + * Track a reference count on the tcps in order to know when + * the tcps_g_q can be removed. As long as there is any + * tcp_t, other that the tcps_g_q itself, in the tcp_stack_t we + * need to keep tcps_g_q around so that a closing connection can + * switch to using tcps_g_q as part of it closing. + */ +#define TCPS_REFHOLD(tcps) { \ + atomic_add_32(&(tcps)->tcps_refcnt, 1); \ + ASSERT((tcps)->tcps_refcnt != 0); \ + DTRACE_PROBE1(tcps__refhold, tcp_stack_t, tcps); \ +} + +/* + * Decrement the reference count on the tcp_stack_t. + * In architectures e.g sun4u, where atomic_add_32_nv is just + * a cas, we need to maintain the right memory barrier semantics + * as that of mutex_exit i.e all the loads and stores should complete + * before the cas is executed. membar_exit() does that here. + */ +#define TCPS_REFRELE(tcps) { \ + ASSERT((tcps)->tcps_refcnt != 0); \ + membar_exit(); \ + DTRACE_PROBE1(tcps__refrele, tcp_stack_t, tcps); \ + if (atomic_add_32_nv(&(tcps)->tcps_refcnt, -1) == 0 && \ + (tcps)->tcps_g_q != NULL) { \ + /* Only tcps_g_q left */ \ + tcp_g_q_inactive(tcps); \ + } \ +} + extern void tcp_free(tcp_t *tcp); -extern void tcp_ddi_init(void); -extern void tcp_ddi_destroy(void); +extern void tcp_ddi_g_init(void); +extern void tcp_ddi_g_destroy(void); +extern void tcp_g_q_inactive(tcp_stack_t *); extern void tcp_xmit_listeners_reset(mblk_t *mp, uint_t ip_hdr_len, - zoneid_t zoneid); + zoneid_t zoneid, tcp_stack_t *); extern void tcp_conn_request(void *arg, mblk_t *mp, void *arg2); extern void tcp_conn_request_unbound(void *arg, mblk_t *mp, void *arg2); extern void tcp_input(void *arg, mblk_t *mp, void *arg2); extern void tcp_rput_data(void *arg, mblk_t *mp, void *arg2); -extern void *tcp_get_conn(void *arg); +extern void *tcp_get_conn(void *arg, tcp_stack_t *); extern void tcp_time_wait_collector(void *arg); extern int tcp_snmp_get(queue_t *, mblk_t *); extern int tcp_snmp_set(queue_t *, int, int, uchar_t *, int len); @@ -627,15 +668,11 @@ extern mblk_t *tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, * * The listener and acceptor hash queues are lists of tcp_t. */ - /* listener hash and acceptor hash queue head */ typedef struct tf_s { tcp_t *tf_tcp; kmutex_t tf_lock; } tf_t; - -extern mib2_tcp_t tcp_mib; - #endif /* (defined(_KERNEL) || defined(_KMEMUSER)) */ /* Contract private interface between TCP and Clustering. */ diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index f55afe25f6..3c7ec52f22 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -57,6 +57,7 @@ const char tcp_version[] = "%Z%%M% %I% %E% SMI"; #include <sys/policy.h> #include <sys/priv.h> #include <sys/zone.h> +#include <sys/sunldi.h> #include <sys/errno.h> #include <sys/signal.h> @@ -154,7 +155,7 @@ const char tcp_version[] = "%Z%%M% %I% %E% SMI"; * * Opening a new connection: * - * The outgoing connection open is pretty simple. ip_tcpopen() does the + * The outgoing connection open is pretty simple. tcp_open() does the * work in creating the conn/tcp structure and initializing it. The * squeue assignment is done based on the CPU the application * is running on. So for outbound connections, processing is always done @@ -241,7 +242,7 @@ extern major_t TCP6_MAJ; * 2: squeue_enter * 3: squeue_fill */ -int tcp_squeue_close = 2; +int tcp_squeue_close = 2; /* Setable in /etc/system */ int tcp_squeue_wput = 2; squeue_func_t tcp_squeue_close_proc; @@ -280,7 +281,8 @@ int tcp_tx_pull_len = 16; * How to add new counters. * * 1) Add a field in the tcp_stat structure describing your counter. - * 2) Add a line in tcp_statistics with the name of the counter. + * 2) Add a line in the template in tcp_kstat2_init() with the name + * of the counter. * * IMPORTANT!! - make sure that both are in sync !! * 3) Use either TCP_STAT or TCP_DBGSTAT with the name. @@ -320,119 +322,33 @@ static uint_t tcp_clean_death_stat[TCP_MAX_CLEAN_DEATH_TAG]; #endif #if TCP_DEBUG_COUNTER -#define TCP_DBGSTAT(x) atomic_add_64(&(tcp_statistics.x.value.ui64), 1) +#define TCP_DBGSTAT(tcps, x) \ + atomic_add_64(&((tcps)->tcps_statistics.x.value.ui64), 1) +#define TCP_G_DBGSTAT(x) \ + atomic_add_64(&(tcp_g_statistics.x.value.ui64), 1) #elif defined(lint) -#define TCP_DBGSTAT(x) ASSERT(_lint_dummy_ == 0); +#define TCP_DBGSTAT(tcps, x) ASSERT(_lint_dummy_ == 0); +#define TCP_G_DBGSTAT(x) ASSERT(_lint_dummy_ == 0); #else -#define TCP_DBGSTAT(x) +#define TCP_DBGSTAT(tcps, x) +#define TCP_G_DBGSTAT(x) #endif -tcp_stat_t tcp_statistics = { - { "tcp_time_wait", KSTAT_DATA_UINT64 }, - { "tcp_time_wait_syn", KSTAT_DATA_UINT64 }, - { "tcp_time_wait_success", KSTAT_DATA_UINT64 }, - { "tcp_time_wait_fail", KSTAT_DATA_UINT64 }, - { "tcp_reinput_syn", KSTAT_DATA_UINT64 }, - { "tcp_ip_output", KSTAT_DATA_UINT64 }, - { "tcp_detach_non_time_wait", KSTAT_DATA_UINT64 }, - { "tcp_detach_time_wait", KSTAT_DATA_UINT64 }, - { "tcp_time_wait_reap", KSTAT_DATA_UINT64 }, - { "tcp_clean_death_nondetached", KSTAT_DATA_UINT64 }, - { "tcp_reinit_calls", KSTAT_DATA_UINT64 }, - { "tcp_eager_err1", KSTAT_DATA_UINT64 }, - { "tcp_eager_err2", KSTAT_DATA_UINT64 }, - { "tcp_eager_blowoff_calls", KSTAT_DATA_UINT64 }, - { "tcp_eager_blowoff_q", KSTAT_DATA_UINT64 }, - { "tcp_eager_blowoff_q0", KSTAT_DATA_UINT64 }, - { "tcp_not_hard_bound", KSTAT_DATA_UINT64 }, - { "tcp_no_listener", KSTAT_DATA_UINT64 }, - { "tcp_found_eager", KSTAT_DATA_UINT64 }, - { "tcp_wrong_queue", KSTAT_DATA_UINT64 }, - { "tcp_found_eager_binding1", KSTAT_DATA_UINT64 }, - { "tcp_found_eager_bound1", KSTAT_DATA_UINT64 }, - { "tcp_eager_has_listener1", KSTAT_DATA_UINT64 }, - { "tcp_open_alloc", KSTAT_DATA_UINT64 }, - { "tcp_open_detached_alloc", KSTAT_DATA_UINT64 }, - { "tcp_rput_time_wait", KSTAT_DATA_UINT64 }, - { "tcp_listendrop", KSTAT_DATA_UINT64 }, - { "tcp_listendropq0", KSTAT_DATA_UINT64 }, - { "tcp_wrong_rq", KSTAT_DATA_UINT64 }, - { "tcp_rsrv_calls", KSTAT_DATA_UINT64 }, - { "tcp_eagerfree2", KSTAT_DATA_UINT64 }, - { "tcp_eagerfree3", KSTAT_DATA_UINT64 }, - { "tcp_eagerfree4", KSTAT_DATA_UINT64 }, - { "tcp_eagerfree5", KSTAT_DATA_UINT64 }, - { "tcp_timewait_syn_fail", KSTAT_DATA_UINT64 }, - { "tcp_listen_badflags", KSTAT_DATA_UINT64 }, - { "tcp_timeout_calls", KSTAT_DATA_UINT64 }, - { "tcp_timeout_cached_alloc", KSTAT_DATA_UINT64 }, - { "tcp_timeout_cancel_reqs", KSTAT_DATA_UINT64 }, - { "tcp_timeout_canceled", KSTAT_DATA_UINT64 }, - { "tcp_timermp_alloced", KSTAT_DATA_UINT64 }, - { "tcp_timermp_freed", KSTAT_DATA_UINT64 }, - { "tcp_timermp_allocfail", KSTAT_DATA_UINT64 }, - { "tcp_timermp_allocdblfail", KSTAT_DATA_UINT64 }, - { "tcp_push_timer_cnt", KSTAT_DATA_UINT64 }, - { "tcp_ack_timer_cnt", KSTAT_DATA_UINT64 }, - { "tcp_ire_null1", KSTAT_DATA_UINT64 }, - { "tcp_ire_null", KSTAT_DATA_UINT64 }, - { "tcp_ip_send", KSTAT_DATA_UINT64 }, - { "tcp_ip_ire_send", KSTAT_DATA_UINT64 }, - { "tcp_wsrv_called", KSTAT_DATA_UINT64 }, - { "tcp_flwctl_on", KSTAT_DATA_UINT64 }, - { "tcp_timer_fire_early", KSTAT_DATA_UINT64 }, - { "tcp_timer_fire_miss", KSTAT_DATA_UINT64 }, - { "tcp_freelist_cleanup", KSTAT_DATA_UINT64 }, - { "tcp_rput_v6_error", KSTAT_DATA_UINT64 }, - { "tcp_out_sw_cksum", KSTAT_DATA_UINT64 }, - { "tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, - { "tcp_zcopy_on", KSTAT_DATA_UINT64 }, - { "tcp_zcopy_off", KSTAT_DATA_UINT64 }, - { "tcp_zcopy_backoff", KSTAT_DATA_UINT64 }, - { "tcp_zcopy_disable", KSTAT_DATA_UINT64 }, - { "tcp_mdt_pkt_out", KSTAT_DATA_UINT64 }, - { "tcp_mdt_pkt_out_v4", KSTAT_DATA_UINT64 }, - { "tcp_mdt_pkt_out_v6", KSTAT_DATA_UINT64 }, - { "tcp_mdt_discarded", KSTAT_DATA_UINT64 }, - { "tcp_mdt_conn_halted1", KSTAT_DATA_UINT64 }, - { "tcp_mdt_conn_halted2", KSTAT_DATA_UINT64 }, - { "tcp_mdt_conn_halted3", KSTAT_DATA_UINT64 }, - { "tcp_mdt_conn_resumed1", KSTAT_DATA_UINT64 }, - { "tcp_mdt_conn_resumed2", KSTAT_DATA_UINT64 }, - { "tcp_mdt_legacy_small", KSTAT_DATA_UINT64 }, - { "tcp_mdt_legacy_all", KSTAT_DATA_UINT64 }, - { "tcp_mdt_legacy_ret", KSTAT_DATA_UINT64 }, - { "tcp_mdt_allocfail", KSTAT_DATA_UINT64 }, - { "tcp_mdt_addpdescfail", KSTAT_DATA_UINT64 }, - { "tcp_mdt_allocd", KSTAT_DATA_UINT64 }, - { "tcp_mdt_linked", KSTAT_DATA_UINT64 }, - { "tcp_fusion_flowctl", KSTAT_DATA_UINT64 }, - { "tcp_fusion_backenabled", KSTAT_DATA_UINT64 }, - { "tcp_fusion_urg", KSTAT_DATA_UINT64 }, - { "tcp_fusion_putnext", KSTAT_DATA_UINT64 }, - { "tcp_fusion_unfusable", KSTAT_DATA_UINT64 }, - { "tcp_fusion_aborted", KSTAT_DATA_UINT64 }, - { "tcp_fusion_unqualified", KSTAT_DATA_UINT64 }, - { "tcp_fusion_rrw_busy", KSTAT_DATA_UINT64 }, - { "tcp_fusion_rrw_msgcnt", KSTAT_DATA_UINT64 }, - { "tcp_fusion_rrw_plugged", KSTAT_DATA_UINT64 }, - { "tcp_in_ack_unsent_drop", KSTAT_DATA_UINT64 }, - { "tcp_sock_fallback", KSTAT_DATA_UINT64 }, - { "tcp_lso_enabled", KSTAT_DATA_UINT64 }, - { "tcp_lso_disabled", KSTAT_DATA_UINT64 }, - { "tcp_lso_times", KSTAT_DATA_UINT64 }, - { "tcp_lso_pkt_out", KSTAT_DATA_UINT64 }, -}; +#define TCP_G_STAT(x) (tcp_g_statistics.x.value.ui64++) -static kstat_t *tcp_kstat; +tcp_g_stat_t tcp_g_statistics; +kstat_t *tcp_g_kstat; /* * Call either ip_output or ip_output_v6. This replaces putnext() calls on the * tcp write side. */ #define CALL_IP_WPUT(connp, q, mp) { \ + tcp_stack_t *tcps; \ + \ + tcps = connp->conn_netstack->netstack_tcp; \ ASSERT(((q)->q_flag & QREADR) == 0); \ - TCP_DBGSTAT(tcp_ip_output); \ + TCP_DBGSTAT(tcps, tcp_ip_output); \ connp->conn_send(connp, (mp), (q), IP_WPUT); \ } @@ -464,15 +380,9 @@ static kstat_t *tcp_kstat; #define ISS_INCR 250000 #define ISS_NSEC_SHT 12 -static uint32_t tcp_iss_incr_extra; /* Incremented for each connection */ -static kmutex_t tcp_iss_key_lock; -static MD5_CTX tcp_iss_key; static sin_t sin_null; /* Zero address for quick clears */ static sin6_t sin6_null; /* Zero address for quick clears */ -/* Packet dropper for TCP IPsec policy drops. */ -static ipdropper_t tcp_dropper; - /* * This implementation follows the 4.3BSD interpretation of the urgent * pointer and not RFC 1122. Switching to RFC 1122 behavior would cause @@ -615,11 +525,15 @@ kmem_cache_t *tcp_iphc_cache; * The list manipulations (including tcp_time_wait_next/prev) * are protected by the tcp_time_wait_lock. The content of the * detached TIME_WAIT connections is protected by the normal perimeters. + * + * This list is per squeue and squeues are shared across the tcp_stack_t's. + * Things on tcp_time_wait_head remain associated with the tcp_stack_t + * and conn_netstack. + * The tcp_t's that are added to tcp_free_list are disassociated and + * have NULL tcp_tcps and conn_netstack pointers. */ - typedef struct tcp_squeue_priv_s { kmutex_t tcp_time_wait_lock; - /* Protects the next 3 globals */ timeout_id_t tcp_time_wait_tid; tcp_t *tcp_time_wait_head; tcp_t *tcp_time_wait_tail; @@ -832,13 +746,16 @@ static int tcp_tpistate(tcp_t *tcp); static void tcp_bind_hash_insert(tf_t *tf, tcp_t *tcp, int caller_holds_lock); static void tcp_bind_hash_remove(tcp_t *tcp); -static tcp_t *tcp_acceptor_hash_lookup(t_uscalar_t id); +static tcp_t *tcp_acceptor_hash_lookup(t_uscalar_t id, tcp_stack_t *); void tcp_acceptor_hash_insert(t_uscalar_t id, tcp_t *tcp); static void tcp_acceptor_hash_remove(tcp_t *tcp); static void tcp_capability_req(tcp_t *tcp, mblk_t *mp); static void tcp_info_req(tcp_t *tcp, mblk_t *mp); static void tcp_addr_req(tcp_t *tcp, mblk_t *mp); static void tcp_addr_req_ipv6(tcp_t *tcp, mblk_t *mp); +void tcp_g_q_setup(tcp_stack_t *); +void tcp_g_q_create(tcp_stack_t *); +void tcp_g_q_destroy(tcp_stack_t *); static int tcp_header_init_ipv4(tcp_t *tcp); static int tcp_header_init_ipv6(tcp_t *tcp); int tcp_init(tcp_t *tcp, queue_t *q); @@ -866,12 +783,13 @@ static void tcp_opt_reverse(tcp_t *tcp, ipha_t *ipha); static int tcp_opt_set_header(tcp_t *tcp, boolean_t checkonly, uchar_t *ptr, uint_t len); static int tcp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); -static boolean_t tcp_param_register(tcpparam_t *tcppa, int cnt); +static boolean_t tcp_param_register(IDP *ndp, tcpparam_t *tcppa, int cnt, + tcp_stack_t *); static int tcp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); static int tcp_param_set_aligned(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); -static void tcp_iss_key_init(uint8_t *phrase, int len); +static void tcp_iss_key_init(uint8_t *phrase, int len, tcp_stack_t *); static int tcp_1948_phrase_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); static void tcp_process_shrunk_swnd(tcp_t *tcp, uint32_t shrunk_cnt); @@ -884,7 +802,7 @@ static void tcp_report_item(mblk_t *mp, tcp_t *tcp, int hashval, static uint_t tcp_rcv_drain(queue_t *q, tcp_t *tcp); static void tcp_sack_rxmit(tcp_t *tcp, uint_t *flags); -static boolean_t tcp_send_rst_chk(void); +static boolean_t tcp_send_rst_chk(tcp_stack_t *); static void tcp_ss_rexmit(tcp_t *tcp); static mblk_t *tcp_rput_add_ancillary(tcp_t *tcp, mblk_t *mp, ip6_pkt_t *ipp); static void tcp_process_options(tcp_t *, tcph_t *); @@ -936,11 +854,11 @@ static void tcp_ack_timer(void *arg); static mblk_t *tcp_ack_mp(tcp_t *tcp); static void tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq, uint32_t ack, int ctl, uint_t ip_hdr_len, - zoneid_t zoneid); + zoneid_t zoneid, tcp_stack_t *); static void tcp_xmit_ctl(char *str, tcp_t *tcp, uint32_t seq, uint32_t ack, int ctl); -static tcp_hsp_t *tcp_hsp_lookup(ipaddr_t addr); -static tcp_hsp_t *tcp_hsp_lookup_ipv6(in6_addr_t *addr); +static tcp_hsp_t *tcp_hsp_lookup(ipaddr_t addr, tcp_stack_t *); +static tcp_hsp_t *tcp_hsp_lookup_ipv6(in6_addr_t *addr, tcp_stack_t *); static int setmaxps(queue_t *q, int maxpsz); static void tcp_set_rto(tcp_t *, time_t); static boolean_t tcp_check_policy(tcp_t *, mblk_t *, ipha_t *, ip6_t *, @@ -956,14 +874,14 @@ static void tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, boolean_t tcp_paws_check(tcp_t *tcp, tcph_t *tcph, tcp_opt_t *tcpoptp); boolean_t tcp_reserved_port_add(int, in_port_t *, in_port_t *); boolean_t tcp_reserved_port_del(in_port_t, in_port_t); -boolean_t tcp_reserved_port_check(in_port_t); -static tcp_t *tcp_alloc_temp_tcp(in_port_t); +boolean_t tcp_reserved_port_check(in_port_t, tcp_stack_t *); +static tcp_t *tcp_alloc_temp_tcp(in_port_t, tcp_stack_t *); static int tcp_reserved_port_list(queue_t *, mblk_t *, caddr_t, cred_t *); static mblk_t *tcp_mdt_info_mp(mblk_t *); static void tcp_mdt_update(tcp_t *, ill_mdt_capab_t *, boolean_t); static int tcp_mdt_add_attrs(multidata_t *, const mblk_t *, const boolean_t, const uint32_t, const uint32_t, - const uint32_t, const uint32_t); + const uint32_t, const uint32_t, tcp_stack_t *); static void tcp_multisend_data(tcp_t *, ire_t *, const ill_t *, mblk_t *, const uint_t, const uint_t, boolean_t *); static mblk_t *tcp_lso_info_mp(mblk_t *); @@ -974,10 +892,15 @@ extern void tcp_timermp_free(tcp_t *); static void tcp_timer_free(tcp_t *tcp, mblk_t *mp); static void tcp_stop_lingering(tcp_t *tcp); static void tcp_close_linger_timeout(void *arg); -void tcp_ddi_init(void); -void tcp_ddi_destroy(void); -static void tcp_kstat_init(void); -static void tcp_kstat_fini(void); +static void *tcp_stack_init(netstackid_t stackid, netstack_t *ns); +static void tcp_stack_shutdown(netstackid_t stackid, void *arg); +static void tcp_stack_fini(netstackid_t stackid, void *arg); +static void *tcp_g_kstat_init(tcp_g_stat_t *); +static void tcp_g_kstat_fini(kstat_t *); +static void *tcp_kstat_init(netstackid_t, tcp_stack_t *); +static void tcp_kstat_fini(netstackid_t, kstat_t *); +static void *tcp_kstat2_init(netstackid_t, tcp_stat_t *); +static void tcp_kstat2_fini(netstackid_t, kstat_t *); static int tcp_kstat_update(kstat_t *kp, int rw); void tcp_reinput(conn_t *connp, mblk_t *mp, squeue_t *sqp); static int tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp, @@ -1028,10 +951,10 @@ void tcp_clean_death_wrapper(void *arg, mblk_t *mp, void *arg2); static mblk_t *tcp_ioctl_abort_build_msg(tcp_ioc_abort_conn_t *, tcp_t *); static void tcp_ioctl_abort_dump(tcp_ioc_abort_conn_t *); static void tcp_ioctl_abort_handler(tcp_t *, mblk_t *); -static int tcp_ioctl_abort(tcp_ioc_abort_conn_t *); +static int tcp_ioctl_abort(tcp_ioc_abort_conn_t *, tcp_stack_t *tcps); static void tcp_ioctl_abort_conn(queue_t *, mblk_t *); static int tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t *, int, int *, - boolean_t); + boolean_t, tcp_stack_t *); static struct module_info tcp_rinfo = { TCP_MOD_ID, TCP_MOD_NAME, 0, INFPSZ, TCP_RECV_HIWATER, TCP_RECV_LOWATER @@ -1096,49 +1019,11 @@ struct streamtab tcpinfo = { &tcp_rinit, &tcp_winit }; -extern squeue_func_t tcp_squeue_wput_proc; -extern squeue_func_t tcp_squeue_timer_proc; - -/* Protected by tcp_g_q_lock */ -static queue_t *tcp_g_q; /* Default queue used during detached closes */ -kmutex_t tcp_g_q_lock; - -/* Protected by tcp_hsp_lock */ -/* - * XXX The host param mechanism should go away and instead we should use - * the metrics associated with the routes to determine the default sndspace - * and rcvspace. - */ -static tcp_hsp_t **tcp_hsp_hash; /* Hash table for HSPs */ -krwlock_t tcp_hsp_lock; - -/* - * Extra privileged ports. In host byte order. - * Protected by tcp_epriv_port_lock. - */ -#define TCP_NUM_EPRIV_PORTS 64 -static int tcp_g_num_epriv_ports = TCP_NUM_EPRIV_PORTS; -static uint16_t tcp_g_epriv_ports[TCP_NUM_EPRIV_PORTS] = { 2049, 4045 }; -kmutex_t tcp_epriv_port_lock; - /* - * The smallest anonymous port in the privileged port range which TCP - * looks for free port. Use in the option TCP_ANONPRIVBIND. + * Have to ensure that tcp_g_q_close is not done by an + * interrupt thread. */ -static in_port_t tcp_min_anonpriv_port = 512; - -/* Only modified during _init and _fini thus no locking is needed. */ -static caddr_t tcp_g_nd; /* Head of 'named dispatch' variable list */ - -/* Hint not protected by any lock */ -static uint_t tcp_next_port_to_try; - - -/* TCP bind hash list - all tcp_t with state >= BOUND. */ -tf_t tcp_bind_fanout[TCP_BIND_FANOUT_SIZE]; - -/* TCP queue hash list - all tcp_t in case they will be an acceptor. */ -static tf_t tcp_acceptor_fanout[TCP_FANOUT_SIZE]; +static taskq_t *tcp_taskq; /* * TCP has a private interface for other kernel modules to reserve a @@ -1171,23 +1056,9 @@ typedef struct tcp_rport_s { tcp_t **temp_tcp_array; } tcp_rport_t; -/* The reserved port array. */ -static tcp_rport_t tcp_reserved_port[TCP_RESERVED_PORTS_ARRAY_MAX_SIZE]; - -/* Locks to protect the tcp_reserved_ports array. */ -static krwlock_t tcp_reserved_port_lock; - -/* The number of ranges in the array. */ -uint32_t tcp_reserved_port_array_size = 0; - -/* - * MIB-2 stuff for SNMP - * Note: tcpInErrs {tcp 15} is accumulated in ip.c - */ -mib2_tcp_t tcp_mib; /* SNMP fixed size info */ -kstat_t *tcp_mibkp; /* kstat exporting tcp_mib data */ - +/* Setable only in /etc/system. Move to ndd? */ boolean_t tcp_icmp_source_quench = B_FALSE; + /* * Following assumes TPI alignment requirements stay along 32 bit * boundaries @@ -1245,8 +1116,8 @@ static struct T_info_ack tcp_g_t_info_ack_v6 = { * tcp_wroff_xtra is the extra space in front of TCP/IP header for link * layer header. It has to be a multiple of 4. */ -static tcpparam_t tcp_wroff_xtra_param = { 0, 256, 32, "tcp_wroff_xtra" }; -#define tcp_wroff_xtra tcp_wroff_xtra_param.tcp_param_val +static tcpparam_t lcl_tcp_wroff_xtra_param = { 0, 256, 32, "tcp_wroff_xtra" }; +#define tcps_wroff_xtra tcps_wroff_xtra_param->tcp_param_val /* * All of these are alterable, within the min/max values given, at run time. @@ -1254,7 +1125,7 @@ static tcpparam_t tcp_wroff_xtra_param = { 0, 256, 32, "tcp_wroff_xtra" }; * per the TCP spec. */ /* BEGIN CSTYLED */ -tcpparam_t tcp_param_arr[] = { +static tcpparam_t lcl_tcp_param_arr[] = { /*min max value name */ { 1*SECONDS, 10*MINUTES, 1*MINUTES, "tcp_time_wait_interval"}, { 1, PARAM_MAX, 128, "tcp_conn_req_max_q" }, @@ -1331,18 +1202,20 @@ tcpparam_t tcp_param_arr[] = { * each header fragment in the header buffer. Each parameter value has * to be a multiple of 4 (32-bit aligned). */ -static tcpparam_t tcp_mdt_head_param = { 32, 256, 32, "tcp_mdt_hdr_head_min" }; -static tcpparam_t tcp_mdt_tail_param = { 0, 256, 32, "tcp_mdt_hdr_tail_min" }; -#define tcp_mdt_hdr_head_min tcp_mdt_head_param.tcp_param_val -#define tcp_mdt_hdr_tail_min tcp_mdt_tail_param.tcp_param_val +static tcpparam_t lcl_tcp_mdt_head_param = + { 32, 256, 32, "tcp_mdt_hdr_head_min" }; +static tcpparam_t lcl_tcp_mdt_tail_param = + { 0, 256, 32, "tcp_mdt_hdr_tail_min" }; +#define tcps_mdt_hdr_head_min tcps_mdt_head_param->tcp_param_val +#define tcps_mdt_hdr_tail_min tcps_mdt_tail_param->tcp_param_val /* * tcp_mdt_max_pbufs is the upper limit value that tcp uses to figure out * the maximum number of payload buffers associated per Multidata. */ -static tcpparam_t tcp_mdt_max_pbufs_param = +static tcpparam_t lcl_tcp_mdt_max_pbufs_param = { 1, MULTIDATA_MAX_PBUFS, MULTIDATA_MAX_PBUFS, "tcp_mdt_max_pbufs" }; -#define tcp_mdt_max_pbufs tcp_mdt_max_pbufs_param.tcp_param_val +#define tcps_mdt_max_pbufs tcps_mdt_max_pbufs_param->tcp_param_val /* Round up the value to the nearest mss. */ #define MSS_ROUNDUP(value, mss) ((((value) - 1) / (mss) + 1) * (mss)) @@ -1373,14 +1246,6 @@ static tcpparam_t tcp_mdt_max_pbufs_param = #define DISP_PORT_ONLY 1 #define DISP_ADDR_AND_PORT 2 -/* - * This controls the rate some ndd info report functions can be used - * by non-privileged users. It stores the last time such info is - * requested. When those report functions are called again, this - * is checked with the current time and compare with the ndd param - * tcp_ndd_get_info_interval. - */ -static clock_t tcp_last_ndd_get_info_time = 0; #define NDD_TOO_QUICK_MSG \ "ndd get info rate too high for non-privileged users, try again " \ "later.\n" @@ -1389,17 +1254,6 @@ static clock_t tcp_last_ndd_get_info_time = 0; #define IS_VMLOANED_MBLK(mp) \ (((mp)->b_datap->db_struioflag & STRUIO_ZC) != 0) -/* - * These two variables control the rate for TCP to generate RSTs in - * response to segments not belonging to any connections. We limit - * TCP to sent out tcp_rst_sent_rate (ndd param) number of RSTs in - * each 1 second interval. This is to protect TCP against DoS attack. - */ -static clock_t tcp_last_rst_intrvl; -static uint32_t tcp_rst_cnt; - -/* The number of RST not sent because of the rate limit. */ -static uint32_t tcp_rst_unsent; /* Enable or disable b_cont M_MULTIDATA chaining for MDT. */ boolean_t tcp_mdt_chain = B_TRUE; @@ -1414,12 +1268,13 @@ uint_t tcp_mdt_smss_threshold = 1; uint32_t do_tcpzcopy = 1; /* 0: disable, 1: enable, 2: force */ /* - * Forces all connections to obey the value of the tcp_maxpsz_multiplier + * Forces all connections to obey the value of the tcps_maxpsz_multiplier * tunable settable via NDD. Otherwise, the per-connection behavior is * determined dynamically during tcp_adapt_ire(), which is the default. */ boolean_t tcp_static_maxpsz = B_FALSE; +/* Setable in /etc/system */ /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ uint32_t tcp_random_anon_port = 1; @@ -1559,6 +1414,9 @@ extern uint32_t (*cl_inet_ipident)(uint8_t protocol, sa_family_t addr_family, */ int cl_tcp_walk_list(int (*callback)(cl_tcp_info_t *, void *), void *arg); +static int cl_tcp_walk_list_stack(int (*callback)(cl_tcp_info_t *, void *), + void *arg, tcp_stack_t *tcps); + /* * Figure out the value of window scale opton. Note that the rwnd is * ASSUMED to be rounded up to the nearest MSS before the calculation. @@ -1595,6 +1453,8 @@ tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait) squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP)); mutex_enter(&tcp_time_wait->tcp_time_wait_lock); locked = B_TRUE; + } else { + ASSERT(MUTEX_HELD(&tcp_time_wait->tcp_time_wait_lock)); } if (tcp->tcp_time_wait_expire == 0) { @@ -1646,6 +1506,7 @@ tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait) static void tcp_time_wait_append(tcp_t *tcp) { + tcp_stack_t *tcps = tcp->tcp_tcps; tcp_squeue_priv_t *tcp_time_wait = *((tcp_squeue_priv_t **)squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP)); @@ -1675,7 +1536,7 @@ tcp_time_wait_append(tcp_t *tcp) * modular arithmetic. */ tcp->tcp_time_wait_expire += - drv_usectohz(tcp_time_wait_interval * 1000); + drv_usectohz(tcps->tcps_time_wait_interval * 1000); if (tcp->tcp_time_wait_expire == 0) tcp->tcp_time_wait_expire = 1; @@ -1683,7 +1544,8 @@ tcp_time_wait_append(tcp_t *tcp) ASSERT(tcp->tcp_state == TCPS_TIME_WAIT); ASSERT(tcp->tcp_time_wait_next == NULL); ASSERT(tcp->tcp_time_wait_prev == NULL); - TCP_DBGSTAT(tcp_time_wait); + TCP_DBGSTAT(tcps, tcp_time_wait); + mutex_enter(&tcp_time_wait->tcp_time_wait_lock); if (tcp_time_wait->tcp_time_wait_head == NULL) { ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL); @@ -1705,6 +1567,7 @@ tcp_timewait_output(void *arg, mblk_t *mp, void *arg2) { conn_t *connp = (conn_t *)arg; tcp_t *tcp = connp->conn_tcp; + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT(tcp != NULL); if (tcp->tcp_state == TCPS_CLOSED) { @@ -1718,7 +1581,7 @@ tcp_timewait_output(void *arg, mblk_t *mp, void *arg2) tcp->tcp_ipversion == IPV6_VERSION))); ASSERT(!tcp->tcp_listener); - TCP_STAT(tcp_time_wait_reap); + TCP_STAT(tcps, tcp_time_wait_reap); ASSERT(TCP_IS_DETACHED(tcp)); /* @@ -1728,6 +1591,32 @@ tcp_timewait_output(void *arg, mblk_t *mp, void *arg2) tcp_close_detached(tcp); } +/* + * Remove cached/latched IPsec references. + */ +void +tcp_ipsec_cleanup(tcp_t *tcp) +{ + conn_t *connp = tcp->tcp_connp; + + if (connp->conn_flags & IPCL_TCPCONN) { + if (connp->conn_latch != NULL) { + IPLATCH_REFRELE(connp->conn_latch, + connp->conn_netstack); + connp->conn_latch = NULL; + } + if (connp->conn_policy != NULL) { + IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); + connp->conn_policy = NULL; + } + } +} + +/* + * Cleaup before placing on free list. + * Disassociate from the netstack/tcp_stack_t since the freelist + * is per squeue and not per netstack. + */ void tcp_cleanup(tcp_t *tcp) { @@ -1737,8 +1626,14 @@ tcp_cleanup(tcp_t *tcp) int tcp_hdr_grown; tcp_sack_info_t *tcp_sack_info; conn_t *connp = tcp->tcp_connp; + tcp_stack_t *tcps = tcp->tcp_tcps; + netstack_t *ns = tcps->tcps_netstack; tcp_bind_hash_remove(tcp); + + /* Cleanup that which needs the netstack first */ + tcp_ipsec_cleanup(tcp); + tcp_free(tcp); /* Release any SSL context */ @@ -1754,12 +1649,6 @@ tcp_cleanup(tcp_t *tcp) tcp->tcp_kssl_pending = B_FALSE; conn_delete_ire(connp, NULL); - if (connp->conn_flags & IPCL_TCPCONN) { - if (connp->conn_latch != NULL) - IPLATCH_REFRELE(connp->conn_latch); - if (connp->conn_policy != NULL) - IPPH_REFRELE(connp->conn_policy); - } /* * Since we will bzero the entire structure, we need to @@ -1772,6 +1661,18 @@ tcp_cleanup(tcp_t *tcp) */ ipcl_globalhash_remove(connp); + /* + * Now it is safe to decrement the reference counts. + * This might be the last reference on the netstack and TCPS + * in which case it will cause the tcp_g_q_close and + * the freeing of the IP Instance. + */ + connp->conn_netstack = NULL; + netstack_rele(ns); + ASSERT(tcps != NULL); + tcp->tcp_tcps = NULL; + TCPS_REFRELE(tcps); + /* Save some state */ mp = tcp->tcp_timercache; @@ -1803,13 +1704,13 @@ tcp_cleanup(tcp_t *tcp) connp->conn_state_flags = CONN_INCIPIENT; connp->conn_ulp = IPPROTO_TCP; connp->conn_ref = 1; - - ipcl_globalhash_insert(connp); } /* * Blows away all tcps whose TIME_WAIT has expired. List traversal * is done forwards from the head. + * This walks all stack instances since + * tcp_time_wait remains global across all stacks. */ /* ARGSUSED */ void @@ -1831,12 +1732,15 @@ tcp_time_wait_collector(void *arg) if (tcp_time_wait->tcp_free_list != NULL && tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) { - TCP_STAT(tcp_freelist_cleanup); + TCP_G_STAT(tcp_freelist_cleanup); while ((tcp = tcp_time_wait->tcp_free_list) != NULL) { tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next; + tcp->tcp_time_wait_next = NULL; + tcp_time_wait->tcp_free_list_cnt--; + ASSERT(tcp->tcp_tcps == NULL); CONN_DEC_REF(tcp->tcp_connp); } - tcp_time_wait->tcp_free_list_cnt = 0; + ASSERT(tcp_time_wait->tcp_free_list_cnt == 0); } /* @@ -1904,6 +1808,11 @@ tcp_time_wait_collector(void *arg) mutex_exit( &tcp_time_wait->tcp_time_wait_lock); tcp_cleanup(tcp); + ASSERT(connp->conn_latch == NULL); + ASSERT(connp->conn_policy == NULL); + ASSERT(tcp->tcp_tcps == NULL); + ASSERT(connp->conn_netstack == NULL); + mutex_enter( &tcp_time_wait->tcp_time_wait_lock); tcp->tcp_time_wait_next = @@ -1917,6 +1826,7 @@ tcp_time_wait_collector(void *arg) &tcp_time_wait->tcp_time_wait_lock); tcp_bind_hash_remove(tcp); conn_delete_ire(tcp->tcp_connp, NULL); + tcp_ipsec_cleanup(tcp); CONN_DEC_REF(tcp->tcp_connp); } } else { @@ -1984,7 +1894,6 @@ tcp_time_wait_collector(void *arg) timeout(tcp_time_wait_collector, sqp, TCP_TIME_WAIT_DELAY); mutex_exit(&tcp_time_wait->tcp_time_wait_lock); } - /* * Reply to a clients T_CONN_RES TPI message. This function * is used only for TLI/XTI listener. Sockfs sends T_CONN_RES @@ -2003,6 +1912,7 @@ tcp_accept(tcp_t *listener, mblk_t *mp) mblk_t *opt_mp = NULL; /* T_OPTMGMT_REQ messages */ mblk_t *ok_mp; mblk_t *mp1; + tcp_stack_t *tcps = listener->tcp_tcps; if ((mp->b_wptr - mp->b_rptr) < sizeof (*tcr)) { tcp_err_ack(listener, mp, TPROTO, 0); @@ -2071,7 +1981,7 @@ tcp_accept(tcp_t *listener, mblk_t *mp) acceptor = listener; CONN_INC_REF(acceptor->tcp_connp); } else { - acceptor = tcp_acceptor_hash_lookup(acceptor_id); + acceptor = tcp_acceptor_hash_lookup(acceptor_id, tcps); if (acceptor == NULL) { if (listener->tcp_debug) { (void) strlog(TCP_MOD_ID, 0, 1, @@ -2415,8 +2325,9 @@ tcp_accept(tcp_t *listener, mblk_t *mp) */ finish: ASSERT(acceptor->tcp_detached); - acceptor->tcp_rq = tcp_g_q; - acceptor->tcp_wq = WR(tcp_g_q); + ASSERT(tcps->tcps_g_q != NULL); + acceptor->tcp_rq = tcps->tcps_g_q; + acceptor->tcp_wq = WR(tcps->tcps_g_q); (void) tcp_clean_death(acceptor, 0, 2); CONN_DEC_REF(acceptor->tcp_connp); @@ -2515,6 +2426,9 @@ tcp_accept_swap(tcp_t *listener, tcp_t *acceptor, tcp_t *eager) if (eager->tcp_cred != NULL) crfree(eager->tcp_cred); eager->tcp_cred = econnp->conn_cred = aconnp->conn_cred; + ASSERT(econnp->conn_netstack == aconnp->conn_netstack); + ASSERT(eager->tcp_tcps == acceptor->tcp_tcps); + aconnp->conn_cred = NULL; econnp->conn_zoneid = aconnp->conn_zoneid; @@ -2591,13 +2505,15 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) ts_label_t *tsl = crgetlabel(CONN_CRED(connp)); ill_t *ill = NULL; boolean_t incoming = (ire_mp == NULL); + tcp_stack_t *tcps = tcp->tcp_tcps; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; ASSERT(connp->conn_ire_cache == NULL); if (tcp->tcp_ipversion == IPV4_VERSION) { if (CLASSD(tcp->tcp_connp->conn_rem)) { - BUMP_MIB(&ip_mib, ipIfStatsInDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards); return (0); } /* @@ -2620,12 +2536,13 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) if (tcp->tcp_connp->conn_nexthop_set) { ire = ire_ctable_lookup(tcp->tcp_connp->conn_rem, tcp->tcp_connp->conn_nexthop_v4, 0, NULL, zoneid, - tsl, MATCH_IRE_MARK_PRIVATE_ADDR | MATCH_IRE_GW); + tsl, MATCH_IRE_MARK_PRIVATE_ADDR | MATCH_IRE_GW, + ipst); if (ire == NULL) { ire = ire_ftable_lookup( tcp->tcp_connp->conn_nexthop_v4, 0, 0, IRE_INTERFACE, NULL, NULL, zoneid, 0, - tsl, match_flags); + tsl, match_flags, ipst); if (ire == NULL) return (0); } else { @@ -2633,7 +2550,7 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) } } else { ire = ire_cache_lookup(tcp->tcp_connp->conn_rem, - zoneid, tsl); + zoneid, tsl, ipst); if (ire != NULL) { ire_cacheable = B_TRUE; ire_uinfo = (ire_mp != NULL) ? @@ -2646,7 +2563,7 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) tcp->tcp_connp->conn_rem, 0, 0, 0, NULL, &sire, zoneid, 0, tsl, (MATCH_IRE_RECURSIVE | - MATCH_IRE_DEFAULT)); + MATCH_IRE_DEFAULT), ipst); if (ire == NULL) return (0); ire_uinfo = (sire != NULL) ? @@ -2695,7 +2612,7 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) * should change. IP tells us the latest setting of * ip_path_mtu_discovery through ire_frag_flag. */ - if (ip_path_mtu_discovery) { + if (ipst->ips_ip_path_mtu_discovery) { tcp->tcp_ipha->ipha_fragment_offset_and_flags = htons(IPH_DF); } @@ -2741,7 +2658,7 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) dst_ipif = dst_ill->ill_ipif; } ire = ire_ctable_lookup_v6(&tcp->tcp_connp->conn_remv6, - 0, 0, dst_ipif, zoneid, tsl, match_flags); + 0, 0, dst_ipif, zoneid, tsl, match_flags, ipst); if (ire != NULL) { ire_cacheable = B_TRUE; @@ -2753,7 +2670,7 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) ire = ire_ftable_lookup_v6( &tcp->tcp_connp->conn_remv6, 0, 0, 0, dst_ipif, &sire, zoneid, - 0, tsl, match_flags); + 0, tsl, match_flags, ipst); if (ire == NULL) { if (dst_ill != NULL) ill_refrele(dst_ill); @@ -2834,12 +2751,13 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) tcp->tcp_rtt_sa = ire_uinfo->iulp_rtt; tcp->tcp_rtt_sd = ire_uinfo->iulp_rtt_sd; rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd + - tcp_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5); + tcps->tcps_rexmit_interval_extra + + (tcp->tcp_rtt_sa >> 5); - if (rto > tcp_rexmit_interval_max) { - tcp->tcp_rto = tcp_rexmit_interval_max; - } else if (rto < tcp_rexmit_interval_min) { - tcp->tcp_rto = tcp_rexmit_interval_min; + if (rto > tcps->tcps_rexmit_interval_max) { + tcp->tcp_rto = tcps->tcps_rexmit_interval_max; + } else if (rto < tcps->tcps_rexmit_interval_min) { + tcp->tcp_rto = tcps->tcps_rexmit_interval_min; } else { tcp->tcp_rto = rto; } @@ -2850,10 +2768,10 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN; if (ire_uinfo->iulp_spipe > 0) { tcp->tcp_xmit_hiwater = MIN(ire_uinfo->iulp_spipe, - tcp_max_buf); - if (tcp_snd_lowat_fraction != 0) + tcps->tcps_max_buf); + if (tcps->tcps_snd_lowat_fraction != 0) tcp->tcp_xmit_lowater = tcp->tcp_xmit_hiwater / - tcp_snd_lowat_fraction; + tcps->tcps_snd_lowat_fraction; (void) tcp_maxpsz_set(tcp, B_TRUE); } /* @@ -2864,7 +2782,8 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) * info back to the caller. */ if (ire_uinfo->iulp_rpipe > 0) { - tcp->tcp_rwnd = MIN(ire_uinfo->iulp_rpipe, tcp_max_buf); + tcp->tcp_rwnd = MIN(ire_uinfo->iulp_rpipe, + tcps->tcps_max_buf); } if (ire_uinfo->iulp_rtomax > 0) { @@ -2940,9 +2859,9 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) /* Sanity check for MSS value. */ if (tcp->tcp_ipversion == IPV4_VERSION) - mss_max = tcp_mss_max_ipv4; + mss_max = tcps->tcps_mss_max_ipv4; else - mss_max = tcp_mss_max_ipv6; + mss_max = tcps->tcps_mss_max_ipv6; if (tcp->tcp_ipversion == IPV6_VERSION && (ire->ire_frag_flag & IPH_FRAG_HDR)) { @@ -2960,8 +2879,8 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) mss -= tcp->tcp_ipsec_overhead; - if (mss < tcp_mss_min) - mss = tcp_mss_min; + if (mss < tcps->tcps_mss_min) + mss = tcps->tcps_mss_min; if (mss > mss_max) mss = mss_max; @@ -2980,18 +2899,18 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) tcp->tcp_loopback = B_TRUE; if (tcp->tcp_ipversion == IPV4_VERSION) { - hsp = tcp_hsp_lookup(tcp->tcp_remote); + hsp = tcp_hsp_lookup(tcp->tcp_remote, tcps); } else { - hsp = tcp_hsp_lookup_ipv6(&tcp->tcp_remote_v6); + hsp = tcp_hsp_lookup_ipv6(&tcp->tcp_remote_v6, tcps); } if (hsp != NULL) { /* Only modify if we're going to make them bigger */ if (hsp->tcp_hsp_sendspace > tcp->tcp_xmit_hiwater) { tcp->tcp_xmit_hiwater = hsp->tcp_hsp_sendspace; - if (tcp_snd_lowat_fraction != 0) + if (tcps->tcps_snd_lowat_fraction != 0) tcp->tcp_xmit_lowater = tcp->tcp_xmit_hiwater / - tcp_snd_lowat_fraction; + tcps->tcps_snd_lowat_fraction; } if (hsp->tcp_hsp_recvspace > tcp->tcp_rwnd) { @@ -3082,6 +3001,7 @@ tcp_bind(tcp_t *tcp, mblk_t *mp) zone_t *zone; cred_t *cr; in_port_t mlp_port; + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX); if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { @@ -3266,7 +3186,8 @@ tcp_bind(tcp_t *tcp, mblk_t *mp) if (requested_port == 0) { requested_port = tcp->tcp_anon_priv_bind ? tcp_get_next_priv_port(tcp) : - tcp_update_next_port(tcp_next_port_to_try, tcp, B_TRUE); + tcp_update_next_port(tcps->tcps_next_port_to_try, + tcp, B_TRUE); if (requested_port == 0) { tcp_err_ack(tcp, mp, TNOADDR, 0); return; @@ -3283,7 +3204,8 @@ tcp_bind(tcp_t *tcp, mblk_t *mp) if (connp->conn_anon_mlp && is_system_labeled()) { zone = crgetzone(cr); addrtype = tsol_mlp_addr_type(zone->zone_id, - IPV6_VERSION, &v6addr); + IPV6_VERSION, &v6addr, + tcps->tcps_netstack->netstack_ip); if (addrtype == mlptSingle) { tcp_err_ack(tcp, mp, TNOADDR, 0); return; @@ -3306,12 +3228,12 @@ tcp_bind(tcp_t *tcp, mblk_t *mp) * - the atomic assignment of the elements of the array */ cr = DB_CREDDEF(mp, tcp->tcp_cred); - if (requested_port < tcp_smallest_nonpriv_port) { + if (requested_port < tcps->tcps_smallest_nonpriv_port) { priv = B_TRUE; } else { - for (i = 0; i < tcp_g_num_epriv_ports; i++) { + for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) { if (requested_port == - tcp_g_epriv_ports[i]) { + tcps->tcps_g_epriv_ports[i]) { priv = B_TRUE; break; } @@ -3335,7 +3257,8 @@ tcp_bind(tcp_t *tcp, mblk_t *mp) if (is_system_labeled()) { zone = crgetzone(cr); addrtype = tsol_mlp_addr_type(zone->zone_id, - IPV6_VERSION, &v6addr); + IPV6_VERSION, &v6addr, + tcps->tcps_netstack->netstack_ip); if (addrtype == mlptSingle) { tcp_err_ack(tcp, mp, TNOADDR, 0); return; @@ -3363,6 +3286,10 @@ tcp_bind(tcp_t *tcp, mblk_t *mp) * zone actually owns the MLP. Reject if not. */ if (mlptype == mlptShared && addrtype == mlptShared) { + /* + * No need to handle exclusive-stack zones since + * ALL_ZONES only applies to the shared stack. + */ zoneid_t mlpzone; mlpzone = tsol_mlp_findzone(IPPROTO_TCP, @@ -3475,10 +3402,10 @@ do_bind: tcp->tcp_conn_req_max = tbr->CONIND_number; if (tcp->tcp_conn_req_max) { - if (tcp->tcp_conn_req_max < tcp_conn_req_min) - tcp->tcp_conn_req_max = tcp_conn_req_min; - if (tcp->tcp_conn_req_max > tcp_conn_req_max_q) - tcp->tcp_conn_req_max = tcp_conn_req_max_q; + if (tcp->tcp_conn_req_max < tcps->tcps_conn_req_min) + tcp->tcp_conn_req_max = tcps->tcps_conn_req_min; + if (tcp->tcp_conn_req_max > tcps->tcps_conn_req_max_q) + tcp->tcp_conn_req_max = tcps->tcps_conn_req_max_q; /* * If this is a listener, do not reset the eager list * and other stuffs. Note that we don't check if the @@ -3492,7 +3419,7 @@ do_bind: tcp->tcp_eager_next_drop_q0 = tcp; tcp->tcp_eager_prev_drop_q0 = tcp; tcp->tcp_second_ctimer_threshold = - tcp_ip_abort_linterval; + tcps->tcps_ip_abort_linterval; } } @@ -3552,6 +3479,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, int loopmax; conn_t *connp = tcp->tcp_connp; zoneid_t zoneid = connp->conn_zoneid; + tcp_stack_t *tcps = tcp->tcp_tcps; /* * Lookup for free addresses is done in a loop and "loopmax" @@ -3576,10 +3504,11 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, * loopmax = * (IPPORT_RESERVED-1) - tcp_min_anonpriv_port + 1 */ - loopmax = IPPORT_RESERVED - tcp_min_anonpriv_port; + loopmax = IPPORT_RESERVED - + tcps->tcps_min_anonpriv_port; } else { - loopmax = (tcp_largest_anon_port - - tcp_smallest_anon_port + 1); + loopmax = (tcps->tcps_largest_anon_port - + tcps->tcps_smallest_anon_port + 1); } } do { @@ -3602,7 +3531,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, * doing a CONN_INC_REF. */ tcp_bind_hash_remove(tcp); - tbf = &tcp_bind_fanout[TCP_BIND_HASH(lport)]; + tbf = &tcps->tcps_bind_fanout[TCP_BIND_HASH(lport)]; mutex_enter(&tbf->tf_lock); for (ltcp = tbf->tf_tcp; ltcp != NULL; ltcp = ltcp->tcp_bind_hash) { @@ -3776,7 +3705,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, tcp->tcp_lport = htons(port); *(uint16_t *)tcp->tcp_tcph->th_lport = tcp->tcp_lport; - ASSERT(&tcp_bind_fanout[TCP_BIND_HASH( + ASSERT(&tcps->tcps_bind_fanout[TCP_BIND_HASH( tcp->tcp_lport)] == tbf); tcp_bind_hash_insert(tbf, tcp, 1); @@ -3795,7 +3724,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, * be in the valid range. */ if (!tcp->tcp_anon_priv_bind) - tcp_next_port_to_try = port + 1; + tcps->tcps_next_port_to_try = port + 1; return (port); } @@ -3808,7 +3737,8 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, * get one to start with. */ port = - tcp_update_next_port(tcp_next_port_to_try, + tcp_update_next_port( + tcps->tcps_next_port_to_try, tcp, B_TRUE); user_specified = B_FALSE; } else { @@ -3859,6 +3789,7 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag) { mblk_t *mp; queue_t *q; + tcp_stack_t *tcps = tcp->tcp_tcps; TCP_CLD_STAT(tag); @@ -3907,7 +3838,7 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag) return (0); } - TCP_STAT(tcp_clean_death_nondetached); + TCP_STAT(tcps, tcp_clean_death_nondetached); /* * If T_ORDREL_IND has not been sent yet (done when service routine @@ -3960,10 +3891,10 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag) } if (tcp->tcp_state <= TCPS_SYN_RCVD) { /* SYN_SENT or SYN_RCVD */ - BUMP_MIB(&tcp_mib, tcpAttemptFails); + BUMP_MIB(&tcps->tcps_mib, tcpAttemptFails); } else if (tcp->tcp_state <= TCPS_CLOSE_WAIT) { /* ESTABLISHED or CLOSE_WAIT */ - BUMP_MIB(&tcp_mib, tcpEstabResets); + BUMP_MIB(&tcps->tcps_mib, tcpEstabResets); } } @@ -3979,6 +3910,7 @@ static void tcp_stop_lingering(tcp_t *tcp) { clock_t delta = 0; + tcp_stack_t *tcps = tcp->tcp_tcps; tcp->tcp_linger_tid = 0; if (tcp->tcp_state > TCPS_LISTEN) { @@ -4002,12 +3934,13 @@ tcp_stop_lingering(tcp_t *tcp) tcp->tcp_detached = B_TRUE; - tcp->tcp_rq = tcp_g_q; - tcp->tcp_wq = WR(tcp_g_q); + ASSERT(tcps->tcps_g_q != NULL); + tcp->tcp_rq = tcps->tcps_g_q; + tcp->tcp_wq = WR(tcps->tcps_g_q); if (tcp->tcp_state == TCPS_TIME_WAIT) { tcp_time_wait_append(tcp); - TCP_DBGSTAT(tcp_detach_time_wait); + TCP_DBGSTAT(tcps, tcp_detach_time_wait); goto finish; } @@ -4028,8 +3961,9 @@ finish: /* Signal closing thread that it can complete close */ mutex_enter(&tcp->tcp_closelock); tcp->tcp_detached = B_TRUE; - tcp->tcp_rq = tcp_g_q; - tcp->tcp_wq = WR(tcp_g_q); + ASSERT(tcps->tcps_g_q != NULL); + tcp->tcp_rq = tcps->tcps_g_q; + tcp->tcp_wq = WR(tcps->tcps_g_q); tcp->tcp_closed = 1; cv_signal(&tcp->tcp_closecv); mutex_exit(&tcp->tcp_closelock); @@ -4225,6 +4159,7 @@ tcp_close_output(void *arg, mblk_t *mp, void *arg2) conn_t *connp = (conn_t *)arg; tcp_t *tcp = connp->conn_tcp; clock_t delta = 0; + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT((connp->conn_fanout != NULL && connp->conn_ref >= 4) || (connp->conn_fanout == NULL && connp->conn_ref >= 3)); @@ -4369,7 +4304,7 @@ tcp_close_output(void *arg, mblk_t *mp, void *arg2) tcp->tcp_detached = B_TRUE; if (tcp->tcp_state == TCPS_TIME_WAIT) { tcp_time_wait_append(tcp); - TCP_DBGSTAT(tcp_detach_time_wait); + TCP_DBGSTAT(tcps, tcp_detach_time_wait); ASSERT(connp->conn_ref >= 3); goto finish; } @@ -4391,10 +4326,10 @@ tcp_close_output(void *arg, mblk_t *mp, void *arg2) if (msg) { if (tcp->tcp_state == TCPS_ESTABLISHED || tcp->tcp_state == TCPS_CLOSE_WAIT) - BUMP_MIB(&tcp_mib, tcpEstabResets); + BUMP_MIB(&tcps->tcps_mib, tcpEstabResets); if (tcp->tcp_state == TCPS_SYN_SENT || tcp->tcp_state == TCPS_SYN_RCVD) - BUMP_MIB(&tcp_mib, tcpAttemptFails); + BUMP_MIB(&tcps->tcps_mib, tcpAttemptFails); tcp_xmit_ctl(msg, tcp, tcp->tcp_snxt, 0, TH_RST); } @@ -4407,13 +4342,13 @@ finish: * Although packets are always processed on the correct * tcp's perimeter and access is serialized via squeue's, * IP still needs a queue when sending packets in time_wait - * state so use WR(tcp_g_q) till ip_output() can be + * state so use WR(tcps_g_q) till ip_output() can be * changed to deal with just connp. For read side, we * could have set tcp_rq to NULL but there are some cases * in tcp_rput_data() from early days of this code which * do a putnext without checking if tcp is closed. Those * need to be identified before both tcp_rq and tcp_wq - * can be set to NULL and tcp_q_q can disappear forever. + * can be set to NULL and tcps_g_q can disappear forever. */ mutex_enter(&tcp->tcp_closelock); /* @@ -4423,8 +4358,13 @@ finish: */ if (!tcp->tcp_wait_for_eagers) { tcp->tcp_detached = B_TRUE; - tcp->tcp_rq = tcp_g_q; - tcp->tcp_wq = WR(tcp_g_q); + /* + * When default queue is closing we set tcps_g_q to NULL + * after the close is done. + */ + ASSERT(tcps->tcps_g_q != NULL); + tcp->tcp_rq = tcps->tcps_g_q; + tcp->tcp_wq = WR(tcps->tcps_g_q); } /* Signal tcp_close() to finish closing. */ @@ -4509,13 +4449,14 @@ tcp_closei_local(tcp_t *tcp) { ire_t *ire; conn_t *connp = tcp->tcp_connp; + tcp_stack_t *tcps = tcp->tcp_tcps; if (!TCP_IS_SOCKET(tcp)) tcp_acceptor_hash_remove(tcp); - UPDATE_MIB(&tcp_mib, tcpHCInSegs, tcp->tcp_ibsegs); + UPDATE_MIB(&tcps->tcps_mib, tcpHCInSegs, tcp->tcp_ibsegs); tcp->tcp_ibsegs = 0; - UPDATE_MIB(&tcp_mib, tcpHCOutSegs, tcp->tcp_obsegs); + UPDATE_MIB(&tcps->tcps_mib, tcpHCOutSegs, tcp->tcp_obsegs); tcp->tcp_obsegs = 0; /* @@ -4544,8 +4485,9 @@ tcp_closei_local(tcp_t *tcp) * listener queue, after we have released our * reference on the listener */ - tcp->tcp_rq = tcp_g_q; - tcp->tcp_wq = WR(tcp_g_q); + ASSERT(tcps->tcps_g_q != NULL); + tcp->tcp_rq = tcps->tcps_g_q; + tcp->tcp_wq = WR(tcps->tcps_g_q); CONN_DEC_REF(listener->tcp_connp); } else { mutex_exit(&listener->tcp_eager_lock); @@ -4609,6 +4551,8 @@ tcp_closei_local(tcp_t *tcp) tcp->tcp_kssl_ctx = NULL; } tcp->tcp_kssl_pending = B_FALSE; + + tcp_ipsec_cleanup(tcp); } /* @@ -4812,6 +4756,7 @@ tcp_drop_q0(tcp_t *tcp) { tcp_t *eager; mblk_t *mp; + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT(MUTEX_HELD(&tcp->tcp_eager_lock)); ASSERT(tcp->tcp_eager_next_q0 != tcp->tcp_eager_prev_q0); @@ -4837,12 +4782,12 @@ tcp_drop_q0(tcp_t *tcp) if (tcp->tcp_debug) { (void) strlog(TCP_MOD_ID, 0, 3, SL_TRACE, "tcp_drop_q0: listen half-open queue (max=%d) overflow" - " (%d pending) on %s, drop one", tcp_conn_req_max_q0, + " (%d pending) on %s, drop one", tcps->tcps_conn_req_max_q0, tcp->tcp_conn_req_cnt_q0, tcp_display(tcp, NULL, DISP_PORT_ONLY)); } - BUMP_MIB(&tcp_mib, tcpHalfOpenDrop); + BUMP_MIB(&tcps->tcps_mib, tcpHalfOpenDrop); /* Put a reference on the conn as we are enqueueing it in the sqeue */ CONN_INC_REF(eager->tcp_connp); @@ -4869,6 +4814,7 @@ tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp, int err; int ifindex = 0; cred_t *cr; + tcp_stack_t *tcps = tcp->tcp_tcps; if (ipvers == IPV4_VERSION) { ipha = (ipha_t *)mp->b_rptr; @@ -4885,7 +4831,7 @@ tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp, sin6.sin6_port = *(uint16_t *)tcph->th_lport; sin6.sin6_family = AF_INET6; sin6.__sin6_src_id = ip_srcid_find_addr(&v6dst, - lconnp->conn_zoneid); + lconnp->conn_zoneid, tcps->tcps_netstack); if (tcp->tcp_recvdstaddr) { sin6_t sin6d; @@ -4925,7 +4871,7 @@ tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp, sin6.sin6_family = AF_INET6; sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; sin6.__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, - lconnp->conn_zoneid); + lconnp->conn_zoneid, tcps->tcps_netstack); if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { /* Pass up the scope_id of remote addr */ @@ -4961,7 +4907,7 @@ tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp, connp->conn_flags |= (IPCL_TCP6|IPCL_EAGER); connp->conn_fully_bound = B_FALSE; - if (tcp_trace) + if (tcps->tcps_trace) tcp->tcp_tracebuf = kmem_zalloc(sizeof (tcptrch_t), KM_NOSLEEP); /* Inherit information from the "parent" */ @@ -4969,7 +4915,7 @@ tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp, tcp->tcp_family = ltcp->tcp_family; tcp->tcp_wq = ltcp->tcp_wq; tcp->tcp_rq = ltcp->tcp_rq; - tcp->tcp_mss = tcp_mss_def_ipv6; + tcp->tcp_mss = tcps->tcps_mss_def_ipv6; tcp->tcp_detached = B_TRUE; if ((err = tcp_init_values(tcp)) != 0) { freemsg(tpi_mp); @@ -5094,7 +5040,7 @@ tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp, tcp->tcp_ipha->ipha_src = ipha->ipha_dst; /* Source routing option copyover (reverse it) */ - if (tcp_rev_src_routes) + if (tcps->tcps_rev_src_routes) tcp_opt_reverse(tcp, ipha); } else { ASSERT(ip6h != NULL); @@ -5135,6 +5081,7 @@ tcp_conn_create_v4(conn_t *lconnp, conn_t *connp, ipha_t *ipha, mblk_t *tpi_mp = NULL; int err; cred_t *cr; + tcp_stack_t *tcps = tcp->tcp_tcps; sin = sin_null; sin.sin_addr.s_addr = ipha->ipha_src; @@ -5172,7 +5119,7 @@ tcp_conn_create_v4(conn_t *lconnp, conn_t *connp, ipha_t *ipha, connp->conn_fport = *(uint16_t *)tcph->th_lport; connp->conn_lport = *(uint16_t *)tcph->th_fport; - if (tcp_trace) { + if (tcps->tcps_trace) { tcp->tcp_tracebuf = kmem_zalloc(sizeof (tcptrch_t), KM_NOSLEEP); } @@ -5181,7 +5128,7 @@ tcp_conn_create_v4(conn_t *lconnp, conn_t *connp, ipha_t *ipha, tcp->tcp_family = ltcp->tcp_family; tcp->tcp_wq = ltcp->tcp_wq; tcp->tcp_rq = ltcp->tcp_rq; - tcp->tcp_mss = tcp_mss_def_ipv4; + tcp->tcp_mss = tcps->tcps_mss_def_ipv4; tcp->tcp_detached = B_TRUE; if ((err = tcp_init_values(tcp)) != 0) { freemsg(tpi_mp); @@ -5221,7 +5168,7 @@ tcp_conn_create_v4(conn_t *lconnp, conn_t *connp, ipha_t *ipha, bcopy(tcph->th_fport, tcp->tcp_tcph->th_lport, sizeof (in_port_t)); /* Source routing option copyover (reverse it) */ - if (tcp_rev_src_routes) + if (tcps->tcps_rev_src_routes) tcp_opt_reverse(tcp, ipha); ASSERT(tcp->tcp_conn.tcp_eager_conn_ind == NULL); @@ -5262,7 +5209,7 @@ tcp_get_ipsec_conn(tcp_t *tcp, squeue_t *sqp, mblk_t **mpp) boolean_t mctl_present = B_FALSE; uint_t ipvers; - econnp = tcp_get_conn(sqp); + econnp = tcp_get_conn(sqp, tcp->tcp_tcps); if (econnp == NULL) { freemsg(first_mp); return (NULL); @@ -5398,12 +5345,13 @@ tcp_get_ipsec_conn(tcp_t *tcp, squeue_t *sqp, mblk_t **mpp) * there for too long. */ void * -tcp_get_conn(void *arg) +tcp_get_conn(void *arg, tcp_stack_t *tcps) { tcp_t *tcp = NULL; conn_t *connp = NULL; squeue_t *sqp = (squeue_t *)arg; tcp_squeue_priv_t *tcp_time_wait; + netstack_t *ns; tcp_time_wait = *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP)); @@ -5418,11 +5366,24 @@ tcp_get_conn(void *arg) tcp->tcp_time_wait_next = NULL; connp = tcp->tcp_connp; connp->conn_flags |= IPCL_REUSED; + + ASSERT(tcp->tcp_tcps == NULL); + ASSERT(connp->conn_netstack == NULL); + ns = tcps->tcps_netstack; + netstack_hold(ns); + connp->conn_netstack = ns; + tcp->tcp_tcps = tcps; + TCPS_REFHOLD(tcps); + ipcl_globalhash_insert(connp); return ((void *)connp); } mutex_exit(&tcp_time_wait->tcp_time_wait_lock); - if ((connp = ipcl_conn_create(IPCL_TCPCONN, KM_NOSLEEP)) == NULL) + if ((connp = ipcl_conn_create(IPCL_TCPCONN, KM_NOSLEEP, + tcps->tcps_netstack)) == NULL) return (NULL); + tcp = connp->conn_tcp; + tcp->tcp_tcps = tcps; + TCPS_REFHOLD(tcps); return ((void *)connp); } @@ -5441,7 +5402,8 @@ tcp_update_label(tcp_t *tcp, const cred_t *cr) int added; if (tsol_compute_label(cr, tcp->tcp_remote, optbuf, - connp->conn_mac_exempt) != 0) + connp->conn_mac_exempt, + tcp->tcp_tcps->tcps_netstack->netstack_ip) != 0) return (B_FALSE); added = tsol_remove_secopt(tcp->tcp_ipha, tcp->tcp_hdr_len); @@ -5465,7 +5427,8 @@ tcp_update_label(tcp_t *tcp, const cred_t *cr) uchar_t optbuf[TSOL_MAX_IPV6_OPTION]; if (tsol_compute_label_v6(cr, &tcp->tcp_remote_v6, optbuf, - connp->conn_mac_exempt) != 0) + connp->conn_mac_exempt, + tcp->tcp_tcps->tcps_netstack->netstack_ip) != 0) return (B_FALSE); if (tsol_update_sticky(&tcp->tcp_sticky_ipp, &tcp->tcp_label_len, optbuf) != 0) @@ -5504,7 +5467,7 @@ tcp_update_label(tcp_t *tcp, const cred_t *cr) * Sockfs ACCEPT Path: * ------------------- * - * open acceptor stream (ip_tcpopen allocates tcp_wput_accept() + * open acceptor stream (tcp_open allocates tcp_wput_accept() * as STREAM entry point) * * soaccept() sends T_CONN_RES on the acceptor STREAM to tcp_wput_accept() @@ -5616,6 +5579,8 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) tcp_t *tcp = connp->conn_tcp; ire_t *ire; cred_t *credp; + tcp_stack_t *tcps = tcp->tcp_tcps; + ip_stack_t *ipst; if (tcp->tcp_state != TCPS_LISTEN) goto error2; @@ -5625,8 +5590,8 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) mutex_enter(&tcp->tcp_eager_lock); if (tcp->tcp_conn_req_cnt_q >= tcp->tcp_conn_req_max) { mutex_exit(&tcp->tcp_eager_lock); - TCP_STAT(tcp_listendrop); - BUMP_MIB(&tcp_mib, tcpListenDrop); + TCP_STAT(tcps, tcp_listendrop); + BUMP_MIB(&tcps->tcps_mib, tcpListenDrop); if (tcp->tcp_debug) { (void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE|SL_ERROR, "tcp_conn_request: listen backlog (max=%d) " @@ -5638,7 +5603,7 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) } if (tcp->tcp_conn_req_cnt_q0 >= - tcp->tcp_conn_req_max + tcp_conn_req_max_q0) { + tcp->tcp_conn_req_max + tcps->tcps_conn_req_max_q0) { /* * Q0 is full. Drop a pending half-open req from the queue * to make room for the new SYN req. Also mark the time we @@ -5647,16 +5612,16 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) * A more aggressive defense against SYN attack will * be to set the "tcp_syn_defense" flag now. */ - TCP_STAT(tcp_listendropq0); + TCP_STAT(tcps, tcp_listendropq0); tcp->tcp_last_rcv_lbolt = lbolt64; if (!tcp_drop_q0(tcp)) { mutex_exit(&tcp->tcp_eager_lock); - BUMP_MIB(&tcp_mib, tcpListenDropQ0); + BUMP_MIB(&tcps->tcps_mib, tcpListenDropQ0); if (tcp->tcp_debug) { (void) strlog(TCP_MOD_ID, 0, 3, SL_TRACE, "tcp_conn_request: listen half-open queue " "(max=%d) full (%d pending) on %s", - tcp_conn_req_max_q0, + tcps->tcps_conn_req_max_q0, tcp->tcp_conn_req_cnt_q0, tcp_display(tcp, NULL, DISP_PORT_ONLY)); @@ -5677,9 +5642,10 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) new_sqp = (squeue_t *)DB_CKSUMSTART(mp); DB_CKSUMSTART(mp) = 0; mp->b_datap->db_struioflag &= ~STRUIO_EAGER; - econnp = (conn_t *)tcp_get_conn(arg2); + econnp = (conn_t *)tcp_get_conn(arg2, tcps); if (econnp == NULL) goto error2; + ASSERT(econnp->conn_netstack == connp->conn_netstack); econnp->conn_sqp = new_sqp; } else if ((mp->b_datap->db_struioflag & STRUIO_POLICY) != 0) { /* @@ -5692,6 +5658,7 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) */ return; } + ASSERT(econnp->conn_netstack == connp->conn_netstack); } else { goto error2; } @@ -5804,7 +5771,7 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) eager->tcp_hard_binding = B_TRUE; - tcp_bind_hash_insert(&tcp_bind_fanout[ + tcp_bind_hash_insert(&tcps->tcps_bind_fanout[ TCP_BIND_HASH(eager->tcp_lport)], eager, 0); CL_INET_CONNECT(eager); @@ -5838,7 +5805,7 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) tcp_process_options(eager, tcph); /* Is the other end ECN capable? */ - if (tcp_ecn_permitted >= 1 && + if (tcps->tcps_ecn_permitted >= 1 && (tcph->th_flags[0] & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) { eager->tcp_ecn_ok = B_TRUE; } @@ -5949,7 +5916,7 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) eager->tcp_rack = seg_seq; eager->tcp_rnxt = seg_seq + 1; U32_TO_ABE32(eager->tcp_rnxt, eager->tcp_tcph->th_ack); - BUMP_MIB(&tcp_mib, tcpPassiveOpens); + BUMP_MIB(&tcps->tcps_mib, tcpPassiveOpens); eager->tcp_state = TCPS_SYN_RCVD; mp1 = tcp_xmit_mp(eager, eager->tcp_xmit_head, eager->tcp_mss, NULL, NULL, eager->tcp_iss, B_FALSE, NULL, B_FALSE); @@ -6043,7 +6010,9 @@ error1: * If a connection already exists, send the mp to that connections so * that it can be appropriately dealt with. */ - if ((econnp = ipcl_classify(mp, connp->conn_zoneid)) != NULL) { + ipst = tcps->tcps_netstack->netstack_ip; + + if ((econnp = ipcl_classify(mp, connp->conn_zoneid, ipst)) != NULL) { if (!IPCL_IS_CONNECTED(econnp)) { /* * Something bad happened. ipcl_conn_insert() @@ -6469,6 +6438,7 @@ tcp_connect_ipv4(tcp_t *tcp, mblk_t *mp, ipaddr_t *dstaddrp, in_port_t dstport, ipaddr_t dstaddr = *dstaddrp; int32_t oldstate; uint16_t lport; + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT(tcp->tcp_ipversion == IPV4_VERSION); @@ -6495,7 +6465,7 @@ tcp_connect_ipv4(tcp_t *tcp, mblk_t *mp, ipaddr_t *dstaddrp, in_port_t dstport, /* Handle __sin6_src_id if socket not bound to an IP address */ if (srcid != 0 && tcp->tcp_ipha->ipha_src == INADDR_ANY) { ip_srcid_find_id(srcid, &tcp->tcp_ip_src_v6, - tcp->tcp_connp->conn_zoneid); + tcp->tcp_connp->conn_zoneid, tcps->tcps_netstack); IN6_V4MAPPED_TO_IPADDR(&tcp->tcp_ip_src_v6, tcp->tcp_ipha->ipha_src); } @@ -6524,7 +6494,7 @@ tcp_connect_ipv4(tcp_t *tcp, mblk_t *mp, ipaddr_t *dstaddrp, in_port_t dstport, * included in the checksum but that ip will include the * first hop in the source route in the tcp checksum. */ - tcp->tcp_sum = ip_massage_options(tcp->tcp_ipha); + tcp->tcp_sum = ip_massage_options(tcp->tcp_ipha, tcps->tcps_netstack); tcp->tcp_sum = (tcp->tcp_sum & 0xFFFF) + (tcp->tcp_sum >> 16); tcp->tcp_sum -= ((tcp->tcp_ipha->ipha_dst >> 16) + (tcp->tcp_ipha->ipha_dst & 0xffff)); @@ -6550,7 +6520,8 @@ tcp_connect_ipv4(tcp_t *tcp, mblk_t *mp, ipaddr_t *dstaddrp, in_port_t dstport, * tcp_bindi will pick an unused port, insert the connection * in the bind hash and transition to BOUND state. */ - lport = tcp_update_next_port(tcp_next_port_to_try, tcp, B_TRUE); + lport = tcp_update_next_port(tcps->tcps_next_port_to_try, + tcp, B_TRUE); lport = tcp_bindi(tcp, lport, &tcp->tcp_ip_src_v6, 0, B_TRUE, B_FALSE, B_FALSE); if (lport == 0) { @@ -6590,7 +6561,7 @@ tcp_connect_ipv4(tcp_t *tcp, mblk_t *mp, ipaddr_t *dstaddrp, in_port_t dstport, mp1 = ip_bind_v6(tcp->tcp_wq, mp1, tcp->tcp_connp, &tcp->tcp_sticky_ipp); } - BUMP_MIB(&tcp_mib, tcpActiveOpens); + BUMP_MIB(&tcps->tcps_mib, tcpActiveOpens); tcp->tcp_active_open = 1; /* * If the bind cannot complete immediately @@ -6630,6 +6601,7 @@ tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp, ip6_rthdr_t *rth; int32_t oldstate; uint16_t lport; + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT(tcp->tcp_family == AF_INET6); @@ -6656,7 +6628,7 @@ tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp, /* Handle __sin6_src_id if socket not bound to an IP address */ if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&tcp->tcp_ip6h->ip6_src)) { ip_srcid_find_id(srcid, &tcp->tcp_ip6h->ip6_src, - tcp->tcp_connp->conn_zoneid); + tcp->tcp_connp->conn_zoneid, tcps->tcps_netstack); tcp->tcp_ip_src_v6 = tcp->tcp_ip6h->ip6_src; } @@ -6723,8 +6695,8 @@ tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp, */ rth = ip_find_rthdr_v6(tcp->tcp_ip6h, (uint8_t *)tcp->tcp_tcph); if (rth != NULL) { - - tcp->tcp_sum = ip_massage_options_v6(tcp->tcp_ip6h, rth); + tcp->tcp_sum = ip_massage_options_v6(tcp->tcp_ip6h, rth, + tcps->tcps_netstack); tcp->tcp_sum = ntohs((tcp->tcp_sum & 0xFFFF) + (tcp->tcp_sum >> 16)); } else { @@ -6748,7 +6720,8 @@ tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp, * tcp_bindi will pick an unused port, insert the connection * in the bind hash and transition to BOUND state. */ - lport = tcp_update_next_port(tcp_next_port_to_try, tcp, B_TRUE); + lport = tcp_update_next_port(tcps->tcps_next_port_to_try, + tcp, B_TRUE); lport = tcp_bindi(tcp, lport, &tcp->tcp_ip_src_v6, 0, B_TRUE, B_FALSE, B_FALSE); if (lport == 0) { @@ -6777,7 +6750,7 @@ tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp, mblk_setcred(mp1, tcp->tcp_cred); mp1 = ip_bind_v6(tcp->tcp_wq, mp1, tcp->tcp_connp, &tcp->tcp_sticky_ipp); - BUMP_MIB(&tcp_mib, tcpActiveOpens); + BUMP_MIB(&tcps->tcps_mib, tcpActiveOpens); tcp->tcp_active_open = 1; /* ip_bind_v6() may return ACK or ERROR */ if (mp1 != NULL) @@ -6810,23 +6783,28 @@ tcp_def_q_set(tcp_t *tcp, mblk_t *mp) { struct iocblk *iocp = (struct iocblk *)mp->b_rptr; queue_t *q = tcp->tcp_wq; + tcp_stack_t *tcps = tcp->tcp_tcps; +#ifdef NS_DEBUG + (void) printf("TCP_IOC_DEFAULT_Q for stack %d\n", + tcps->tcps_netstack->netstack_stackid); +#endif mp->b_datap->db_type = M_IOCACK; iocp->ioc_count = 0; - mutex_enter(&tcp_g_q_lock); - if (tcp_g_q != NULL) { - mutex_exit(&tcp_g_q_lock); + mutex_enter(&tcps->tcps_g_q_lock); + if (tcps->tcps_g_q != NULL) { + mutex_exit(&tcps->tcps_g_q_lock); iocp->ioc_error = EALREADY; } else { mblk_t *mp1; mp1 = tcp_ip_bind_mp(tcp, O_T_BIND_REQ, 0); if (mp1 == NULL) { - mutex_exit(&tcp_g_q_lock); + mutex_exit(&tcps->tcps_g_q_lock); iocp->ioc_error = ENOMEM; } else { - tcp_g_q = tcp->tcp_rq; - mutex_exit(&tcp_g_q_lock); + tcps->tcps_g_q = tcp->tcp_rq; + mutex_exit(&tcps->tcps_g_q_lock); iocp->ioc_error = 0; iocp->ioc_rval = 0; /* @@ -6852,6 +6830,7 @@ tcp_disconnect(tcp_t *tcp, mblk_t *mp) tcp_t *ltcp = NULL; t_scalar_t seqnum; conn_t *connp; + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX); if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_discon_req)) { @@ -6894,6 +6873,7 @@ tcp_disconnect(tcp_t *tcp, mblk_t *mp) */ int old_state = tcp->tcp_state; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; /* * The connection can't be on the tcp_time_wait_head list @@ -6910,14 +6890,14 @@ tcp_disconnect(tcp_t *tcp, mblk_t *mp) if (tcp->tcp_ipversion == IPV4_VERSION) { connp = ipcl_lookup_listener_v4(tcp->tcp_lport, tcp->tcp_ipha->ipha_src, - tcp->tcp_connp->conn_zoneid); + tcp->tcp_connp->conn_zoneid, ipst); if (connp != NULL) ltcp = connp->conn_tcp; } else { /* Allow tcp_bound_if listeners? */ connp = ipcl_lookup_listener_v6(tcp->tcp_lport, &tcp->tcp_ip6h->ip6_src, 0, - tcp->tcp_connp->conn_zoneid); + tcp->tcp_connp->conn_zoneid, ipst); if (connp != NULL) ltcp = connp->conn_tcp; } @@ -6930,10 +6910,10 @@ tcp_disconnect(tcp_t *tcp, mblk_t *mp) if (ltcp != NULL) CONN_DEC_REF(ltcp->tcp_connp); if (old_state == TCPS_SYN_SENT || old_state == TCPS_SYN_RCVD) { - BUMP_MIB(&tcp_mib, tcpAttemptFails); + BUMP_MIB(&tcps->tcps_mib, tcpAttemptFails); } else if (old_state == TCPS_ESTABLISHED || old_state == TCPS_CLOSE_WAIT) { - BUMP_MIB(&tcp_mib, tcpEstabResets); + BUMP_MIB(&tcps->tcps_mib, tcpEstabResets); } if (tcp->tcp_fused) @@ -7090,6 +7070,7 @@ tcp_eager_kill(void *arg, mblk_t *mp, void *arg2) conn_t *econnp = (conn_t *)arg; tcp_t *eager = econnp->conn_tcp; tcp_t *listener = eager->tcp_listener; + tcp_stack_t *tcps = eager->tcp_tcps; /* * We could be called because listener is closing. Since @@ -7097,8 +7078,9 @@ tcp_eager_kill(void *arg, mblk_t *mp, void *arg2) * Better use the default queue just to send the TH_RST * out. */ - eager->tcp_rq = tcp_g_q; - eager->tcp_wq = WR(tcp_g_q); + ASSERT(tcps->tcps_g_q != NULL); + eager->tcp_rq = tcps->tcps_g_q; + eager->tcp_wq = WR(tcps->tcps_g_q); if (eager->tcp_state > TCPS_LISTEN) { tcp_xmit_ctl("tcp_eager_kill, can't wait", @@ -7136,8 +7118,9 @@ tcp_eager_blowoff(tcp_t *listener, t_scalar_t seqnum) { tcp_t *eager; mblk_t *mp; + tcp_stack_t *tcps = listener->tcp_tcps; - TCP_STAT(tcp_eager_blowoff_calls); + TCP_STAT(tcps, tcp_eager_blowoff_calls); eager = listener; mutex_enter(&listener->tcp_eager_lock); do { @@ -7171,12 +7154,13 @@ tcp_eager_cleanup(tcp_t *listener, boolean_t q0_only) { tcp_t *eager; mblk_t *mp; + tcp_stack_t *tcps = listener->tcp_tcps; ASSERT(MUTEX_HELD(&listener->tcp_eager_lock)); if (!q0_only) { /* First cleanup q */ - TCP_STAT(tcp_eager_blowoff_q); + TCP_STAT(tcps, tcp_eager_blowoff_q); eager = listener->tcp_eager_next_q; while (eager != NULL) { if (eager->tcp_closemp_used == 0) { @@ -7192,7 +7176,7 @@ tcp_eager_cleanup(tcp_t *listener, boolean_t q0_only) } } /* Then cleanup q0 */ - TCP_STAT(tcp_eager_blowoff_q0); + TCP_STAT(tcps, tcp_eager_blowoff_q0); eager = listener->tcp_eager_next_q0; while (eager != listener) { if (eager->tcp_closemp_used == 0) { @@ -7323,10 +7307,12 @@ static int tcp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) { int i; + tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; - for (i = 0; i < tcp_g_num_epriv_ports; i++) { - if (tcp_g_epriv_ports[i] != 0) - (void) mi_mpprintf(mp, "%d ", tcp_g_epriv_ports[i]); + for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) { + if (tcps->tcps_g_epriv_ports[i] != 0) + (void) mi_mpprintf(mp, "%d ", + tcps->tcps_g_epriv_ports[i]); } return (0); } @@ -7342,6 +7328,7 @@ tcp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, { long new_value; int i; + tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; /* * Fail the request if the new value does not lie within the @@ -7352,26 +7339,26 @@ tcp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, return (EINVAL); } - mutex_enter(&tcp_epriv_port_lock); + mutex_enter(&tcps->tcps_epriv_port_lock); /* Check if the value is already in the list */ - for (i = 0; i < tcp_g_num_epriv_ports; i++) { - if (new_value == tcp_g_epriv_ports[i]) { - mutex_exit(&tcp_epriv_port_lock); + for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) { + if (new_value == tcps->tcps_g_epriv_ports[i]) { + mutex_exit(&tcps->tcps_epriv_port_lock); return (EEXIST); } } /* Find an empty slot */ - for (i = 0; i < tcp_g_num_epriv_ports; i++) { - if (tcp_g_epriv_ports[i] == 0) + for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) { + if (tcps->tcps_g_epriv_ports[i] == 0) break; } - if (i == tcp_g_num_epriv_ports) { - mutex_exit(&tcp_epriv_port_lock); + if (i == tcps->tcps_g_num_epriv_ports) { + mutex_exit(&tcps->tcps_epriv_port_lock); return (EOVERFLOW); } /* Set the new value */ - tcp_g_epriv_ports[i] = (uint16_t)new_value; - mutex_exit(&tcp_epriv_port_lock); + tcps->tcps_g_epriv_ports[i] = (uint16_t)new_value; + mutex_exit(&tcps->tcps_epriv_port_lock); return (0); } @@ -7386,6 +7373,7 @@ tcp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, { long new_value; int i; + tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; /* * Fail the request if the new value does not lie within the @@ -7396,19 +7384,19 @@ tcp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, return (EINVAL); } - mutex_enter(&tcp_epriv_port_lock); + mutex_enter(&tcps->tcps_epriv_port_lock); /* Check that the value is already in the list */ - for (i = 0; i < tcp_g_num_epriv_ports; i++) { - if (tcp_g_epriv_ports[i] == new_value) + for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) { + if (tcps->tcps_g_epriv_ports[i] == new_value) break; } - if (i == tcp_g_num_epriv_ports) { - mutex_exit(&tcp_epriv_port_lock); + if (i == tcps->tcps_g_num_epriv_ports) { + mutex_exit(&tcps->tcps_epriv_port_lock); return (ESRCH); } /* Clear the value */ - tcp_g_epriv_ports[i] = 0; - mutex_exit(&tcp_epriv_port_lock); + tcps->tcps_g_epriv_ports[i] = 0; + mutex_exit(&tcps->tcps_epriv_port_lock); return (0); } @@ -7473,6 +7461,8 @@ tcp_tpistate(tcp_t *tcp) static void tcp_copy_info(struct T_info_ack *tia, tcp_t *tcp) { + tcp_stack_t *tcps = tcp->tcp_tcps; + if (tcp->tcp_family == AF_INET6) *tia = tcp_g_t_info_ack_v6; else @@ -7482,9 +7472,9 @@ tcp_copy_info(struct T_info_ack *tia, tcp_t *tcp) if (tcp->tcp_mss == 0) { /* Not yet set - tcp_open does not set mss */ if (tcp->tcp_ipversion == IPV4_VERSION) - tia->TIDU_size = tcp_mss_def_ipv4; + tia->TIDU_size = tcps->tcps_mss_def_ipv4; else - tia->TIDU_size = tcp_mss_def_ipv6; + tia->TIDU_size = tcps->tcps_mss_def_ipv6; } else { tia->TIDU_size = tcp->tcp_mss; } @@ -7692,8 +7682,9 @@ tcp_reinit(tcp_t *tcp) { mblk_t *mp; int err; + tcp_stack_t *tcps = tcp->tcp_tcps; - TCP_STAT(tcp_reinit_calls); + TCP_STAT(tcps, tcp_reinit_calls); /* tcp_reinit should never be called for detached tcp_t's */ ASSERT(tcp->tcp_listener == NULL); @@ -7710,9 +7701,9 @@ tcp_reinit(tcp_t *tcp) * Reset everything in the state vector, after updating global * MIB data from instance counters. */ - UPDATE_MIB(&tcp_mib, tcpHCInSegs, tcp->tcp_ibsegs); + UPDATE_MIB(&tcps->tcps_mib, tcpHCInSegs, tcp->tcp_ibsegs); tcp->tcp_ibsegs = 0; - UPDATE_MIB(&tcp_mib, tcpHCOutSegs, tcp->tcp_obsegs); + UPDATE_MIB(&tcps->tcps_mib, tcpHCOutSegs, tcp->tcp_obsegs); tcp->tcp_obsegs = 0; tcp_close_mpp(&tcp->tcp_xmit_head); @@ -7787,6 +7778,7 @@ tcp_reinit(tcp_t *tcp) tcp_reinit_values(tcp); ipcl_hash_remove(tcp->tcp_connp); conn_delete_ire(tcp->tcp_connp, NULL); + tcp_ipsec_cleanup(tcp); if (tcp->tcp_conn_req_max != 0) { /* @@ -7844,10 +7836,10 @@ tcp_reinit(tcp_t *tcp) tcp->tcp_ip_src_v6 = tcp->tcp_bound_source_v6; ASSERT(tcp->tcp_ptpbhn != NULL); - tcp->tcp_rq->q_hiwat = tcp_recv_hiwat; - tcp->tcp_rwnd = tcp_recv_hiwat; + tcp->tcp_rq->q_hiwat = tcps->tcps_recv_hiwat; + tcp->tcp_rwnd = tcps->tcps_recv_hiwat; tcp->tcp_mss = tcp->tcp_ipversion != IPV4_VERSION ? - tcp_mss_def_ipv6 : tcp_mss_def_ipv4; + tcps->tcps_mss_def_ipv6 : tcps->tcps_mss_def_ipv4; } /* @@ -7861,6 +7853,8 @@ static void tcp_reinit_values(tcp) tcp_t *tcp; { + tcp_stack_t *tcps = tcp->tcp_tcps; + #ifndef lint #define DONTCARE(x) #define PRESERVE(x) @@ -8092,10 +8086,10 @@ tcp_reinit_values(tcp) PRESERVE(tcp->tcp_family); if (tcp->tcp_family == AF_INET6) { tcp->tcp_ipversion = IPV6_VERSION; - tcp->tcp_mss = tcp_mss_def_ipv6; + tcp->tcp_mss = tcps->tcps_mss_def_ipv6; } else { tcp->tcp_ipversion = IPV4_VERSION; - tcp->tcp_mss = tcp_mss_def_ipv4; + tcp->tcp_mss = tcps->tcps_mss_def_ipv4; } tcp->tcp_bound_if = 0; @@ -8187,6 +8181,7 @@ static int tcp_init_values(tcp_t *tcp) { int err; + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT((tcp->tcp_family == AF_INET && tcp->tcp_ipversion == IPV4_VERSION) || @@ -8201,32 +8196,32 @@ tcp_init_values(tcp_t *tcp) * during first few transmissions of a connection as seen in slow * links. */ - tcp->tcp_rtt_sa = tcp_rexmit_interval_initial << 2; - tcp->tcp_rtt_sd = tcp_rexmit_interval_initial >> 1; + tcp->tcp_rtt_sa = tcps->tcps_rexmit_interval_initial << 2; + tcp->tcp_rtt_sd = tcps->tcps_rexmit_interval_initial >> 1; tcp->tcp_rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd + - tcp_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5) + - tcp_conn_grace_period; - if (tcp->tcp_rto < tcp_rexmit_interval_min) - tcp->tcp_rto = tcp_rexmit_interval_min; + tcps->tcps_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5) + + tcps->tcps_conn_grace_period; + if (tcp->tcp_rto < tcps->tcps_rexmit_interval_min) + tcp->tcp_rto = tcps->tcps_rexmit_interval_min; tcp->tcp_timer_backoff = 0; tcp->tcp_ms_we_have_waited = 0; tcp->tcp_last_recv_time = lbolt; - tcp->tcp_cwnd_max = tcp_cwnd_max_; + tcp->tcp_cwnd_max = tcps->tcps_cwnd_max_; tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN; tcp->tcp_snd_burst = TCP_CWND_INFINITE; - tcp->tcp_maxpsz = tcp_maxpsz_multiplier; + tcp->tcp_maxpsz = tcps->tcps_maxpsz_multiplier; - tcp->tcp_first_timer_threshold = tcp_ip_notify_interval; - tcp->tcp_first_ctimer_threshold = tcp_ip_notify_cinterval; - tcp->tcp_second_timer_threshold = tcp_ip_abort_interval; + tcp->tcp_first_timer_threshold = tcps->tcps_ip_notify_interval; + tcp->tcp_first_ctimer_threshold = tcps->tcps_ip_notify_cinterval; + tcp->tcp_second_timer_threshold = tcps->tcps_ip_abort_interval; /* * Fix it to tcp_ip_abort_linterval later if it turns out to be a * passive open. */ - tcp->tcp_second_ctimer_threshold = tcp_ip_abort_cinterval; + tcp->tcp_second_ctimer_threshold = tcps->tcps_ip_abort_cinterval; - tcp->tcp_naglim = tcp_naglim_def; + tcp->tcp_naglim = tcps->tcps_naglim_def; /* NOTE: ISS is now set in tcp_adapt_ire(). */ @@ -8259,8 +8254,8 @@ tcp_init_values(tcp_t *tcp) * down tcp_rwnd. tcp_adapt_ire() will set the right value later. */ tcp->tcp_rcv_ws = TCP_MAX_WINSHIFT; - tcp->tcp_xmit_lowater = tcp_xmit_lowat; - tcp->tcp_xmit_hiwater = tcp_xmit_hiwat; + tcp->tcp_xmit_lowater = tcps->tcps_xmit_lowat; + tcp->tcp_xmit_hiwater = tcps->tcps_xmit_hiwat; tcp->tcp_cork = B_FALSE; /* @@ -8269,10 +8264,10 @@ tcp_init_values(tcp_t *tcp) * initialization here means that this value is not inherited thru * tcp_reinit(). */ - tcp->tcp_debug = tcp_dbg; + tcp->tcp_debug = tcps->tcps_dbg; - tcp->tcp_ka_interval = tcp_keepalive_interval; - tcp->tcp_ka_abort_thres = tcp_keepalive_abort_interval; + tcp->tcp_ka_interval = tcps->tcps_keepalive_interval; + tcp->tcp_ka_abort_thres = tcps->tcps_keepalive_abort_interval; return (0); } @@ -8286,6 +8281,7 @@ tcp_header_init_ipv4(tcp_t *tcp) tcph_t *tcph; uint32_t sum; conn_t *connp; + tcp_stack_t *tcps = tcp->tcp_tcps; /* * This is a simple initialization. If there's @@ -8318,10 +8314,10 @@ tcp_header_init_ipv4(tcp_t *tcp) = (IP_VERSION << 4) | IP_SIMPLE_HDR_LENGTH_IN_WORDS; tcp->tcp_ipha->ipha_ident = 0; - tcp->tcp_ttl = (uchar_t)tcp_ipv4_ttl; + tcp->tcp_ttl = (uchar_t)tcps->tcps_ipv4_ttl; tcp->tcp_tos = 0; tcp->tcp_ipha->ipha_fragment_offset_and_flags = 0; - tcp->tcp_ipha->ipha_ttl = (uchar_t)tcp_ipv4_ttl; + tcp->tcp_ipha->ipha_ttl = (uchar_t)tcps->tcps_ipv4_ttl; tcp->tcp_ipha->ipha_protocol = IPPROTO_TCP; tcph = (tcph_t *)(tcp->tcp_iphc + sizeof (ipha_t)); @@ -8348,6 +8344,7 @@ tcp_header_init_ipv6(tcp_t *tcp) tcph_t *tcph; uint32_t sum; conn_t *connp; + tcp_stack_t *tcps = tcp->tcp_tcps; /* * This is a simple initialization. If there's @@ -8390,7 +8387,7 @@ tcp_header_init_ipv6(tcp_t *tcp) tcp->tcp_ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; tcp->tcp_ip6h->ip6_plen = ntohs(sizeof (tcph_t)); tcp->tcp_ip6h->ip6_nxt = IPPROTO_TCP; - tcp->tcp_ip6h->ip6_hops = (uint8_t)tcp_ipv6_hoplimit; + tcp->tcp_ip6h->ip6_hops = (uint8_t)tcps->tcps_ipv6_hoplimit; tcph = (tcph_t *)(tcp->tcp_iphc + IPV6_HDR_LEN); tcp->tcp_tcph = tcph; @@ -8429,6 +8426,7 @@ tcp_icmp_error(tcp_t *tcp, mblk_t *mp) uint32_t ratio; size_t mp_size = MBLKL(mp); uint32_t seg_seq; + tcp_stack_t *tcps = tcp->tcp_tcps; /* Assume IP provides aligned packets - otherwise toss */ if (!OK_32PTR(mp->b_rptr)) { @@ -8571,7 +8569,7 @@ noticmpv4: * tcp_wput_data(). Need to adjust all those * params to make sure tcp_wput_data() work properly. */ - if (tcp_ignore_path_mtu) + if (tcps->tcps_ignore_path_mtu) break; /* @@ -8598,7 +8596,7 @@ noticmpv4: * or less than tcp_mss_min. * The value 68 comes from rfc 1191. */ - if (new_mss < MAX(68, tcp_mss_min)) + if (new_mss < MAX(68, tcps->tcps_mss_min)) tcp->tcp_ipha->ipha_fragment_offset_and_flags = 0; @@ -8717,6 +8715,7 @@ tcp_icmp_error_ipv6(tcp_t *tcp, mblk_t *mp, boolean_t ipsec_mctl) mblk_t *first_mp = mp; size_t mp_size; uint32_t seg_seq; + tcp_stack_t *tcps = tcp->tcp_tcps; /* * The caller has determined if this is an IPSEC_IN packet and @@ -8842,7 +8841,7 @@ noticmpv6: * tcp_wput_data(). Need to adjust all those * params to make sure tcp_wput_data() work properly. */ - if (tcp_ignore_path_mtu) + if (tcps->tcps_ignore_path_mtu) break; /* @@ -9193,13 +9192,14 @@ tcp_keepalive_killer(void *arg) int32_t firetime; int32_t idletime; int32_t ka_intrvl; + tcp_stack_t *tcps = tcp->tcp_tcps; tcp->tcp_ka_tid = 0; if (tcp->tcp_fused) return; - BUMP_MIB(&tcp_mib, tcpTimKeepalive); + BUMP_MIB(&tcps->tcps_mib, tcpTimKeepalive); ka_intrvl = tcp->tcp_ka_interval; /* @@ -9224,7 +9224,7 @@ tcp_keepalive_killer(void *arg) */ if (tcp->tcp_ka_abort_thres != 0 && idletime > (ka_intrvl + tcp->tcp_ka_abort_thres)) { - BUMP_MIB(&tcp_mib, tcpTimKeepaliveDrop); + BUMP_MIB(&tcps->tcps_mib, tcpTimKeepaliveDrop); (void) tcp_clean_death(tcp, tcp->tcp_client_errno ? tcp->tcp_client_errno : ETIMEDOUT, 11); return; @@ -9248,18 +9248,20 @@ tcp_keepalive_killer(void *arg) TCP_RECORD_TRACE(tcp, mp, TCP_TRACE_SEND_PKT); tcp_send_data(tcp, tcp->tcp_wq, mp); - BUMP_MIB(&tcp_mib, tcpTimKeepaliveProbe); + BUMP_MIB(&tcps->tcps_mib, + tcpTimKeepaliveProbe); if (tcp->tcp_ka_last_intrvl != 0) { + int max; /* * We should probe again at least * in ka_intrvl, but not more than * tcp_rexmit_interval_max. */ + max = tcps->tcps_rexmit_interval_max; firetime = MIN(ka_intrvl - 1, tcp->tcp_ka_last_intrvl << 1); - if (firetime > tcp_rexmit_interval_max) - firetime = - tcp_rexmit_interval_max; + if (firetime > max) + firetime = max; } else { firetime = tcp->tcp_rto; } @@ -9501,14 +9503,15 @@ static void tcp_mss_set(tcp_t *tcp, uint32_t mss) { uint32_t mss_max; + tcp_stack_t *tcps = tcp->tcp_tcps; if (tcp->tcp_ipversion == IPV4_VERSION) - mss_max = tcp_mss_max_ipv4; + mss_max = tcps->tcps_mss_max_ipv4; else - mss_max = tcp_mss_max_ipv6; + mss_max = tcps->tcps_mss_max_ipv6; - if (mss < tcp_mss_min) - mss = tcp_mss_min; + if (mss < tcps->tcps_mss_min) + mss = tcps->tcps_mss_min; if (mss > mss_max) mss = mss_max; /* @@ -9532,7 +9535,7 @@ tcp_mss_set(tcp_t *tcp, uint32_t mss) * The new tcp_cwnd should not get bigger. */ if (tcp->tcp_init_cwnd == 0) { - tcp->tcp_cwnd = MIN(tcp_slow_start_initial * mss, + tcp->tcp_cwnd = MIN(tcps->tcps_slow_start_initial * mss, MIN(4 * mss, MAX(2 * mss, 4380 / mss * mss))); } else { if (tcp->tcp_mss < mss) { @@ -9554,25 +9557,60 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) conn_t *connp; int err; dev_t conn_dev; - zoneid_t zoneid = getzoneid(); - - /* - * Special case for install: miniroot needs to be able to access files - * via NFS as though it were always in the global zone. - */ - if (credp == kcred && nfs_global_client_only != 0) - zoneid = GLOBAL_ZONEID; + zoneid_t zoneid; + tcp_stack_t *tcps = NULL; if (q->q_ptr != NULL) return (0); + if (!(flag & SO_ACCEPTOR)) { + /* + * Special case for install: miniroot needs to be able to + * access files via NFS as though it were always in the + * global zone. + */ + if (credp == kcred && nfs_global_client_only != 0) { + zoneid = GLOBAL_ZONEID; + tcps = netstack_find_by_stackid(GLOBAL_NETSTACKID)-> + netstack_tcp; + ASSERT(tcps != NULL); + } else { + netstack_t *ns; + + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + tcps = ns->netstack_tcp; + ASSERT(tcps != NULL); + + /* + * For exclusive stacks we set the zoneid to zero + * to make TCP operate as if in the global zone. + */ + if (tcps->tcps_netstack->netstack_stackid != + GLOBAL_NETSTACKID) + zoneid = GLOBAL_ZONEID; + else + zoneid = crgetzoneid(credp); + } + /* + * For stackid zero this is done from strplumb.c, but + * non-zero stackids are handled here. + */ + if (tcps->tcps_g_q == NULL && + tcps->tcps_netstack->netstack_stackid != + GLOBAL_NETSTACKID) { + tcp_g_q_setup(tcps); + } + } if (sflag == MODOPEN) { /* * This is a special case. The purpose of a modopen * is to allow just the T_SVR4_OPTMGMT_REQ to pass * through for MIB browsers. Everything else is failed. */ - connp = (conn_t *)tcp_get_conn(IP_SQUEUE_GET(lbolt)); + connp = (conn_t *)tcp_get_conn(IP_SQUEUE_GET(lbolt), tcps); + /* tcp_get_conn incremented refcnt */ + netstack_rele(tcps->tcps_netstack); if (connp == NULL) return (ENOMEM); @@ -9580,6 +9618,8 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) connp->conn_flags |= IPCL_TCPMOD; connp->conn_cred = credp; connp->conn_zoneid = zoneid; + ASSERT(connp->conn_netstack == tcps->tcps_netstack); + ASSERT(connp->conn_netstack->netstack_tcp == tcps); q->q_ptr = WR(q)->q_ptr = connp; crhold(credp); q->q_qinfo = &tcp_mod_rinit; @@ -9587,13 +9627,17 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) qprocson(q); return (0); } - - if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) + if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { + if (tcps != NULL) + netstack_rele(tcps->tcps_netstack); return (EBUSY); + } *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); if (flag & SO_ACCEPTOR) { + /* No netstack_find_by_cred, hence no netstack_rele needed */ + ASSERT(tcps == NULL); q->q_qinfo = &tcp_acceptor_rinit; q->q_ptr = (void *)conn_dev; WR(q)->q_qinfo = &tcp_acceptor_winit; @@ -9602,7 +9646,12 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) return (0); } - connp = (conn_t *)tcp_get_conn(IP_SQUEUE_GET(lbolt)); + connp = (conn_t *)tcp_get_conn(IP_SQUEUE_GET(lbolt), tcps); + /* + * Both tcp_get_conn and netstack_find_by_cred incremented refcnt, + * so we drop it by one. + */ + netstack_rele(tcps->tcps_netstack); if (connp == NULL) { inet_minor_free(ip_minor_arena, conn_dev); q->q_ptr = NULL; @@ -9620,7 +9669,7 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) connp->conn_src_preferences = IPV6_PREFER_SRC_DEFAULT; tcp->tcp_ipversion = IPV6_VERSION; tcp->tcp_family = AF_INET6; - tcp->tcp_mss = tcp_mss_def_ipv6; + tcp->tcp_mss = tcps->tcps_mss_def_ipv6; } else { connp->conn_flags |= IPCL_TCP4; connp->conn_send = ip_output; @@ -9628,7 +9677,7 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) connp->conn_pkt_isv6 = B_FALSE; tcp->tcp_ipversion = IPV4_VERSION; tcp->tcp_family = AF_INET; - tcp->tcp_mss = tcp_mss_def_ipv4; + tcp->tcp_mss = tcps->tcps_mss_def_ipv4; } /* @@ -9643,6 +9692,8 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) connp->conn_zoneid = zoneid; connp->conn_mlp_type = mlptSingle; connp->conn_ulp_labeled = !is_system_labeled(); + ASSERT(connp->conn_netstack == tcps->tcps_netstack); + ASSERT(tcp->tcp_tcps == tcps); /* * If the caller has the process-wide flag set, then default to MAC @@ -9675,7 +9726,7 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) tcp_acceptor_hash_insert(tcp->tcp_acceptor_id, tcp); } - if (tcp_trace) + if (tcps->tcps_trace) tcp->tcp_tracebuf = kmem_zalloc(sizeof (tcptrch_t), KM_SLEEP); err = tcp_init(tcp, q); @@ -9687,8 +9738,8 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) return (err); } - RD(q)->q_hiwat = tcp_recv_hiwat; - tcp->tcp_rwnd = tcp_recv_hiwat; + RD(q)->q_hiwat = tcps->tcps_recv_hiwat; + tcp->tcp_rwnd = tcps->tcps_recv_hiwat; /* Non-zero default values */ connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; @@ -9745,21 +9796,22 @@ int tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) { int32_t *i1 = (int32_t *)ptr; + tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; switch (level) { case IPPROTO_TCP: switch (name) { case TCP_NOTIFY_THRESHOLD: - *i1 = tcp_ip_notify_interval; + *i1 = tcps->tcps_ip_notify_interval; break; case TCP_ABORT_THRESHOLD: - *i1 = tcp_ip_abort_interval; + *i1 = tcps->tcps_ip_abort_interval; break; case TCP_CONN_NOTIFY_THRESHOLD: - *i1 = tcp_ip_notify_cinterval; + *i1 = tcps->tcps_ip_notify_cinterval; break; case TCP_CONN_ABORT_THRESHOLD: - *i1 = tcp_ip_abort_cinterval; + *i1 = tcps->tcps_ip_abort_cinterval; break; default: return (-1); @@ -9768,7 +9820,7 @@ tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) case IPPROTO_IP: switch (name) { case IP_TTL: - *i1 = tcp_ipv4_ttl; + *i1 = tcps->tcps_ipv4_ttl; break; default: return (-1); @@ -9777,7 +9829,7 @@ tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) case IPPROTO_IPV6: switch (name) { case IPV6_UNICAST_HOPS: - *i1 = tcp_ipv6_hoplimit; + *i1 = tcps->tcps_ipv6_hoplimit; break; default: return (-1); @@ -10093,7 +10145,8 @@ tcp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) return (-1); return (ip_fill_mtuinfo(&connp->conn_remv6, - connp->conn_fport, mtuinfo)); + connp->conn_fport, mtuinfo, + connp->conn_netstack)); } default: return (-1); @@ -10121,6 +10174,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name, boolean_t onoff = (*i1 == 0) ? 0 : 1; boolean_t checkonly; int reterr; + tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; switch (optset_context) { case SETFN_OPTCOM_CHECKONLY: @@ -10280,7 +10334,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name, tcp->tcp_dgram_errind = onoff; break; case SO_SNDBUF: { - if (*i1 > tcp_max_buf) { + if (*i1 > tcps->tcps_max_buf) { *outlenp = 0; return (ENOBUFS); } @@ -10288,10 +10342,10 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name, break; tcp->tcp_xmit_hiwater = *i1; - if (tcp_snd_lowat_fraction != 0) + if (tcps->tcps_snd_lowat_fraction != 0) tcp->tcp_xmit_lowater = tcp->tcp_xmit_hiwater / - tcp_snd_lowat_fraction; + tcps->tcps_snd_lowat_fraction; (void) tcp_maxpsz_set(tcp, B_TRUE); /* * If we are flow-controlled, recheck the condition. @@ -10308,7 +10362,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name, break; } case SO_RCVBUF: - if (*i1 > tcp_max_buf) { + if (*i1 > tcps->tcps_max_buf) { *outlenp = 0; return (ENOBUFS); } @@ -10419,7 +10473,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name, tcp->tcp_init_cwnd = init_cwnd; break; } - if ((reterr = secpolicy_net_config(cr, B_TRUE)) != 0) { + if ((reterr = secpolicy_ip_config(cr, B_TRUE)) != 0) { *outlenp = 0; return (reterr); } @@ -10434,8 +10488,8 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name, if (checkonly) break; - if (*i1 < tcp_keepalive_interval_low || - *i1 > tcp_keepalive_interval_high) { + if (*i1 < tcps->tcps_keepalive_interval_low || + *i1 > tcps->tcps_keepalive_interval_high) { *outlenp = 0; return (EINVAL); } @@ -10458,8 +10512,10 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name, break; case TCP_KEEPALIVE_ABORT_THRESHOLD: if (!checkonly) { - if (*i1 < tcp_keepalive_abort_interval_low || - *i1 > tcp_keepalive_abort_interval_high) { + if (*i1 < + tcps->tcps_keepalive_abort_interval_low || + *i1 > + tcps->tcps_keepalive_abort_interval_high) { *outlenp = 0; return (EINVAL); } @@ -10571,7 +10627,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name, if (*i1 == -1) { tcp->tcp_ip6h->ip6_hops = ipp->ipp_unicast_hops = - (uint8_t)tcp_ipv6_hoplimit; + (uint8_t)tcps->tcps_ipv6_hoplimit; ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; /* Pass modified value to IP. */ *i1 = tcp->tcp_ip6h->ip6_hops; @@ -10973,6 +11029,7 @@ tcp_build_hdrs(queue_t *q, tcp_t *tcp) char buf[TCP_MAX_HDR_LENGTH]; ip6_pkt_t *ipp = &tcp->tcp_sticky_ipp; in6_addr_t src, dst; + tcp_stack_t *tcps = tcp->tcp_tcps; /* * save the existing tcp header and source/dest IP addresses @@ -11030,7 +11087,7 @@ tcp_build_hdrs(queue_t *q, tcp_t *tcp) * the default value for TCP. */ if (!(ipp->ipp_fields & IPPF_UNICAST_HOPS)) - tcp->tcp_ip6h->ip6_hops = tcp_ipv6_hoplimit; + tcp->tcp_ip6h->ip6_hops = tcps->tcps_ipv6_hoplimit; /* * If we're setting extension headers after a connection @@ -11050,14 +11107,14 @@ tcp_build_hdrs(queue_t *q, tcp_t *tcp) (uint8_t *)tcp->tcp_tcph); if (rth != NULL) { tcp->tcp_sum = ip_massage_options_v6(tcp->tcp_ip6h, - rth); + rth, tcps->tcps_netstack); tcp->tcp_sum = ntohs((tcp->tcp_sum & 0xFFFF) + (tcp->tcp_sum >> 16)); } } /* Try to get everything in a single mblk */ - (void) mi_set_sth_wroff(RD(q), hdrs_len + tcp_wroff_xtra); + (void) mi_set_sth_wroff(RD(q), hdrs_len + tcps->tcps_wroff_xtra); return (0); } @@ -11183,6 +11240,7 @@ tcp_opt_set_header(tcp_t *tcp, boolean_t checkonly, uchar_t *ptr, uint_t len) uint_t tcph_len; uint8_t *ip_optp; tcph_t *new_tcph; + tcp_stack_t *tcps = tcp->tcp_tcps; if ((len > TCP_MAX_IP_OPTIONS_LENGTH) || (len & 0x3)) return (EINVAL); @@ -11224,7 +11282,7 @@ tcp_opt_set_header(tcp_t *tcp, boolean_t checkonly, uchar_t *ptr, uint_t len) if (!TCP_IS_DETACHED(tcp)) { /* Always allocate room for all options. */ (void) mi_set_sth_wroff(tcp->tcp_rq, - TCP_MAX_COMBINED_HEADER_LENGTH + tcp_wroff_xtra); + TCP_MAX_COMBINED_HEADER_LENGTH + tcps->tcps_wroff_xtra); } return (0); } @@ -11245,100 +11303,116 @@ tcp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) * named dispatch handler. */ static boolean_t -tcp_param_register(tcpparam_t *tcppa, int cnt) +tcp_param_register(IDP *ndp, tcpparam_t *tcppa, int cnt, tcp_stack_t *tcps) { for (; cnt-- > 0; tcppa++) { if (tcppa->tcp_param_name && tcppa->tcp_param_name[0]) { - if (!nd_load(&tcp_g_nd, tcppa->tcp_param_name, + if (!nd_load(ndp, tcppa->tcp_param_name, tcp_param_get, tcp_param_set, (caddr_t)tcppa)) { - nd_free(&tcp_g_nd); + nd_free(ndp); return (B_FALSE); } } } - if (!nd_load(&tcp_g_nd, tcp_wroff_xtra_param.tcp_param_name, + tcps->tcps_wroff_xtra_param = kmem_zalloc(sizeof (tcpparam_t), + KM_SLEEP); + bcopy(&lcl_tcp_wroff_xtra_param, tcps->tcps_wroff_xtra_param, + sizeof (tcpparam_t)); + if (!nd_load(ndp, tcps->tcps_wroff_xtra_param->tcp_param_name, tcp_param_get, tcp_param_set_aligned, - (caddr_t)&tcp_wroff_xtra_param)) { - nd_free(&tcp_g_nd); + (caddr_t)tcps->tcps_wroff_xtra_param)) { + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, tcp_mdt_head_param.tcp_param_name, + tcps->tcps_mdt_head_param = kmem_zalloc(sizeof (tcpparam_t), + KM_SLEEP); + bcopy(&lcl_tcp_mdt_head_param, tcps->tcps_mdt_head_param, + sizeof (tcpparam_t)); + if (!nd_load(ndp, tcps->tcps_mdt_head_param->tcp_param_name, tcp_param_get, tcp_param_set_aligned, - (caddr_t)&tcp_mdt_head_param)) { - nd_free(&tcp_g_nd); + (caddr_t)tcps->tcps_mdt_head_param)) { + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, tcp_mdt_tail_param.tcp_param_name, + tcps->tcps_mdt_tail_param = kmem_zalloc(sizeof (tcpparam_t), + KM_SLEEP); + bcopy(&lcl_tcp_mdt_tail_param, tcps->tcps_mdt_tail_param, + sizeof (tcpparam_t)); + if (!nd_load(ndp, tcps->tcps_mdt_tail_param->tcp_param_name, tcp_param_get, tcp_param_set_aligned, - (caddr_t)&tcp_mdt_tail_param)) { - nd_free(&tcp_g_nd); + (caddr_t)tcps->tcps_mdt_tail_param)) { + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, tcp_mdt_max_pbufs_param.tcp_param_name, - tcp_param_get, tcp_param_set, - (caddr_t)&tcp_mdt_max_pbufs_param)) { - nd_free(&tcp_g_nd); + tcps->tcps_mdt_max_pbufs_param = kmem_zalloc(sizeof (tcpparam_t), + KM_SLEEP); + bcopy(&lcl_tcp_mdt_max_pbufs_param, tcps->tcps_mdt_max_pbufs_param, + sizeof (tcpparam_t)); + if (!nd_load(ndp, tcps->tcps_mdt_max_pbufs_param->tcp_param_name, + tcp_param_get, tcp_param_set_aligned, + (caddr_t)tcps->tcps_mdt_max_pbufs_param)) { + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, "tcp_extra_priv_ports", + if (!nd_load(ndp, "tcp_extra_priv_ports", tcp_extra_priv_ports_get, NULL, NULL)) { - nd_free(&tcp_g_nd); + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, "tcp_extra_priv_ports_add", + if (!nd_load(ndp, "tcp_extra_priv_ports_add", NULL, tcp_extra_priv_ports_add, NULL)) { - nd_free(&tcp_g_nd); + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, "tcp_extra_priv_ports_del", + if (!nd_load(ndp, "tcp_extra_priv_ports_del", NULL, tcp_extra_priv_ports_del, NULL)) { - nd_free(&tcp_g_nd); + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, "tcp_status", tcp_status_report, NULL, + if (!nd_load(ndp, "tcp_status", tcp_status_report, NULL, NULL)) { - nd_free(&tcp_g_nd); + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, "tcp_bind_hash", tcp_bind_hash_report, + if (!nd_load(ndp, "tcp_bind_hash", tcp_bind_hash_report, NULL, NULL)) { - nd_free(&tcp_g_nd); + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, "tcp_listen_hash", tcp_listen_hash_report, - NULL, NULL)) { - nd_free(&tcp_g_nd); + if (!nd_load(ndp, "tcp_listen_hash", + tcp_listen_hash_report, NULL, NULL)) { + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, "tcp_conn_hash", tcp_conn_hash_report, + if (!nd_load(ndp, "tcp_conn_hash", tcp_conn_hash_report, NULL, NULL)) { - nd_free(&tcp_g_nd); + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, "tcp_acceptor_hash", tcp_acceptor_hash_report, - NULL, NULL)) { - nd_free(&tcp_g_nd); + if (!nd_load(ndp, "tcp_acceptor_hash", + tcp_acceptor_hash_report, NULL, NULL)) { + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, "tcp_host_param", tcp_host_param_report, + if (!nd_load(ndp, "tcp_host_param", tcp_host_param_report, tcp_host_param_set, NULL)) { - nd_free(&tcp_g_nd); + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, "tcp_host_param_ipv6", tcp_host_param_report, - tcp_host_param_set_ipv6, NULL)) { - nd_free(&tcp_g_nd); + if (!nd_load(ndp, "tcp_host_param_ipv6", + tcp_host_param_report, tcp_host_param_set_ipv6, NULL)) { + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, "tcp_1948_phrase", NULL, tcp_1948_phrase_set, - NULL)) { - nd_free(&tcp_g_nd); + if (!nd_load(ndp, "tcp_1948_phrase", NULL, + tcp_1948_phrase_set, NULL)) { + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&tcp_g_nd, "tcp_reserved_port_list", + if (!nd_load(ndp, "tcp_reserved_port_list", tcp_reserved_port_list, NULL, NULL)) { - nd_free(&tcp_g_nd); + nd_free(ndp); return (B_FALSE); } /* @@ -11346,10 +11420,10 @@ tcp_param_register(tcpparam_t *tcppa, int cnt) * through printing of their name (no get or set routines) * XXX Remove in future releases ? */ - if (!nd_load(&tcp_g_nd, + if (!nd_load(ndp, "tcp_close_wait_interval(obsoleted - " "use tcp_time_wait_interval)", NULL, NULL, NULL)) { - nd_free(&tcp_g_nd); + nd_free(ndp); return (B_FALSE); } return (B_TRUE); @@ -11412,6 +11486,7 @@ tcp_reass(tcp_t *tcp, mblk_t *mp, uint32_t start) mblk_t *mp2; mblk_t *next_mp; uint32_t u1; + tcp_stack_t *tcps = tcp->tcp_tcps; /* Walk through all the new pieces. */ do { @@ -11431,8 +11506,8 @@ tcp_reass(tcp_t *tcp, mblk_t *mp, uint32_t start) if (!mp1) { tcp->tcp_reass_tail = mp; tcp->tcp_reass_head = mp; - BUMP_MIB(&tcp_mib, tcpInDataUnorderSegs); - UPDATE_MIB(&tcp_mib, + BUMP_MIB(&tcps->tcps_mib, tcpInDataUnorderSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpInDataUnorderBytes, end - start); continue; } @@ -11441,8 +11516,8 @@ tcp_reass(tcp_t *tcp, mblk_t *mp, uint32_t start) /* Link it on end. */ mp1->b_cont = mp; tcp->tcp_reass_tail = mp; - BUMP_MIB(&tcp_mib, tcpInDataUnorderSegs); - UPDATE_MIB(&tcp_mib, + BUMP_MIB(&tcps->tcps_mib, tcpInDataUnorderSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpInDataUnorderBytes, end - start); continue; } @@ -11508,6 +11583,7 @@ tcp_reass_elim_overlap(tcp_t *tcp, mblk_t *mp) uint32_t end; mblk_t *mp1; uint32_t u1; + tcp_stack_t *tcps = tcp->tcp_tcps; end = TCP_REASS_END(mp); while ((mp1 = mp->b_cont) != NULL) { @@ -11517,16 +11593,17 @@ tcp_reass_elim_overlap(tcp_t *tcp, mblk_t *mp) if (!SEQ_GEQ(end, TCP_REASS_END(mp1))) { mp->b_wptr -= end - u1; TCP_REASS_SET_END(mp, u1); - BUMP_MIB(&tcp_mib, tcpInDataPartDupSegs); - UPDATE_MIB(&tcp_mib, tcpInDataPartDupBytes, end - u1); + BUMP_MIB(&tcps->tcps_mib, tcpInDataPartDupSegs); + UPDATE_MIB(&tcps->tcps_mib, + tcpInDataPartDupBytes, end - u1); break; } mp->b_cont = mp1->b_cont; TCP_REASS_SET_SEQ(mp1, 0); TCP_REASS_SET_END(mp1, 0); freeb(mp1); - BUMP_MIB(&tcp_mib, tcpInDataDupSegs); - UPDATE_MIB(&tcp_mib, tcpInDataDupBytes, end - u1); + BUMP_MIB(&tcps->tcps_mib, tcpInDataDupSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpInDataDupBytes, end - u1); } if (!mp1) tcp->tcp_reass_tail = mp; @@ -11544,6 +11621,8 @@ tcp_rcv_drain(queue_t *q, tcp_t *tcp) #ifdef DEBUG uint_t cnt = 0; #endif + tcp_stack_t *tcps = tcp->tcp_tcps; + /* Can't drain on an eager connection */ if (tcp->tcp_listener != NULL) return (ret); @@ -11598,7 +11677,7 @@ tcp_rcv_drain(queue_t *q, tcp_t *tcp) * deferred acks segments, send an update immediately. */ if (thwin < tcp->tcp_rack_cur_max * tcp->tcp_mss) { - BUMP_MIB(&tcp_mib, tcpOutWinUpdate); + BUMP_MIB(&tcps->tcps_mib, tcpOutWinUpdate); ret = TH_ACK_NEEDED; } tcp->tcp_rwnd = q->q_hiwat; @@ -11684,8 +11763,9 @@ tcp_input(void *arg, mblk_t *mp, void *arg2) if (tcp->tcp_state == TCPS_CLOSED || tcp->tcp_state == TCPS_BOUND) { conn_t *new_connp; + ip_stack_t *ipst = tcp->tcp_tcps->tcps_netstack->netstack_ip; - new_connp = ipcl_classify(mp, connp->conn_zoneid); + new_connp = ipcl_classify(mp, connp->conn_zoneid, ipst); if (new_connp != NULL) { tcp_reinput(new_connp, mp, arg2); return; @@ -11809,8 +11889,9 @@ tcp_set_rto(tcp_t *tcp, clock_t rtt) clock_t sa = tcp->tcp_rtt_sa; clock_t sv = tcp->tcp_rtt_sd; clock_t rto; + tcp_stack_t *tcps = tcp->tcp_tcps; - BUMP_MIB(&tcp_mib, tcpRttUpdate); + BUMP_MIB(&tcps->tcps_mib, tcpRttUpdate); tcp->tcp_rtt_update++; /* tcp_rtt_sa is not 0 means this is a new sample. */ @@ -11877,12 +11958,12 @@ tcp_set_rto(tcp_t *tcp, clock_t rtt) * deviation of RTO to accomodate burstiness of 1/4 of * window size. */ - rto = (sa >> 3) + sv + tcp_rexmit_interval_extra + (sa >> 5); + rto = (sa >> 3) + sv + tcps->tcps_rexmit_interval_extra + (sa >> 5); - if (rto > tcp_rexmit_interval_max) { - tcp->tcp_rto = tcp_rexmit_interval_max; - } else if (rto < tcp_rexmit_interval_min) { - tcp->tcp_rto = tcp_rexmit_interval_min; + if (rto > tcps->tcps_rexmit_interval_max) { + tcp->tcp_rto = tcps->tcps_rexmit_interval_max; + } else if (rto < tcps->tcps_rexmit_interval_min) { + tcp->tcp_rto = tcps->tcps_rexmit_interval_min; } else { tcp->tcp_rto = rto; } @@ -11952,6 +12033,7 @@ tcp_sack_rxmit(tcp_t *tcp, uint_t *flags) int32_t mss; uint32_t seg_len; mblk_t *xmit_mp; + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT(tcp->tcp_sack_info != NULL); ASSERT(tcp->tcp_notsack_list != NULL); @@ -11988,7 +12070,7 @@ tcp_sack_rxmit(tcp_t *tcp, uint_t *flags) for (; notsack_blk != NULL; notsack_blk = notsack_blk->next) { if (SEQ_GT(notsack_blk->end, begin) && (notsack_blk->sack_cnt >= - tcp_dupack_fast_retransmit)) { + tcps->tcps_dupack_fast_retransmit)) { end = notsack_blk->end; if (SEQ_LT(begin, notsack_blk->begin)) { begin = notsack_blk->begin; @@ -12046,9 +12128,9 @@ tcp_sack_rxmit(tcp_t *tcp, uint_t *flags) */ snxt_mp->b_prev = (mblk_t *)lbolt; - BUMP_MIB(&tcp_mib, tcpRetransSegs); - UPDATE_MIB(&tcp_mib, tcpRetransBytes, seg_len); - BUMP_MIB(&tcp_mib, tcpOutSackRetransSegs); + BUMP_MIB(&tcps->tcps_mib, tcpRetransSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpRetransBytes, seg_len); + BUMP_MIB(&tcps->tcps_mib, tcpOutSackRetransSegs); /* * Update tcp_rexmit_max to extend this SACK recovery phase. * This happens when new data sent during fast recovery is @@ -12076,6 +12158,9 @@ tcp_check_policy(tcp_t *tcp, mblk_t *first_mp, ipha_t *ipha, ip6_t *ip6h, ipsec_in_t *ii; const char *reason; kstat_named_t *counter; + tcp_stack_t *tcps = tcp->tcp_tcps; + ipsec_stack_t *ipss; + ip_stack_t *ipst; ASSERT(mctl_present || !secure); @@ -12093,9 +12178,13 @@ tcp_check_policy(tcp_t *tcp, mblk_t *first_mp, ipha_t *ipha, ip6_t *ip6h, act->ipa_act.ipa_type == IPSEC_ACT_CLEAR) return (B_TRUE); ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH, - "tcp_check_policy", ipha, ip6h, secure); + "tcp_check_policy", ipha, ip6h, secure, + tcps->tcps_netstack); + ipss = tcps->tcps_netstack->netstack_ipsec; + ip_drop_packet(first_mp, B_TRUE, NULL, NULL, - &ipdrops_tcp_clear, &tcp_dropper); + DROPPER(ipss, ipds_tcp_clear), + &tcps->tcps_dropper); return (B_FALSE); } @@ -12104,9 +12193,13 @@ tcp_check_policy(tcp_t *tcp, mblk_t *first_mp, ipha_t *ipha, ip6_t *ip6h, */ if (act == NULL) { ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED, - "tcp_check_policy", ipha, ip6h, secure); + "tcp_check_policy", ipha, ip6h, secure, + tcps->tcps_netstack); + ipss = tcps->tcps_netstack->netstack_ipsec; + ip_drop_packet(first_mp, B_TRUE, NULL, NULL, - &ipdrops_tcp_secure, &tcp_dropper); + DROPPER(ipss, ipds_tcp_secure), + &tcps->tcps_dropper); return (B_FALSE); } @@ -12122,17 +12215,20 @@ tcp_check_policy(tcp_t *tcp, mblk_t *first_mp, ipha_t *ipha, ip6_t *ip6h, ii = (ipsec_in_t *)first_mp->b_rptr; + ipst = tcps->tcps_netstack->netstack_ip; + if (ipsec_check_ipsecin_latch(ii, data_mp, ipl, ipha, ip6h, &reason, &counter, tcp->tcp_connp)) { - BUMP_MIB(&ip_mib, ipsecInSucceeded); + BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded); return (B_TRUE); } (void) strlog(TCP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE, "tcp inbound policy mismatch: %s, packet dropped\n", reason); - BUMP_MIB(&ip_mib, ipsecInFailed); + BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed); - ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, &tcp_dropper); + ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter, + &tcps->tcps_dropper); return (B_FALSE); } @@ -12153,6 +12249,7 @@ tcp_ss_rexmit(tcp_t *tcp) int32_t off; int32_t burst = tcp->tcp_snd_burst; mblk_t *snxt_mp; + tcp_stack_t *tcps = tcp->tcp_tcps; /* * Note that tcp_rexmit can be set even though TCP has retransmitted @@ -12195,8 +12292,8 @@ tcp_ss_rexmit(tcp_t *tcp) * retransmission. */ old_snxt_mp->b_prev = (mblk_t *)lbolt; - BUMP_MIB(&tcp_mib, tcpRetransSegs); - UPDATE_MIB(&tcp_mib, tcpRetransBytes, cnt); + BUMP_MIB(&tcps->tcps_mib, tcpRetransSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpRetransBytes, cnt); tcp->tcp_rexmit_nxt = snxt; burst--; @@ -12236,6 +12333,7 @@ tcp_process_options(tcp_t *tcp, tcph_t *tcph) tcp_opt_t tcpopt; uint32_t mss_max; char *tmp_tcph; + tcp_stack_t *tcps = tcp->tcp_tcps; tcpopt.tcp = NULL; options = tcp_parse_options(tcph, &tcpopt); @@ -12248,16 +12346,16 @@ tcp_process_options(tcp_t *tcp, tcph_t *tcph) */ if (!(options & TCP_OPT_MSS_PRESENT)) { if (tcp->tcp_ipversion == IPV4_VERSION) - tcpopt.tcp_opt_mss = tcp_mss_def_ipv4; + tcpopt.tcp_opt_mss = tcps->tcps_mss_def_ipv4; else - tcpopt.tcp_opt_mss = tcp_mss_def_ipv6; + tcpopt.tcp_opt_mss = tcps->tcps_mss_def_ipv6; } else { if (tcp->tcp_ipversion == IPV4_VERSION) - mss_max = tcp_mss_max_ipv4; + mss_max = tcps->tcps_mss_max_ipv4; else - mss_max = tcp_mss_max_ipv6; - if (tcpopt.tcp_opt_mss < tcp_mss_min) - tcpopt.tcp_opt_mss = tcp_mss_min; + mss_max = tcps->tcps_mss_max_ipv6; + if (tcpopt.tcp_opt_mss < tcps->tcps_mss_min) + tcpopt.tcp_opt_mss = tcps->tcps_mss_min; else if (tcpopt.tcp_opt_mss > mss_max) tcpopt.tcp_opt_mss = mss_max; } @@ -12317,7 +12415,7 @@ tcp_process_options(tcp_t *tcp, tcph_t *tcph) */ if ((options & TCP_OPT_SACK_OK_PRESENT) && (tcp->tcp_snd_sack_ok || - (tcp_sack_permitted != 0 && TCP_IS_DETACHED(tcp)))) { + (tcps->tcps_sack_permitted != 0 && TCP_IS_DETACHED(tcp)))) { /* This should be true only in the passive case. */ if (tcp->tcp_sack_info == NULL) { ASSERT(TCP_IS_DETACHED(tcp)); @@ -12398,6 +12496,7 @@ tcp_send_conn_ind(void *arg, mblk_t *mp, void *arg2) struct T_conn_ind *conn_ind; ipaddr_t *addr_cache; boolean_t need_send_conn_ind = B_FALSE; + tcp_stack_t *tcps = listener->tcp_tcps; /* retrieve the eager */ conn_ind = (struct T_conn_ind *)mp->b_rptr; @@ -12509,7 +12608,7 @@ tcp_send_conn_ind(void *arg, mblk_t *mp, void *arg2) listener->tcp_syn_rcvd_timeout--; if (listener->tcp_syn_defense && listener->tcp_syn_rcvd_timeout <= - (tcp_conn_req_max_q0 >> 5) && + (tcps->tcps_conn_req_max_q0 >> 5) && 10*MINUTES < TICK_TO_MSEC(lbolt64 - listener->tcp_last_rcv_lbolt)) { /* @@ -12552,6 +12651,7 @@ tcp_find_pktinfo(tcp_t *tcp, mblk_t *mp, uint_t *ipversp, uint_t *ip_hdr_lenp, ip6_pkt_t ipp; uint_t ipvers; uint_t ip_hdr_len; + tcp_stack_t *tcps = tcp->tcp_tcps; rptr = mp->b_rptr; ASSERT(OK_32PTR(rptr)); @@ -12616,12 +12716,13 @@ tcp_find_pktinfo(tcp_t *tcp, mblk_t *mp, uint_t *ipversp, uint_t *ip_hdr_lenp, if (ip6h->ip6_nxt != IPPROTO_TCP) { uint8_t nexthdrp; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; /* Look for ifindex information */ if (ip6h->ip6_nxt == IPPROTO_RAW) { ip6i_t *ip6i = (ip6i_t *)ip6h; if ((uchar_t *)&ip6i[1] > mp->b_wptr) { - BUMP_MIB(&ip_mib, tcpInErrs); + BUMP_MIB(&ipst->ips_ip_mib, tcpInErrs); freemsg(first_mp); return (NULL); } @@ -12643,7 +12744,7 @@ tcp_find_pktinfo(tcp_t *tcp, mblk_t *mp, uint_t *ipversp, uint_t *ip_hdr_lenp, } if (MBLKL(mp) < IPV6_HDR_LEN + sizeof (tcph_t)) { - BUMP_MIB(&ip_mib, tcpInErrs); + BUMP_MIB(&ipst->ips_ip_mib, tcpInErrs); freemsg(first_mp); return (NULL); } @@ -12658,7 +12759,7 @@ tcp_find_pktinfo(tcp_t *tcp, mblk_t *mp, uint_t *ipversp, uint_t *ip_hdr_lenp, ip_hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp); /* Verify if this is a TCP packet */ if (nexthdrp != IPPROTO_TCP) { - BUMP_MIB(&ip_mib, tcpInErrs); + BUMP_MIB(&ipst->ips_ip_mib, tcpInErrs); freemsg(first_mp); return (NULL); } @@ -12730,12 +12831,13 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2) conn_t *connp = (conn_t *)arg; squeue_t *sqp = (squeue_t *)arg2; tcp_t *tcp = connp->conn_tcp; + tcp_stack_t *tcps = tcp->tcp_tcps; /* * RST from fused tcp loopback peer should trigger an unfuse. */ if (tcp->tcp_fused) { - TCP_STAT(tcp_fusion_aborted); + TCP_STAT(tcps, tcp_fusion_aborted); tcp_unfuse(tcp); } @@ -12755,7 +12857,7 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2) mp = tcp_find_pktinfo(tcp, mp, &ipvers, &ip_hdr_len, NULL, &ipp); if (mp == NULL) { - TCP_STAT(tcp_rput_v6_error); + TCP_STAT(tcps, tcp_rput_v6_error); return; } iphdr = mp->b_rptr; @@ -12896,11 +12998,13 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2) if (tcp->tcp_snd_sack_ok) { (void) mi_set_sth_wroff(tcp->tcp_rq, tcp->tcp_hdr_len + TCPOPT_MAX_SACK_LEN + - (tcp->tcp_loopback ? 0 : tcp_wroff_xtra)); + (tcp->tcp_loopback ? 0 : + tcps->tcps_wroff_xtra)); } else { (void) mi_set_sth_wroff(tcp->tcp_rq, tcp->tcp_hdr_len + - (tcp->tcp_loopback ? 0 : tcp_wroff_xtra)); + (tcp->tcp_loopback ? 0 : + tcps->tcps_wroff_xtra)); } } if (flags & TH_ACK) { @@ -12997,7 +13101,7 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2) TCP_TRACE_SEND_PKT); tcp_send_data(tcp, tcp->tcp_wq, ack_mp); BUMP_LOCAL(tcp->tcp_obsegs); - BUMP_MIB(&tcp_mib, tcpOutAck); + BUMP_MIB(&tcps->tcps_mib, tcpOutAck); /* Send up T_CONN_CON */ putnext(tcp->tcp_rq, mp1); @@ -13012,7 +13116,7 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2) * as usual. Mark this tcp as not capable * of fusion. */ - TCP_STAT(tcp_fusion_unfusable); + TCP_STAT(tcps, tcp_fusion_unfusable); tcp->tcp_unfusable = B_TRUE; putnext(tcp->tcp_rq, mp1); } @@ -13091,8 +13195,9 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2) case TCPS_CLOSED: case TCPS_BOUND: { conn_t *new_connp; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; - new_connp = ipcl_classify(mp, connp->conn_zoneid); + new_connp = ipcl_classify(mp, connp->conn_zoneid, ipst); if (new_connp != NULL) { tcp_reinput(new_connp, mp, connp->conn_sqp); return; @@ -13127,7 +13232,7 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2) */ if (TCP_IS_DETACHED_NONEAGER(tcp) && (seg_len > 0 && SEQ_GT(seg_seq + seg_len, tcp->tcp_rnxt))) { - BUMP_MIB(&tcp_mib, tcpInClosed); + BUMP_MIB(&tcps->tcps_mib, tcpInClosed); TCP_RECORD_TRACE(tcp, mp, TCP_TRACE_RECV_PKT); @@ -13195,8 +13300,8 @@ try_again:; /* Recompute the gaps after noting the SYN. */ goto try_again; } - BUMP_MIB(&tcp_mib, tcpInDataDupSegs); - UPDATE_MIB(&tcp_mib, tcpInDataDupBytes, + BUMP_MIB(&tcps->tcps_mib, tcpInDataDupSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpInDataDupBytes, (seg_len > -gap ? -gap : seg_len)); /* Remove the old stuff from seg_len. */ seg_len += gap; @@ -13313,10 +13418,11 @@ try_again:; mblk_t *mp2; if (tcp->tcp_rwnd == 0) { - BUMP_MIB(&tcp_mib, tcpInWinProbe); + BUMP_MIB(&tcps->tcps_mib, tcpInWinProbe); } else { - BUMP_MIB(&tcp_mib, tcpInDataPastWinSegs); - UPDATE_MIB(&tcp_mib, tcpInDataPastWinBytes, -rgap); + BUMP_MIB(&tcps->tcps_mib, tcpInDataPastWinSegs); + UPDATE_MIB(&tcps->tcps_mib, + tcpInDataPastWinBytes, -rgap); } /* @@ -13533,8 +13639,8 @@ ok:; } } } else if (seg_len > 0) { - BUMP_MIB(&tcp_mib, tcpInDataInorderSegs); - UPDATE_MIB(&tcp_mib, tcpInDataInorderBytes, seg_len); + BUMP_MIB(&tcps->tcps_mib, tcpInDataInorderSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpInDataInorderBytes, seg_len); /* * If an out of order FIN was received before, and the seq * num and len of the new segment match that of the FIN, @@ -13910,7 +14016,7 @@ process_ack: * simultaneous active opens. */ if (tcp->tcp_loopback) { - TCP_STAT(tcp_fusion_unfusable); + TCP_STAT(tcps, tcp_fusion_unfusable); tcp->tcp_unfusable = B_TRUE; } } @@ -14006,7 +14112,7 @@ process_ack: if (!ofo_seg && seg_len == 0 && new_swnd == tcp->tcp_swnd) { int dupack_cnt; - BUMP_MIB(&tcp_mib, tcpInDupAck); + BUMP_MIB(&tcps->tcps_mib, tcpInDupAck); /* * Fast retransmit. When we have seen exactly three * identical ACKs while we have unacked data @@ -14019,7 +14125,7 @@ process_ack: ! tcp->tcp_rexmit) { /* Do Limited Transmit */ if ((dupack_cnt = ++tcp->tcp_dupack_cnt) < - tcp_dupack_fast_retransmit) { + tcps->tcps_dupack_fast_retransmit) { /* * RFC 3042 * @@ -14050,7 +14156,7 @@ process_ack: flags |= TH_LIMIT_XMIT; } } else if (dupack_cnt == - tcp_dupack_fast_retransmit) { + tcps->tcps_dupack_fast_retransmit) { /* * If we have reduced tcp_ssthresh @@ -14178,7 +14284,7 @@ process_ack: if (new_swnd != 0) { /* tcp_suna != tcp_snxt */ /* Packet contains a window update */ - BUMP_MIB(&tcp_mib, tcpInWinUpdate); + BUMP_MIB(&tcps->tcps_mib, tcpInWinUpdate); tcp->tcp_zero_win_probe = 0; tcp->tcp_timer_backoff = 0; tcp->tcp_ms_we_have_waited = 0; @@ -14216,7 +14322,7 @@ process_ack: * Should we send ACKs in response to ACK only segments? */ if (SEQ_GT(seg_ack, tcp->tcp_snxt)) { - BUMP_MIB(&tcp_mib, tcpInAckUnsent); + BUMP_MIB(&tcps->tcps_mib, tcpInAckUnsent); /* drop the received segment */ freemsg(mp); @@ -14231,14 +14337,14 @@ process_ack: */ if (tcp_drop_ack_unsent_cnt > 0 && ++tcp->tcp_in_ack_unsent > tcp_drop_ack_unsent_cnt) { - TCP_STAT(tcp_in_ack_unsent_drop); + TCP_STAT(tcps, tcp_in_ack_unsent_drop); return; } mp = tcp_ack_mp(tcp); if (mp != NULL) { TCP_RECORD_TRACE(tcp, mp, TCP_TRACE_SEND_PKT); BUMP_LOCAL(tcp->tcp_obsegs); - BUMP_MIB(&tcp_mib, tcpOutAck); + BUMP_MIB(&tcps->tcps_mib, tcpOutAck); tcp_send_data(tcp, tcp->tcp_wq, mp); } return; @@ -14259,7 +14365,7 @@ process_ack: * window was inflated to account for the other side's * cached packets, retract it. If it is, do Hoe's algorithm. */ - if (tcp->tcp_dupack_cnt >= tcp_dupack_fast_retransmit) { + if (tcp->tcp_dupack_cnt >= tcps->tcps_dupack_fast_retransmit) { ASSERT(tcp->tcp_rexmit == B_FALSE); if (SEQ_GEQ(seg_ack, tcp->tcp_rexmit_max)) { tcp->tcp_dupack_cnt = 0; @@ -14303,7 +14409,7 @@ process_ack: * segments. */ tcp->tcp_cwnd = tcp->tcp_cwnd_ssthresh + - tcp_dupack_fast_retransmit * mss; + tcps->tcps_dupack_fast_retransmit * mss; tcp->tcp_cwnd_cnt = tcp->tcp_cwnd; flags |= TH_REXMIT_NEEDED; } @@ -14342,8 +14448,8 @@ process_ack: } } - BUMP_MIB(&tcp_mib, tcpInAckSegs); - UPDATE_MIB(&tcp_mib, tcpInAckBytes, bytes_acked); + BUMP_MIB(&tcps->tcps_mib, tcpInAckSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpInAckBytes, bytes_acked); tcp->tcp_suna = seg_ack; if (tcp->tcp_zero_win_probe != 0) { tcp->tcp_zero_win_probe = 0; @@ -14425,7 +14531,7 @@ process_ack: tcp_set_rto(tcp, (int32_t)lbolt - (int32_t)(intptr_t)mp1->b_prev); else - BUMP_MIB(&tcp_mib, tcpRttNoUpdate); + BUMP_MIB(&tcps->tcps_mib, tcpRttNoUpdate); /* Remeber the last sequence to be ACKed */ tcp->tcp_csuna = seg_ack; @@ -14434,7 +14540,7 @@ process_ack: tcp->tcp_set_timer = 0; } } else { - BUMP_MIB(&tcp_mib, tcpRttNoUpdate); + BUMP_MIB(&tcps->tcps_mib, tcpRttNoUpdate); } /* Eat acknowledged bytes off the xmit queue. */ @@ -14605,7 +14711,7 @@ est: * flushing the FIN_WAIT_2 connection. */ TCP_TIMER_RESTART(tcp, - tcp_fin_wait_2_flush_interval); + tcps->tcps_fin_wait_2_flush_interval); } break; case TCPS_FIN_WAIT_2: @@ -14628,10 +14734,10 @@ est: tcp->tcp_exclbind = 0; if (!TCP_IS_DETACHED(tcp)) { TCP_TIMER_RESTART(tcp, - tcp_time_wait_interval); + tcps->tcps_time_wait_interval); } else { tcp_time_wait_append(tcp); - TCP_DBGSTAT(tcp_rput_time_wait); + TCP_DBGSTAT(tcps, tcp_rput_time_wait); } } /*FALLTHRU*/ @@ -14683,10 +14789,10 @@ est: tcp->tcp_exclbind = 0; if (!TCP_IS_DETACHED(tcp)) { TCP_TIMER_RESTART(tcp, - tcp_time_wait_interval); + tcps->tcps_time_wait_interval); } else { tcp_time_wait_append(tcp); - TCP_DBGSTAT(tcp_rput_time_wait); + TCP_DBGSTAT(tcps, tcp_rput_time_wait); } if (seg_len) { /* @@ -14879,9 +14985,9 @@ est: * do anything for a detached tcp. */ if (!TCP_IS_DETACHED(tcp)) - tcp->tcp_push_tid = TCP_TIMER(tcp, - tcp_push_timer, - MSEC_TO_TICK(tcp_push_timer_interval)); + tcp->tcp_push_tid = TCP_TIMER(tcp, + tcp_push_timer, + MSEC_TO_TICK(tcps->tcps_push_timer_interval)); } } xmit_check: @@ -14898,7 +15004,7 @@ xmit_check: if (flags & TH_REXMIT_NEEDED) { uint32_t snd_size = tcp->tcp_snxt - tcp->tcp_suna; - BUMP_MIB(&tcp_mib, tcpOutFastRetrans); + BUMP_MIB(&tcps->tcps_mib, tcpOutFastRetrans); if (snd_size > mss) snd_size = mss; if (snd_size > tcp->tcp_swnd) @@ -14910,8 +15016,9 @@ xmit_check: if (mp1 != NULL) { tcp->tcp_xmit_head->b_prev = (mblk_t *)lbolt; tcp->tcp_csuna = tcp->tcp_snxt; - BUMP_MIB(&tcp_mib, tcpRetransSegs); - UPDATE_MIB(&tcp_mib, tcpRetransBytes, snd_size); + BUMP_MIB(&tcps->tcps_mib, tcpRetransSegs); + UPDATE_MIB(&tcps->tcps_mib, + tcpRetransBytes, snd_size); TCP_RECORD_TRACE(tcp, mp1, TCP_TRACE_SEND_PKT); tcp_send_data(tcp, tcp->tcp_wq, mp1); @@ -14985,7 +15092,7 @@ ack_check: TCP_RECORD_TRACE(tcp, mp1, TCP_TRACE_SEND_PKT); tcp_send_data(tcp, tcp->tcp_wq, mp1); BUMP_LOCAL(tcp->tcp_obsegs); - BUMP_MIB(&tcp_mib, tcpOutAck); + BUMP_MIB(&tcps->tcps_mib, tcpOutAck); } if (tcp->tcp_ack_tid != 0) { (void) TCP_TIMER_CANCEL(tcp, tcp->tcp_ack_tid); @@ -15000,8 +15107,8 @@ ack_check: if (tcp->tcp_ack_tid == 0) { tcp->tcp_ack_tid = TCP_TIMER(tcp, tcp_ack_timer, MSEC_TO_TICK(tcp->tcp_localnet ? - (clock_t)tcp_local_dack_interval : - (clock_t)tcp_deferred_ack_interval)); + (clock_t)tcps->tcps_local_dack_interval : + (clock_t)tcps->tcps_deferred_ack_interval)); } } if (flags & TH_ORDREL_NEEDED) { @@ -15470,6 +15577,7 @@ tcp_rput_other(tcp_t *tcp, mblk_t *mp) mblk_t *lsoi; int retval; mblk_t *ire_mp; + tcp_stack_t *tcps = tcp->tcp_tcps; switch (mp->b_datap->db_type) { case M_PROTO: @@ -15580,12 +15688,12 @@ tcp_rput_other(tcp_t *tcp, mblk_t *mp) * round up. */ tcp->tcp_rwnd = MAX(MSS_ROUNDUP(tcp->tcp_rwnd, mss), - tcp_recv_hiwat_minmss * mss); + tcps->tcps_recv_hiwat_minmss * mss); q->q_hiwat = tcp->tcp_rwnd; tcp_set_ws_value(tcp); U32_TO_ABE16((tcp->tcp_rwnd >> tcp->tcp_rcv_ws), tcp->tcp_tcph->th_win); - if (tcp->tcp_rcv_ws > 0 || tcp_wscale_always) + if (tcp->tcp_rcv_ws > 0 || tcps->tcps_wscale_always) tcp->tcp_snd_ws_ok = B_TRUE; /* @@ -15594,8 +15702,8 @@ tcp_rput_other(tcp_t *tcp, mblk_t *mp) * include the timestamp * option in the SYN segment. */ - if (tcp_tstamp_always || - (tcp->tcp_rcv_ws && tcp_tstamp_if_wscale)) { + if (tcps->tcps_tstamp_always || + (tcp->tcp_rcv_ws && tcps->tcps_tstamp_if_wscale)) { tcp->tcp_snd_ts_ok = B_TRUE; } @@ -15604,7 +15712,7 @@ tcp_rput_other(tcp_t *tcp, mblk_t *mp) * tcp_adapt_ire() if the sack metric * is set. So check it here also. */ - if (tcp_sack_permitted == 2 || + if (tcps->tcps_sack_permitted == 2 || tcp->tcp_snd_sack_ok) { if (tcp->tcp_sack_info == NULL) { tcp->tcp_sack_info = @@ -15622,7 +15730,7 @@ tcp_rput_other(tcp_t *tcp, mblk_t *mp) * enabled IP packets. Setting it to 1 avoids * compatibility problems. */ - if (tcp_ecn_permitted == 2) + if (tcps->tcps_ecn_permitted == 2) tcp->tcp_ecn_ok = B_TRUE; TCP_TIMER_RESTART(tcp, tcp->tcp_rto); @@ -15778,10 +15886,11 @@ tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2) tcp_t *tcp = connp->conn_tcp; queue_t *q = tcp->tcp_rq; uint_t thwin; + tcp_stack_t *tcps = tcp->tcp_tcps; freeb(mp); - TCP_STAT(tcp_rsrv_calls); + TCP_STAT(tcps, tcp_rsrv_calls); if (TCP_IS_DETACHED(tcp) || q == NULL) { return; @@ -15809,7 +15918,7 @@ tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2) tcp_clrqfull(peer_tcp); TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp); - TCP_STAT(tcp_fusion_backenabled); + TCP_STAT(tcps, tcp_fusion_backenabled); return; } @@ -15829,7 +15938,7 @@ tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2) tcp_xmit_ctl(NULL, tcp, (tcp->tcp_swnd == 0) ? tcp->tcp_suna : tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK); - BUMP_MIB(&tcp_mib, tcpOutWinUpdate); + BUMP_MIB(&tcps->tcps_mib, tcpOutWinUpdate); } } /* Handle a failure to allocate a T_ORDREL_IND here */ @@ -15879,12 +15988,13 @@ tcp_rsrv(queue_t *q) conn_t *connp = Q_TO_CONN(q); tcp_t *tcp = connp->conn_tcp; mblk_t *mp; + tcp_stack_t *tcps = tcp->tcp_tcps; /* No code does a putq on the read side */ ASSERT(q->q_first == NULL); /* Nothing to do for the default queue */ - if (q == tcp_g_q) { + if (q == tcps->tcps_g_q) { return; } @@ -15937,6 +16047,7 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd) uint32_t old_max_rwnd; uint32_t max_transmittable_rwnd; boolean_t tcp_detached = TCP_IS_DETACHED(tcp); + tcp_stack_t *tcps = tcp->tcp_tcps; if (tcp->tcp_fused) { size_t sth_hiwat; @@ -15973,7 +16084,7 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd) * funny TCP interactions of Nagle algorithm, SWS avoidance * and delayed acknowledgement. */ - rwnd = MAX(rwnd, tcp_recv_hiwat_minmss * mss); + rwnd = MAX(rwnd, tcps->tcps_recv_hiwat_minmss * mss); /* * If window size info has already been exchanged, TCP should not @@ -16005,7 +16116,7 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd) } if (tcp->tcp_localnet) { tcp->tcp_rack_abs_max = - MIN(tcp_local_dacks_max, rwnd / mss / 2); + MIN(tcps->tcps_local_dacks_max, rwnd / mss / 2); } else { /* * For a remote host on a different subnet (through a router), @@ -16013,7 +16124,7 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd) * tcp_deferred_acks_max is default to 2. */ tcp->tcp_rack_abs_max = - MIN(tcp_deferred_acks_max, rwnd / mss / 2); + MIN(tcps->tcps_deferred_acks_max, rwnd / mss / 2); } if (tcp->tcp_rack_cur_max > tcp->tcp_rack_abs_max) tcp->tcp_rack_cur_max = tcp->tcp_rack_abs_max; @@ -16042,7 +16153,8 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd) * prefer to choose these values algorithmically, with a likely * relationship to rwnd. */ - (void) mi_set_sth_hiwat(tcp->tcp_rq, MAX(rwnd, tcp_sth_rcv_hiwat)); + (void) mi_set_sth_hiwat(tcp->tcp_rq, + MAX(rwnd, tcps->tcps_sth_rcv_hiwat)); return (rwnd); } @@ -16072,6 +16184,8 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl) zoneid_t zoneid; int v4_conn_idx; int v6_conn_idx; + tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; + ip_stack_t *ipst; if (mpctl == NULL || (mpdata = mpctl->b_cont) == NULL || @@ -16087,22 +16201,23 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl) } /* build table of connections -- need count in fixed part */ - SET_MIB(tcp_mib.tcpRtoAlgorithm, 4); /* vanj */ - SET_MIB(tcp_mib.tcpRtoMin, tcp_rexmit_interval_min); - SET_MIB(tcp_mib.tcpRtoMax, tcp_rexmit_interval_max); - SET_MIB(tcp_mib.tcpMaxConn, -1); - SET_MIB(tcp_mib.tcpCurrEstab, 0); + SET_MIB(tcps->tcps_mib.tcpRtoAlgorithm, 4); /* vanj */ + SET_MIB(tcps->tcps_mib.tcpRtoMin, tcps->tcps_rexmit_interval_min); + SET_MIB(tcps->tcps_mib.tcpRtoMax, tcps->tcps_rexmit_interval_max); + SET_MIB(tcps->tcps_mib.tcpMaxConn, -1); + SET_MIB(tcps->tcps_mib.tcpCurrEstab, 0); ispriv = - secpolicy_net_config((Q_TO_CONN(q))->conn_cred, B_TRUE) == 0; + secpolicy_ip_config((Q_TO_CONN(q))->conn_cred, B_TRUE) == 0; zoneid = Q_TO_CONN(q)->conn_zoneid; v4_conn_idx = v6_conn_idx = 0; mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; for (i = 0; i < CONN_G_HASH_SIZE; i++) { + ipst = tcps->tcps_netstack->netstack_ip; - connfp = &ipcl_globalhash_fanout[i]; + connfp = &ipst->ips_ipcl_globalhash_fanout[i]; connp = NULL; @@ -16115,16 +16230,18 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl) continue; /* not in this zone */ tcp = connp->conn_tcp; - UPDATE_MIB(&tcp_mib, tcpHCInSegs, tcp->tcp_ibsegs); + UPDATE_MIB(&tcps->tcps_mib, + tcpHCInSegs, tcp->tcp_ibsegs); tcp->tcp_ibsegs = 0; - UPDATE_MIB(&tcp_mib, tcpHCOutSegs, tcp->tcp_obsegs); + UPDATE_MIB(&tcps->tcps_mib, + tcpHCOutSegs, tcp->tcp_obsegs); tcp->tcp_obsegs = 0; tce6.tcp6ConnState = tce.tcpConnState = tcp_snmp_state(tcp); if (tce.tcpConnState == MIB2_TCP_established || tce.tcpConnState == MIB2_TCP_closeWait) - BUMP_MIB(&tcp_mib, tcpCurrEstab); + BUMP_MIB(&tcps->tcps_mib, tcpCurrEstab); needattr = B_FALSE; bzero(&mlp, sizeof (mlp)); @@ -16268,15 +16385,17 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl) } /* fixed length structure for IPv4 and IPv6 counters */ - SET_MIB(tcp_mib.tcpConnTableSize, sizeof (mib2_tcpConnEntry_t)); - SET_MIB(tcp_mib.tcp6ConnTableSize, sizeof (mib2_tcp6ConnEntry_t)); + SET_MIB(tcps->tcps_mib.tcpConnTableSize, sizeof (mib2_tcpConnEntry_t)); + SET_MIB(tcps->tcps_mib.tcp6ConnTableSize, + sizeof (mib2_tcp6ConnEntry_t)); /* synchronize 32- and 64-bit counters */ - SYNC32_MIB(&tcp_mib, tcpInSegs, tcpHCInSegs); - SYNC32_MIB(&tcp_mib, tcpOutSegs, tcpHCOutSegs); + SYNC32_MIB(&tcps->tcps_mib, tcpInSegs, tcpHCInSegs); + SYNC32_MIB(&tcps->tcps_mib, tcpOutSegs, tcpHCOutSegs); optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = MIB2_TCP; optp->name = 0; - (void) snmp_append_data(mpdata, (char *)&tcp_mib, sizeof (tcp_mib)); + (void) snmp_append_data(mpdata, (char *)&tcps->tcps_mib, + sizeof (tcps->tcps_mib)); optp->len = msgdsize(mpdata); qreply(q, mpctl); @@ -16395,7 +16514,7 @@ tcp_report_item(mblk_t *mp, tcp_t *tcp, int hashval, tcp_t *thisstream, cred_t *cr) { char hash[10], addrbuf[INET6_ADDRSTRLEN]; - boolean_t ispriv = secpolicy_net_config(cr, B_TRUE) == 0; + boolean_t ispriv = secpolicy_ip_config(cr, B_TRUE) == 0; char cflag; in6_addr_t v6dst; char buf[80]; @@ -16512,6 +16631,11 @@ tcp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) conn_t *connp; connf_t *connfp; zoneid_t zoneid; + tcp_stack_t *tcps; + ip_stack_t *ipst; + + zoneid = Q_TO_CONN(q)->conn_zoneid; + tcps = Q_TO_TCP(q)->tcp_tcps; /* * Because of the ndd constraint, at most we can have 64K buffer @@ -16521,9 +16645,9 @@ tcp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) * we limit the rate of doing this using tcp_ndd_get_info_interval. * This should be OK as normal users should not do this too often. */ - if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { - if (ddi_get_lbolt() - tcp_last_ndd_get_info_time < - drv_usectohz(tcp_ndd_get_info_interval * 1000)) { + if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { + if (ddi_get_lbolt() - tcps->tcps_last_ndd_get_info_time < + drv_usectohz(tcps->tcps_ndd_get_info_interval * 1000)) { (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); return (0); } @@ -16536,10 +16660,10 @@ tcp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) (void) mi_mpprintf(mp, "%s", tcp_report_header); - zoneid = Q_TO_CONN(q)->conn_zoneid; for (i = 0; i < CONN_G_HASH_SIZE; i++) { - connfp = &ipcl_globalhash_fanout[i]; + ipst = tcps->tcps_netstack->netstack_ip; + connfp = &ipst->ips_ipcl_globalhash_fanout[i]; connp = NULL; @@ -16555,7 +16679,7 @@ tcp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) } - tcp_last_ndd_get_info_time = ddi_get_lbolt(); + tcps->tcps_last_ndd_get_info_time = ddi_get_lbolt(); return (0); } @@ -16568,11 +16692,14 @@ tcp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) tcp_t *tcp; int i; zoneid_t zoneid; + tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; + + zoneid = Q_TO_CONN(q)->conn_zoneid; /* Refer to comments in tcp_status_report(). */ - if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { - if (ddi_get_lbolt() - tcp_last_ndd_get_info_time < - drv_usectohz(tcp_ndd_get_info_interval * 1000)) { + if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { + if (ddi_get_lbolt() - tcps->tcps_last_ndd_get_info_time < + drv_usectohz(tcps->tcps_ndd_get_info_interval * 1000)) { (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); return (0); } @@ -16585,10 +16712,8 @@ tcp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) (void) mi_mpprintf(mp, " %s", tcp_report_header); - zoneid = Q_TO_CONN(q)->conn_zoneid; - - for (i = 0; i < A_CNT(tcp_bind_fanout); i++) { - tbf = &tcp_bind_fanout[i]; + for (i = 0; i < TCP_BIND_FANOUT_SIZE; i++) { + tbf = &tcps->tcps_bind_fanout[i]; mutex_enter(&tbf->tf_lock); for (tcp = tbf->tf_tcp; tcp != NULL; tcp = tcp->tcp_bind_hash) { @@ -16602,7 +16727,7 @@ tcp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) } mutex_exit(&tbf->tf_lock); } - tcp_last_ndd_get_info_time = ddi_get_lbolt(); + tcps->tcps_last_ndd_get_info_time = ddi_get_lbolt(); return (0); } @@ -16616,11 +16741,16 @@ tcp_listen_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) tcp_t *tcp; int i; zoneid_t zoneid; + tcp_stack_t *tcps; + ip_stack_t *ipst; + + zoneid = Q_TO_CONN(q)->conn_zoneid; + tcps = Q_TO_TCP(q)->tcp_tcps; /* Refer to comments in tcp_status_report(). */ - if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { - if (ddi_get_lbolt() - tcp_last_ndd_get_info_time < - drv_usectohz(tcp_ndd_get_info_interval * 1000)) { + if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { + if (ddi_get_lbolt() - tcps->tcps_last_ndd_get_info_time < + drv_usectohz(tcps->tcps_ndd_get_info_interval * 1000)) { (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); return (0); } @@ -16635,10 +16765,10 @@ tcp_listen_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) " TCP " MI_COL_HDRPAD_STR "zone IP addr port seqnum backlog (q0/q/max)"); - zoneid = Q_TO_CONN(q)->conn_zoneid; + ipst = tcps->tcps_netstack->netstack_ip; - for (i = 0; i < ipcl_bind_fanout_size; i++) { - connfp = &ipcl_bind_fanout[i]; + for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { + connfp = &ipst->ips_ipcl_bind_fanout[i]; connp = NULL; while ((connp = ipcl_get_next_conn(connfp, connp, IPCL_TCP)) != NULL) { @@ -16650,7 +16780,7 @@ tcp_listen_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) } } - tcp_last_ndd_get_info_time = ddi_get_lbolt(); + tcps->tcps_last_ndd_get_info_time = ddi_get_lbolt(); return (0); } @@ -16664,11 +16794,17 @@ tcp_conn_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) tcp_t *tcp; int i; zoneid_t zoneid; + tcp_stack_t *tcps; + ip_stack_t *ipst; + + zoneid = Q_TO_CONN(q)->conn_zoneid; + tcps = Q_TO_TCP(q)->tcp_tcps; + ipst = tcps->tcps_netstack->netstack_ip; /* Refer to comments in tcp_status_report(). */ - if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { - if (ddi_get_lbolt() - tcp_last_ndd_get_info_time < - drv_usectohz(tcp_ndd_get_info_interval * 1000)) { + if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { + if (ddi_get_lbolt() - tcps->tcps_last_ndd_get_info_time < + drv_usectohz(tcps->tcps_ndd_get_info_interval * 1000)) { (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); return (0); } @@ -16680,13 +16816,11 @@ tcp_conn_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) } (void) mi_mpprintf(mp, "tcp_conn_hash_size = %d", - ipcl_conn_fanout_size); + ipst->ips_ipcl_conn_fanout_size); (void) mi_mpprintf(mp, " %s", tcp_report_header); - zoneid = Q_TO_CONN(q)->conn_zoneid; - - for (i = 0; i < ipcl_conn_fanout_size; i++) { - connfp = &ipcl_conn_fanout[i]; + for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { + connfp = &ipst->ips_ipcl_conn_fanout[i]; connp = NULL; while ((connp = ipcl_get_next_conn(connfp, connp, IPCL_TCP)) != NULL) { @@ -16699,7 +16833,7 @@ tcp_conn_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) } } - tcp_last_ndd_get_info_time = ddi_get_lbolt(); + tcps->tcps_last_ndd_get_info_time = ddi_get_lbolt(); return (0); } @@ -16712,11 +16846,15 @@ tcp_acceptor_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) tcp_t *tcp; int i; zoneid_t zoneid; + tcp_stack_t *tcps; + + zoneid = Q_TO_CONN(q)->conn_zoneid; + tcps = Q_TO_TCP(q)->tcp_tcps; /* Refer to comments in tcp_status_report(). */ - if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { - if (ddi_get_lbolt() - tcp_last_ndd_get_info_time < - drv_usectohz(tcp_ndd_get_info_interval * 1000)) { + if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { + if (ddi_get_lbolt() - tcps->tcps_last_ndd_get_info_time < + drv_usectohz(tcps->tcps_ndd_get_info_interval * 1000)) { (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); return (0); } @@ -16729,10 +16867,8 @@ tcp_acceptor_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) (void) mi_mpprintf(mp, " %s", tcp_report_header); - zoneid = Q_TO_CONN(q)->conn_zoneid; - - for (i = 0; i < A_CNT(tcp_acceptor_fanout); i++) { - tf = &tcp_acceptor_fanout[i]; + for (i = 0; i < TCP_FANOUT_SIZE; i++) { + tf = &tcps->tcps_acceptor_fanout[i]; mutex_enter(&tf->tf_lock); for (tcp = tf->tf_tcp; tcp != NULL; tcp = tcp->tcp_acceptor_hash) { @@ -16744,7 +16880,7 @@ tcp_acceptor_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) } mutex_exit(&tf->tf_lock); } - tcp_last_ndd_get_info_time = ddi_get_lbolt(); + tcps->tcps_last_ndd_get_info_time = ddi_get_lbolt(); return (0); } @@ -16764,6 +16900,7 @@ tcp_timer(void *arg) uint32_t mss; conn_t *connp = (conn_t *)arg; tcp_t *tcp = connp->conn_tcp; + tcp_stack_t *tcps = tcp->tcp_tcps; tcp->tcp_timer_tid = 0; @@ -16796,8 +16933,8 @@ tcp_timer(void *arg) } if (!listener->tcp_syn_defense && (listener->tcp_syn_rcvd_timeout > - (tcp_conn_req_max_q0 >> 2)) && - (tcp_conn_req_max_q0 > 200)) { + (tcps->tcps_conn_req_max_q0 >> 2)) && + (tcps->tcps_conn_req_max_q0 > 200)) { /* We may be under attack. Put on a defense. */ listener->tcp_syn_defense = B_TRUE; cmn_err(CE_WARN, "High TCP connect timeout " @@ -16844,7 +16981,7 @@ tcp_timer(void *arg) if (tcp->tcp_suna != tcp->tcp_snxt) { clock_t time_to_wait; - BUMP_MIB(&tcp_mib, tcpTimRetrans); + BUMP_MIB(&tcps->tcps_mib, tcpTimRetrans); if (!tcp->tcp_xmit_head) break; time_to_wait = lbolt - @@ -16856,7 +16993,7 @@ tcp_timer(void *arg) * restart the timer. */ if (time_to_wait > msec_per_tick) { - TCP_STAT(tcp_timer_fire_early); + TCP_STAT(tcps, tcp_timer_fire_early); TCP_TIMER_RESTART(tcp, time_to_wait); return; } @@ -16937,7 +17074,7 @@ tcp_timer(void *arg) /* Extend window for zero window probe */ tcp->tcp_swnd++; tcp->tcp_zero_win_probe = B_TRUE; - BUMP_MIB(&tcp_mib, tcpOutWinProbe); + BUMP_MIB(&tcps->tcps_mib, tcpOutWinProbe); } else { /* * Handle timeout from sender SWS avoidance. @@ -16965,7 +17102,7 @@ tcp_timer(void *arg) !tcp->tcp_fin_acked) break; /* Nothing to do, return without restarting timer. */ - TCP_STAT(tcp_timer_fire_miss); + TCP_STAT(tcps, tcp_timer_fire_miss); return; case TCPS_FIN_WAIT_2: /* @@ -16977,7 +17114,8 @@ tcp_timer(void *arg) if (TCP_IS_DETACHED(tcp)) { (void) tcp_clean_death(tcp, 0, 23); } else { - TCP_TIMER_RESTART(tcp, tcp_fin_wait_2_flush_interval); + TCP_TIMER_RESTART(tcp, + tcps->tcps_fin_wait_2_flush_interval); } return; case TCPS_TIME_WAIT: @@ -17001,7 +17139,7 @@ tcp_timer(void *arg) if ((tcp->tcp_zero_win_probe == 0) || (TICK_TO_MSEC(lbolt - tcp->tcp_last_recv_time) > second_threshold)) { - BUMP_MIB(&tcp_mib, tcpTimRetransDrop); + BUMP_MIB(&tcps->tcps_mib, tcpTimRetransDrop); /* * If TCP is in SYN_RCVD state, send back a * RST|ACK as BSD does. Note that tcp_zero_win_probe @@ -17059,19 +17197,19 @@ tcp_timer(void *arg) } tcp->tcp_timer_backoff++; if ((ms = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd + - tcp_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5)) < - tcp_rexmit_interval_min) { + tcps->tcps_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5)) < + tcps->tcps_rexmit_interval_min) { /* * This means the original RTO is tcp_rexmit_interval_min. * So we will use tcp_rexmit_interval_min as the RTO value * and do the backoff. */ - ms = tcp_rexmit_interval_min << tcp->tcp_timer_backoff; + ms = tcps->tcps_rexmit_interval_min << tcp->tcp_timer_backoff; } else { ms <<= tcp->tcp_timer_backoff; } - if (ms > tcp_rexmit_interval_max) { - ms = tcp_rexmit_interval_max; + if (ms > tcps->tcps_rexmit_interval_max) { + ms = tcps->tcps_rexmit_interval_max; /* * ms is at max, decrement tcp_timer_backoff to avoid * overflow. @@ -17135,8 +17273,8 @@ tcp_timer(void *arg) } tcp->tcp_csuna = tcp->tcp_snxt; - BUMP_MIB(&tcp_mib, tcpRetransSegs); - UPDATE_MIB(&tcp_mib, tcpRetransBytes, mss); + BUMP_MIB(&tcps->tcps_mib, tcpRetransSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpRetransBytes, mss); TCP_RECORD_TRACE(tcp, mp, TCP_TRACE_SEND_PKT); tcp_send_data(tcp, tcp->tcp_wq, mp); @@ -17208,6 +17346,7 @@ tcp_update_next_port(in_port_t port, const tcp_t *tcp, boolean_t random) { int i; boolean_t restart = B_FALSE; + tcp_stack_t *tcps = tcp->tcp_tcps; if (random && tcp_random_anon_port != 0) { (void) random_get_pseudo_bytes((uint8_t *)&port, @@ -17221,29 +17360,29 @@ tcp_update_next_port(in_port_t port, const tcp_t *tcp, boolean_t random) * port to get the random port. It should fall into the * valid anon port range. */ - if (port < tcp_smallest_anon_port) { - port = tcp_smallest_anon_port + - port % (tcp_largest_anon_port - - tcp_smallest_anon_port); + if (port < tcps->tcps_smallest_anon_port) { + port = tcps->tcps_smallest_anon_port + + port % (tcps->tcps_largest_anon_port - + tcps->tcps_smallest_anon_port); } } retry: - if (port < tcp_smallest_anon_port) - port = (in_port_t)tcp_smallest_anon_port; + if (port < tcps->tcps_smallest_anon_port) + port = (in_port_t)tcps->tcps_smallest_anon_port; - if (port > tcp_largest_anon_port) { + if (port > tcps->tcps_largest_anon_port) { if (restart) return (0); restart = B_TRUE; - port = (in_port_t)tcp_smallest_anon_port; + port = (in_port_t)tcps->tcps_smallest_anon_port; } - if (port < tcp_smallest_nonpriv_port) - port = (in_port_t)tcp_smallest_nonpriv_port; + if (port < tcps->tcps_smallest_nonpriv_port) + port = (in_port_t)tcps->tcps_smallest_nonpriv_port; - for (i = 0; i < tcp_g_num_epriv_ports; i++) { - if (port == tcp_g_epriv_ports[i]) { + for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) { + if (port == tcps->tcps_g_epriv_ports[i]) { port++; /* * Make sure whether the port is in the @@ -17275,9 +17414,9 @@ tcp_get_next_priv_port(const tcp_t *tcp) static in_port_t next_priv_port = IPPORT_RESERVED - 1; in_port_t nextport; boolean_t restart = B_FALSE; - + tcp_stack_t *tcps = tcp->tcp_tcps; retry: - if (next_priv_port < tcp_min_anonpriv_port || + if (next_priv_port < tcps->tcps_min_anonpriv_port || next_priv_port >= IPPORT_RESERVED) { next_priv_port = IPPORT_RESERVED - 1; if (restart) @@ -17370,6 +17509,7 @@ tcp_output(void *arg, mblk_t *mp, void *arg2) conn_t *connp = (conn_t *)arg; tcp_t *tcp = connp->conn_tcp; uint32_t msize; + tcp_stack_t *tcps = tcp->tcp_tcps; /* * Try and ASSERT the minimum possible references on the @@ -17457,7 +17597,7 @@ tcp_output(void *arg, mblk_t *mp, void *arg2) */ if ((tcp->tcp_suna == snxt) && !tcp->tcp_localnet && (TICK_TO_MSEC(lbolt - tcp->tcp_last_recv_time) >= tcp->tcp_rto)) { - SET_TCP_INIT_CWND(tcp, mss, tcp_slow_start_after_idle); + SET_TCP_INIT_CWND(tcp, mss, tcps->tcps_slow_start_after_idle); } usable = tcp->tcp_swnd; /* tcp window size */ @@ -17530,8 +17670,8 @@ tcp_output(void *arg, mblk_t *mp, void *arg2) U32_TO_ABE32(snxt, tcph->th_seq); - BUMP_MIB(&tcp_mib, tcpOutDataSegs); - UPDATE_MIB(&tcp_mib, tcpOutDataBytes, len); + BUMP_MIB(&tcps->tcps_mib, tcpOutDataSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpOutDataBytes, len); BUMP_LOCAL(tcp->tcp_obsegs); /* Update the latest receive window size in TCP header. */ @@ -17557,7 +17697,7 @@ tcp_output(void *arg, mblk_t *mp, void *arg2) (!OK_32PTR(rptr))) { /* NOTE: we assume allocb returns an OK_32PTR */ mp = allocb(tcp->tcp_ip_hdr_len + TCP_MAX_HDR_LENGTH + - tcp_wroff_xtra, BPRI_MED); + tcps->tcps_wroff_xtra, BPRI_MED); if (!mp) { freemsg(mp1); goto no_memory; @@ -17566,7 +17706,7 @@ tcp_output(void *arg, mblk_t *mp, void *arg2) mp1 = mp; /* Leave room for Link Level header */ /* hdrlen = tcp->tcp_hdr_len; */ - rptr = &mp1->b_rptr[tcp_wroff_xtra]; + rptr = &mp1->b_rptr[tcps->tcps_wroff_xtra]; mp1->b_wptr = &rptr[hdrlen]; } mp1->b_rptr = rptr; @@ -17657,6 +17797,7 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2) mblk_t *stropt_mp = mp; struct stroptions *stropt; uint_t thwin; + tcp_stack_t *tcps = tcp->tcp_tcps; /* * Drop the eager's ref on the listener, that was placed when @@ -17765,7 +17906,7 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2) } stropt->so_flags = SO_HIWAT; - stropt->so_hiwat = MAX(q->q_hiwat, tcp_sth_rcv_hiwat); + stropt->so_hiwat = MAX(q->q_hiwat, tcps->tcps_sth_rcv_hiwat); stropt->so_flags |= SO_MAXBLK; stropt->so_maxblk = tcp_maxpsz_set(tcp, B_FALSE); @@ -17800,10 +17941,10 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2) (void) tcp_maxpsz_set(tcp->tcp_loopback_peer, B_TRUE); } else if (tcp->tcp_snd_sack_ok) { stropt->so_wroff = tcp->tcp_hdr_len + TCPOPT_MAX_SACK_LEN + - (tcp->tcp_loopback ? 0 : tcp_wroff_xtra); + (tcp->tcp_loopback ? 0 : tcps->tcps_wroff_xtra); } else { stropt->so_wroff = tcp->tcp_hdr_len + (tcp->tcp_loopback ? 0 : - tcp_wroff_xtra); + tcps->tcps_wroff_xtra); } /* @@ -17851,7 +17992,7 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2) tcp, (tcp->tcp_swnd == 0) ? tcp->tcp_suna : tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK); - BUMP_MIB(&tcp_mib, tcpOutWinUpdate); + BUMP_MIB(&tcps->tcps_mib, tcpOutWinUpdate); } } @@ -17880,7 +18021,7 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2) } if (peer_tcp->tcp_flow_stopped) { tcp_clrqfull(peer_tcp); - TCP_STAT(tcp_fusion_backenabled); + TCP_STAT(tcps, tcp_fusion_backenabled); } mutex_exit(&peer_tcp->tcp_non_sq_lock); mutex_exit(&tcp->tcp_non_sq_lock); @@ -17982,7 +18123,7 @@ tcp_send_pending(void *arg, mblk_t *mp, void *arg2) /* * This is the STREAMS entry point for T_CONN_RES coming down on * Acceptor STREAM when sockfs listener does accept processing. - * Read the block comment on top pf tcp_conn_request(). + * Read the block comment on top of tcp_conn_request(). */ void tcp_wput_accept(queue_t *q, mblk_t *mp) @@ -18048,6 +18189,9 @@ tcp_wput_accept(queue_t *q, mblk_t *mp) econnp->conn_zoneid = listener->tcp_connp->conn_zoneid; econnp->conn_allzones = listener->tcp_connp->conn_allzones; + ASSERT(econnp->conn_netstack == + listener->tcp_connp->conn_netstack); + ASSERT(eager->tcp_tcps == listener->tcp_tcps); /* Put the ref for IP */ CONN_INC_REF(econnp); @@ -18231,6 +18375,7 @@ tcp_wput(queue_t *q, mblk_t *mp) uchar_t *rptr; struct iocblk *iocp; uint32_t msize; + tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; ASSERT(connp->conn_ref >= 2); @@ -18315,7 +18460,7 @@ tcp_wput(queue_t *q, mblk_t *mp) case ND_SET: /* nd_getset does the necessary checks */ case ND_GET: - if (!nd_getset(q, tcp_g_nd, mp)) { + if (!nd_getset(q, tcps->tcps_g_nd, mp)) { CALL_IP_WPUT(connp, q, mp); return; } @@ -18326,7 +18471,7 @@ tcp_wput(queue_t *q, mblk_t *mp) * Wants to be the default wq. Check the credentials * first, the rest is executed via squeue. */ - if (secpolicy_net_config(iocp->ioc_cr, B_FALSE) != 0) { + if (secpolicy_ip_config(iocp->ioc_cr, B_FALSE) != 0) { iocp->ioc_error = EPERM; iocp->ioc_count = 0; mp->b_datap->db_type = M_IOCACK; @@ -18388,6 +18533,7 @@ tcp_zcopy_check(tcp_t *tcp) conn_t *connp = tcp->tcp_connp; ire_t *ire; boolean_t zc_enabled = B_FALSE; + tcp_stack_t *tcps = tcp->tcp_tcps; if (do_tcpzcopy == 2) zc_enabled = B_TRUE; @@ -18424,10 +18570,10 @@ tcp_zcopy_check(tcp_t *tcp) if (!TCP_IS_DETACHED(tcp)) { if (zc_enabled) { (void) mi_set_sth_copyopt(tcp->tcp_rq, ZCVMSAFE); - TCP_STAT(tcp_zcopy_on); + TCP_STAT(tcps, tcp_zcopy_on); } else { (void) mi_set_sth_copyopt(tcp->tcp_rq, ZCVMUNSAFE); - TCP_STAT(tcp_zcopy_off); + TCP_STAT(tcps, tcp_zcopy_off); } } return (zc_enabled); @@ -18436,13 +18582,15 @@ tcp_zcopy_check(tcp_t *tcp) static mblk_t * tcp_zcopy_disable(tcp_t *tcp, mblk_t *bp) { + tcp_stack_t *tcps = tcp->tcp_tcps; + if (do_tcpzcopy == 2) return (bp); else if (tcp->tcp_snd_zcopy_on) { tcp->tcp_snd_zcopy_on = B_FALSE; if (!TCP_IS_DETACHED(tcp)) { (void) mi_set_sth_copyopt(tcp->tcp_rq, ZCVMUNSAFE); - TCP_STAT(tcp_zcopy_disable); + TCP_STAT(tcps, tcp_zcopy_disable); } } return (tcp_zcopy_backoff(tcp, bp, 0)); @@ -18456,8 +18604,10 @@ static mblk_t * tcp_zcopy_backoff(tcp_t *tcp, mblk_t *bp, int fix_xmitlist) { mblk_t *head, *tail, *nbp; + tcp_stack_t *tcps = tcp->tcp_tcps; + if (IS_VMLOANED_MBLK(bp)) { - TCP_STAT(tcp_zcopy_backoff); + TCP_STAT(tcps, tcp_zcopy_backoff); if ((head = copyb(bp)) == NULL) { /* fail to backoff; leave it for the next backoff */ tcp->tcp_xmit_zc_clean = B_FALSE; @@ -18486,7 +18636,7 @@ tcp_zcopy_backoff(tcp_t *tcp, mblk_t *bp, int fix_xmitlist) tail = head; while (nbp) { if (IS_VMLOANED_MBLK(nbp)) { - TCP_STAT(tcp_zcopy_backoff); + TCP_STAT(tcps, tcp_zcopy_backoff); if ((tail->b_cont = copyb(nbp)) == NULL) { tcp->tcp_xmit_zc_clean = B_FALSE; tail->b_cont = nbp; @@ -18541,9 +18691,10 @@ tcp_zcopy_notify(tcp_t *tcp) static boolean_t tcp_send_find_ire(tcp_t *tcp, ipaddr_t *dst, ire_t **irep) { - ire_t *ire; - conn_t *connp = tcp->tcp_connp; - + ire_t *ire; + conn_t *connp = tcp->tcp_connp; + tcp_stack_t *tcps = tcp->tcp_tcps; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; mutex_enter(&connp->conn_lock); ire = connp->conn_ire_cache; @@ -18562,7 +18713,7 @@ tcp_send_find_ire(tcp_t *tcp, ipaddr_t *dst, ire_t **irep) /* force a recheck later on */ tcp->tcp_ire_ill_check_done = B_FALSE; - TCP_DBGSTAT(tcp_ire_null1); + TCP_DBGSTAT(tcps, tcp_ire_null1); connp->conn_ire_cache = NULL; mutex_exit(&connp->conn_lock); @@ -18570,12 +18721,13 @@ tcp_send_find_ire(tcp_t *tcp, ipaddr_t *dst, ire_t **irep) IRE_REFRELE_NOTR(ire); tsl = crgetlabel(CONN_CRED(connp)); - ire = (dst ? ire_cache_lookup(*dst, connp->conn_zoneid, tsl) : + ire = (dst ? + ire_cache_lookup(*dst, connp->conn_zoneid, tsl, ipst) : ire_cache_lookup_v6(&tcp->tcp_ip6h->ip6_dst, - connp->conn_zoneid, tsl)); + connp->conn_zoneid, tsl, ipst)); if (ire == NULL) { - TCP_STAT(tcp_ire_null); + TCP_STAT(tcps, tcp_ire_null); return (B_FALSE); } @@ -18630,6 +18782,7 @@ tcp_send_find_ire_ill(tcp_t *tcp, mblk_t *mp, ire_t **irep, ill_t **illp) ill_t *ill; conn_t *connp = tcp->tcp_connp; mblk_t *ire_fp_mp; + tcp_stack_t *tcps = tcp->tcp_tcps; if (mp != NULL) ipha = (ipha_t *)mp->b_rptr; @@ -18646,7 +18799,7 @@ tcp_send_find_ire_ill(tcp_t *tcp, mblk_t *mp, ire_t **irep, ill_t **illp) ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL) || ((mp != NULL) && (ire->ire_max_frag < ntohs(ipha->ipha_length) || MBLKL(ire_fp_mp) > MBLKHEAD(mp)))) { - TCP_STAT(tcp_ip_ire_send); + TCP_STAT(tcps, tcp_ip_ire_send); IRE_REFRELE(ire); return (B_FALSE); } @@ -18687,6 +18840,8 @@ tcp_send_data(tcp_t *tcp, queue_t *q, mblk_t *mp) uint32_t hcksum_txflags = 0; mblk_t *ire_fp_mp; uint_t ire_fp_mp_len; + tcp_stack_t *tcps = tcp->tcp_tcps; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; ASSERT(DB_TYPE(mp) == M_DATA); @@ -18708,10 +18863,10 @@ tcp_send_data(tcp_t *tcp, queue_t *q, mblk_t *mp) !connp->conn_ulp_labeled || ipha->ipha_ident == IP_HDR_INCLUDED || ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || - IPP_ENABLED(IPP_LOCAL_OUT)) { + IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { if (tcp->tcp_snd_zcopy_aware) mp = tcp_zcopy_disable(tcp, mp); - TCP_STAT(tcp_ip_send); + TCP_STAT(tcps, tcp_ip_send); CALL_IP_WPUT(connp, q, mp); return; } @@ -18746,7 +18901,7 @@ tcp_send_data(tcp_t *tcp, queue_t *q, mblk_t *mp) * Restore LSO for this connection, so that next time around * it is eligible to go through tcp_lsosend() path again. */ - TCP_STAT(tcp_lso_enabled); + TCP_STAT(tcps, tcp_lso_enabled); tcp->tcp_lso = B_TRUE; ip1dbg(("tcp_send_data: reenabling LSO for connp %p on " "interface %s\n", (void *)connp, ill->ill_name)); @@ -18755,7 +18910,7 @@ tcp_send_data(tcp_t *tcp, queue_t *q, mblk_t *mp) * Restore MDT for this connection, so that next time around * it is eligible to go through tcp_multisend() path again. */ - TCP_STAT(tcp_mdt_conn_resumed1); + TCP_STAT(tcps, tcp_mdt_conn_resumed1); tcp->tcp_mdt = B_TRUE; ip1dbg(("tcp_send_data: reenabling MDT for connp %p on " "interface %s\n", (void *)connp, ill->ill_name)); @@ -18787,8 +18942,8 @@ tcp_send_data(tcp_t *tcp, queue_t *q, mblk_t *mp) /* Software checksum? */ if (DB_CKSUMFLAGS(mp) == 0) { - TCP_STAT(tcp_out_sw_cksum); - TCP_STAT_UPDATE(tcp_out_sw_cksum_bytes, + TCP_STAT(tcps, tcp_out_sw_cksum); + TCP_STAT_UPDATE(tcps, tcp_out_sw_cksum_bytes, ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); } @@ -18819,14 +18974,15 @@ tcp_send_data(tcp_t *tcp, queue_t *q, mblk_t *mp) * depending on the availability of transmit resources at * the media layer. */ - IP_DLS_ILL_TX(ill, ipha, mp); + IP_DLS_ILL_TX(ill, ipha, mp, ipst); } else { ill_t *out_ill = (ill_t *)ire->ire_stq->q_ptr; DTRACE_PROBE4(ip4__physical__out__start, ill_t *, NULL, ill_t *, out_ill, ipha_t *, ipha, mblk_t *, mp); - FW_HOOKS(ip4_physical_out_event, ipv4firewall_physical_out, - NULL, out_ill, ipha, mp, mp); + FW_HOOKS(ipst->ips_ip4_physical_out_event, + ipst->ips_ipv4firewall_physical_out, + NULL, out_ill, ipha, mp, mp, ipst); DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); if (mp != NULL) putnext(ire->ire_stq, mp); @@ -18896,6 +19052,8 @@ tcp_wput_data(tcp_t *tcp, mblk_t *mp, boolean_t urgent) int32_t tcp_tcp_hdr_len; int mdt_thres; int rc; + tcp_stack_t *tcps = tcp->tcp_tcps; + ip_stack_t *ipst; tcpstate = tcp->tcp_state; if (mp == NULL) { @@ -19052,7 +19210,7 @@ data_null: if ((tcp->tcp_suna == snxt) && !tcp->tcp_localnet && (TICK_TO_MSEC(lbolt - tcp->tcp_last_recv_time) >= tcp->tcp_rto)) { - SET_TCP_INIT_CWND(tcp, mss, tcp_slow_start_after_idle); + SET_TCP_INIT_CWND(tcp, mss, tcps->tcps_slow_start_after_idle); } if (tcpstate == TCPS_SYN_RCVD) { /* @@ -19192,6 +19350,8 @@ data_null: * connection, stop using LSO/MDT and restore the stream head * parameters accordingly. */ + ipst = tcps->tcps_netstack->netstack_ip; + if ((tcp->tcp_lso || tcp->tcp_mdt) && ((tcp->tcp_ipversion == IPV4_VERSION && tcp->tcp_ip_hdr_len != IP_SIMPLE_HDR_LENGTH) || @@ -19200,7 +19360,7 @@ data_null: tcp->tcp_state != TCPS_ESTABLISHED || TCP_IS_DETACHED(tcp) || !CONN_IS_LSO_MD_FASTPATH(tcp->tcp_connp) || CONN_IPSEC_OUT_ENCAPSULATED(tcp->tcp_connp) || - IPP_ENABLED(IPP_LOCAL_OUT))) { + IPP_ENABLED(IPP_LOCAL_OUT, ipst))) { if (tcp->tcp_lso) { tcp->tcp_connp->conn_lso_ok = B_FALSE; tcp->tcp_lso = B_FALSE; @@ -19212,9 +19372,9 @@ data_null: /* Anything other than detached is considered pathological */ if (!TCP_IS_DETACHED(tcp)) { if (tcp->tcp_lso) - TCP_STAT(tcp_lso_disabled); + TCP_STAT(tcps, tcp_lso_disabled); else - TCP_STAT(tcp_mdt_conn_halted1); + TCP_STAT(tcps, tcp_mdt_conn_halted1); (void) tcp_maxpsz_set(tcp, B_TRUE); } } @@ -19400,7 +19560,7 @@ tcp_fill_header(tcp_t *tcp, uchar_t *rptr, clock_t now, int num_sack_blk) static int tcp_mdt_add_attrs(multidata_t *mmd, const mblk_t *dlmp, const boolean_t hwcksum, const uint32_t start, const uint32_t stuff, const uint32_t end, - const uint32_t flags) + const uint32_t flags, tcp_stack_t *tcps) { /* Add global destination address & SAP attribute */ if (dlmp == NULL || !ip_md_addr_attr(mmd, NULL, dlmp)) { @@ -19408,7 +19568,7 @@ tcp_mdt_add_attrs(multidata_t *mmd, const mblk_t *dlmp, const boolean_t hwcksum, "destination address+SAP\n")); if (dlmp != NULL) - TCP_STAT(tcp_mdt_allocfail); + TCP_STAT(tcps, tcp_mdt_allocfail); return (-1); } @@ -19418,7 +19578,7 @@ tcp_mdt_add_attrs(multidata_t *mmd, const mblk_t *dlmp, const boolean_t hwcksum, ip1dbg(("tcp_mdt_add_attrs: can't add global hardware " "checksum attribute\n")); - TCP_STAT(tcp_mdt_allocfail); + TCP_STAT(tcps, tcp_mdt_allocfail); return (-1); } @@ -19472,6 +19632,8 @@ tcp_multisend(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len, conn_t *connp; mblk_t *mp, *mp1, *fw_mp_head = NULL; uchar_t *pld_start; + tcp_stack_t *tcps = tcp->tcp_tcps; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; #ifdef _BIG_ENDIAN #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) @@ -19574,7 +19736,7 @@ tcp_multisend(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len, */ if (!ILL_MDT_USABLE(ill) || (ire->ire_flags & RTF_MULTIRT) != 0) { /* don't go through this path anymore for this connection */ - TCP_STAT(tcp_mdt_conn_halted2); + TCP_STAT(tcps, tcp_mdt_conn_halted2); tcp->tcp_mdt = B_FALSE; ip1dbg(("tcp_multisend: disabling MDT for connp %p on " "interface %s\n", (void *)connp, ill->ill_name)); @@ -19678,7 +19840,7 @@ tcp_multisend(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len, * return to us once a large-size transmission is * possible. */ - TCP_STAT(tcp_mdt_legacy_small); + TCP_STAT(tcps, tcp_mdt_legacy_small); if ((err = tcp_send(q, tcp, mss, tcp_hdr_len, tcp_tcp_hdr_len, num_sack_blk, usable, snxt, tail_unsent, xmit_tail, local_time, @@ -19694,7 +19856,7 @@ tcp_multisend(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len, return (0); } - TCP_STAT(tcp_mdt_legacy_ret); + TCP_STAT(tcps, tcp_mdt_legacy_ret); /* * We may have delivered the Multidata, so make sure * to re-initialize before the next round. @@ -19788,7 +19950,7 @@ tcp_multisend(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len, /* hardware checksum offsets */ start, stuff, 0, /* hardware checksum flag */ - hwcksum_flags) != 0)) { + hwcksum_flags, tcps) != 0)) { legacy_send: if (md_mp != NULL) { /* Unlink message from the chain */ @@ -19807,11 +19969,11 @@ legacy_send: md_mp_head = NULL; } /* md_hbuf gets freed automatically */ - TCP_STAT(tcp_mdt_discarded); + TCP_STAT(tcps, tcp_mdt_discarded); freeb(md_mp); } else { /* Either allocb or mmd_alloc failed */ - TCP_STAT(tcp_mdt_allocfail); + TCP_STAT(tcps, tcp_mdt_allocfail); if (md_hbuf != NULL) freeb(md_hbuf); } @@ -19831,7 +19993,7 @@ legacy_send_no_md: * we gave up with the Multidata processings * and let the old path have it all. */ - TCP_STAT(tcp_mdt_legacy_all); + TCP_STAT(tcps, tcp_mdt_legacy_all); return (tcp_send(q, tcp, mss, tcp_hdr_len, tcp_tcp_hdr_len, num_sack_blk, usable, snxt, tail_unsent, xmit_tail, local_time, @@ -19839,11 +20001,11 @@ legacy_send_no_md: } /* link to any existing ones, if applicable */ - TCP_STAT(tcp_mdt_allocd); + TCP_STAT(tcps, tcp_mdt_allocd); if (md_mp_head == NULL) { md_mp_head = md_mp; } else if (tcp_mdt_chain) { - TCP_STAT(tcp_mdt_linked); + TCP_STAT(tcps, tcp_mdt_linked); linkb(md_mp_head, md_mp); } } @@ -19896,7 +20058,7 @@ legacy_send_no_md: break; /* done */ if ((md_pbuf = dupb(*xmit_tail)) == NULL) { - TCP_STAT(tcp_mdt_allocfail); + TCP_STAT(tcps, tcp_mdt_allocfail); goto legacy_send; /* out_of_mem */ } @@ -19905,7 +20067,8 @@ legacy_send_no_md: if (!ip_md_zcopy_attr(mmd, NULL, zc_cap->ill_zerocopy_flags)) { freeb(md_pbuf); - TCP_STAT(tcp_mdt_allocfail); + TCP_STAT(tcps, + tcp_mdt_allocfail); /* out_of_mem */ goto legacy_send; } @@ -19968,7 +20131,7 @@ legacy_send_no_md: max_pld > 0) { md_pbuf_nxt = dupb((*xmit_tail)->b_cont); if (md_pbuf_nxt == NULL) { - TCP_STAT(tcp_mdt_allocfail); + TCP_STAT(tcps, tcp_mdt_allocfail); goto legacy_send; /* out_of_mem */ } @@ -19977,7 +20140,8 @@ legacy_send_no_md: if (!ip_md_zcopy_attr(mmd, NULL, zc_cap->ill_zerocopy_flags)) { freeb(md_pbuf_nxt); - TCP_STAT(tcp_mdt_allocfail); + TCP_STAT(tcps, + tcp_mdt_allocfail); /* out_of_mem */ goto legacy_send; } @@ -20094,7 +20258,8 @@ legacy_send_no_md: *snxt == tcp->tcp_fss) { if (!tcp->tcp_fin_acked) { tcp->tcp_tcph->th_flags[0] |= TH_FIN; - BUMP_MIB(&tcp_mib, tcpOutControl); + BUMP_MIB(&tcps->tcps_mib, + tcpOutControl); } if (!tcp->tcp_fin_sent) { tcp->tcp_fin_sent = B_TRUE; @@ -20294,7 +20459,7 @@ legacy_send_no_md: (void *)tcp, (void *)mmd, (void *)pkt_info, err); } - TCP_STAT(tcp_mdt_addpdescfail); + TCP_STAT(tcps, tcp_mdt_addpdescfail); goto legacy_send; /* out_of_mem */ } ASSERT(pkt != NULL); @@ -20336,8 +20501,8 @@ legacy_send_no_md: *up = (sum & 0xFFFF) + (sum >> 16); } else { /* software checksumming */ - TCP_STAT(tcp_out_sw_cksum); - TCP_STAT_UPDATE(tcp_out_sw_cksum_bytes, + TCP_STAT(tcps, tcp_out_sw_cksum); + TCP_STAT_UPDATE(tcps, tcp_out_sw_cksum_bytes, tcp->tcp_hdr_len + tcp->tcp_last_sent_len); *up = IP_MD_CSUM(pkt, tcp->tcp_ip_hdr_len, cksum + IP_TCP_CSUM_COMP); @@ -20359,8 +20524,10 @@ legacy_send_no_md: } } - if (af == AF_INET && HOOKS4_INTERESTED_PHYSICAL_OUT|| - af == AF_INET6 && HOOKS6_INTERESTED_PHYSICAL_OUT) { + if (af == AF_INET && + HOOKS4_INTERESTED_PHYSICAL_OUT(ipst) || + af == AF_INET6 && + HOOKS6_INTERESTED_PHYSICAL_OUT(ipst)) { /* build header(IP/TCP) mblk for this segment */ if ((mp = dupb(md_hbuf)) == NULL) goto legacy_send; @@ -20387,9 +20554,10 @@ legacy_send_no_md: ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); - FW_HOOKS(ip4_physical_out_event, - ipv4firewall_physical_out, - NULL, ill, ipha, mp, mp); + FW_HOOKS( + ipst->ips_ip4_physical_out_event, + ipst->ips_ipv4firewall_physical_out, + NULL, ill, ipha, mp, mp, ipst); DTRACE_PROBE1( ip4__physical__out__end, mblk_t *, mp); @@ -20400,9 +20568,10 @@ legacy_send_no_md: ill_t *, ill, ip6_t *, ip6h, mblk_t *, mp); - FW_HOOKS6(ip6_physical_out_event, - ipv6firewall_physical_out, - NULL, ill, ip6h, mp, mp); + FW_HOOKS6( + ipst->ips_ip6_physical_out_event, + ipst->ips_ipv6firewall_physical_out, + NULL, ill, ip6h, mp, mp, ipst); DTRACE_PROBE1( ip6__physical__out__end, mblk_t *, mp); @@ -20518,7 +20687,7 @@ legacy_send_no_md: freemsg(mp); } if (buf_trunked) { - TCP_STAT(tcp_mdt_discarded); + TCP_STAT(tcps, tcp_mdt_discarded); freeb(md_mp); buf_trunked = B_FALSE; } @@ -20550,6 +20719,8 @@ tcp_multisend_data(tcp_t *tcp, ire_t *ire, const ill_t *ill, mblk_t *md_mp_head, { uint64_t delta; nce_t *nce; + tcp_stack_t *tcps = tcp->tcp_tcps; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; ASSERT(ire != NULL && ill != NULL); ASSERT(ire->ire_stq != NULL); @@ -20559,14 +20730,14 @@ tcp_multisend_data(tcp_t *tcp, ire_t *ire, const ill_t *ill, mblk_t *md_mp_head, /* adjust MIBs and IRE timestamp */ TCP_RECORD_TRACE(tcp, md_mp_head, TCP_TRACE_SEND_PKT); tcp->tcp_obsegs += obsegs; - UPDATE_MIB(&tcp_mib, tcpOutDataSegs, obsegs); - UPDATE_MIB(&tcp_mib, tcpOutDataBytes, obbytes); - TCP_STAT_UPDATE(tcp_mdt_pkt_out, obsegs); + UPDATE_MIB(&tcps->tcps_mib, tcpOutDataSegs, obsegs); + UPDATE_MIB(&tcps->tcps_mib, tcpOutDataBytes, obbytes); + TCP_STAT_UPDATE(tcps, tcp_mdt_pkt_out, obsegs); if (tcp->tcp_ipversion == IPV4_VERSION) { - TCP_STAT_UPDATE(tcp_mdt_pkt_out_v4, obsegs); + TCP_STAT_UPDATE(tcps, tcp_mdt_pkt_out_v4, obsegs); } else { - TCP_STAT_UPDATE(tcp_mdt_pkt_out_v6, obsegs); + TCP_STAT_UPDATE(tcps, tcp_mdt_pkt_out_v6, obsegs); } UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests, obsegs); UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, obsegs); @@ -20630,7 +20801,8 @@ tcp_multisend_data(tcp_t *tcp, ire_t *ire, const ill_t *ill, mblk_t *md_mp_head, */ nce->nce_state = ND_DELAY; mutex_exit(&nce->nce_lock); - NDP_RESTART_TIMER(nce, delay_first_probe_time); + NDP_RESTART_TIMER(nce, + ipst->ips_delay_first_probe_time); if (ip_debug > 3) { /* ip2dbg */ pr_addr_dbg("tcp_multisend_data: state " @@ -20675,6 +20847,8 @@ tcp_lsosend_data(tcp_t *tcp, mblk_t *mp, ire_t *ire, ill_t *ill, const int mss, ipaddr_t dst; uint32_t cksum; uint16_t *up; + tcp_stack_t *tcps = tcp->tcp_tcps; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; ASSERT(DB_TYPE(mp) == M_DATA); ASSERT(tcp->tcp_state == TCPS_ESTABLISHED); @@ -20746,14 +20920,15 @@ tcp_lsosend_data(tcp_t *tcp, mblk_t *mp, ire_t *ire, ill_t *ill, const int mss, * depending on the availability of transmit resources at * the media layer. */ - IP_DLS_ILL_TX(ill, ipha, mp); + IP_DLS_ILL_TX(ill, ipha, mp, ipst); } else { ill_t *out_ill = (ill_t *)ire->ire_stq->q_ptr; DTRACE_PROBE4(ip4__physical__out__start, ill_t *, NULL, ill_t *, out_ill, ipha_t *, ipha, mblk_t *, mp); - FW_HOOKS(ip4_physical_out_event, ipv4firewall_physical_out, - NULL, out_ill, ipha, mp, mp); + FW_HOOKS(ipst->ips_ip4_physical_out_event, + ipst->ips_ipv4firewall_physical_out, + NULL, out_ill, ipha, mp, mp, ipst); DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); if (mp != NULL) putnext(ire->ire_stq, mp); @@ -20785,6 +20960,7 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len, int num_lso_seg = 1; uint_t lso_usable; boolean_t do_lso_send = B_FALSE; + tcp_stack_t *tcps = tcp->tcp_tcps; /* * Check LSO capability before any further work. And the similar check @@ -21008,16 +21184,16 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len, *snxt += len; *tail_unsent = (*xmit_tail)->b_wptr - mp1->b_wptr; BUMP_LOCAL(tcp->tcp_obsegs); - BUMP_MIB(&tcp_mib, tcpOutDataSegs); - UPDATE_MIB(&tcp_mib, tcpOutDataBytes, len); + BUMP_MIB(&tcps->tcps_mib, tcpOutDataSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpOutDataBytes, len); TCP_RECORD_TRACE(tcp, mp, TCP_TRACE_SEND_PKT); tcp_send_data(tcp, q, mp); continue; } *snxt += len; /* Adjust later if we don't send all of len */ - BUMP_MIB(&tcp_mib, tcpOutDataSegs); - UPDATE_MIB(&tcp_mib, tcpOutDataBytes, len); + BUMP_MIB(&tcps->tcps_mib, tcpOutDataSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpOutDataBytes, len); if (*tail_unsent) { /* Are the bytes above us in flight? */ @@ -21097,7 +21273,7 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len, must_alloc:; mp1 = allocb(tcp->tcp_ip_hdr_len + TCP_MAX_HDR_LENGTH + - tcp_wroff_xtra + ire_fp_mp_len, BPRI_MED); + tcps->tcps_wroff_xtra + ire_fp_mp_len, BPRI_MED); if (mp1 == NULL) { freemsg(mp); if (ire != NULL) @@ -21108,7 +21284,8 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len, mp = mp1; /* Leave room for Link Level header */ len = tcp_hdr_len; - rptr = &mp->b_rptr[tcp_wroff_xtra + ire_fp_mp_len]; + rptr = + &mp->b_rptr[tcps->tcps_wroff_xtra + ire_fp_mp_len]; mp->b_wptr = &rptr[len]; } @@ -21197,7 +21374,8 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len, *usable -= spill; *snxt += spill; tcp->tcp_last_sent_len += spill; - UPDATE_MIB(&tcp_mib, tcpOutDataBytes, spill); + UPDATE_MIB(&tcps->tcps_mib, + tcpOutDataBytes, spill); /* * Adjust the checksum */ @@ -21233,8 +21411,8 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len, num_lso_seg); tcp->tcp_obsegs += num_lso_seg; - TCP_STAT(tcp_lso_times); - TCP_STAT_UPDATE(tcp_lso_pkt_out, num_lso_seg); + TCP_STAT(tcps, tcp_lso_times); + TCP_STAT_UPDATE(tcps, tcp_lso_pkt_out, num_lso_seg); } else { tcp_send_data(tcp, q, mp); BUMP_LOCAL(tcp->tcp_obsegs); @@ -21278,6 +21456,7 @@ static void tcp_mdt_update(tcp_t *tcp, ill_mdt_capab_t *mdt_capab, boolean_t first) { boolean_t prev_state; + tcp_stack_t *tcps = tcp->tcp_tcps; /* * IP is telling us to abort MDT on this connection? We know @@ -21292,7 +21471,7 @@ tcp_mdt_update(tcp_t *tcp, ill_mdt_capab_t *mdt_capab, boolean_t first) prev_state = tcp->tcp_mdt; tcp->tcp_mdt = (mdt_capab->ill_mdt_on != 0); if (!tcp->tcp_mdt && !first) { - TCP_STAT(tcp_mdt_conn_halted3); + TCP_STAT(tcps, tcp_mdt_conn_halted3); ip1dbg(("tcp_mdt_update: disabling MDT for connp %p\n", (void *)tcp->tcp_connp)); } @@ -21335,18 +21514,18 @@ tcp_mdt_update(tcp_t *tcp, ill_mdt_capab_t *mdt_capab, boolean_t first) /* a zero means driver wants default value */ tcp->tcp_mdt_max_pld = MIN(mdt_capab->ill_mdt_max_pld, - tcp_mdt_max_pbufs); + tcps->tcps_mdt_max_pbufs); if (tcp->tcp_mdt_max_pld == 0) - tcp->tcp_mdt_max_pld = tcp_mdt_max_pbufs; + tcp->tcp_mdt_max_pld = tcps->tcps_mdt_max_pbufs; /* ensure 32-bit alignment */ - tcp->tcp_mdt_hdr_head = roundup(MAX(tcp_mdt_hdr_head_min, + tcp->tcp_mdt_hdr_head = roundup(MAX(tcps->tcps_mdt_hdr_head_min, mdt_capab->ill_mdt_hdr_head), 4); - tcp->tcp_mdt_hdr_tail = roundup(MAX(tcp_mdt_hdr_tail_min, + tcp->tcp_mdt_hdr_tail = roundup(MAX(tcps->tcps_mdt_hdr_tail_min, mdt_capab->ill_mdt_hdr_tail), 4); if (!first && !prev_state) { - TCP_STAT(tcp_mdt_conn_resumed2); + TCP_STAT(tcps, tcp_mdt_conn_resumed2); ip1dbg(("tcp_mdt_update: reenabling MDT for connp %p\n", (void *)tcp->tcp_connp)); } @@ -21385,6 +21564,8 @@ tcp_lso_info_mp(mblk_t *mp) static void tcp_lso_update(tcp_t *tcp, ill_lso_capab_t *lso_capab) { + tcp_stack_t *tcps = tcp->tcp_tcps; + /* * IP is telling us to abort LSO on this connection? We know * this because the capability is only turned off when IP @@ -21396,7 +21577,7 @@ tcp_lso_update(tcp_t *tcp, ill_lso_capab_t *lso_capab) * will indicate that the feature is to be turned on. */ tcp->tcp_lso = (lso_capab->ill_lso_on != 0); - TCP_STAT(tcp_lso_enabled); + TCP_STAT(tcps, tcp_lso_enabled); /* * We currently only support LSO on simple TCP/IPv4, @@ -21408,7 +21589,7 @@ tcp_lso_update(tcp_t *tcp, ill_lso_capab_t *lso_capab) tcp->tcp_ip_hdr_len != IP_SIMPLE_HDR_LENGTH) || (tcp->tcp_ipversion == IPV6_VERSION)) { tcp->tcp_lso = B_FALSE; - TCP_STAT(tcp_lso_disabled); + TCP_STAT(tcps, tcp_lso_disabled); } else { tcp->tcp_lso_max = MIN(TCP_MAX_LSO_LENGTH, lso_capab->ill_lso_max); @@ -21419,6 +21600,8 @@ static void tcp_ire_ill_check(tcp_t *tcp, ire_t *ire, ill_t *ill, boolean_t check_lso_mdt) { conn_t *connp = tcp->tcp_connp; + tcp_stack_t *tcps = tcp->tcp_tcps; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; ASSERT(ire != NULL); @@ -21429,13 +21612,13 @@ tcp_ire_ill_check(tcp_t *tcp, ire_t *ire, ill_t *ill, boolean_t check_lso_mdt) * are only best-effort checks, and we do more thorough ones prior * to calling tcp_send()/tcp_multisend(). */ - if ((ip_lso_outbound || ip_multidata_outbound) && check_lso_mdt && - !(ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) && + if ((ipst->ips_ip_lso_outbound || ipst->ips_ip_multidata_outbound) && + check_lso_mdt && !(ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) && ill != NULL && !CONN_IPSEC_OUT_ENCAPSULATED(connp) && !(ire->ire_flags & RTF_MULTIRT) && - !IPP_ENABLED(IPP_LOCAL_OUT) && + !IPP_ENABLED(IPP_LOCAL_OUT, ipst) && CONN_IS_LSO_MD_FASTPATH(connp)) { - if (ip_lso_outbound && ILL_LSO_CAPABLE(ill)) { + if (ipst->ips_ip_lso_outbound && ILL_LSO_CAPABLE(ill)) { /* Cache the result */ connp->conn_lso_ok = B_TRUE; @@ -21447,7 +21630,8 @@ tcp_ire_ill_check(tcp_t *tcp, ire_t *ire, ill_t *ill, boolean_t check_lso_mdt) ill->ill_name)); } tcp_lso_update(tcp, ill->ill_lso_capab); - } else if (ip_multidata_outbound && ILL_MDT_CAPABLE(ill)) { + } else if (ipst->ips_ip_multidata_outbound && + ILL_MDT_CAPABLE(ill)) { /* Cache the result */ connp->conn_mdt_ok = B_TRUE; @@ -21720,6 +21904,7 @@ tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2) tcp_t *tcp = connp->conn_tcp; queue_t *q = tcp->tcp_wq; struct iocblk *iocp; + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT(DB_TYPE(mp) == M_IOCTL); /* @@ -21738,7 +21923,7 @@ tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2) switch (iocp->ioc_cmd) { case TCP_IOC_DEFAULT_Q: /* Wants to be the default wq. */ - if (secpolicy_net_config(iocp->ioc_cr, B_FALSE) != 0) { + if (secpolicy_ip_config(iocp->ioc_cr, B_FALSE) != 0) { iocp->ioc_error = EPERM; iocp->ioc_count = 0; mp->b_datap->db_type = M_IOCACK; @@ -21782,7 +21967,7 @@ tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2) tcp_fuse_disable_pair(tcp, B_FALSE); } tcp->tcp_issocket = B_FALSE; - TCP_STAT(tcp_sock_fallback); + TCP_STAT(tcps, tcp_sock_fallback); DB_TYPE(mp) = M_IOCACK; iocp->ioc_error = 0; @@ -21975,7 +22160,9 @@ non_urgent_data: static void tcp_wsrv(queue_t *q) { - TCP_STAT(tcp_wsrv_called); + tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; + + TCP_STAT(tcps, tcp_wsrv_called); } /* Non overlapping byte exchanger */ @@ -22006,6 +22193,7 @@ tcp_xmit_ctl(char *str, tcp_t *tcp, uint32_t seq, uint32_t ack, int ctl) int tcp_hdr_len; int tcp_ip_hdr_len; mblk_t *mp; + tcp_stack_t *tcps = tcp->tcp_tcps; /* * Save sum for use in source route later. @@ -22021,12 +22209,12 @@ tcp_xmit_ctl(char *str, tcp_t *tcp, uint32_t seq, uint32_t ack, int ctl) "tcp_xmit_ctl: '%s', seq 0x%x, ack 0x%x, ctl 0x%x", str, seq, ack, ctl); } - mp = allocb(tcp_ip_hdr_len + TCP_MAX_HDR_LENGTH + tcp_wroff_xtra, + mp = allocb(tcp_ip_hdr_len + TCP_MAX_HDR_LENGTH + tcps->tcps_wroff_xtra, BPRI_MED); if (mp == NULL) { return; } - rptr = &mp->b_rptr[tcp_wroff_xtra]; + rptr = &mp->b_rptr[tcps->tcps_wroff_xtra]; mp->b_rptr = rptr; mp->b_wptr = &rptr[tcp_hdr_len]; bcopy(tcp->tcp_iphc, rptr, tcp_hdr_len); @@ -22043,8 +22231,8 @@ tcp_xmit_ctl(char *str, tcp_t *tcp, uint32_t seq, uint32_t ack, int ctl) tcph = (tcph_t *)&rptr[tcp_ip_hdr_len]; tcph->th_flags[0] = (uint8_t)ctl; if (ctl & TH_RST) { - BUMP_MIB(&tcp_mib, tcpOutRsts); - BUMP_MIB(&tcp_mib, tcpOutControl); + BUMP_MIB(&tcps->tcps_mib, tcpOutRsts); + BUMP_MIB(&tcps->tcps_mib, tcpOutControl); /* * Don't send TSopt w/ TH_RST packets per RFC 1323. */ @@ -22076,7 +22264,7 @@ tcp_xmit_ctl(char *str, tcp_t *tcp, uint32_t seq, uint32_t ack, int ctl) tcph->th_win); tcp->tcp_rack = ack; tcp->tcp_rack_cnt = 0; - BUMP_MIB(&tcp_mib, tcpOutAck); + BUMP_MIB(&tcps->tcps_mib, tcpOutAck); } BUMP_LOCAL(tcp->tcp_obsegs); U32_TO_BE32(seq, tcph->th_seq); @@ -22095,7 +22283,7 @@ tcp_xmit_ctl(char *str, tcp_t *tcp, uint32_t seq, uint32_t ack, int ctl) * to a segment. If it returns B_FALSE, TCP should not respond. */ static boolean_t -tcp_send_rst_chk(void) +tcp_send_rst_chk(tcp_stack_t *tcps) { clock_t now; @@ -22109,14 +22297,15 @@ tcp_send_rst_chk(void) * RSTs in normal cases but when under attack, the impact is * limited. */ - if (tcp_rst_sent_rate_enabled != 0) { + if (tcps->tcps_rst_sent_rate_enabled != 0) { now = lbolt; /* lbolt can wrap around. */ - if ((tcp_last_rst_intrvl > now) || - (TICK_TO_MSEC(now - tcp_last_rst_intrvl) > 1*SECONDS)) { - tcp_last_rst_intrvl = now; - tcp_rst_cnt = 1; - } else if (++tcp_rst_cnt > tcp_rst_sent_rate) { + if ((tcps->tcps_last_rst_intrvl > now) || + (TICK_TO_MSEC(now - tcps->tcps_last_rst_intrvl) > + 1*SECONDS)) { + tcps->tcps_last_rst_intrvl = now; + tcps->tcps_rst_cnt = 1; + } else if (++tcps->tcps_rst_cnt > tcps->tcps_rst_sent_rate) { return (B_FALSE); } } @@ -22191,7 +22380,8 @@ tcp_ip_advise_mblk(void *addr, int addr_len, ipic_t **ipic) */ static void tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq, - uint32_t ack, int ctl, uint_t ip_hdr_len, zoneid_t zoneid) + uint32_t ack, int ctl, uint_t ip_hdr_len, zoneid_t zoneid, + tcp_stack_t *tcps) { ipha_t *ipha = NULL; ip6_t *ip6h = NULL; @@ -22205,13 +22395,31 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq, in6_addr_t v6addr; int addr_len; void *addr; - queue_t *q = tcp_g_q; - tcp_t *tcp = Q_TO_TCP(q); + queue_t *q = tcps->tcps_g_q; + tcp_t *tcp; cred_t *cr; mblk_t *nmp; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; - if (!tcp_send_rst_chk()) { - tcp_rst_unsent++; + if (tcps->tcps_g_q == NULL) { + /* + * For non-zero stackids the default queue isn't created + * until the first open, thus there can be a need to send + * a reset before then. But we can't do that, hence we just + * drop the packet. Later during boot, when the default queue + * has been setup, a retransmitted packet from the peer + * will result in a reset. + */ + ASSERT(tcps->tcps_netstack->netstack_stackid != + GLOBAL_NETSTACKID); + freemsg(mp); + return; + } + + tcp = Q_TO_TCP(q); + + if (!tcp_send_rst_chk(tcps)) { + tcps->tcps_rst_unsent++; freemsg(mp); return; } @@ -22225,7 +22433,7 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq, mctl_present = B_FALSE; } - if (str && q && tcp_dbg) { + if (str && q && tcps->tcps_dbg) { (void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE, "tcp_xmit_early_reset: '%s', seq 0x%x, ack 0x%x, " "flags 0x%x", @@ -22269,7 +22477,7 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq, if (ipha->ipha_src == 0 || ipha->ipha_src == INADDR_BROADCAST || CLASSD(ipha->ipha_src)) { freemsg(ipsec_mp); - BUMP_MIB(&ip_mib, ipIfStatsInDiscards); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards); return; } } else { @@ -22278,7 +22486,7 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq, if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) { freemsg(ipsec_mp); - BUMP_MIB(&ip6_mib, ipIfStatsInDiscards); + BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); return; } @@ -22309,7 +22517,7 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq, ipha->ipha_src = ipha->ipha_dst; ipha->ipha_dst = v4addr; ipha->ipha_ident = 0; - ipha->ipha_ttl = (uchar_t)tcp_ipv4_ttl; + ipha->ipha_ttl = (uchar_t)tcps->tcps_ipv4_ttl; addr_len = IP_ADDR_LEN; addr = &v4addr; } else { @@ -22319,7 +22527,7 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq, v6addr = ip6h->ip6_src; ip6h->ip6_src = ip6h->ip6_dst; ip6h->ip6_dst = v6addr; - ip6h->ip6_hops = (uchar_t)tcp_ipv6_hoplimit; + ip6h->ip6_hops = (uchar_t)tcps->tcps_ipv6_hoplimit; addr_len = IPV6_ADDR_LEN; addr = &v6addr; } @@ -22330,8 +22538,8 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq, U16_TO_BE16(sizeof (tcph_t), tcph->th_sum); tcph->th_flags[0] = (uint8_t)ctl; if (ctl & TH_RST) { - BUMP_MIB(&tcp_mib, tcpOutRsts); - BUMP_MIB(&tcp_mib, tcpOutControl); + BUMP_MIB(&tcps->tcps_mib, tcpOutRsts); + BUMP_MIB(&tcps->tcps_mib, tcpOutControl); } /* IP trusts us to set up labels when required. */ @@ -22341,10 +22549,12 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq, if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) err = tsol_check_label(cr, &mp, &adjust, - tcp->tcp_connp->conn_mac_exempt); + tcp->tcp_connp->conn_mac_exempt, + tcps->tcps_netstack->netstack_ip); else err = tsol_check_label_v6(cr, &mp, &adjust, - tcp->tcp_connp->conn_mac_exempt); + tcp->tcp_connp->conn_mac_exempt, + tcps->tcps_netstack->netstack_ip); if (mctl_present) ipsec_mp->b_cont = mp; else @@ -22374,7 +22584,7 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq, zoneid = GLOBAL_ZONEID; /* Add the zoneid so ip_output routes it properly */ - if ((nmp = ip_prepend_zoneid(ipsec_mp, zoneid)) == NULL) { + if ((nmp = ip_prepend_zoneid(ipsec_mp, zoneid, ipst)) == NULL) { freemsg(ipsec_mp); return; } @@ -22390,7 +22600,7 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq, * reused by tcp_xmit_listener_reset, so it already contains * the right credentials and we don't need to call mblk_setcred. * Also the conn's cred is not right since it is associated - * with tcp_g_q. + * with tcps_g_q. */ CALL_IP_WPUT(tcp->tcp_connp, tcp->tcp_wq, ipsec_mp); @@ -22424,6 +22634,7 @@ tcp_xmit_end(tcp_t *tcp) { ipic_t *ipic; mblk_t *mp; + tcp_stack_t *tcps = tcp->tcp_tcps; if (tcp->tcp_state < TCPS_SYN_RCVD || tcp->tcp_state > TCPS_CLOSE_WAIT) { @@ -22477,7 +22688,8 @@ tcp_xmit_end(tcp_t *tcp) * If TCP does not get enough samples of RTT or tcp_rtt_updates * is 0, don't update the cache. */ - if (tcp_rtt_updates == 0 || tcp->tcp_rtt_update < tcp_rtt_updates) + if (tcps->tcps_rtt_updates == 0 || + tcp->tcp_rtt_update < tcps->tcps_rtt_updates) return (0); /* @@ -22520,7 +22732,8 @@ tcp_xmit_end(tcp_t *tcp) * RST. */ void -tcp_xmit_listeners_reset(mblk_t *mp, uint_t ip_hdr_len, zoneid_t zoneid) +tcp_xmit_listeners_reset(mblk_t *mp, uint_t ip_hdr_len, zoneid_t zoneid, + tcp_stack_t *tcps) { uchar_t *rptr; uint32_t seg_len; @@ -22534,8 +22747,9 @@ tcp_xmit_listeners_reset(mblk_t *mp, uint_t ip_hdr_len, zoneid_t zoneid) boolean_t mctl_present = B_FALSE; boolean_t check = B_TRUE; boolean_t policy_present; + ipsec_stack_t *ipss = tcps->tcps_netstack->netstack_ipsec; - TCP_STAT(tcp_no_listener); + TCP_STAT(tcps, tcp_no_listener); ipsec_mp = mp; @@ -22558,11 +22772,11 @@ tcp_xmit_listeners_reset(mblk_t *mp, uint_t ip_hdr_len, zoneid_t zoneid) } if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { - policy_present = ipsec_inbound_v4_policy_present; + policy_present = ipss->ipsec_inbound_v4_policy_present; ipha = (ipha_t *)mp->b_rptr; ip6h = NULL; } else { - policy_present = ipsec_inbound_v6_policy_present; + policy_present = ipss->ipsec_inbound_v6_policy_present; ipha = NULL; ip6h = (ip6_t *)mp->b_rptr; } @@ -22573,7 +22787,8 @@ tcp_xmit_listeners_reset(mblk_t *mp, uint_t ip_hdr_len, zoneid_t zoneid) * nobody's home. */ ipsec_mp = ipsec_check_global_policy( - ipsec_mp, (conn_t *)NULL, ipha, ip6h, mctl_present); + ipsec_mp, (conn_t *)NULL, ipha, ip6h, mctl_present, + tcps->tcps_netstack); if (ipsec_mp == NULL) return; } @@ -22599,7 +22814,7 @@ tcp_xmit_listeners_reset(mblk_t *mp, uint_t ip_hdr_len, zoneid_t zoneid) freemsg(ipsec_mp); } else if (flags & TH_ACK) { tcp_xmit_early_reset("no tcp, reset", - ipsec_mp, seg_ack, 0, TH_RST, ip_hdr_len, zoneid); + ipsec_mp, seg_ack, 0, TH_RST, ip_hdr_len, zoneid, tcps); } else { if (flags & TH_SYN) { seg_len++; @@ -22612,13 +22827,13 @@ tcp_xmit_listeners_reset(mblk_t *mp, uint_t ip_hdr_len, zoneid_t zoneid) * floor. */ freemsg(ipsec_mp); - tcp_rst_unsent++; + tcps->tcps_rst_unsent++; return; } tcp_xmit_early_reset("no tcp, reset/ack", ipsec_mp, 0, seg_seq + seg_len, - TH_RST | TH_ACK, ip_hdr_len, zoneid); + TH_RST | TH_ACK, ip_hdr_len, zoneid, tcps); } } @@ -22650,10 +22865,11 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset, tcph_t *tcph; int32_t num_sack_blk = 0; int32_t sack_opt_len = 0; + tcp_stack_t *tcps = tcp->tcp_tcps; /* Allocate for our maximum TCP header + link-level */ - mp1 = allocb(tcp->tcp_ip_hdr_len + TCP_MAX_HDR_LENGTH + tcp_wroff_xtra, - BPRI_MED); + mp1 = allocb(tcp->tcp_ip_hdr_len + TCP_MAX_HDR_LENGTH + + tcps->tcps_wroff_xtra, BPRI_MED); if (!mp1) return (NULL); data_length = 0; @@ -22722,7 +22938,7 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset, U32_TO_ABE16(tcp->tcp_rwnd >> tcp->tcp_rcv_ws, tcp->tcp_tcph->th_win); - rptr = mp1->b_rptr + tcp_wroff_xtra; + rptr = mp1->b_rptr + tcps->tcps_wroff_xtra; mp1->b_rptr = rptr; mp1->b_wptr = rptr + tcp->tcp_hdr_len + sack_opt_len; bcopy(tcp->tcp_iphc, rptr, tcp->tcp_hdr_len); @@ -22863,7 +23079,7 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset, * the peer's calculated SMSS may be smaller * than what it can be. This should be OK. */ - if (tcp_use_smss_as_mss_opt) { + if (tcps->tcps_use_smss_as_mss_opt) { u1 = tcp->tcp_mss; U16_TO_BE16(u1, wptr); } @@ -22916,13 +23132,13 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset, u1 += tcp->tcp_sum; u1 = (u1 >> 16) + (u1 & 0xFFFF); U16_TO_BE16(u1, tcph->th_sum); - BUMP_MIB(&tcp_mib, tcpOutControl); + BUMP_MIB(&tcps->tcps_mib, tcpOutControl); } if ((tcp->tcp_valid_bits & TCP_FSS_VALID) && (seq + data_length) == tcp->tcp_fss) { if (!tcp->tcp_fin_acked) { flags |= TH_FIN; - BUMP_MIB(&tcp_mib, tcpOutControl); + BUMP_MIB(&tcps->tcps_mib, tcpOutControl); } if (!tcp->tcp_fin_sent) { tcp->tcp_fin_sent = B_TRUE; @@ -22950,7 +23166,7 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset, if ((tcp->tcp_valid_bits & TCP_URG_VALID) && u1 != 0 && u1 < (uint32_t)(64 * 1024)) { flags |= TH_URG; - BUMP_MIB(&tcp_mib, tcpOutUrg); + BUMP_MIB(&tcps->tcps_mib, tcpOutUrg); U32_TO_ABE16(u1, tcph->th_urp); } } @@ -23025,8 +23241,9 @@ tcp_push_timer(void *arg) { conn_t *connp = (conn_t *)arg; tcp_t *tcp = connp->conn_tcp; + tcp_stack_t *tcps = tcp->tcp_tcps; - TCP_DBGSTAT(tcp_push_timer_cnt); + TCP_DBGSTAT(tcps, tcp_push_timer_cnt); ASSERT(tcp->tcp_listener == NULL); @@ -23051,8 +23268,9 @@ tcp_ack_timer(void *arg) conn_t *connp = (conn_t *)arg; tcp_t *tcp = connp->conn_tcp; mblk_t *mp; + tcp_stack_t *tcps = tcp->tcp_tcps; - TCP_DBGSTAT(tcp_ack_timer_cnt); + TCP_DBGSTAT(tcps, tcp_ack_timer_cnt); tcp->tcp_ack_tid = 0; @@ -23086,8 +23304,8 @@ tcp_ack_timer(void *arg) if (mp != NULL) { TCP_RECORD_TRACE(tcp, mp, TCP_TRACE_SEND_PKT); BUMP_LOCAL(tcp->tcp_obsegs); - BUMP_MIB(&tcp_mib, tcpOutAck); - BUMP_MIB(&tcp_mib, tcpOutAckDelayed); + BUMP_MIB(&tcps->tcps_mib, tcpOutAck); + BUMP_MIB(&tcps->tcps_mib, tcpOutAckDelayed); tcp_send_data(tcp, tcp->tcp_wq, mp); } } @@ -23098,6 +23316,7 @@ static mblk_t * tcp_ack_mp(tcp_t *tcp) { uint32_t seq_no; + tcp_stack_t *tcps = tcp->tcp_tcps; /* * There are a few cases to be considered while setting the sequence no. @@ -23155,7 +23374,7 @@ tcp_ack_mp(tcp_t *tcp) tcp_hdr_len = tcp->tcp_hdr_len; tcp_tcp_hdr_len = tcp->tcp_tcp_hdr_len; } - mp1 = allocb(tcp_hdr_len + tcp_wroff_xtra, BPRI_MED); + mp1 = allocb(tcp_hdr_len + tcps->tcps_wroff_xtra, BPRI_MED); if (!mp1) return (NULL); @@ -23163,7 +23382,7 @@ tcp_ack_mp(tcp_t *tcp) U32_TO_ABE16(tcp->tcp_rwnd >> tcp->tcp_rcv_ws, tcp->tcp_tcph->th_win); /* copy in prototype TCP + IP header */ - rptr = mp1->b_rptr + tcp_wroff_xtra; + rptr = mp1->b_rptr + tcps->tcps_wroff_xtra; mp1->b_rptr = rptr; mp1->b_wptr = rptr + tcp_hdr_len; bcopy(tcp->tcp_iphc, rptr, tcp->tcp_hdr_len); @@ -23250,16 +23469,18 @@ tcp_ack_mp(tcp_t *tcp) */ /* ARGSUSED */ static tcp_t * -tcp_alloc_temp_tcp(in_port_t port) +tcp_alloc_temp_tcp(in_port_t port, tcp_stack_t *tcps) { conn_t *connp; tcp_t *tcp; - connp = ipcl_conn_create(IPCL_TCPCONN, KM_SLEEP); + connp = ipcl_conn_create(IPCL_TCPCONN, KM_SLEEP, tcps->tcps_netstack); if (connp == NULL) return (NULL); tcp = connp->conn_tcp; + tcp->tcp_tcps = tcps; + TCPS_REFHOLD(tcps); /* * Only initialize the necessary info in those structures. Note @@ -23291,6 +23512,8 @@ tcp_alloc_temp_tcp(in_port_t port) * * Return: * B_TRUE if the deletion is successful, B_FALSE otherwise. + * + * Assumes that nca is only for zoneid=0 */ boolean_t tcp_reserved_port_del(in_port_t lo_port, in_port_t hi_port) @@ -23299,19 +23522,25 @@ tcp_reserved_port_del(in_port_t lo_port, in_port_t hi_port) int size; tcp_t **temp_tcp_array; tcp_t *tcp; + tcp_stack_t *tcps; + + tcps = netstack_find_by_stackid(GLOBAL_NETSTACKID)->netstack_tcp; + ASSERT(tcps != NULL); - rw_enter(&tcp_reserved_port_lock, RW_WRITER); + rw_enter(&tcps->tcps_reserved_port_lock, RW_WRITER); /* First make sure that the port ranage is indeed reserved. */ - for (i = 0; i < tcp_reserved_port_array_size; i++) { - if (tcp_reserved_port[i].lo_port == lo_port) { - hi_port = tcp_reserved_port[i].hi_port; - temp_tcp_array = tcp_reserved_port[i].temp_tcp_array; + for (i = 0; i < tcps->tcps_reserved_port_array_size; i++) { + if (tcps->tcps_reserved_port[i].lo_port == lo_port) { + hi_port = tcps->tcps_reserved_port[i].hi_port; + temp_tcp_array = + tcps->tcps_reserved_port[i].temp_tcp_array; break; } } - if (i == tcp_reserved_port_array_size) { - rw_exit(&tcp_reserved_port_lock); + if (i == tcps->tcps_reserved_port_array_size) { + rw_exit(&tcps->tcps_reserved_port_lock); + netstack_rele(tcps->tcps_netstack); return (B_FALSE); } @@ -23319,11 +23548,13 @@ tcp_reserved_port_del(in_port_t lo_port, in_port_t hi_port) * Remove the range from the array. This simple loop is possible * because port ranges are inserted in ascending order. */ - for (j = i; j < tcp_reserved_port_array_size - 1; j++) { - tcp_reserved_port[j].lo_port = tcp_reserved_port[j+1].lo_port; - tcp_reserved_port[j].hi_port = tcp_reserved_port[j+1].hi_port; - tcp_reserved_port[j].temp_tcp_array = - tcp_reserved_port[j+1].temp_tcp_array; + for (j = i; j < tcps->tcps_reserved_port_array_size - 1; j++) { + tcps->tcps_reserved_port[j].lo_port = + tcps->tcps_reserved_port[j+1].lo_port; + tcps->tcps_reserved_port[j].hi_port = + tcps->tcps_reserved_port[j+1].hi_port; + tcps->tcps_reserved_port[j].temp_tcp_array = + tcps->tcps_reserved_port[j+1].temp_tcp_array; } /* Remove all the temporary tcp structures. */ @@ -23336,8 +23567,9 @@ tcp_reserved_port_del(in_port_t lo_port, in_port_t hi_port) size--; } kmem_free(temp_tcp_array, (hi_port - lo_port + 1) * sizeof (tcp_t *)); - tcp_reserved_port_array_size--; - rw_exit(&tcp_reserved_port_lock); + tcps->tcps_reserved_port_array_size--; + rw_exit(&tcps->tcps_reserved_port_lock); + netstack_rele(tcps->tcps_netstack); return (B_TRUE); } @@ -23346,13 +23578,13 @@ tcp_reserved_port_del(in_port_t lo_port, in_port_t hi_port) * first parameter is the list of tcp to be removed. The second parameter * is the number of tcps in the array. */ -#define TCP_TMP_TCP_REMOVE(tcp_array, num) \ +#define TCP_TMP_TCP_REMOVE(tcp_array, num, tcps) \ { \ while ((num) > 0) { \ tcp_t *tcp = (tcp_array)[(num) - 1]; \ tf_t *tbf; \ tcp_t *tcpnext; \ - tbf = &tcp_bind_fanout[TCP_BIND_HASH(tcp->tcp_lport)]; \ + tbf = &tcps->tcps_bind_fanout[TCP_BIND_HASH(tcp->tcp_lport)]; \ mutex_enter(&tbf->tf_lock); \ tcpnext = tcp->tcp_bind_hash; \ if (tcpnext) { \ @@ -23384,6 +23616,8 @@ tcp_reserved_port_del(in_port_t lo_port, in_port_t hi_port) * * Return: * B_TRUE if the port reservation is successful, B_FALSE otherwise. + * + * Assumes that nca is only for zoneid=0 */ boolean_t tcp_reserved_port_add(int size, in_port_t *lo_port, in_port_t *hi_port) @@ -23399,15 +23633,21 @@ tcp_reserved_port_add(int size, in_port_t *lo_port, in_port_t *hi_port) boolean_t used; tcp_rport_t tmp_ports[TCP_RESERVED_PORTS_ARRAY_MAX_SIZE]; zoneid_t zoneid = GLOBAL_ZONEID; + tcp_stack_t *tcps; /* Sanity check. */ if (size <= 0 || size > TCP_RESERVED_PORTS_RANGE_MAX) { return (B_FALSE); } - rw_enter(&tcp_reserved_port_lock, RW_WRITER); - if (tcp_reserved_port_array_size == TCP_RESERVED_PORTS_ARRAY_MAX_SIZE) { - rw_exit(&tcp_reserved_port_lock); + tcps = netstack_find_by_stackid(GLOBAL_NETSTACKID)->netstack_tcp; + ASSERT(tcps != NULL); + + rw_enter(&tcps->tcps_reserved_port_lock, RW_WRITER); + if (tcps->tcps_reserved_port_array_size == + TCP_RESERVED_PORTS_ARRAY_MAX_SIZE) { + rw_exit(&tcps->tcps_reserved_port_lock); + netstack_rele(tcps->tcps_netstack); return (B_FALSE); } @@ -23417,22 +23657,25 @@ tcp_reserved_port_add(int size, in_port_t *lo_port, in_port_t *hi_port) */ *lo_port = TCP_SMALLEST_RESERVED_PORT; *hi_port = TCP_LARGEST_RESERVED_PORT; - for (i = 0; i < tcp_reserved_port_array_size; - *lo_port = tcp_reserved_port[i].hi_port + 1, i++) { - if (tcp_reserved_port[i].lo_port - *lo_port >= size) { - *hi_port = tcp_reserved_port[i].lo_port - 1; + for (i = 0; i < tcps->tcps_reserved_port_array_size; + *lo_port = tcps->tcps_reserved_port[i].hi_port + 1, i++) { + if (tcps->tcps_reserved_port[i].lo_port - *lo_port >= size) { + *hi_port = tcps->tcps_reserved_port[i].lo_port - 1; break; } } /* No available port range. */ - if (i == tcp_reserved_port_array_size && *hi_port - *lo_port < size) { - rw_exit(&tcp_reserved_port_lock); + if (i == tcps->tcps_reserved_port_array_size && + *hi_port - *lo_port < size) { + rw_exit(&tcps->tcps_reserved_port_lock); + netstack_rele(tcps->tcps_netstack); return (B_FALSE); } temp_tcp_array = kmem_zalloc(size * sizeof (tcp_t *), KM_NOSLEEP); if (temp_tcp_array == NULL) { - rw_exit(&tcp_reserved_port_lock); + rw_exit(&tcps->tcps_reserved_port_lock); + netstack_rele(tcps->tcps_netstack); return (B_FALSE); } @@ -23442,7 +23685,7 @@ tcp_reserved_port_add(int size, in_port_t *lo_port, in_port_t *hi_port) cur_size++, port++) { used = B_FALSE; net_port = htons(port); - tbf = &tcp_bind_fanout[TCP_BIND_HASH(net_port)]; + tbf = &tcps->tcps_bind_fanout[TCP_BIND_HASH(net_port)]; mutex_enter(&tbf->tf_lock); for (tcp = tbf->tf_tcp; tcp != NULL; tcp = tcp->tcp_bind_hash) { @@ -23454,7 +23697,8 @@ tcp_reserved_port_add(int size, in_port_t *lo_port, in_port_t *hi_port) * temporary tcps. */ mutex_exit(&tbf->tf_lock); - TCP_TMP_TCP_REMOVE(temp_tcp_array, cur_size); + TCP_TMP_TCP_REMOVE(temp_tcp_array, cur_size, + tcps); *lo_port = port + 1; cur_size = -1; used = B_TRUE; @@ -23462,18 +23706,21 @@ tcp_reserved_port_add(int size, in_port_t *lo_port, in_port_t *hi_port) } } if (!used) { - if ((tmp_tcp = tcp_alloc_temp_tcp(net_port)) == NULL) { + if ((tmp_tcp = tcp_alloc_temp_tcp(net_port, tcps)) == + NULL) { /* * Allocation failure. Just fail the request. * Need to remove all those temporary tcp * structures. */ mutex_exit(&tbf->tf_lock); - TCP_TMP_TCP_REMOVE(temp_tcp_array, cur_size); - rw_exit(&tcp_reserved_port_lock); + TCP_TMP_TCP_REMOVE(temp_tcp_array, cur_size, + tcps); + rw_exit(&tcps->tcps_reserved_port_lock); kmem_free(temp_tcp_array, (hi_port - lo_port + 1) * sizeof (tcp_t *)); + netstack_rele(tcps->tcps_netstack); return (B_FALSE); } temp_tcp_array[cur_size] = tmp_tcp; @@ -23489,9 +23736,10 @@ tcp_reserved_port_add(int size, in_port_t *lo_port, in_port_t *hi_port) * range is available. */ if (cur_size < size) { - TCP_TMP_TCP_REMOVE(temp_tcp_array, cur_size); - rw_exit(&tcp_reserved_port_lock); + TCP_TMP_TCP_REMOVE(temp_tcp_array, cur_size, tcps); + rw_exit(&tcps->tcps_reserved_port_lock); kmem_free(temp_tcp_array, size * sizeof (tcp_t *)); + netstack_rele(tcps->tcps_netstack); return (B_FALSE); } *hi_port = port - 1; @@ -23504,32 +23752,37 @@ tcp_reserved_port_add(int size, in_port_t *lo_port, in_port_t *hi_port) * that we should provide more reserved port ranges, this function * has to be modified to be more efficient. */ - if (tcp_reserved_port_array_size == 0) { - tcp_reserved_port[0].lo_port = *lo_port; - tcp_reserved_port[0].hi_port = *hi_port; - tcp_reserved_port[0].temp_tcp_array = temp_tcp_array; + if (tcps->tcps_reserved_port_array_size == 0) { + tcps->tcps_reserved_port[0].lo_port = *lo_port; + tcps->tcps_reserved_port[0].hi_port = *hi_port; + tcps->tcps_reserved_port[0].temp_tcp_array = temp_tcp_array; } else { - for (i = 0, j = 0; i < tcp_reserved_port_array_size; i++, j++) { - if (*lo_port < tcp_reserved_port[i].lo_port && i == j) { + for (i = 0, j = 0; i < tcps->tcps_reserved_port_array_size; + i++, j++) { + if (*lo_port < tcps->tcps_reserved_port[i].lo_port && + i == j) { tmp_ports[j].lo_port = *lo_port; tmp_ports[j].hi_port = *hi_port; tmp_ports[j].temp_tcp_array = temp_tcp_array; j++; } - tmp_ports[j].lo_port = tcp_reserved_port[i].lo_port; - tmp_ports[j].hi_port = tcp_reserved_port[i].hi_port; + tmp_ports[j].lo_port = + tcps->tcps_reserved_port[i].lo_port; + tmp_ports[j].hi_port = + tcps->tcps_reserved_port[i].hi_port; tmp_ports[j].temp_tcp_array = - tcp_reserved_port[i].temp_tcp_array; + tcps->tcps_reserved_port[i].temp_tcp_array; } if (j == i) { tmp_ports[j].lo_port = *lo_port; tmp_ports[j].hi_port = *hi_port; tmp_ports[j].temp_tcp_array = temp_tcp_array; } - bcopy(tmp_ports, tcp_reserved_port, sizeof (tmp_ports)); + bcopy(tmp_ports, tcps->tcps_reserved_port, sizeof (tmp_ports)); } - tcp_reserved_port_array_size++; - rw_exit(&tcp_reserved_port_lock); + tcps->tcps_reserved_port_array_size++; + rw_exit(&tcps->tcps_reserved_port_lock); + netstack_rele(tcps->tcps_netstack); return (B_TRUE); } @@ -23543,19 +23796,19 @@ tcp_reserved_port_add(int size, in_port_t *lo_port, in_port_t *hi_port) * B_TRUE is the port is inside a reserved port range, B_FALSE otherwise. */ boolean_t -tcp_reserved_port_check(in_port_t port) +tcp_reserved_port_check(in_port_t port, tcp_stack_t *tcps) { int i; - rw_enter(&tcp_reserved_port_lock, RW_READER); - for (i = 0; i < tcp_reserved_port_array_size; i++) { - if (port >= tcp_reserved_port[i].lo_port || - port <= tcp_reserved_port[i].hi_port) { - rw_exit(&tcp_reserved_port_lock); + rw_enter(&tcps->tcps_reserved_port_lock, RW_READER); + for (i = 0; i < tcps->tcps_reserved_port_array_size; i++) { + if (port >= tcps->tcps_reserved_port[i].lo_port || + port <= tcps->tcps_reserved_port[i].hi_port) { + rw_exit(&tcps->tcps_reserved_port_lock); return (B_TRUE); } } - rw_exit(&tcp_reserved_port_lock); + rw_exit(&tcps->tcps_reserved_port_lock); return (B_FALSE); } @@ -23568,17 +23821,19 @@ static int tcp_reserved_port_list(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) { int i; + tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; - rw_enter(&tcp_reserved_port_lock, RW_READER); - if (tcp_reserved_port_array_size > 0) + rw_enter(&tcps->tcps_reserved_port_lock, RW_READER); + if (tcps->tcps_reserved_port_array_size > 0) (void) mi_mpprintf(mp, "The following ports are reserved:"); else (void) mi_mpprintf(mp, "No port is reserved."); - for (i = 0; i < tcp_reserved_port_array_size; i++) { + for (i = 0; i < tcps->tcps_reserved_port_array_size; i++) { (void) mi_mpprintf(mp, "%d-%d", - tcp_reserved_port[i].lo_port, tcp_reserved_port[i].hi_port); + tcps->tcps_reserved_port[i].lo_port, + tcps->tcps_reserved_port[i].hi_port); } - rw_exit(&tcp_reserved_port_lock); + rw_exit(&tcps->tcps_reserved_port_lock); return (0); } @@ -23639,6 +23894,7 @@ tcp_bind_hash_remove(tcp_t *tcp) { tcp_t *tcpnext; kmutex_t *lockp; + tcp_stack_t *tcps = tcp->tcp_tcps; if (tcp->tcp_ptpbhn == NULL) return; @@ -23648,7 +23904,7 @@ tcp_bind_hash_remove(tcp_t *tcp) * hash_remove's for this instance. */ ASSERT(tcp->tcp_lport != 0); - lockp = &tcp_bind_fanout[TCP_BIND_HASH(tcp->tcp_lport)].tf_lock; + lockp = &tcps->tcps_bind_fanout[TCP_BIND_HASH(tcp->tcp_lport)].tf_lock; ASSERT(lockp != NULL); mutex_enter(lockp); @@ -23670,12 +23926,12 @@ tcp_bind_hash_remove(tcp_t *tcp) * Returns with a CONN_INC_REF tcp structure. Caller must do a CONN_DEC_REF. */ static tcp_t * -tcp_acceptor_hash_lookup(t_uscalar_t id) +tcp_acceptor_hash_lookup(t_uscalar_t id, tcp_stack_t *tcps) { tf_t *tf; tcp_t *tcp; - tf = &tcp_acceptor_fanout[TCP_ACCEPTOR_HASH(id)]; + tf = &tcps->tcps_acceptor_fanout[TCP_ACCEPTOR_HASH(id)]; mutex_enter(&tf->tf_lock); for (tcp = tf->tf_tcp; tcp != NULL; tcp = tcp->tcp_acceptor_hash) { @@ -23699,8 +23955,9 @@ tcp_acceptor_hash_insert(t_uscalar_t id, tcp_t *tcp) tf_t *tf; tcp_t **tcpp; tcp_t *tcpnext; + tcp_stack_t *tcps = tcp->tcp_tcps; - tf = &tcp_acceptor_fanout[TCP_ACCEPTOR_HASH(id)]; + tf = &tcps->tcps_acceptor_fanout[TCP_ACCEPTOR_HASH(id)]; if (tcp->tcp_ptpahn != NULL) tcp_acceptor_hash_remove(tcp); @@ -23756,13 +24013,12 @@ tcp_host_param_setvalue(queue_t *q, mblk_t *mp, char *value, caddr_t cp, int af) int error = 0; int retval; char *end; - tcp_hsp_t *hsp; tcp_hsp_t *hspprev; - ipaddr_t addr = 0; /* Address we're looking for */ in6_addr_t v6addr; /* Address we're looking for */ uint32_t hash; /* Hash of that address */ + tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; /* * If the following variables are still zero after parsing the input @@ -23777,7 +24033,7 @@ tcp_host_param_setvalue(queue_t *q, mblk_t *mp, char *value, caddr_t cp, int af) long timestamp = 0; /* Originate TCP TSTAMP option, 1 = yes */ boolean_t delete = B_FALSE; /* User asked to delete this HSP */ - rw_enter(&tcp_hsp_lock, RW_WRITER); + rw_enter(&tcps->tcps_hsp_lock, RW_WRITER); /* Parse and validate address */ if (af == AF_INET) { @@ -23884,14 +24140,14 @@ tcp_host_param_setvalue(queue_t *q, mblk_t *mp, char *value, caddr_t cp, int af) * Note that deletes don't return an error if the thing * we're trying to delete isn't there. */ - if (tcp_hsp_hash == NULL) + if (tcps->tcps_hsp_hash == NULL) goto done; - hsp = tcp_hsp_hash[hash]; + hsp = tcps->tcps_hsp_hash[hash]; if (hsp) { if (IN6_ARE_ADDR_EQUAL(&hsp->tcp_hsp_addr_v6, &v6addr)) { - tcp_hsp_hash[hash] = hsp->tcp_hsp_next; + tcps->tcps_hsp_hash[hash] = hsp->tcp_hsp_next; mi_free((char *)hsp); } else { hspprev = hsp; @@ -23913,10 +24169,10 @@ tcp_host_param_setvalue(queue_t *q, mblk_t *mp, char *value, caddr_t cp, int af) * so, allocate the hash table. */ - if (!tcp_hsp_hash) { - tcp_hsp_hash = (tcp_hsp_t **) + if (!tcps->tcps_hsp_hash) { + tcps->tcps_hsp_hash = (tcp_hsp_t **) mi_zalloc(sizeof (tcp_hsp_t *) * TCP_HSP_HASH_SIZE); - if (!tcp_hsp_hash) { + if (!tcps->tcps_hsp_hash) { error = EINVAL; goto done; } @@ -23924,7 +24180,7 @@ tcp_host_param_setvalue(queue_t *q, mblk_t *mp, char *value, caddr_t cp, int af) /* Get head of hash chain */ - hsp = tcp_hsp_hash[hash]; + hsp = tcps->tcps_hsp_hash[hash]; /* Try to find pre-existing hsp on hash chain */ /* Doesn't handle CIDR prefixes. */ @@ -23945,8 +24201,8 @@ tcp_host_param_setvalue(queue_t *q, mblk_t *mp, char *value, caddr_t cp, int af) error = EINVAL; goto done; } - hsp->tcp_hsp_next = tcp_hsp_hash[hash]; - tcp_hsp_hash[hash] = hsp; + hsp->tcp_hsp_next = tcps->tcps_hsp_hash[hash]; + tcps->tcps_hsp_hash[hash] = hsp; } /* Set values that the user asked us to change */ @@ -23966,7 +24222,7 @@ tcp_host_param_setvalue(queue_t *q, mblk_t *mp, char *value, caddr_t cp, int af) } done: - rw_exit(&tcp_hsp_lock); + rw_exit(&tcps->tcps_hsp_lock); return (error); } @@ -23993,14 +24249,15 @@ tcp_host_param_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) tcp_hsp_t *hsp; int i; char addrbuf[INET6_ADDRSTRLEN], subnetbuf[INET6_ADDRSTRLEN]; + tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; - rw_enter(&tcp_hsp_lock, RW_READER); + rw_enter(&tcps->tcps_hsp_lock, RW_READER); (void) mi_mpprintf(mp, "Hash HSP " MI_COL_HDRPAD_STR "Address Subnet Mask Send Receive TStamp"); - if (tcp_hsp_hash) { + if (tcps->tcps_hsp_hash) { for (i = 0; i < TCP_HSP_HASH_SIZE; i++) { - hsp = tcp_hsp_hash[i]; + hsp = tcps->tcps_hsp_hash[i]; while (hsp) { if (hsp->tcp_hsp_vers == IPV4_VERSION) { (void) inet_ntop(AF_INET, @@ -24032,7 +24289,7 @@ tcp_host_param_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) } } } - rw_exit(&tcp_hsp_lock); + rw_exit(&tcps->tcps_hsp_lock); return (0); } @@ -24051,19 +24308,19 @@ static ipaddr_t netmasks[] = { * associated with the routes to determine the default sndspace and rcvspace. */ static tcp_hsp_t * -tcp_hsp_lookup(ipaddr_t addr) +tcp_hsp_lookup(ipaddr_t addr, tcp_stack_t *tcps) { tcp_hsp_t *hsp = NULL; /* Quick check without acquiring the lock. */ - if (tcp_hsp_hash == NULL) + if (tcps->tcps_hsp_hash == NULL) return (NULL); - rw_enter(&tcp_hsp_lock, RW_READER); + rw_enter(&tcps->tcps_hsp_lock, RW_READER); /* This routine finds the best-matching HSP for address addr. */ - if (tcp_hsp_hash) { + if (tcps->tcps_hsp_hash) { int i; ipaddr_t srchaddr; tcp_hsp_t *hsp_net; @@ -24075,7 +24332,7 @@ tcp_hsp_lookup(ipaddr_t addr) for (i = 1; i <= 3; i++) { /* Look for exact match on srchaddr */ - hsp = tcp_hsp_hash[TCP_HSP_HASH(srchaddr)]; + hsp = tcps->tcps_hsp_hash[TCP_HSP_HASH(srchaddr)]; while (hsp) { if (hsp->tcp_hsp_vers == IPV4_VERSION && hsp->tcp_hsp_addr == srchaddr) @@ -24128,7 +24385,7 @@ tcp_hsp_lookup(ipaddr_t addr) } } - rw_exit(&tcp_hsp_lock); + rw_exit(&tcps->tcps_hsp_lock); return (hsp); } @@ -24137,19 +24394,19 @@ tcp_hsp_lookup(ipaddr_t addr) * match lookup. */ static tcp_hsp_t * -tcp_hsp_lookup_ipv6(in6_addr_t *v6addr) +tcp_hsp_lookup_ipv6(in6_addr_t *v6addr, tcp_stack_t *tcps) { tcp_hsp_t *hsp = NULL; /* Quick check without acquiring the lock. */ - if (tcp_hsp_hash == NULL) + if (tcps->tcps_hsp_hash == NULL) return (NULL); - rw_enter(&tcp_hsp_lock, RW_READER); + rw_enter(&tcps->tcps_hsp_lock, RW_READER); /* This routine finds the best-matching HSP for address addr. */ - if (tcp_hsp_hash) { + if (tcps->tcps_hsp_hash) { int i; in6_addr_t v6srchaddr; tcp_hsp_t *hsp_net; @@ -24161,7 +24418,7 @@ tcp_hsp_lookup_ipv6(in6_addr_t *v6addr) for (i = 1; i <= 3; i++) { /* Look for exact match on srchaddr */ - hsp = tcp_hsp_hash[TCP_HSP_HASH( + hsp = tcps->tcps_hsp_hash[TCP_HSP_HASH( V4_PART_OF_V6(v6srchaddr))]; while (hsp) { if (hsp->tcp_hsp_vers == IPV6_VERSION && @@ -24224,7 +24481,7 @@ tcp_hsp_lookup_ipv6(in6_addr_t *v6addr) } } - rw_exit(&tcp_hsp_lock); + rw_exit(&tcps->tcps_hsp_lock); return (hsp); } @@ -24450,7 +24707,7 @@ tcp_conprim_opt_process(tcp_t *tcp, mblk_t *mp, int *do_disconnectp, #define PASSWD_SIZE 16 /* MUST be multiple of 4 */ static void -tcp_iss_key_init(uint8_t *phrase, int len) +tcp_iss_key_init(uint8_t *phrase, int len, tcp_stack_t *tcps) { struct { int32_t current_time; @@ -24496,11 +24753,11 @@ tcp_iss_key_init(uint8_t *phrase, int len) /* * Hash 'em all together. The MD5Final is called per-connection. */ - mutex_enter(&tcp_iss_key_lock); - MD5Init(&tcp_iss_key); - MD5Update(&tcp_iss_key, (uchar_t *)&tcp_iss_cookie, + mutex_enter(&tcps->tcps_iss_key_lock); + MD5Init(&tcps->tcps_iss_key); + MD5Update(&tcps->tcps_iss_key, (uchar_t *)&tcp_iss_cookie, sizeof (tcp_iss_cookie)); - mutex_exit(&tcp_iss_key_lock); + mutex_exit(&tcps->tcps_iss_key_lock); } /* @@ -24511,10 +24768,12 @@ static int tcp_1948_phrase_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) { + tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; + /* * Basically, value contains a new pass phrase. Pass it along! */ - tcp_iss_key_init((uint8_t *)value, strlen(value)); + tcp_iss_key_init((uint8_t *)value, strlen(value), tcps); return (0); } @@ -24534,45 +24793,232 @@ tcp_iphc_constructor(void *buf, void *cdrarg, int kmflags) return (0); } +/* + * Make sure we wait until the default queue is setup, yet allow + * tcp_g_q_create() to open a TCP stream. + * We need to allow tcp_g_q_create() do do an open + * of tcp, hence we compare curhread. + * All others have to wait until the tcps_g_q has been + * setup. + */ void -tcp_ddi_init(void) +tcp_g_q_setup(tcp_stack_t *tcps) { - int i; + mutex_enter(&tcps->tcps_g_q_lock); + if (tcps->tcps_g_q != NULL) { + mutex_exit(&tcps->tcps_g_q_lock); + return; + } + if (tcps->tcps_g_q_creator == NULL) { + /* This thread will set it up */ + tcps->tcps_g_q_creator = curthread; + mutex_exit(&tcps->tcps_g_q_lock); + tcp_g_q_create(tcps); + mutex_enter(&tcps->tcps_g_q_lock); + ASSERT(tcps->tcps_g_q_creator == curthread); + tcps->tcps_g_q_creator = NULL; + cv_signal(&tcps->tcps_g_q_cv); + ASSERT(tcps->tcps_g_q != NULL); + mutex_exit(&tcps->tcps_g_q_lock); + return; + } + /* Everybody but the creator has to wait */ + if (tcps->tcps_g_q_creator != curthread) { + while (tcps->tcps_g_q == NULL) + cv_wait(&tcps->tcps_g_q_cv, &tcps->tcps_g_q_lock); + } + mutex_exit(&tcps->tcps_g_q_lock); +} - /* Initialize locks */ - rw_init(&tcp_hsp_lock, NULL, RW_DEFAULT, NULL); - mutex_init(&tcp_g_q_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&tcp_random_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&tcp_iss_key_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&tcp_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); - rw_init(&tcp_reserved_port_lock, NULL, RW_DEFAULT, NULL); +major_t IP_MAJ; +#define IP "ip" - for (i = 0; i < A_CNT(tcp_bind_fanout); i++) { - mutex_init(&tcp_bind_fanout[i].tf_lock, NULL, - MUTEX_DEFAULT, NULL); +#define TCP6DEV "/devices/pseudo/tcp6@0:tcp6" + +/* + * Create a default tcp queue here instead of in strplumb + */ +void +tcp_g_q_create(tcp_stack_t *tcps) +{ + int error; + ldi_handle_t lh = NULL; + ldi_ident_t li = NULL; + int rval; + cred_t *cr; + +#ifdef NS_DEBUG + (void) printf("tcp_g_q_create()\n"); +#endif + + ASSERT(tcps->tcps_g_q_creator == curthread); + + error = ldi_ident_from_major(IP_MAJ, &li); + if (error) { +#ifdef DEBUG + printf("tcp_g_q_create: lyr ident get failed error %d\n", + error); +#endif + return; } - for (i = 0; i < A_CNT(tcp_acceptor_fanout); i++) { - mutex_init(&tcp_acceptor_fanout[i].tf_lock, NULL, - MUTEX_DEFAULT, NULL); + cr = zone_get_kcred(netstackid_to_zoneid( + tcps->tcps_netstack->netstack_stackid)); + ASSERT(cr != NULL); + /* + * We set the tcp default queue to IPv6 because IPv4 falls + * back to IPv6 when it can't find a client, but + * IPv6 does not fall back to IPv4. + */ + error = ldi_open_by_name(TCP6DEV, FREAD|FWRITE, cr, &lh, li); + if (error) { +#ifdef DEBUG + printf("tcp_g_q_create: open of TCP6DEV failed error %d\n", + error); +#endif + goto out; } - /* TCP's IPsec code calls the packet dropper. */ - ip_drop_register(&tcp_dropper, "TCP IPsec policy enforcement"); + /* + * This ioctl causes the tcp framework to cache a pointer to + * this stream, so we don't want to close the stream after + * this operation. + * Use the kernel credentials that are for the zone we're in. + */ + error = ldi_ioctl(lh, TCP_IOC_DEFAULT_Q, + (intptr_t)0, FKIOCTL, cr, &rval); + if (error) { +#ifdef DEBUG + printf("tcp_g_q_create: ioctl TCP_IOC_DEFAULT_Q failed " + "error %d\n", error); +#endif + goto out; + } + tcps->tcps_g_q_lh = lh; /* For tcp_g_q_close */ + lh = NULL; +out: + /* Close layered handles */ + if (li) + ldi_ident_release(li); + /* Keep cred around until _inactive needs it */ + tcps->tcps_g_q_cr = cr; +} - if (!tcp_g_nd) { - if (!tcp_param_register(tcp_param_arr, A_CNT(tcp_param_arr))) { - nd_free(&tcp_g_nd); - } +/* + * We keep tcp_g_q set until all other tcp_t's in the zone + * has gone away, and then when tcp_g_q_inactive() is called + * we clear it. + */ +void +tcp_g_q_destroy(tcp_stack_t *tcps) +{ +#ifdef NS_DEBUG + (void) printf("tcp_g_q_destroy()for stack %d\n", + tcps->tcps_netstack->netstack_stackid); +#endif + + if (tcps->tcps_g_q == NULL) { + return; /* Nothing to cleanup */ + } + /* + * Drop reference corresponding to the default queue. + * This reference was added from tcp_open when the default queue + * was created, hence we compensate for this extra drop in + * tcp_g_q_close. If the refcnt drops to zero here it means + * the default queue was the last one to be open, in which + * case, then tcp_g_q_inactive will be + * called as a result of the refrele. + */ + TCPS_REFRELE(tcps); +} + +/* + * Called when last tcp_t drops reference count using TCPS_REFRELE. + * Run by tcp_q_q_inactive using a taskq. + */ +static void +tcp_g_q_close(void *arg) +{ + tcp_stack_t *tcps = arg; + int error; + ldi_handle_t lh = NULL; + ldi_ident_t li = NULL; + cred_t *cr; + +#ifdef NS_DEBUG + (void) printf("tcp_g_q_inactive() for stack %d refcnt %d\n", + tcps->tcps_netstack->netstack_stackid, + tcps->tcps_netstack->netstack_refcnt); +#endif + lh = tcps->tcps_g_q_lh; + if (lh == NULL) + return; /* Nothing to cleanup */ + + ASSERT(tcps->tcps_refcnt == 1); + ASSERT(tcps->tcps_g_q != NULL); + + error = ldi_ident_from_major(IP_MAJ, &li); + if (error) { +#ifdef DEBUG + printf("tcp_g_q_inactive: lyr ident get failed error %d\n", + error); +#endif + return; } + cr = tcps->tcps_g_q_cr; + tcps->tcps_g_q_cr = NULL; + ASSERT(cr != NULL); + /* - * Note: To really walk the device tree you need the devinfo - * pointer to your device which is only available after probe/attach. - * The following is safe only because it uses ddi_root_node() + * Make sure we can break the recursion when tcp_close decrements + * the reference count causing g_q_inactive to be called again. */ - tcp_max_optsize = optcom_max_optsize(tcp_opt_obj.odb_opt_des_arr, - tcp_opt_obj.odb_opt_arr_cnt); + tcps->tcps_g_q_lh = NULL; + + /* close the default queue */ + (void) ldi_close(lh, FREAD|FWRITE, cr); + /* + * At this point in time tcps and the rest of netstack_t might + * have been deleted. + */ + tcps = NULL; + + /* Close layered handles */ + ldi_ident_release(li); + crfree(cr); +} + +/* + * Called when last tcp_t drops reference count using TCPS_REFRELE. + * + * Have to ensure that the ldi routines are not used by an + * interrupt thread by using a taskq. + */ +void +tcp_g_q_inactive(tcp_stack_t *tcps) +{ + if (tcps->tcps_g_q_lh == NULL) + return; /* Nothing to cleanup */ + + ASSERT(tcps->tcps_refcnt == 0); + TCPS_REFHOLD(tcps); /* Compensate for what g_q_destroy did */ + + if (servicing_interrupt()) { + (void) taskq_dispatch(tcp_taskq, tcp_g_q_close, + (void *) tcps, TQ_SLEEP); + } else { + tcp_g_q_close(tcps); + } +} + +/* + * Called by IP when IP is loaded into the kernel + */ +void +tcp_ddi_g_init(void) +{ + IP_MAJ = ddi_name_to_major(IP); tcp_timercache = kmem_cache_create("tcp_timercache", sizeof (tcp_timer_t) + sizeof (mblk_t), 0, @@ -24586,13 +25032,92 @@ tcp_ddi_init(void) TCP_MAX_COMBINED_HEADER_LENGTH, 0, tcp_iphc_constructor, NULL, NULL, NULL, NULL, 0); + mutex_init(&tcp_random_lock, NULL, MUTEX_DEFAULT, NULL); + + /* Initialize the random number generator */ + tcp_random_init(); + tcp_squeue_wput_proc = tcp_squeue_switch(tcp_squeue_wput); tcp_squeue_close_proc = tcp_squeue_switch(tcp_squeue_close); + /* A single callback independently of how many netstacks we have */ ip_squeue_init(tcp_squeue_add); - /* Initialize the random number generator */ - tcp_random_init(); + tcp_g_kstat = tcp_g_kstat_init(&tcp_g_statistics); + + tcp_taskq = taskq_create("tcp_taskq", 1, minclsyspri, 1, 1, + TASKQ_PREPOPULATE); + + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel, so we can maintain the + * set of tcp_stack_t's. + */ + netstack_register(NS_TCP, tcp_stack_init, tcp_stack_shutdown, + tcp_stack_fini); +} + + +/* + * Initialize the TCP stack instance. + */ +static void * +tcp_stack_init(netstackid_t stackid, netstack_t *ns) +{ + tcp_stack_t *tcps; + tcpparam_t *pa; + int i; + + tcps = (tcp_stack_t *)kmem_zalloc(sizeof (*tcps), KM_SLEEP); + tcps->tcps_netstack = ns; + + /* Initialize locks */ + rw_init(&tcps->tcps_hsp_lock, NULL, RW_DEFAULT, NULL); + mutex_init(&tcps->tcps_g_q_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&tcps->tcps_g_q_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&tcps->tcps_iss_key_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&tcps->tcps_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&tcps->tcps_reserved_port_lock, NULL, RW_DEFAULT, NULL); + + tcps->tcps_g_num_epriv_ports = TCP_NUM_EPRIV_PORTS; + tcps->tcps_g_epriv_ports[0] = 2049; + tcps->tcps_g_epriv_ports[1] = 4045; + tcps->tcps_min_anonpriv_port = 512; + + tcps->tcps_bind_fanout = kmem_zalloc(sizeof (tf_t) * + TCP_BIND_FANOUT_SIZE, KM_SLEEP); + tcps->tcps_acceptor_fanout = kmem_zalloc(sizeof (tf_t) * + TCP_FANOUT_SIZE, KM_SLEEP); + tcps->tcps_reserved_port = kmem_zalloc(sizeof (tcp_rport_t) * + TCP_RESERVED_PORTS_ARRAY_MAX_SIZE, KM_SLEEP); + + for (i = 0; i < TCP_BIND_FANOUT_SIZE; i++) { + mutex_init(&tcps->tcps_bind_fanout[i].tf_lock, NULL, + MUTEX_DEFAULT, NULL); + } + + for (i = 0; i < TCP_FANOUT_SIZE; i++) { + mutex_init(&tcps->tcps_acceptor_fanout[i].tf_lock, NULL, + MUTEX_DEFAULT, NULL); + } + + /* TCP's IPsec code calls the packet dropper. */ + ip_drop_register(&tcps->tcps_dropper, "TCP IPsec policy enforcement"); + + pa = (tcpparam_t *)kmem_alloc(sizeof (lcl_tcp_param_arr), KM_SLEEP); + tcps->tcps_params = pa; + bcopy(lcl_tcp_param_arr, tcps->tcps_params, sizeof (lcl_tcp_param_arr)); + + (void) tcp_param_register(&tcps->tcps_g_nd, tcps->tcps_params, + A_CNT(lcl_tcp_param_arr), tcps); + + /* + * Note: To really walk the device tree you need the devinfo + * pointer to your device which is only available after probe/attach. + * The following is safe only because it uses ddi_root_node() + */ + tcp_max_optsize = optcom_max_optsize(tcp_opt_obj.odb_opt_des_arr, + tcp_opt_obj.odb_opt_arr_cnt); /* * Initialize RFC 1948 secret values. This will probably be reset once @@ -24605,48 +25130,104 @@ tcp_ddi_init(void) */ tcp_iss_key_init((uint8_t *)&tcp_g_t_info_ack, - sizeof (tcp_g_t_info_ack)); + sizeof (tcp_g_t_info_ack), tcps); - if ((tcp_kstat = kstat_create(TCP_MOD_NAME, 0, "tcpstat", - "net", KSTAT_TYPE_NAMED, - sizeof (tcp_statistics) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL)) != NULL) { - tcp_kstat->ks_data = &tcp_statistics; - kstat_install(tcp_kstat); - } + tcps->tcps_kstat = tcp_kstat2_init(stackid, &tcps->tcps_statistics); + tcps->tcps_mibkp = tcp_kstat_init(stackid, tcps); - tcp_kstat_init(); + return (tcps); } +/* + * Called when the IP module is about to be unloaded. + */ void -tcp_ddi_destroy(void) +tcp_ddi_g_destroy(void) +{ + tcp_g_kstat_fini(tcp_g_kstat); + tcp_g_kstat = NULL; + bzero(&tcp_g_statistics, sizeof (tcp_g_statistics)); + + mutex_destroy(&tcp_random_lock); + + kmem_cache_destroy(tcp_timercache); + kmem_cache_destroy(tcp_sack_info_cache); + kmem_cache_destroy(tcp_iphc_cache); + + netstack_unregister(NS_TCP); + taskq_destroy(tcp_taskq); +} + +/* + * Shut down the TCP stack instance. + */ +/* ARGSUSED */ +static void +tcp_stack_shutdown(netstackid_t stackid, void *arg) +{ + tcp_stack_t *tcps = (tcp_stack_t *)arg; + + tcp_g_q_destroy(tcps); +} + +/* + * Free the TCP stack instance. + */ +static void +tcp_stack_fini(netstackid_t stackid, void *arg) { + tcp_stack_t *tcps = (tcp_stack_t *)arg; int i; - nd_free(&tcp_g_nd); + nd_free(&tcps->tcps_g_nd); + kmem_free(tcps->tcps_params, sizeof (lcl_tcp_param_arr)); + tcps->tcps_params = NULL; + kmem_free(tcps->tcps_wroff_xtra_param, sizeof (tcpparam_t)); + tcps->tcps_wroff_xtra_param = NULL; + kmem_free(tcps->tcps_mdt_head_param, sizeof (tcpparam_t)); + tcps->tcps_mdt_head_param = NULL; + kmem_free(tcps->tcps_mdt_tail_param, sizeof (tcpparam_t)); + tcps->tcps_mdt_tail_param = NULL; + kmem_free(tcps->tcps_mdt_max_pbufs_param, sizeof (tcpparam_t)); + tcps->tcps_mdt_max_pbufs_param = NULL; - for (i = 0; i < A_CNT(tcp_bind_fanout); i++) { - mutex_destroy(&tcp_bind_fanout[i].tf_lock); + for (i = 0; i < TCP_BIND_FANOUT_SIZE; i++) { + ASSERT(tcps->tcps_bind_fanout[i].tf_tcp == NULL); + mutex_destroy(&tcps->tcps_bind_fanout[i].tf_lock); } - for (i = 0; i < A_CNT(tcp_acceptor_fanout); i++) { - mutex_destroy(&tcp_acceptor_fanout[i].tf_lock); + for (i = 0; i < TCP_FANOUT_SIZE; i++) { + ASSERT(tcps->tcps_acceptor_fanout[i].tf_tcp == NULL); + mutex_destroy(&tcps->tcps_acceptor_fanout[i].tf_lock); } - mutex_destroy(&tcp_iss_key_lock); - rw_destroy(&tcp_hsp_lock); - mutex_destroy(&tcp_g_q_lock); - mutex_destroy(&tcp_random_lock); - mutex_destroy(&tcp_epriv_port_lock); - rw_destroy(&tcp_reserved_port_lock); + kmem_free(tcps->tcps_bind_fanout, sizeof (tf_t) * TCP_BIND_FANOUT_SIZE); + tcps->tcps_bind_fanout = NULL; - ip_drop_unregister(&tcp_dropper); + kmem_free(tcps->tcps_acceptor_fanout, sizeof (tf_t) * TCP_FANOUT_SIZE); + tcps->tcps_acceptor_fanout = NULL; - kmem_cache_destroy(tcp_timercache); - kmem_cache_destroy(tcp_sack_info_cache); - kmem_cache_destroy(tcp_iphc_cache); + kmem_free(tcps->tcps_reserved_port, sizeof (tcp_rport_t) * + TCP_RESERVED_PORTS_ARRAY_MAX_SIZE); + tcps->tcps_reserved_port = NULL; + + mutex_destroy(&tcps->tcps_iss_key_lock); + rw_destroy(&tcps->tcps_hsp_lock); + mutex_destroy(&tcps->tcps_g_q_lock); + cv_destroy(&tcps->tcps_g_q_cv); + mutex_destroy(&tcps->tcps_epriv_port_lock); + rw_destroy(&tcps->tcps_reserved_port_lock); + + ip_drop_unregister(&tcps->tcps_dropper); + + tcp_kstat2_fini(stackid, tcps->tcps_kstat); + tcps->tcps_kstat = NULL; + bzero(&tcps->tcps_statistics, sizeof (tcps->tcps_statistics)); + + tcp_kstat_fini(stackid, tcps->tcps_mibkp); + tcps->tcps_mibkp = NULL; - tcp_kstat_fini(); + kmem_free(tcps, sizeof (*tcps)); } /* @@ -24660,14 +25241,15 @@ tcp_iss_init(tcp_t *tcp) MD5_CTX context; struct { uint32_t ports; in6_addr_t src; in6_addr_t dst; } arg; uint32_t answer[4]; + tcp_stack_t *tcps = tcp->tcp_tcps; - tcp_iss_incr_extra += (ISS_INCR >> 1); - tcp->tcp_iss = tcp_iss_incr_extra; - switch (tcp_strong_iss) { + tcps->tcps_iss_incr_extra += (ISS_INCR >> 1); + tcp->tcp_iss = tcps->tcps_iss_incr_extra; + switch (tcps->tcps_strong_iss) { case 2: - mutex_enter(&tcp_iss_key_lock); - context = tcp_iss_key; - mutex_exit(&tcp_iss_key_lock); + mutex_enter(&tcps->tcps_iss_key_lock); + context = tcps->tcps_iss_key; + mutex_exit(&tcps->tcps_iss_key_lock); arg.ports = tcp->tcp_ports; if (tcp->tcp_ipversion == IPV4_VERSION) { IN6_IPADDR_TO_V4MAPPED(tcp->tcp_ipha->ipha_src, @@ -24713,19 +25295,38 @@ tcp_iss_init(tcp_t *tcp) * non-zero from the callback routine terminates the search. */ int -cl_tcp_walk_list(int (*callback)(cl_tcp_info_t *, void *), void *arg) +cl_tcp_walk_list(int (*cl_callback)(cl_tcp_info_t *, void *), + void *arg) +{ + netstack_handle_t nh; + netstack_t *ns; + int ret = 0; + + netstack_next_init(&nh); + while ((ns = netstack_next(&nh)) != NULL) { + ret = cl_tcp_walk_list_stack(cl_callback, arg, + ns->netstack_tcp); + netstack_rele(ns); + } + netstack_next_fini(&nh); + return (ret); +} + +static int +cl_tcp_walk_list_stack(int (*callback)(cl_tcp_info_t *, void *), void *arg, + tcp_stack_t *tcps) { tcp_t *tcp; cl_tcp_info_t cl_tcpi; connf_t *connfp; conn_t *connp; int i; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; ASSERT(callback != NULL); for (i = 0; i < CONN_G_HASH_SIZE; i++) { - - connfp = &ipcl_globalhash_fanout[i]; + connfp = &ipst->ips_ipcl_globalhash_fanout[i]; connp = NULL; while ((connp = @@ -24959,13 +25560,16 @@ tcp_ioctl_abort_handler(tcp_t *tcp, mblk_t *mp) */ static int tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t *acp, int index, int *count, - boolean_t exact) + boolean_t exact, tcp_stack_t *tcps) { int nmatch, err = 0; tcp_t *tcp; MBLKP mp, last, listhead = NULL; conn_t *tconnp; - connf_t *connfp = &ipcl_conn_fanout[index]; + connf_t *connfp; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; + + connfp = &ipst->ips_ipcl_conn_fanout[index]; startover: nmatch = 0; @@ -25021,7 +25625,7 @@ startover: * Abort all connections that matches the attributes specified in acp. */ static int -tcp_ioctl_abort(tcp_ioc_abort_conn_t *acp) +tcp_ioctl_abort(tcp_ioc_abort_conn_t *acp, tcp_stack_t *tcps) { sa_family_t af; uint32_t ports; @@ -25030,6 +25634,7 @@ tcp_ioctl_abort(tcp_ioc_abort_conn_t *acp) boolean_t exact = B_FALSE; /* set when there is no wildcard */ int index = -1; ushort_t logflags; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; af = acp->ac_local.ss_family; @@ -25057,14 +25662,16 @@ tcp_ioctl_abort(tcp_ioc_abort_conn_t *acp) */ if (index != -1) { err = tcp_ioctl_abort_bucket(acp, index, - &count, exact); + &count, exact, tcps); } else { /* * loop through all entries for wildcard case */ - for (index = 0; index < ipcl_conn_fanout_size; index++) { + for (index = 0; + index < ipst->ips_ipcl_conn_fanout_size; + index++) { err = tcp_ioctl_abort_bucket(acp, index, - &count, exact); + &count, exact, tcps); if (err != 0) break; } @@ -25095,8 +25702,11 @@ tcp_ioctl_abort_conn(queue_t *q, mblk_t *mp) MBLKP mp1; sa_family_t laf, raf; tcp_ioc_abort_conn_t *acp; - zone_t *zptr; - zoneid_t zoneid = Q_TO_CONN(q)->conn_zoneid; + zone_t *zptr; + conn_t *connp = Q_TO_CONN(q); + zoneid_t zoneid = connp->conn_zoneid; + tcp_t *tcp = connp->conn_tcp; + tcp_stack_t *tcps = tcp->tcp_tcps; iocp = (IOCP)mp->b_rptr; @@ -25107,7 +25717,7 @@ tcp_ioctl_abort_conn(queue_t *q, mblk_t *mp) } /* check permissions */ - if (secpolicy_net_config(iocp->ioc_cr, B_FALSE) != 0) { + if (secpolicy_ip_config(iocp->ioc_cr, B_FALSE) != 0) { err = EPERM; goto out; } @@ -25132,6 +25742,13 @@ tcp_ioctl_abort_conn(queue_t *q, mblk_t *mp) } } + /* + * For exclusive stacks we set the zoneid to zero + * to make TCP operate as if in the global zone. + */ + if (tcps->tcps_netstack->netstack_stackid != GLOBAL_NETSTACKID) + acp->ac_zoneid = GLOBAL_ZONEID; + if (acp->ac_start < TCPS_SYN_SENT || acp->ac_end > TCPS_TIME_WAIT || acp->ac_start > acp->ac_end || laf != raf || (laf != AF_INET && laf != AF_INET6)) { @@ -25140,7 +25757,7 @@ tcp_ioctl_abort_conn(queue_t *q, mblk_t *mp) } tcp_ioctl_abort_dump(acp); - err = tcp_ioctl_abort(acp); + err = tcp_ioctl_abort(acp, tcps); out: if (mp1 != NULL) { @@ -25171,6 +25788,7 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, uint_t flags; uint32_t new_swnd = 0; conn_t *connp; + tcp_stack_t *tcps = tcp->tcp_tcps; BUMP_LOCAL(tcp->tcp_ibsegs); TCP_RECORD_TRACE(tcp, mp, TCP_TRACE_RECV_PKT); @@ -25188,8 +25806,8 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, gap = seg_seq - tcp->tcp_rnxt; rgap = tcp->tcp_rwnd - (gap + seg_len); if (gap < 0) { - BUMP_MIB(&tcp_mib, tcpInDataDupSegs); - UPDATE_MIB(&tcp_mib, tcpInDataDupBytes, + BUMP_MIB(&tcps->tcps_mib, tcpInDataDupSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpInDataDupBytes, (seg_len > -gap ? -gap : seg_len)); seg_len += gap; if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) { @@ -25208,12 +25826,13 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, if (tcp_time_wait_remove(tcp, NULL) == B_TRUE) { tcp_time_wait_append(tcp); - TCP_DBGSTAT(tcp_rput_time_wait); + TCP_DBGSTAT(tcps, + tcp_rput_time_wait); } } else { ASSERT(tcp != NULL); TCP_TIMER_RESTART(tcp, - tcp_time_wait_interval); + tcps->tcps_time_wait_interval); } tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK); @@ -25243,10 +25862,11 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, * The above calculation is ugly and is a * waste of CPU cycles... */ - uint32_t new_iss = tcp_iss_incr_extra; + uint32_t new_iss = tcps->tcps_iss_incr_extra; int32_t adj; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; - switch (tcp_strong_iss) { + switch (tcps->tcps_strong_iss) { case 2: { /* Add time and MD5 components. */ uint32_t answer[4]; @@ -25257,9 +25877,9 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, } arg; MD5_CTX context; - mutex_enter(&tcp_iss_key_lock); - context = tcp_iss_key; - mutex_exit(&tcp_iss_key_lock); + mutex_enter(&tcps->tcps_iss_key_lock); + context = tcps->tcps_iss_key; + mutex_exit(&tcps->tcps_iss_key_lock); arg.ports = tcp->tcp_ports; /* We use MAPPED addresses in tcp_iss_init */ arg.src = tcp->tcp_ip_src_v6; @@ -25293,7 +25913,7 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, * ahead of the current tcp_snxt, so add the * difference to tcp_iss_incr_extra. */ - tcp_iss_incr_extra += adj; + tcps->tcps_iss_incr_extra += adj; } /* * If tcp_clean_death() can not perform the task now, @@ -25314,9 +25934,9 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, * check this time by attaching a dummy * ipsec_in with ipsec_in_dont_check set. */ - if ((connp = ipcl_classify(mp, tcp->tcp_connp->conn_zoneid)) != - NULL) { - TCP_STAT(tcp_time_wait_syn_success); + connp = ipcl_classify(mp, tcp->tcp_connp->conn_zoneid, ipst); + if (connp != NULL) { + TCP_STAT(tcps, tcp_time_wait_syn_success); tcp_reinput(connp, mp, tcp->tcp_connp->conn_sqp); return; } @@ -25328,8 +25948,8 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, * value is the amount out of window. */ if (rgap < 0) { - BUMP_MIB(&tcp_mib, tcpInDataPastWinSegs); - UPDATE_MIB(&tcp_mib, tcpInDataPastWinBytes, -rgap); + BUMP_MIB(&tcps->tcps_mib, tcpInDataPastWinSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpInDataPastWinBytes, -rgap); /* Fix seg_len and make sure there is something left. */ seg_len += rgap; if (seg_len <= 0) { @@ -25358,9 +25978,9 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, flags |= TH_ACK_NEEDED; seg_len = 0; } else if (seg_len > 0) { - BUMP_MIB(&tcp_mib, tcpInClosed); - BUMP_MIB(&tcp_mib, tcpInDataInorderSegs); - UPDATE_MIB(&tcp_mib, tcpInDataInorderBytes, seg_len); + BUMP_MIB(&tcps->tcps_mib, tcpInClosed); + BUMP_MIB(&tcps->tcps_mib, tcpInDataInorderSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpInDataInorderBytes, seg_len); } if (flags & TH_RST) { (void) tcp_clean_death(tcp, 0, 28); @@ -25381,7 +26001,7 @@ process_ack: if (bytes_acked <= 0) { if (bytes_acked == 0 && seg_len == 0 && new_swnd == tcp->tcp_swnd) - BUMP_MIB(&tcp_mib, tcpInDupAck); + BUMP_MIB(&tcps->tcps_mib, tcpInDupAck); } else { /* Acks something not sent */ flags |= TH_ACK_NEEDED; @@ -25398,7 +26018,7 @@ done: if ((mp->b_datap->db_struioflag & STRUIO_EAGER) != 0) { DB_CKSUMSTART(mp) = 0; mp->b_datap->db_struioflag &= ~STRUIO_EAGER; - TCP_STAT(tcp_time_wait_syn_fail); + TCP_STAT(tcps, tcp_time_wait_syn_fail); } freemsg(mp); } @@ -25450,15 +26070,16 @@ tcp_timeout(conn_t *connp, void (*f)(void *), clock_t tim) mblk_t *mp; tcp_timer_t *tcpt; tcp_t *tcp = connp->conn_tcp; + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT(connp->conn_sqp != NULL); - TCP_DBGSTAT(tcp_timeout_calls); + TCP_DBGSTAT(tcps, tcp_timeout_calls); if (tcp->tcp_timercache == NULL) { mp = tcp_timermp_alloc(KM_NOSLEEP | KM_PANIC); } else { - TCP_DBGSTAT(tcp_timeout_cached_alloc); + TCP_DBGSTAT(tcps, tcp_timeout_cached_alloc); mp = tcp->tcp_timercache; tcp->tcp_timercache = mp->b_next; mp->b_next = NULL; @@ -25523,8 +26144,9 @@ tcp_timeout_cancel(conn_t *connp, timeout_id_t id) mblk_t *mp = (mblk_t *)id; tcp_timer_t *tcpt; clock_t delta; + tcp_stack_t *tcps = connp->conn_tcp->tcp_tcps; - TCP_DBGSTAT(tcp_timeout_cancel_reqs); + TCP_DBGSTAT(tcps, tcp_timeout_cancel_reqs); if (mp == NULL) return (-1); @@ -25535,7 +26157,7 @@ tcp_timeout_cancel(conn_t *connp, timeout_id_t id) delta = untimeout(tcpt->tcpt_tid); if (delta >= 0) { - TCP_DBGSTAT(tcp_timeout_canceled); + TCP_DBGSTAT(tcps, tcp_timeout_canceled); tcp_timer_free(connp->conn_tcp, mp); CONN_DEC_REF(connp); } @@ -25566,19 +26188,24 @@ tcp_timermp_alloc(int kmflags) mp->b_wptr = NULL; mp->b_datap = NULL; mp->b_queue = NULL; + mp->b_cont = NULL; } else if (kmflags & KM_PANIC) { /* * Failed to allocate memory for the timer. Try allocating from * dblock caches. */ - TCP_STAT(tcp_timermp_allocfail); + /* ipclassifier calls this from a constructor - hence no tcps */ + TCP_G_STAT(tcp_timermp_allocfail); mp = allocb_tryhard(sizeof (tcp_timer_t)); if (mp == NULL) { size_t size = 0; /* * Memory is really low. Try tryhard allocation. + * + * ipclassifier calls this from a constructor - + * hence no tcps */ - TCP_STAT(tcp_timermp_allocdblfail); + TCP_G_STAT(tcp_timermp_allocdblfail); mp = kmem_alloc_tryhard(sizeof (mblk_t) + sizeof (tcp_timer_t), &size, kmflags); mp->b_rptr = (uchar_t *)(&mp[1]); @@ -25586,10 +26213,12 @@ tcp_timermp_alloc(int kmflags) mp->b_wptr = (uchar_t *)-1; mp->b_datap = (dblk_t *)size; mp->b_queue = NULL; + mp->b_cont = NULL; } ASSERT(mp->b_wptr != NULL); } - TCP_DBGSTAT(tcp_timermp_alloced); + /* ipclassifier calls this from a constructor - hence no tcps */ + TCP_G_DBGSTAT(tcp_timermp_alloced); return (mp); } @@ -25619,6 +26248,7 @@ static void tcp_timer_free(tcp_t *tcp, mblk_t *mp) { mblk_t *mp1 = tcp->tcp_timercache; + tcp_stack_t *tcps = tcp->tcp_tcps; if (mp->b_wptr != NULL) { /* @@ -25636,7 +26266,7 @@ tcp_timer_free(tcp_t *tcp, mblk_t *mp) tcp->tcp_timercache = mp; } else { kmem_cache_free(tcp_timercache, mp); - TCP_DBGSTAT(tcp_timermp_freed); + TCP_DBGSTAT(tcps, tcp_timermp_freed); } } @@ -25655,6 +26285,7 @@ void tcp_setqfull(tcp_t *tcp) { queue_t *q = tcp->tcp_wq; + tcp_stack_t *tcps = tcp->tcp_tcps; if (!(q->q_flag & QFULL)) { mutex_enter(QLOCK(q)); @@ -25663,7 +26294,7 @@ tcp_setqfull(tcp_t *tcp) q->q_flag |= QFULL; tcp->tcp_flow_stopped = B_TRUE; mutex_exit(QLOCK(q)); - TCP_STAT(tcp_flwctl_on); + TCP_STAT(tcps, tcp_flwctl_on); } else { mutex_exit(QLOCK(q)); } @@ -25689,12 +26320,171 @@ tcp_clrqfull(tcp_t *tcp) } } + /* - * TCP Kstats implementation + * kstats related to squeues i.e. not per IP instance */ +static void * +tcp_g_kstat_init(tcp_g_stat_t *tcp_g_statp) +{ + kstat_t *ksp; + + tcp_g_stat_t template = { + { "tcp_timermp_alloced", KSTAT_DATA_UINT64 }, + { "tcp_timermp_allocfail", KSTAT_DATA_UINT64 }, + { "tcp_timermp_allocdblfail", KSTAT_DATA_UINT64 }, + { "tcp_freelist_cleanup", KSTAT_DATA_UINT64 }, + }; + + ksp = kstat_create(TCP_MOD_NAME, 0, "tcpstat_g", "net", + KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + + if (ksp == NULL) + return (NULL); + + bcopy(&template, tcp_g_statp, sizeof (template)); + ksp->ks_data = (void *)tcp_g_statp; + + kstat_install(ksp); + return (ksp); +} + +static void +tcp_g_kstat_fini(kstat_t *ksp) +{ + if (ksp != NULL) { + kstat_delete(ksp); + } +} + + +static void * +tcp_kstat2_init(netstackid_t stackid, tcp_stat_t *tcps_statisticsp) +{ + kstat_t *ksp; + + tcp_stat_t template = { + { "tcp_time_wait", KSTAT_DATA_UINT64 }, + { "tcp_time_wait_syn", KSTAT_DATA_UINT64 }, + { "tcp_time_wait_success", KSTAT_DATA_UINT64 }, + { "tcp_time_wait_fail", KSTAT_DATA_UINT64 }, + { "tcp_reinput_syn", KSTAT_DATA_UINT64 }, + { "tcp_ip_output", KSTAT_DATA_UINT64 }, + { "tcp_detach_non_time_wait", KSTAT_DATA_UINT64 }, + { "tcp_detach_time_wait", KSTAT_DATA_UINT64 }, + { "tcp_time_wait_reap", KSTAT_DATA_UINT64 }, + { "tcp_clean_death_nondetached", KSTAT_DATA_UINT64 }, + { "tcp_reinit_calls", KSTAT_DATA_UINT64 }, + { "tcp_eager_err1", KSTAT_DATA_UINT64 }, + { "tcp_eager_err2", KSTAT_DATA_UINT64 }, + { "tcp_eager_blowoff_calls", KSTAT_DATA_UINT64 }, + { "tcp_eager_blowoff_q", KSTAT_DATA_UINT64 }, + { "tcp_eager_blowoff_q0", KSTAT_DATA_UINT64 }, + { "tcp_not_hard_bound", KSTAT_DATA_UINT64 }, + { "tcp_no_listener", KSTAT_DATA_UINT64 }, + { "tcp_found_eager", KSTAT_DATA_UINT64 }, + { "tcp_wrong_queue", KSTAT_DATA_UINT64 }, + { "tcp_found_eager_binding1", KSTAT_DATA_UINT64 }, + { "tcp_found_eager_bound1", KSTAT_DATA_UINT64 }, + { "tcp_eager_has_listener1", KSTAT_DATA_UINT64 }, + { "tcp_open_alloc", KSTAT_DATA_UINT64 }, + { "tcp_open_detached_alloc", KSTAT_DATA_UINT64 }, + { "tcp_rput_time_wait", KSTAT_DATA_UINT64 }, + { "tcp_listendrop", KSTAT_DATA_UINT64 }, + { "tcp_listendropq0", KSTAT_DATA_UINT64 }, + { "tcp_wrong_rq", KSTAT_DATA_UINT64 }, + { "tcp_rsrv_calls", KSTAT_DATA_UINT64 }, + { "tcp_eagerfree2", KSTAT_DATA_UINT64 }, + { "tcp_eagerfree3", KSTAT_DATA_UINT64 }, + { "tcp_eagerfree4", KSTAT_DATA_UINT64 }, + { "tcp_eagerfree5", KSTAT_DATA_UINT64 }, + { "tcp_timewait_syn_fail", KSTAT_DATA_UINT64 }, + { "tcp_listen_badflags", KSTAT_DATA_UINT64 }, + { "tcp_timeout_calls", KSTAT_DATA_UINT64 }, + { "tcp_timeout_cached_alloc", KSTAT_DATA_UINT64 }, + { "tcp_timeout_cancel_reqs", KSTAT_DATA_UINT64 }, + { "tcp_timeout_canceled", KSTAT_DATA_UINT64 }, + { "tcp_timermp_freed", KSTAT_DATA_UINT64 }, + { "tcp_push_timer_cnt", KSTAT_DATA_UINT64 }, + { "tcp_ack_timer_cnt", KSTAT_DATA_UINT64 }, + { "tcp_ire_null1", KSTAT_DATA_UINT64 }, + { "tcp_ire_null", KSTAT_DATA_UINT64 }, + { "tcp_ip_send", KSTAT_DATA_UINT64 }, + { "tcp_ip_ire_send", KSTAT_DATA_UINT64 }, + { "tcp_wsrv_called", KSTAT_DATA_UINT64 }, + { "tcp_flwctl_on", KSTAT_DATA_UINT64 }, + { "tcp_timer_fire_early", KSTAT_DATA_UINT64 }, + { "tcp_timer_fire_miss", KSTAT_DATA_UINT64 }, + { "tcp_rput_v6_error", KSTAT_DATA_UINT64 }, + { "tcp_out_sw_cksum", KSTAT_DATA_UINT64 }, + { "tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, + { "tcp_zcopy_on", KSTAT_DATA_UINT64 }, + { "tcp_zcopy_off", KSTAT_DATA_UINT64 }, + { "tcp_zcopy_backoff", KSTAT_DATA_UINT64 }, + { "tcp_zcopy_disable", KSTAT_DATA_UINT64 }, + { "tcp_mdt_pkt_out", KSTAT_DATA_UINT64 }, + { "tcp_mdt_pkt_out_v4", KSTAT_DATA_UINT64 }, + { "tcp_mdt_pkt_out_v6", KSTAT_DATA_UINT64 }, + { "tcp_mdt_discarded", KSTAT_DATA_UINT64 }, + { "tcp_mdt_conn_halted1", KSTAT_DATA_UINT64 }, + { "tcp_mdt_conn_halted2", KSTAT_DATA_UINT64 }, + { "tcp_mdt_conn_halted3", KSTAT_DATA_UINT64 }, + { "tcp_mdt_conn_resumed1", KSTAT_DATA_UINT64 }, + { "tcp_mdt_conn_resumed2", KSTAT_DATA_UINT64 }, + { "tcp_mdt_legacy_small", KSTAT_DATA_UINT64 }, + { "tcp_mdt_legacy_all", KSTAT_DATA_UINT64 }, + { "tcp_mdt_legacy_ret", KSTAT_DATA_UINT64 }, + { "tcp_mdt_allocfail", KSTAT_DATA_UINT64 }, + { "tcp_mdt_addpdescfail", KSTAT_DATA_UINT64 }, + { "tcp_mdt_allocd", KSTAT_DATA_UINT64 }, + { "tcp_mdt_linked", KSTAT_DATA_UINT64 }, + { "tcp_fusion_flowctl", KSTAT_DATA_UINT64 }, + { "tcp_fusion_backenabled", KSTAT_DATA_UINT64 }, + { "tcp_fusion_urg", KSTAT_DATA_UINT64 }, + { "tcp_fusion_putnext", KSTAT_DATA_UINT64 }, + { "tcp_fusion_unfusable", KSTAT_DATA_UINT64 }, + { "tcp_fusion_aborted", KSTAT_DATA_UINT64 }, + { "tcp_fusion_unqualified", KSTAT_DATA_UINT64 }, + { "tcp_fusion_rrw_busy", KSTAT_DATA_UINT64 }, + { "tcp_fusion_rrw_msgcnt", KSTAT_DATA_UINT64 }, + { "tcp_fusion_rrw_plugged", KSTAT_DATA_UINT64 }, + { "tcp_in_ack_unsent_drop", KSTAT_DATA_UINT64 }, + { "tcp_sock_fallback", KSTAT_DATA_UINT64 }, + }; + + ksp = kstat_create_netstack(TCP_MOD_NAME, 0, "tcpstat", "net", + KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL, stackid); + + if (ksp == NULL) + return (NULL); + + bcopy(&template, tcps_statisticsp, sizeof (template)); + ksp->ks_data = (void *)tcps_statisticsp; + ksp->ks_private = (void *)(uintptr_t)stackid; + + kstat_install(ksp); + return (ksp); +} + static void -tcp_kstat_init(void) +tcp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) { + if (ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); + kstat_delete_netstack(ksp, stackid); + } +} + +/* + * TCP Kstats implementation + */ +static void * +tcp_kstat_init(netstackid_t stackid, tcp_stack_t *tcps) +{ + kstat_t *ksp; + tcp_named_kstat_t template = { { "rtoAlgorithm", KSTAT_DATA_INT32, 0 }, { "rtoMin", KSTAT_DATA_INT32, 0 }, @@ -25751,55 +26541,69 @@ tcp_kstat_init(void) { "connTableSize6", KSTAT_DATA_INT32, 0 } }; - tcp_mibkp = kstat_create(TCP_MOD_NAME, 0, TCP_MOD_NAME, - "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(tcp_named_kstat_t), 0); + ksp = kstat_create_netstack(TCP_MOD_NAME, 0, TCP_MOD_NAME, "mib2", + KSTAT_TYPE_NAMED, NUM_OF_FIELDS(tcp_named_kstat_t), 0, stackid); - if (tcp_mibkp == NULL) - return; + if (ksp == NULL) + return (NULL); template.rtoAlgorithm.value.ui32 = 4; - template.rtoMin.value.ui32 = tcp_rexmit_interval_min; - template.rtoMax.value.ui32 = tcp_rexmit_interval_max; + template.rtoMin.value.ui32 = tcps->tcps_rexmit_interval_min; + template.rtoMax.value.ui32 = tcps->tcps_rexmit_interval_max; template.maxConn.value.i32 = -1; - bcopy(&template, tcp_mibkp->ks_data, sizeof (template)); + bcopy(&template, ksp->ks_data, sizeof (template)); + ksp->ks_update = tcp_kstat_update; + ksp->ks_private = (void *)(uintptr_t)stackid; - tcp_mibkp->ks_update = tcp_kstat_update; - - kstat_install(tcp_mibkp); + kstat_install(ksp); + return (ksp); } static void -tcp_kstat_fini(void) +tcp_kstat_fini(netstackid_t stackid, kstat_t *ksp) { - - if (tcp_mibkp != NULL) { - kstat_delete(tcp_mibkp); - tcp_mibkp = NULL; + if (ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); + kstat_delete_netstack(ksp, stackid); } } static int tcp_kstat_update(kstat_t *kp, int rw) { - tcp_named_kstat_t *tcpkp; - tcp_t *tcp; - connf_t *connfp; - conn_t *connp; - int i; + tcp_named_kstat_t *tcpkp; + tcp_t *tcp; + connf_t *connfp; + conn_t *connp; + int i; + netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; + netstack_t *ns; + tcp_stack_t *tcps; + ip_stack_t *ipst; - if (!kp || !kp->ks_data) + if ((kp == NULL) || (kp->ks_data == NULL)) return (EIO); if (rw == KSTAT_WRITE) return (EACCES); + ns = netstack_find_by_stackid(stackid); + if (ns == NULL) + return (-1); + tcps = ns->netstack_tcp; + if (tcps == NULL) { + netstack_rele(ns); + return (-1); + } tcpkp = (tcp_named_kstat_t *)kp->ks_data; tcpkp->currEstab.value.ui32 = 0; + ipst = ns->netstack_ip; + for (i = 0; i < CONN_G_HASH_SIZE; i++) { - connfp = &ipcl_globalhash_fanout[i]; + connfp = &ipst->ips_ipcl_globalhash_fanout[i]; connp = NULL; while ((connp = ipcl_get_next_conn(connfp, connp, IPCL_TCP)) != NULL) { @@ -25813,55 +26617,67 @@ tcp_kstat_update(kstat_t *kp, int rw) } } - tcpkp->activeOpens.value.ui32 = tcp_mib.tcpActiveOpens; - tcpkp->passiveOpens.value.ui32 = tcp_mib.tcpPassiveOpens; - tcpkp->attemptFails.value.ui32 = tcp_mib.tcpAttemptFails; - tcpkp->estabResets.value.ui32 = tcp_mib.tcpEstabResets; - tcpkp->inSegs.value.ui64 = tcp_mib.tcpHCInSegs; - tcpkp->outSegs.value.ui64 = tcp_mib.tcpHCOutSegs; - tcpkp->retransSegs.value.ui32 = tcp_mib.tcpRetransSegs; - tcpkp->connTableSize.value.i32 = tcp_mib.tcpConnTableSize; - tcpkp->outRsts.value.ui32 = tcp_mib.tcpOutRsts; - tcpkp->outDataSegs.value.ui32 = tcp_mib.tcpOutDataSegs; - tcpkp->outDataBytes.value.ui32 = tcp_mib.tcpOutDataBytes; - tcpkp->retransBytes.value.ui32 = tcp_mib.tcpRetransBytes; - tcpkp->outAck.value.ui32 = tcp_mib.tcpOutAck; - tcpkp->outAckDelayed.value.ui32 = tcp_mib.tcpOutAckDelayed; - tcpkp->outUrg.value.ui32 = tcp_mib.tcpOutUrg; - tcpkp->outWinUpdate.value.ui32 = tcp_mib.tcpOutWinUpdate; - tcpkp->outWinProbe.value.ui32 = tcp_mib.tcpOutWinProbe; - tcpkp->outControl.value.ui32 = tcp_mib.tcpOutControl; - tcpkp->outFastRetrans.value.ui32 = tcp_mib.tcpOutFastRetrans; - tcpkp->inAckSegs.value.ui32 = tcp_mib.tcpInAckSegs; - tcpkp->inAckBytes.value.ui32 = tcp_mib.tcpInAckBytes; - tcpkp->inDupAck.value.ui32 = tcp_mib.tcpInDupAck; - tcpkp->inAckUnsent.value.ui32 = tcp_mib.tcpInAckUnsent; - tcpkp->inDataInorderSegs.value.ui32 = tcp_mib.tcpInDataInorderSegs; - tcpkp->inDataInorderBytes.value.ui32 = tcp_mib.tcpInDataInorderBytes; - tcpkp->inDataUnorderSegs.value.ui32 = tcp_mib.tcpInDataUnorderSegs; - tcpkp->inDataUnorderBytes.value.ui32 = tcp_mib.tcpInDataUnorderBytes; - tcpkp->inDataDupSegs.value.ui32 = tcp_mib.tcpInDataDupSegs; - tcpkp->inDataDupBytes.value.ui32 = tcp_mib.tcpInDataDupBytes; - tcpkp->inDataPartDupSegs.value.ui32 = tcp_mib.tcpInDataPartDupSegs; - tcpkp->inDataPartDupBytes.value.ui32 = tcp_mib.tcpInDataPartDupBytes; - tcpkp->inDataPastWinSegs.value.ui32 = tcp_mib.tcpInDataPastWinSegs; - tcpkp->inDataPastWinBytes.value.ui32 = tcp_mib.tcpInDataPastWinBytes; - tcpkp->inWinProbe.value.ui32 = tcp_mib.tcpInWinProbe; - tcpkp->inWinUpdate.value.ui32 = tcp_mib.tcpInWinUpdate; - tcpkp->inClosed.value.ui32 = tcp_mib.tcpInClosed; - tcpkp->rttNoUpdate.value.ui32 = tcp_mib.tcpRttNoUpdate; - tcpkp->rttUpdate.value.ui32 = tcp_mib.tcpRttUpdate; - tcpkp->timRetrans.value.ui32 = tcp_mib.tcpTimRetrans; - tcpkp->timRetransDrop.value.ui32 = tcp_mib.tcpTimRetransDrop; - tcpkp->timKeepalive.value.ui32 = tcp_mib.tcpTimKeepalive; - tcpkp->timKeepaliveProbe.value.ui32 = tcp_mib.tcpTimKeepaliveProbe; - tcpkp->timKeepaliveDrop.value.ui32 = tcp_mib.tcpTimKeepaliveDrop; - tcpkp->listenDrop.value.ui32 = tcp_mib.tcpListenDrop; - tcpkp->listenDropQ0.value.ui32 = tcp_mib.tcpListenDropQ0; - tcpkp->halfOpenDrop.value.ui32 = tcp_mib.tcpHalfOpenDrop; - tcpkp->outSackRetransSegs.value.ui32 = tcp_mib.tcpOutSackRetransSegs; - tcpkp->connTableSize6.value.i32 = tcp_mib.tcp6ConnTableSize; - + tcpkp->activeOpens.value.ui32 = tcps->tcps_mib.tcpActiveOpens; + tcpkp->passiveOpens.value.ui32 = tcps->tcps_mib.tcpPassiveOpens; + tcpkp->attemptFails.value.ui32 = tcps->tcps_mib.tcpAttemptFails; + tcpkp->estabResets.value.ui32 = tcps->tcps_mib.tcpEstabResets; + tcpkp->inSegs.value.ui64 = tcps->tcps_mib.tcpHCInSegs; + tcpkp->outSegs.value.ui64 = tcps->tcps_mib.tcpHCOutSegs; + tcpkp->retransSegs.value.ui32 = tcps->tcps_mib.tcpRetransSegs; + tcpkp->connTableSize.value.i32 = tcps->tcps_mib.tcpConnTableSize; + tcpkp->outRsts.value.ui32 = tcps->tcps_mib.tcpOutRsts; + tcpkp->outDataSegs.value.ui32 = tcps->tcps_mib.tcpOutDataSegs; + tcpkp->outDataBytes.value.ui32 = tcps->tcps_mib.tcpOutDataBytes; + tcpkp->retransBytes.value.ui32 = tcps->tcps_mib.tcpRetransBytes; + tcpkp->outAck.value.ui32 = tcps->tcps_mib.tcpOutAck; + tcpkp->outAckDelayed.value.ui32 = tcps->tcps_mib.tcpOutAckDelayed; + tcpkp->outUrg.value.ui32 = tcps->tcps_mib.tcpOutUrg; + tcpkp->outWinUpdate.value.ui32 = tcps->tcps_mib.tcpOutWinUpdate; + tcpkp->outWinProbe.value.ui32 = tcps->tcps_mib.tcpOutWinProbe; + tcpkp->outControl.value.ui32 = tcps->tcps_mib.tcpOutControl; + tcpkp->outFastRetrans.value.ui32 = tcps->tcps_mib.tcpOutFastRetrans; + tcpkp->inAckSegs.value.ui32 = tcps->tcps_mib.tcpInAckSegs; + tcpkp->inAckBytes.value.ui32 = tcps->tcps_mib.tcpInAckBytes; + tcpkp->inDupAck.value.ui32 = tcps->tcps_mib.tcpInDupAck; + tcpkp->inAckUnsent.value.ui32 = tcps->tcps_mib.tcpInAckUnsent; + tcpkp->inDataInorderSegs.value.ui32 = + tcps->tcps_mib.tcpInDataInorderSegs; + tcpkp->inDataInorderBytes.value.ui32 = + tcps->tcps_mib.tcpInDataInorderBytes; + tcpkp->inDataUnorderSegs.value.ui32 = + tcps->tcps_mib.tcpInDataUnorderSegs; + tcpkp->inDataUnorderBytes.value.ui32 = + tcps->tcps_mib.tcpInDataUnorderBytes; + tcpkp->inDataDupSegs.value.ui32 = tcps->tcps_mib.tcpInDataDupSegs; + tcpkp->inDataDupBytes.value.ui32 = tcps->tcps_mib.tcpInDataDupBytes; + tcpkp->inDataPartDupSegs.value.ui32 = + tcps->tcps_mib.tcpInDataPartDupSegs; + tcpkp->inDataPartDupBytes.value.ui32 = + tcps->tcps_mib.tcpInDataPartDupBytes; + tcpkp->inDataPastWinSegs.value.ui32 = + tcps->tcps_mib.tcpInDataPastWinSegs; + tcpkp->inDataPastWinBytes.value.ui32 = + tcps->tcps_mib.tcpInDataPastWinBytes; + tcpkp->inWinProbe.value.ui32 = tcps->tcps_mib.tcpInWinProbe; + tcpkp->inWinUpdate.value.ui32 = tcps->tcps_mib.tcpInWinUpdate; + tcpkp->inClosed.value.ui32 = tcps->tcps_mib.tcpInClosed; + tcpkp->rttNoUpdate.value.ui32 = tcps->tcps_mib.tcpRttNoUpdate; + tcpkp->rttUpdate.value.ui32 = tcps->tcps_mib.tcpRttUpdate; + tcpkp->timRetrans.value.ui32 = tcps->tcps_mib.tcpTimRetrans; + tcpkp->timRetransDrop.value.ui32 = tcps->tcps_mib.tcpTimRetransDrop; + tcpkp->timKeepalive.value.ui32 = tcps->tcps_mib.tcpTimKeepalive; + tcpkp->timKeepaliveProbe.value.ui32 = + tcps->tcps_mib.tcpTimKeepaliveProbe; + tcpkp->timKeepaliveDrop.value.ui32 = + tcps->tcps_mib.tcpTimKeepaliveDrop; + tcpkp->listenDrop.value.ui32 = tcps->tcps_mib.tcpListenDrop; + tcpkp->listenDropQ0.value.ui32 = tcps->tcps_mib.tcpListenDropQ0; + tcpkp->halfOpenDrop.value.ui32 = tcps->tcps_mib.tcpHalfOpenDrop; + tcpkp->outSackRetransSegs.value.ui32 = + tcps->tcps_mib.tcpOutSackRetransSegs; + tcpkp->connTableSize6.value.i32 = tcps->tcps_mib.tcp6ConnTableSize; + + netstack_rele(ns); return (0); } @@ -25872,10 +26688,11 @@ tcp_reinput(conn_t *connp, mblk_t *mp, squeue_t *sqp) ipha_t *ipha; uint8_t *nexthdrp; tcph_t *tcph; + tcp_stack_t *tcps = connp->conn_tcp->tcp_tcps; /* Already has an eager */ if ((mp->b_datap->db_struioflag & STRUIO_EAGER) != 0) { - TCP_STAT(tcp_reinput_syn); + TCP_STAT(tcps, tcp_reinput_syn); squeue_enter(connp->conn_sqp, mp, connp->conn_recv, connp, SQTAG_TCP_REINPUT_EAGER); return; @@ -25924,6 +26741,10 @@ tcp_squeue_switch(int val) return (rval); } +/* + * This is called once for each squeue - globally for all stack + * instances. + */ static void tcp_squeue_add(squeue_t *sqp) { diff --git a/usr/src/uts/common/inet/tcp/tcp_fusion.c b/usr/src/uts/common/inet/tcp/tcp_fusion.c index 01626dbd0c..a13a2fc79b 100644 --- a/usr/src/uts/common/inet/tcp/tcp_fusion.c +++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c @@ -101,25 +101,6 @@ */ /* - * Macros that determine whether or not IP processing is needed for TCP. - */ -#define TCP_IPOPT_POLICY_V4(tcp) \ - ((tcp)->tcp_ipversion == IPV4_VERSION && \ - ((tcp)->tcp_ip_hdr_len != IP_SIMPLE_HDR_LENGTH || \ - CONN_OUTBOUND_POLICY_PRESENT((tcp)->tcp_connp) || \ - CONN_INBOUND_POLICY_PRESENT((tcp)->tcp_connp))) - -#define TCP_IPOPT_POLICY_V6(tcp) \ - ((tcp)->tcp_ipversion == IPV6_VERSION && \ - ((tcp)->tcp_ip_hdr_len != IPV6_HDR_LEN || \ - CONN_OUTBOUND_POLICY_PRESENT_V6((tcp)->tcp_connp) || \ - CONN_INBOUND_POLICY_PRESENT_V6((tcp)->tcp_connp))) - -#define TCP_LOOPBACK_IP(tcp) \ - (TCP_IPOPT_POLICY_V4(tcp) || TCP_IPOPT_POLICY_V6(tcp) || \ - !CONN_IS_LSO_MD_FASTPATH((tcp)->tcp_connp)) - -/* * Setting this to false means we disable fusion altogether and * loopback connections would go through the protocol paths. */ @@ -146,6 +127,35 @@ static void tcp_fuse_syncstr_disable(tcp_t *); static void strrput_sig(queue_t *, boolean_t); /* + * Return true if this connection needs some IP functionality + */ +static boolean_t +tcp_loopback_needs_ip(tcp_t *tcp, netstack_t *ns) +{ + ipsec_stack_t *ipss = ns->netstack_ipsec; + + if (tcp->tcp_ipversion == IPV4_VERSION) { + if (tcp->tcp_ip_hdr_len != IP_SIMPLE_HDR_LENGTH) + return (B_TRUE); + if (CONN_OUTBOUND_POLICY_PRESENT(tcp->tcp_connp, ipss)) + return (B_TRUE); + if (CONN_INBOUND_POLICY_PRESENT(tcp->tcp_connp, ipss)) + return (B_TRUE); + } else { + if (tcp->tcp_ip_hdr_len != IPV6_HDR_LEN) + return (B_TRUE); + if (CONN_OUTBOUND_POLICY_PRESENT_V6(tcp->tcp_connp, ipss)) + return (B_TRUE); + if (CONN_INBOUND_POLICY_PRESENT_V6(tcp->tcp_connp, ipss)) + return (B_TRUE); + } + if (!CONN_IS_LSO_MD_FASTPATH(tcp->tcp_connp)) + return (B_TRUE); + return (B_FALSE); +} + + +/* * This routine gets called by the eager tcp upon changing state from * SYN_RCVD to ESTABLISHED. It fuses a direct path between itself * and the active connect tcp such that the regular tcp processings @@ -161,6 +171,9 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph) { conn_t *peer_connp, *connp = tcp->tcp_connp; tcp_t *peer_tcp; + tcp_stack_t *tcps = tcp->tcp_tcps; + netstack_t *ns; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; ASSERT(!tcp->tcp_fused); ASSERT(tcp->tcp_loopback); @@ -186,10 +199,10 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph) */ if (tcp->tcp_ipversion == IPV4_VERSION) { peer_connp = ipcl_conn_tcp_lookup_reversed_ipv4(connp, - (ipha_t *)iphdr, tcph); + (ipha_t *)iphdr, tcph, ipst); } else { peer_connp = ipcl_conn_tcp_lookup_reversed_ipv6(connp, - (ip6_t *)iphdr, tcph); + (ip6_t *)iphdr, tcph, ipst); } /* @@ -204,7 +217,7 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph) if (peer_connp == NULL || peer_connp->conn_sqp != connp->conn_sqp || !IPCL_IS_TCP(peer_connp)) { if (peer_connp != NULL) { - TCP_STAT(tcp_fusion_unqualified); + TCP_STAT(tcps, tcp_fusion_unqualified); CONN_DEC_REF(peer_connp); } return; @@ -221,10 +234,14 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph) * In particular we bail out for non-simple TCP/IP or if IPsec/ * IPQoS policy/kernel SSL exists. */ + ns = tcps->tcps_netstack; + ipst = ns->netstack_ip; + if (!tcp->tcp_unfusable && !peer_tcp->tcp_unfusable && - !TCP_LOOPBACK_IP(tcp) && !TCP_LOOPBACK_IP(peer_tcp) && + !tcp_loopback_needs_ip(tcp, ns) && + !tcp_loopback_needs_ip(peer_tcp, ns) && tcp->tcp_kssl_ent == NULL && - !IPP_ENABLED(IPP_LOCAL_OUT|IPP_LOCAL_IN)) { + !IPP_ENABLED(IPP_LOCAL_OUT|IPP_LOCAL_IN, ipst)) { mblk_t *mp; struct stroptions *stropt; queue_t *peer_rq = peer_tcp->tcp_rq; @@ -314,7 +331,7 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph) /* Send the options up */ putnext(peer_rq, mp); } else { - TCP_STAT(tcp_fusion_unqualified); + TCP_STAT(tcps, tcp_fusion_unqualified); } CONN_DEC_REF(peer_connp); return; @@ -377,6 +394,7 @@ tcp_fuse_output_urg(tcp_t *tcp, mblk_t *mp) struct T_exdata_ind *tei; tcp_t *peer_tcp = tcp->tcp_loopback_peer; mblk_t *head, *prev_head = NULL; + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT(tcp->tcp_fused); ASSERT(peer_tcp != NULL && peer_tcp->tcp_loopback_peer == tcp); @@ -423,8 +441,8 @@ tcp_fuse_output_urg(tcp_t *tcp, mblk_t *mp) tei->MORE_flag = 0; mp->b_wptr = (uchar_t *)&tei[1]; - TCP_STAT(tcp_fusion_urg); - BUMP_MIB(&tcp_mib, tcpOutUrg); + TCP_STAT(tcps, tcp_fusion_urg); + BUMP_MIB(&tcps->tcps_mib, tcpOutUrg); head = peer_tcp->tcp_rcv_list; while (head != NULL) { @@ -474,6 +492,9 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) uint_t ip_hdr_len; uint32_t seq; uint32_t recv_size = send_size; + tcp_stack_t *tcps = tcp->tcp_tcps; + netstack_t *ns = tcps->tcps_netstack; + ip_stack_t *ipst = ns->netstack_ip; ASSERT(tcp->tcp_fused); ASSERT(peer_tcp != NULL && peer_tcp->tcp_loopback_peer == tcp); @@ -484,9 +505,10 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) max_unread = peer_tcp->tcp_fuse_rcv_unread_hiwater; /* If this connection requires IP, unfuse and use regular path */ - if (TCP_LOOPBACK_IP(tcp) || TCP_LOOPBACK_IP(peer_tcp) || - IPP_ENABLED(IPP_LOCAL_OUT|IPP_LOCAL_IN)) { - TCP_STAT(tcp_fusion_aborted); + if (tcp_loopback_needs_ip(tcp, ns) || + tcp_loopback_needs_ip(peer_tcp, ns) || + IPP_ENABLED(IPP_LOCAL_OUT|IPP_LOCAL_IN, ipst)) { + TCP_STAT(tcps, tcp_fusion_aborted); goto unfuse; } @@ -515,11 +537,11 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) } if (tcp->tcp_ipversion == IPV4_VERSION && - (HOOKS4_INTERESTED_LOOPBACK_IN || - HOOKS4_INTERESTED_LOOPBACK_OUT) || + (HOOKS4_INTERESTED_LOOPBACK_IN(ipst) || + HOOKS4_INTERESTED_LOOPBACK_OUT(ipst)) || tcp->tcp_ipversion == IPV6_VERSION && - (HOOKS6_INTERESTED_LOOPBACK_IN || - HOOKS6_INTERESTED_LOOPBACK_OUT)) { + (HOOKS6_INTERESTED_LOOPBACK_IN(ipst) || + HOOKS6_INTERESTED_LOOPBACK_OUT(ipst))) { /* * Build ip and tcp header to satisfy FW_HOOKS. * We only build it when any hook is present. @@ -538,9 +560,9 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL, ill_t *, olp, ipha_t *, ipha, mblk_t *, mp1); - FW_HOOKS(ip4_loopback_out_event, - ipv4firewall_loopback_out, - NULL, olp, ipha, mp1, mp1); + FW_HOOKS(ipst->ips_ip4_loopback_out_event, + ipst->ips_ipv4firewall_loopback_out, + NULL, olp, ipha, mp1, mp1, ipst); DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, mp1); } else { ip6h = (ip6_t *)mp1->b_rptr; @@ -548,9 +570,9 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) DTRACE_PROBE4(ip6__loopback__out__start, ill_t *, NULL, ill_t *, olp, ip6_t *, ip6h, mblk_t *, mp1); - FW_HOOKS6(ip6_loopback_out_event, - ipv6firewall_loopback_out, - NULL, olp, ip6h, mp1, mp1); + FW_HOOKS6(ipst->ips_ip6_loopback_out_event, + ipst->ips_ipv6firewall_loopback_out, + NULL, olp, ip6h, mp1, mp1, ipst); DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, mp1); } if (mp1 == NULL) @@ -565,9 +587,9 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) DTRACE_PROBE4(ip4__loopback__in__start, ill_t *, ilp, ill_t *, NULL, ipha_t *, ipha, mblk_t *, mp1); - FW_HOOKS(ip4_loopback_in_event, - ipv4firewall_loopback_in, - ilp, NULL, ipha, mp1, mp1); + FW_HOOKS(ipst->ips_ip4_loopback_in_event, + ipst->ips_ipv4firewall_loopback_in, + ilp, NULL, ipha, mp1, mp1, ipst); DTRACE_PROBE1(ip4__loopback__in__end, mblk_t *, mp1); if (mp1 == NULL) goto unfuse; @@ -577,9 +599,9 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) DTRACE_PROBE4(ip6__loopback__in__start, ill_t *, ilp, ill_t *, NULL, ip6_t *, ip6h, mblk_t *, mp1); - FW_HOOKS6(ip6_loopback_in_event, - ipv6firewall_loopback_in, - ilp, NULL, ip6h, mp1, mp1); + FW_HOOKS6(ipst->ips_ip6_loopback_in_event, + ipst->ips_ipv6firewall_loopback_in, + ilp, NULL, ip6h, mp1, mp1, ipst); DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, mp1); if (mp1 == NULL) goto unfuse; @@ -669,7 +691,7 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) !TCP_IS_DETACHED(peer_tcp) && !canputnext(peer_tcp->tcp_rq)))) { tcp_setqfull(tcp); flow_stopped = B_TRUE; - TCP_STAT(tcp_fusion_flowctl); + TCP_STAT(tcps, tcp_fusion_flowctl); DTRACE_PROBE4(tcp__fuse__output__flowctl, tcp_t *, tcp, uint_t, send_size, uint_t, peer_tcp->tcp_rcv_cnt, uint_t, peer_tcp->tcp_fuse_rcv_unread_cnt); @@ -679,7 +701,7 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) flow_stopped = B_FALSE; } mutex_exit(&tcp->tcp_non_sq_lock); - loopback_packets++; + ipst->ips_loopback_packets++; tcp->tcp_last_sent_len = send_size; /* Need to adjust the following SNMP MIB-related variables */ @@ -688,12 +710,12 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) peer_tcp->tcp_rnxt += recv_size; peer_tcp->tcp_rack = peer_tcp->tcp_rnxt; - BUMP_MIB(&tcp_mib, tcpOutDataSegs); - UPDATE_MIB(&tcp_mib, tcpOutDataBytes, send_size); + BUMP_MIB(&tcps->tcps_mib, tcpOutDataSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpOutDataBytes, send_size); - BUMP_MIB(&tcp_mib, tcpInSegs); - BUMP_MIB(&tcp_mib, tcpInDataInorderSegs); - UPDATE_MIB(&tcp_mib, tcpInDataInorderBytes, send_size); + BUMP_MIB(&tcps->tcps_mib, tcpInSegs); + BUMP_MIB(&tcps->tcps_mib, tcpInDataInorderSegs); + UPDATE_MIB(&tcps->tcps_mib, tcpInDataInorderBytes, send_size); BUMP_LOCAL(tcp->tcp_obsegs); BUMP_LOCAL(peer_tcp->tcp_ibsegs); @@ -749,6 +771,7 @@ tcp_fuse_rcv_drain(queue_t *q, tcp_t *tcp, mblk_t **sigurg_mpp) #ifdef DEBUG uint_t cnt = 0; #endif + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT(tcp->tcp_loopback); ASSERT(tcp->tcp_fused || tcp->tcp_fused_sigurg); @@ -777,7 +800,7 @@ tcp_fuse_rcv_drain(queue_t *q, tcp_t *tcp, mblk_t **sigurg_mpp) (mp = allocb_tryhard(1)) == NULL) { /* Alloc failed; try again next time */ tcp->tcp_push_tid = TCP_TIMER(tcp, tcp_push_timer, - MSEC_TO_TICK(tcp_push_timer_interval)); + MSEC_TO_TICK(tcps->tcps_push_timer_interval)); return (B_TRUE); } else if (sigurg_mpp != NULL) { /* @@ -823,7 +846,7 @@ tcp_fuse_rcv_drain(queue_t *q, tcp_t *tcp, mblk_t **sigurg_mpp) cnt += msgdsize(mp); #endif putnext(q, mp); - TCP_STAT(tcp_fusion_putnext); + TCP_STAT(tcps, tcp_fusion_putnext); } if (tcp->tcp_direct_sockfs) @@ -860,6 +883,7 @@ tcp_fuse_rrw(queue_t *q, struiod_t *dp) tcp_t *tcp = Q_TO_CONN(q)->conn_tcp; mblk_t *mp; tcp_t *peer_tcp; + tcp_stack_t *tcps = tcp->tcp_tcps; mutex_enter(&tcp->tcp_non_sq_lock); @@ -876,8 +900,8 @@ plugged: } while (tcp->tcp_fuse_syncstr_plugged); mutex_exit(&tcp->tcp_non_sq_lock); - TCP_STAT(tcp_fusion_rrw_plugged); - TCP_STAT(tcp_fusion_rrw_busy); + TCP_STAT(tcps, tcp_fusion_rrw_plugged); + TCP_STAT(tcps, tcp_fusion_rrw_busy); return (EBUSY); } @@ -890,7 +914,7 @@ plugged: */ if (!tcp->tcp_direct_sockfs || tcp->tcp_fuse_syncstr_stopped) { mutex_exit(&tcp->tcp_non_sq_lock); - TCP_STAT(tcp_fusion_rrw_busy); + TCP_STAT(tcps, tcp_fusion_rrw_busy); return (EBUSY); } @@ -921,7 +945,7 @@ plugged: uint32_t, tcp->tcp_rcv_cnt, ssize_t, dp->d_uio.uio_resid); tcp->tcp_rcv_list = NULL; - TCP_STAT(tcp_fusion_rrw_msgcnt); + TCP_STAT(tcps, tcp_fusion_rrw_msgcnt); /* * At this point nothing should be left in tcp_rcv_list. @@ -940,7 +964,7 @@ plugged: if (peer_tcp->tcp_flow_stopped) { tcp_clrqfull(peer_tcp); - TCP_STAT(tcp_fusion_backenabled); + TCP_STAT(tcps, tcp_fusion_backenabled); } } mutex_exit(&peer_tcp->tcp_non_sq_lock); @@ -1149,6 +1173,7 @@ void tcp_fuse_disable_pair(tcp_t *tcp, boolean_t unfusing) { tcp_t *peer_tcp = tcp->tcp_loopback_peer; + tcp_stack_t *tcps = tcp->tcp_tcps; ASSERT(tcp->tcp_fused); ASSERT(peer_tcp != NULL); @@ -1194,11 +1219,11 @@ tcp_fuse_disable_pair(tcp_t *tcp, boolean_t unfusing) /* Lift up any flow-control conditions */ if (tcp->tcp_flow_stopped) { tcp_clrqfull(tcp); - TCP_STAT(tcp_fusion_backenabled); + TCP_STAT(tcps, tcp_fusion_backenabled); } if (peer_tcp->tcp_flow_stopped) { tcp_clrqfull(peer_tcp); - TCP_STAT(tcp_fusion_backenabled); + TCP_STAT(tcps, tcp_fusion_backenabled); } /* Disable synchronous streams */ @@ -1212,15 +1237,17 @@ tcp_fuse_disable_pair(tcp_t *tcp, boolean_t unfusing) size_t tcp_fuse_set_rcv_hiwat(tcp_t *tcp, size_t rwnd) { + tcp_stack_t *tcps = tcp->tcp_tcps; + ASSERT(tcp->tcp_fused); /* Ensure that value is within the maximum upper bound */ - if (rwnd > tcp_max_buf) - rwnd = tcp_max_buf; + if (rwnd > tcps->tcps_max_buf) + rwnd = tcps->tcps_max_buf; /* Obey the absolute minimum tcp receive high water mark */ - if (rwnd < tcp_sth_rcv_hiwat) - rwnd = tcp_sth_rcv_hiwat; + if (rwnd < tcps->tcps_sth_rcv_hiwat) + rwnd = tcps->tcps_sth_rcv_hiwat; /* * Round up to system page size in case SO_RCVBUF is modified diff --git a/usr/src/uts/common/inet/tcp/tcp_kssl.c b/usr/src/uts/common/inet/tcp/tcp_kssl.c index dac3e0df3f..5a4d11860a 100644 --- a/usr/src/uts/common/inet/tcp/tcp_kssl.c +++ b/usr/src/uts/common/inet/tcp/tcp_kssl.c @@ -50,6 +50,7 @@ #include <inet/mi.h> #include <inet/mib2.h> #include <inet/tcp.h> +#include <inet/ipsec_impl.h> #include <inet/ipdrop.h> #include <inet/tcp_trace.h> #include <inet/tcp_impl.h> diff --git a/usr/src/uts/common/inet/tcp/tcp_opt_data.c b/usr/src/uts/common/inet/tcp/tcp_opt_data.c index f3e6d72ff4..4f0d767774 100644 --- a/usr/src/uts/common/inet/tcp/tcp_opt_data.c +++ b/usr/src/uts/common/inet/tcp/tcp_opt_data.c @@ -150,7 +150,7 @@ opdes_t tcp_opt_arr[] = { { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 /* no ifindex */ }, -{ IP_NEXTHOP, IPPROTO_IP, OA_RW, OA_RW, OP_CONFIG, OP_PASSNEXT, +{ IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, OP_PASSNEXT, sizeof (in_addr_t), -1 /* not initialized */ }, { IPV6_BOUND_PIF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, diff --git a/usr/src/uts/common/inet/tcp_impl.h b/usr/src/uts/common/inet/tcp_impl.h index c724f53980..2f31dc531a 100644 --- a/usr/src/uts/common/inet/tcp_impl.h +++ b/usr/src/uts/common/inet/tcp_impl.h @@ -144,180 +144,75 @@ typedef struct tcpparam_s { char *tcp_param_name; } tcpparam_t; -extern tcpparam_t tcp_param_arr[]; -#define tcp_time_wait_interval tcp_param_arr[0].tcp_param_val -#define tcp_conn_req_max_q tcp_param_arr[1].tcp_param_val -#define tcp_conn_req_max_q0 tcp_param_arr[2].tcp_param_val -#define tcp_conn_req_min tcp_param_arr[3].tcp_param_val -#define tcp_conn_grace_period tcp_param_arr[4].tcp_param_val -#define tcp_cwnd_max_ tcp_param_arr[5].tcp_param_val -#define tcp_dbg tcp_param_arr[6].tcp_param_val -#define tcp_smallest_nonpriv_port tcp_param_arr[7].tcp_param_val -#define tcp_ip_abort_cinterval tcp_param_arr[8].tcp_param_val -#define tcp_ip_abort_linterval tcp_param_arr[9].tcp_param_val -#define tcp_ip_abort_interval tcp_param_arr[10].tcp_param_val -#define tcp_ip_notify_cinterval tcp_param_arr[11].tcp_param_val -#define tcp_ip_notify_interval tcp_param_arr[12].tcp_param_val -#define tcp_ipv4_ttl tcp_param_arr[13].tcp_param_val -#define tcp_keepalive_interval_high tcp_param_arr[14].tcp_param_max -#define tcp_keepalive_interval tcp_param_arr[14].tcp_param_val -#define tcp_keepalive_interval_low tcp_param_arr[14].tcp_param_min -#define tcp_maxpsz_multiplier tcp_param_arr[15].tcp_param_val -#define tcp_mss_def_ipv4 tcp_param_arr[16].tcp_param_val -#define tcp_mss_max_ipv4 tcp_param_arr[17].tcp_param_val -#define tcp_mss_min tcp_param_arr[18].tcp_param_val -#define tcp_naglim_def tcp_param_arr[19].tcp_param_val -#define tcp_rexmit_interval_initial tcp_param_arr[20].tcp_param_val -#define tcp_rexmit_interval_max tcp_param_arr[21].tcp_param_val -#define tcp_rexmit_interval_min tcp_param_arr[22].tcp_param_val -#define tcp_deferred_ack_interval tcp_param_arr[23].tcp_param_val -#define tcp_snd_lowat_fraction tcp_param_arr[24].tcp_param_val -#define tcp_sth_rcv_hiwat tcp_param_arr[25].tcp_param_val -#define tcp_sth_rcv_lowat tcp_param_arr[26].tcp_param_val -#define tcp_dupack_fast_retransmit tcp_param_arr[27].tcp_param_val -#define tcp_ignore_path_mtu tcp_param_arr[28].tcp_param_val -#define tcp_smallest_anon_port tcp_param_arr[29].tcp_param_val -#define tcp_largest_anon_port tcp_param_arr[30].tcp_param_val -#define tcp_xmit_hiwat tcp_param_arr[31].tcp_param_val -#define tcp_xmit_lowat tcp_param_arr[32].tcp_param_val -#define tcp_recv_hiwat tcp_param_arr[33].tcp_param_val -#define tcp_recv_hiwat_minmss tcp_param_arr[34].tcp_param_val -#define tcp_fin_wait_2_flush_interval tcp_param_arr[35].tcp_param_val -#define tcp_co_min tcp_param_arr[36].tcp_param_val -#define tcp_max_buf tcp_param_arr[37].tcp_param_val -#define tcp_strong_iss tcp_param_arr[38].tcp_param_val -#define tcp_rtt_updates tcp_param_arr[39].tcp_param_val -#define tcp_wscale_always tcp_param_arr[40].tcp_param_val -#define tcp_tstamp_always tcp_param_arr[41].tcp_param_val -#define tcp_tstamp_if_wscale tcp_param_arr[42].tcp_param_val -#define tcp_rexmit_interval_extra tcp_param_arr[43].tcp_param_val -#define tcp_deferred_acks_max tcp_param_arr[44].tcp_param_val -#define tcp_slow_start_after_idle tcp_param_arr[45].tcp_param_val -#define tcp_slow_start_initial tcp_param_arr[46].tcp_param_val -#define tcp_co_timer_interval tcp_param_arr[47].tcp_param_val -#define tcp_sack_permitted tcp_param_arr[48].tcp_param_val -#define tcp_trace tcp_param_arr[49].tcp_param_val -#define tcp_compression_enabled tcp_param_arr[50].tcp_param_val -#define tcp_ipv6_hoplimit tcp_param_arr[51].tcp_param_val -#define tcp_mss_def_ipv6 tcp_param_arr[52].tcp_param_val -#define tcp_mss_max_ipv6 tcp_param_arr[53].tcp_param_val -#define tcp_rev_src_routes tcp_param_arr[54].tcp_param_val -#define tcp_local_dack_interval tcp_param_arr[55].tcp_param_val -#define tcp_ndd_get_info_interval tcp_param_arr[56].tcp_param_val -#define tcp_local_dacks_max tcp_param_arr[57].tcp_param_val -#define tcp_ecn_permitted tcp_param_arr[58].tcp_param_val -#define tcp_rst_sent_rate_enabled tcp_param_arr[59].tcp_param_val -#define tcp_rst_sent_rate tcp_param_arr[60].tcp_param_val -#define tcp_push_timer_interval tcp_param_arr[61].tcp_param_val -#define tcp_use_smss_as_mss_opt tcp_param_arr[62].tcp_param_val -#define tcp_keepalive_abort_interval_high tcp_param_arr[63].tcp_param_max -#define tcp_keepalive_abort_interval tcp_param_arr[63].tcp_param_val -#define tcp_keepalive_abort_interval_low tcp_param_arr[63].tcp_param_min - -/* Kstats */ -typedef struct tcp_stat { - kstat_named_t tcp_time_wait; - kstat_named_t tcp_time_wait_syn; - kstat_named_t tcp_time_wait_syn_success; - kstat_named_t tcp_time_wait_syn_fail; - kstat_named_t tcp_reinput_syn; - kstat_named_t tcp_ip_output; - kstat_named_t tcp_detach_non_time_wait; - kstat_named_t tcp_detach_time_wait; - kstat_named_t tcp_time_wait_reap; - kstat_named_t tcp_clean_death_nondetached; - kstat_named_t tcp_reinit_calls; - kstat_named_t tcp_eager_err1; - kstat_named_t tcp_eager_err2; - kstat_named_t tcp_eager_blowoff_calls; - kstat_named_t tcp_eager_blowoff_q; - kstat_named_t tcp_eager_blowoff_q0; - kstat_named_t tcp_not_hard_bound; - kstat_named_t tcp_no_listener; - kstat_named_t tcp_found_eager; - kstat_named_t tcp_wrong_queue; - kstat_named_t tcp_found_eager_binding1; - kstat_named_t tcp_found_eager_bound1; - kstat_named_t tcp_eager_has_listener1; - kstat_named_t tcp_open_alloc; - kstat_named_t tcp_open_detached_alloc; - kstat_named_t tcp_rput_time_wait; - kstat_named_t tcp_listendrop; - kstat_named_t tcp_listendropq0; - kstat_named_t tcp_wrong_rq; - kstat_named_t tcp_rsrv_calls; - kstat_named_t tcp_eagerfree2; - kstat_named_t tcp_eagerfree3; - kstat_named_t tcp_eagerfree4; - kstat_named_t tcp_eagerfree5; - kstat_named_t tcp_timewait_syn_fail; - kstat_named_t tcp_listen_badflags; - kstat_named_t tcp_timeout_calls; - kstat_named_t tcp_timeout_cached_alloc; - kstat_named_t tcp_timeout_cancel_reqs; - kstat_named_t tcp_timeout_canceled; - kstat_named_t tcp_timermp_alloced; - kstat_named_t tcp_timermp_freed; - kstat_named_t tcp_timermp_allocfail; - kstat_named_t tcp_timermp_allocdblfail; - kstat_named_t tcp_push_timer_cnt; - kstat_named_t tcp_ack_timer_cnt; - kstat_named_t tcp_ire_null1; - kstat_named_t tcp_ire_null; - kstat_named_t tcp_ip_send; - kstat_named_t tcp_ip_ire_send; - kstat_named_t tcp_wsrv_called; - kstat_named_t tcp_flwctl_on; - kstat_named_t tcp_timer_fire_early; - kstat_named_t tcp_timer_fire_miss; - kstat_named_t tcp_freelist_cleanup; - kstat_named_t tcp_rput_v6_error; - kstat_named_t tcp_out_sw_cksum; - kstat_named_t tcp_out_sw_cksum_bytes; - kstat_named_t tcp_zcopy_on; - kstat_named_t tcp_zcopy_off; - kstat_named_t tcp_zcopy_backoff; - kstat_named_t tcp_zcopy_disable; - kstat_named_t tcp_mdt_pkt_out; - kstat_named_t tcp_mdt_pkt_out_v4; - kstat_named_t tcp_mdt_pkt_out_v6; - kstat_named_t tcp_mdt_discarded; - kstat_named_t tcp_mdt_conn_halted1; - kstat_named_t tcp_mdt_conn_halted2; - kstat_named_t tcp_mdt_conn_halted3; - kstat_named_t tcp_mdt_conn_resumed1; - kstat_named_t tcp_mdt_conn_resumed2; - kstat_named_t tcp_mdt_legacy_small; - kstat_named_t tcp_mdt_legacy_all; - kstat_named_t tcp_mdt_legacy_ret; - kstat_named_t tcp_mdt_allocfail; - kstat_named_t tcp_mdt_addpdescfail; - kstat_named_t tcp_mdt_allocd; - kstat_named_t tcp_mdt_linked; - kstat_named_t tcp_fusion_flowctl; - kstat_named_t tcp_fusion_backenabled; - kstat_named_t tcp_fusion_urg; - kstat_named_t tcp_fusion_putnext; - kstat_named_t tcp_fusion_unfusable; - kstat_named_t tcp_fusion_aborted; - kstat_named_t tcp_fusion_unqualified; - kstat_named_t tcp_fusion_rrw_busy; - kstat_named_t tcp_fusion_rrw_msgcnt; - kstat_named_t tcp_fusion_rrw_plugged; - kstat_named_t tcp_in_ack_unsent_drop; - kstat_named_t tcp_sock_fallback; - kstat_named_t tcp_lso_enabled; - kstat_named_t tcp_lso_disabled; - kstat_named_t tcp_lso_times; - kstat_named_t tcp_lso_pkt_out; -} tcp_stat_t; - -extern tcp_stat_t tcp_statistics; - -#define TCP_STAT(x) (tcp_statistics.x.value.ui64++) -#define TCP_STAT_UPDATE(x, n) (tcp_statistics.x.value.ui64 += (n)) -#define TCP_STAT_SET(x, n) (tcp_statistics.x.value.ui64 = (n)) +#define tcps_time_wait_interval tcps_params[0].tcp_param_val +#define tcps_conn_req_max_q tcps_params[1].tcp_param_val +#define tcps_conn_req_max_q0 tcps_params[2].tcp_param_val +#define tcps_conn_req_min tcps_params[3].tcp_param_val +#define tcps_conn_grace_period tcps_params[4].tcp_param_val +#define tcps_cwnd_max_ tcps_params[5].tcp_param_val +#define tcps_dbg tcps_params[6].tcp_param_val +#define tcps_smallest_nonpriv_port tcps_params[7].tcp_param_val +#define tcps_ip_abort_cinterval tcps_params[8].tcp_param_val +#define tcps_ip_abort_linterval tcps_params[9].tcp_param_val +#define tcps_ip_abort_interval tcps_params[10].tcp_param_val +#define tcps_ip_notify_cinterval tcps_params[11].tcp_param_val +#define tcps_ip_notify_interval tcps_params[12].tcp_param_val +#define tcps_ipv4_ttl tcps_params[13].tcp_param_val +#define tcps_keepalive_interval_high tcps_params[14].tcp_param_max +#define tcps_keepalive_interval tcps_params[14].tcp_param_val +#define tcps_keepalive_interval_low tcps_params[14].tcp_param_min +#define tcps_maxpsz_multiplier tcps_params[15].tcp_param_val +#define tcps_mss_def_ipv4 tcps_params[16].tcp_param_val +#define tcps_mss_max_ipv4 tcps_params[17].tcp_param_val +#define tcps_mss_min tcps_params[18].tcp_param_val +#define tcps_naglim_def tcps_params[19].tcp_param_val +#define tcps_rexmit_interval_initial tcps_params[20].tcp_param_val +#define tcps_rexmit_interval_max tcps_params[21].tcp_param_val +#define tcps_rexmit_interval_min tcps_params[22].tcp_param_val +#define tcps_deferred_ack_interval tcps_params[23].tcp_param_val +#define tcps_snd_lowat_fraction tcps_params[24].tcp_param_val +#define tcps_sth_rcv_hiwat tcps_params[25].tcp_param_val +#define __tcps_not_used1 tcps_params[26].tcp_param_val +#define tcps_dupack_fast_retransmit tcps_params[27].tcp_param_val +#define tcps_ignore_path_mtu tcps_params[28].tcp_param_val +#define tcps_smallest_anon_port tcps_params[29].tcp_param_val +#define tcps_largest_anon_port tcps_params[30].tcp_param_val +#define tcps_xmit_hiwat tcps_params[31].tcp_param_val +#define tcps_xmit_lowat tcps_params[32].tcp_param_val +#define tcps_recv_hiwat tcps_params[33].tcp_param_val +#define tcps_recv_hiwat_minmss tcps_params[34].tcp_param_val +#define tcps_fin_wait_2_flush_interval tcps_params[35].tcp_param_val +#define __tcps_not_used2 tcps_params[36].tcp_param_val +#define tcps_max_buf tcps_params[37].tcp_param_val +#define tcps_strong_iss tcps_params[38].tcp_param_val +#define tcps_rtt_updates tcps_params[39].tcp_param_val +#define tcps_wscale_always tcps_params[40].tcp_param_val +#define tcps_tstamp_always tcps_params[41].tcp_param_val +#define tcps_tstamp_if_wscale tcps_params[42].tcp_param_val +#define tcps_rexmit_interval_extra tcps_params[43].tcp_param_val +#define tcps_deferred_acks_max tcps_params[44].tcp_param_val +#define tcps_slow_start_after_idle tcps_params[45].tcp_param_val +#define tcps_slow_start_initial tcps_params[46].tcp_param_val +#define tcps_co_timer_interval tcps_params[47].tcp_param_val +#define tcps_sack_permitted tcps_params[48].tcp_param_val +#define tcps_trace tcps_params[49].tcp_param_val +#define __tcps_not_used4 tcps_params[50].tcp_param_val +#define tcps_ipv6_hoplimit tcps_params[51].tcp_param_val +#define tcps_mss_def_ipv6 tcps_params[52].tcp_param_val +#define tcps_mss_max_ipv6 tcps_params[53].tcp_param_val +#define tcps_rev_src_routes tcps_params[54].tcp_param_val +#define tcps_local_dack_interval tcps_params[55].tcp_param_val +#define tcps_ndd_get_info_interval tcps_params[56].tcp_param_val +#define tcps_local_dacks_max tcps_params[57].tcp_param_val +#define tcps_ecn_permitted tcps_params[58].tcp_param_val +#define tcps_rst_sent_rate_enabled tcps_params[59].tcp_param_val +#define tcps_rst_sent_rate tcps_params[60].tcp_param_val +#define tcps_push_timer_interval tcps_params[61].tcp_param_val +#define tcps_use_smss_as_mss_opt tcps_params[62].tcp_param_val +#define tcps_keepalive_abort_interval_high tcps_params[63].tcp_param_max +#define tcps_keepalive_abort_interval tcps_params[63].tcp_param_val +#define tcps_keepalive_abort_interval_low tcps_params[63].tcp_param_min extern struct qinit tcp_loopback_rinit, tcp_rinit; extern boolean_t do_tcp_fusion; diff --git a/usr/src/uts/common/inet/tcp_stack.h b/usr/src/uts/common/inet/tcp_stack.h new file mode 100644 index 0000000000..c2b89bebb4 --- /dev/null +++ b/usr/src/uts/common/inet/tcp_stack.h @@ -0,0 +1,254 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _INET_TCP_STACK_H +#define _INET_TCP_STACK_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/netstack.h> +#include <inet/ip.h> +#include <inet/ipdrop.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* Kstats */ +typedef struct tcp_stat { + kstat_named_t tcp_time_wait; + kstat_named_t tcp_time_wait_syn; + kstat_named_t tcp_time_wait_syn_success; + kstat_named_t tcp_time_wait_syn_fail; + kstat_named_t tcp_reinput_syn; + kstat_named_t tcp_ip_output; + kstat_named_t tcp_detach_non_time_wait; + kstat_named_t tcp_detach_time_wait; + kstat_named_t tcp_time_wait_reap; + kstat_named_t tcp_clean_death_nondetached; + kstat_named_t tcp_reinit_calls; + kstat_named_t tcp_eager_err1; + kstat_named_t tcp_eager_err2; + kstat_named_t tcp_eager_blowoff_calls; + kstat_named_t tcp_eager_blowoff_q; + kstat_named_t tcp_eager_blowoff_q0; + kstat_named_t tcp_not_hard_bound; + kstat_named_t tcp_no_listener; + kstat_named_t tcp_found_eager; + kstat_named_t tcp_wrong_queue; + kstat_named_t tcp_found_eager_binding1; + kstat_named_t tcp_found_eager_bound1; + kstat_named_t tcp_eager_has_listener1; + kstat_named_t tcp_open_alloc; + kstat_named_t tcp_open_detached_alloc; + kstat_named_t tcp_rput_time_wait; + kstat_named_t tcp_listendrop; + kstat_named_t tcp_listendropq0; + kstat_named_t tcp_wrong_rq; + kstat_named_t tcp_rsrv_calls; + kstat_named_t tcp_eagerfree2; + kstat_named_t tcp_eagerfree3; + kstat_named_t tcp_eagerfree4; + kstat_named_t tcp_eagerfree5; + kstat_named_t tcp_timewait_syn_fail; + kstat_named_t tcp_listen_badflags; + kstat_named_t tcp_timeout_calls; + kstat_named_t tcp_timeout_cached_alloc; + kstat_named_t tcp_timeout_cancel_reqs; + kstat_named_t tcp_timeout_canceled; + kstat_named_t tcp_timermp_freed; + kstat_named_t tcp_push_timer_cnt; + kstat_named_t tcp_ack_timer_cnt; + kstat_named_t tcp_ire_null1; + kstat_named_t tcp_ire_null; + kstat_named_t tcp_ip_send; + kstat_named_t tcp_ip_ire_send; + kstat_named_t tcp_wsrv_called; + kstat_named_t tcp_flwctl_on; + kstat_named_t tcp_timer_fire_early; + kstat_named_t tcp_timer_fire_miss; + kstat_named_t tcp_rput_v6_error; + kstat_named_t tcp_out_sw_cksum; + kstat_named_t tcp_out_sw_cksum_bytes; + kstat_named_t tcp_zcopy_on; + kstat_named_t tcp_zcopy_off; + kstat_named_t tcp_zcopy_backoff; + kstat_named_t tcp_zcopy_disable; + kstat_named_t tcp_mdt_pkt_out; + kstat_named_t tcp_mdt_pkt_out_v4; + kstat_named_t tcp_mdt_pkt_out_v6; + kstat_named_t tcp_mdt_discarded; + kstat_named_t tcp_mdt_conn_halted1; + kstat_named_t tcp_mdt_conn_halted2; + kstat_named_t tcp_mdt_conn_halted3; + kstat_named_t tcp_mdt_conn_resumed1; + kstat_named_t tcp_mdt_conn_resumed2; + kstat_named_t tcp_mdt_legacy_small; + kstat_named_t tcp_mdt_legacy_all; + kstat_named_t tcp_mdt_legacy_ret; + kstat_named_t tcp_mdt_allocfail; + kstat_named_t tcp_mdt_addpdescfail; + kstat_named_t tcp_mdt_allocd; + kstat_named_t tcp_mdt_linked; + kstat_named_t tcp_fusion_flowctl; + kstat_named_t tcp_fusion_backenabled; + kstat_named_t tcp_fusion_urg; + kstat_named_t tcp_fusion_putnext; + kstat_named_t tcp_fusion_unfusable; + kstat_named_t tcp_fusion_aborted; + kstat_named_t tcp_fusion_unqualified; + kstat_named_t tcp_fusion_rrw_busy; + kstat_named_t tcp_fusion_rrw_msgcnt; + kstat_named_t tcp_fusion_rrw_plugged; + kstat_named_t tcp_in_ack_unsent_drop; + kstat_named_t tcp_sock_fallback; + kstat_named_t tcp_lso_enabled; + kstat_named_t tcp_lso_disabled; + kstat_named_t tcp_lso_times; + kstat_named_t tcp_lso_pkt_out; +} tcp_stat_t; + +#define TCP_STAT(tcps, x) ((tcps)->tcps_statistics.x.value.ui64++) +#define TCP_STAT_UPDATE(tcps, x, n) \ + ((tcps)->tcps_statistics.x.value.ui64 += (n)) +#define TCP_STAT_SET(tcps, x, n) \ + ((tcps)->tcps_statistics.x.value.ui64 = (n)) + +typedef struct tcp_g_stat { + kstat_named_t tcp_timermp_alloced; + kstat_named_t tcp_timermp_allocfail; + kstat_named_t tcp_timermp_allocdblfail; + kstat_named_t tcp_freelist_cleanup; +} tcp_g_stat_t; + +#ifdef _KERNEL + +/* + * TCP stack instances + */ +struct tcp_stack { + netstack_t *tcps_netstack; /* Common netstack */ + + mib2_tcp_t tcps_mib; + + /* Protected by tcps_g_q_lock */ + queue_t *tcps_g_q; /* Default queue */ + uint_t tcps_refcnt; /* Total number of tcp_t's */ + kmutex_t tcps_g_q_lock; + kcondvar_t tcps_g_q_cv; + kthread_t *tcps_g_q_creator; + struct __ldi_handle *tcps_g_q_lh; + cred_t *tcps_g_q_cr; /* For _inactive close call */ + + /* Protected by tcp_hsp_lock */ + struct tcp_hsp **tcps_hsp_hash; /* Hash table for HSPs */ + krwlock_t tcps_hsp_lock; + + /* + * Extra privileged ports. In host byte order. + * Protected by tcp_epriv_port_lock. + */ +#define TCP_NUM_EPRIV_PORTS 64 + int tcps_g_num_epriv_ports; + uint16_t tcps_g_epriv_ports[TCP_NUM_EPRIV_PORTS]; + kmutex_t tcps_epriv_port_lock; + + /* + * The smallest anonymous port in the priviledged port range which TCP + * looks for free port. Use in the option TCP_ANONPRIVBIND. + */ + in_port_t tcps_min_anonpriv_port; + + /* Only modified during _init and _fini thus no locking is needed. */ + caddr_t tcps_g_nd; + struct tcpparam_s *tcps_params; /* ndd parameters */ + struct tcpparam_s *tcps_wroff_xtra_param; + struct tcpparam_s *tcps_mdt_head_param; + struct tcpparam_s *tcps_mdt_tail_param; + struct tcpparam_s *tcps_mdt_max_pbufs_param; + + /* Hint not protected by any lock */ + uint_t tcps_next_port_to_try; + + /* TCP bind hash list - all tcp_t with state >= BOUND. */ + struct tf_s *tcps_bind_fanout; + + /* TCP queue hash list - all tcp_t in case they will be an acceptor. */ + struct tf_s *tcps_acceptor_fanout; + + /* The reserved port array. */ + struct tcp_rport_s *tcps_reserved_port; + + /* Locks to protect the tcp_reserved_ports array. */ + krwlock_t tcps_reserved_port_lock; + + /* The number of ranges in the array. */ + uint32_t tcps_reserved_port_array_size; + + /* + * MIB-2 stuff for SNMP + * Note: tcpInErrs {tcp 15} is accumulated in ip.c + */ + kstat_t *tcps_mibkp; /* kstat exporting tcp_mib data */ + kstat_t *tcps_kstat; + tcp_stat_t tcps_statistics; + + uint32_t tcps_iss_incr_extra; + /* Incremented for each connection */ + kmutex_t tcps_iss_key_lock; + MD5_CTX tcps_iss_key; + + /* Packet dropper for TCP IPsec policy drops. */ + ipdropper_t tcps_dropper; + + /* + * This controls the rate some ndd info report functions can be used + * by non-priviledged users. It stores the last time such info is + * requested. When those report functions are called again, this + * is checked with the current time and compare with the ndd param + * tcp_ndd_get_info_interval. + */ + clock_t tcps_last_ndd_get_info_time; + + /* + * These two variables control the rate for TCP to generate RSTs in + * response to segments not belonging to any connections. We limit + * TCP to sent out tcp_rst_sent_rate (ndd param) number of RSTs in + * each 1 second interval. This is to protect TCP against DoS attack. + */ + clock_t tcps_last_rst_intrvl; + uint32_t tcps_rst_cnt; + /* The number of RST not sent because of the rate limit. */ + uint32_t tcps_rst_unsent; +}; +typedef struct tcp_stack tcp_stack_t; + +#endif /* _KERNEL */ +#ifdef __cplusplus +} +#endif + +#endif /* _INET_TCP_STACK_H */ diff --git a/usr/src/uts/common/inet/tun.h b/usr/src/uts/common/inet/tun.h index a6a3115482..6175e82358 100644 --- a/usr/src/uts/common/inet/tun.h +++ b/usr/src/uts/common/inet/tun.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,6 +44,8 @@ extern "C" { #ifdef _KERNEL +#include <sys/netstack.h> + #define TUN_MODID 5134 #define ATUN_MODID 5135 #define TUN6TO4_MODID 5136 @@ -199,6 +201,7 @@ typedef struct tun_s { uint64_t tun_HCOutOctets; /* # Total Octets sent */ uint64_t tun_HCOutUcastPkts; /* # Packets requested */ uint64_t tun_HCOutMulticastPkts; /* Multicast Packets requested */ + netstack_t *tun_netstack; } tun_t; @@ -249,6 +252,34 @@ struct old_iftun_req { /* set tunnel */ /* parameters */ +/* + * Linked list of tunnels. + */ + +#define TUN_PPA_SZ 64 +#define TUN_LIST_HASH(ppa) ((ppa) % TUN_PPA_SZ) + +#define TUN_T_SZ 251 +#define TUN_BYADDR_LIST_HASH(a) (((a).s6_addr32[3]) % (TUN_T_SZ)) + +/* + * tunnel stack instances + */ +struct tun_stack { + netstack_t *tuns_netstack; /* Common netstack */ + + /* + * protects global data structures such as tun_ppa_list + * also protects tun_t at ts_next and *ts_atp + * should be acquired before ts_lock + */ + kmutex_t tuns_global_lock; + tun_stats_t *tuns_ppa_list[TUN_PPA_SZ]; + tun_t *tuns_byaddr_list[TUN_T_SZ]; + + ipaddr_t tuns_relay_rtr_addr_v4; +}; +typedef struct tun_stack tun_stack_t; int tun_open(queue_t *, dev_t *, int, int, cred_t *); diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c index 6cc697d5b3..0832812a19 100644 --- a/usr/src/uts/common/inet/udp/udp.c +++ b/usr/src/uts/common/inet/udp/udp.c @@ -191,75 +191,11 @@ const char udp_version[] = "%Z%%M% %I% %E% SMI"; static queue_t *UDP_WR(queue_t *); static queue_t *UDP_RD(queue_t *); -udp_stat_t udp_statistics = { - { "udp_ip_send", KSTAT_DATA_UINT64 }, - { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, - { "udp_ire_null", KSTAT_DATA_UINT64 }, - { "udp_drain", KSTAT_DATA_UINT64 }, - { "udp_sock_fallback", KSTAT_DATA_UINT64 }, - { "udp_rrw_busy", KSTAT_DATA_UINT64 }, - { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, - { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, - { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, - { "udp_out_opt", KSTAT_DATA_UINT64 }, - { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, - { "udp_out_err_output", KSTAT_DATA_UINT64 }, - { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, - { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, - { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, - { "udp_in_recvopts", KSTAT_DATA_UINT64 }, - { "udp_in_recvif", KSTAT_DATA_UINT64 }, - { "udp_in_recvslla", KSTAT_DATA_UINT64 }, - { "udp_in_recvucred", KSTAT_DATA_UINT64 }, - { "udp_in_recvttl", KSTAT_DATA_UINT64 }, - { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, - { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, - { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, - { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, - { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, - { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, - { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, - { "udp_in_timestamp", KSTAT_DATA_UINT64 }, -#ifdef DEBUG - { "udp_data_conn", KSTAT_DATA_UINT64 }, - { "udp_data_notconn", KSTAT_DATA_UINT64 }, -#endif -}; - -static kstat_t *udp_ksp; struct kmem_cache *udp_cache; -/* - * Bind hash list size and hash function. It has to be a power of 2 for - * hashing. - */ -#define UDP_BIND_FANOUT_SIZE 512 -#define UDP_BIND_HASH(lport) \ - ((ntohs((uint16_t)lport)) & (udp_bind_fanout_size - 1)) - -/* UDP bind fanout hash structure. */ -typedef struct udp_fanout_s { - udp_t *uf_udp; - kmutex_t uf_lock; -#if defined(_LP64) || defined(_I32LPx) - char uf_pad[48]; -#else - char uf_pad[56]; -#endif -} udp_fanout_t; - +/* For /etc/system control */ uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; -/* udp_fanout_t *udp_bind_fanout. */ -static udp_fanout_t *udp_bind_fanout; -/* - * This controls the rate some ndd info report functions can be used - * by non-privileged users. It stores the last time such info is - * requested. When those report functions are called again, this - * is checked with the current time and compare with the ndd param - * udp_ndd_get_info_interval. - */ -static clock_t udp_last_ndd_get_info_time; #define NDD_TOO_QUICK_MSG \ "ndd get info rate too high for non-privileged users, try again " \ "later.\n" @@ -307,7 +243,7 @@ static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, udpattrs_t *udpattrs); static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); -static boolean_t udp_param_register(udpparam_t *udppa, int cnt); +static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); static void udp_report_item(mblk_t *mp, udp_t *udp); @@ -335,8 +271,13 @@ static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen); static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); -static void udp_kstat_init(void); -static void udp_kstat_fini(void); +static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); +static void udp_stack_fini(netstackid_t stackid, void *arg); + +static void *udp_kstat_init(netstackid_t stackid); +static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); +static void *udp_kstat2_init(netstackid_t, udp_stat_t *); +static void udp_kstat2_fini(netstackid_t, kstat_t *); static int udp_kstat_update(kstat_t *kp, int rw); static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); @@ -375,11 +316,6 @@ static struct qinit udp_winit = { &udp_info, NULL, NULL, NULL, STRUIOT_NONE }; -static struct qinit winit = { - (pfi_t)putnext, NULL, NULL, NULL, NULL, - &udp_info, NULL, NULL, NULL, STRUIOT_NONE -}; - /* Support for just SNMP if UDP is not pushed directly over device IP */ struct qinit udp_snmp_rinit = { (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, @@ -392,29 +328,12 @@ struct qinit udp_snmp_winit = { }; struct streamtab udpinfo = { - &udp_rinit, &winit + &udp_rinit, &udp_winit }; static sin_t sin_null; /* Zero address for quick clears */ static sin6_t sin6_null; /* Zero address for quick clears */ -/* Hint not protected by any lock */ -static in_port_t udp_g_next_port_to_try; - -/* - * Extra privileged ports. In host byte order. - */ -#define UDP_NUM_EPRIV_PORTS 64 -static int udp_g_num_epriv_ports = UDP_NUM_EPRIV_PORTS; -static in_port_t udp_g_epriv_ports[UDP_NUM_EPRIV_PORTS] = { 2049, 4045 }; - -/* Only modified during _init and _fini thus no locking is needed. */ -static IDP udp_g_nd; /* Points to table of UDP ND variables. */ - -/* MIB-2 stuff for SNMP */ -static mib2_udp_t udp_mib; /* SNMP fixed size info */ -static kstat_t *udp_mibkp; /* kstat exporting udp_mib data */ - #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) /* Default structure copied into T_INFO_ACK messages */ @@ -452,7 +371,7 @@ static struct T_info_ack udp_g_t_info_ack_ipv6 = { #define UDP_MAX_PORT 65535 /* - * Table of ND variables supported by udp. These are loaded into udp_g_nd + * Table of ND variables supported by udp. These are loaded into us_nd * in udp_open. * All of these are alterable, within the min/max values given, at run time. */ @@ -474,12 +393,7 @@ udpparam_t udp_param_arr[] = { }; /* END CSTYLED */ -/* - * The smallest anonymous port in the privileged port range which UDP - * looks for free port. Use in the option UDP_ANONPRIVBIND. - */ -static in_port_t udp_min_anonpriv_port = 512; - +/* Setable in /etc/system */ /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ uint32_t udp_random_anon_port = 1; @@ -934,9 +848,10 @@ udp_get_next_priv_port(udp_t *udp) static in_port_t next_priv_port = IPPORT_RESERVED - 1; in_port_t nextport; boolean_t restart = B_FALSE; + udp_stack_t *us = udp->udp_us; retry: - if (next_priv_port < udp_min_anonpriv_port || + if (next_priv_port < us->us_min_anonpriv_port || next_priv_port >= IPPORT_RESERVED) { next_priv_port = IPPORT_RESERVED - 1; if (restart) @@ -964,14 +879,16 @@ udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) zoneid_t zoneid; conn_t *connp; udp_t *udp; + udp_stack_t *us; connp = Q_TO_CONN(q); udp = connp->conn_udp; + us = udp->udp_us; /* Refer to comments in udp_status_report(). */ - if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { - if (ddi_get_lbolt() - udp_last_ndd_get_info_time < - drv_usectohz(udp_ndd_get_info_interval * 1000)) { + if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { + if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < + drv_usectohz(us->us_ndd_get_info_interval * 1000)) { (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); return (0); } @@ -990,8 +907,8 @@ udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) zoneid = connp->conn_zoneid; - for (i = 0; i < udp_bind_fanout_size; i++) { - udpf = &udp_bind_fanout[i]; + for (i = 0; i < us->us_bind_fanout_size; i++) { + udpf = &us->us_bind_fanout[i]; mutex_enter(&udpf->uf_lock); /* Print the hash index. */ @@ -1022,7 +939,7 @@ udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) } mutex_exit(&udpf->uf_lock); } - udp_last_ndd_get_info_time = ddi_get_lbolt(); + us->us_last_ndd_get_info_time = ddi_get_lbolt(); return (0); } @@ -1034,6 +951,7 @@ udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) { udp_t *udpnext; kmutex_t *lockp; + udp_stack_t *us = udp->udp_us; if (udp->udp_ptpbhn == NULL) return; @@ -1044,7 +962,8 @@ udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) */ ASSERT(udp->udp_port != 0); if (!caller_holds_lock) { - lockp = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)].uf_lock; + lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, + us->us_bind_fanout_size)].uf_lock; ASSERT(lockp != NULL); mutex_enter(lockp); } @@ -1115,7 +1034,7 @@ udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) * without setting SO_REUSEADDR. This is needed so that they * can be viewed as two independent transport protocols. * However, anonymouns ports are allocated from the same range to avoid - * duplicating the udp_g_next_port_to_try. + * duplicating the us->us_next_port_to_try. */ static void udp_bind(queue_t *q, mblk_t *mp) @@ -1137,9 +1056,11 @@ udp_bind(queue_t *q, mblk_t *mp) udp_t *udp; boolean_t is_inaddr_any; mlp_type_t addrtype, mlptype; + udp_stack_t *us; connp = Q_TO_CONN(q); udp = connp->conn_udp; + us = udp->udp_us; if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, "udp_bind: bad req, len %u", @@ -1240,7 +1161,7 @@ udp_bind(queue_t *q, mblk_t *mp) port = udp_get_next_priv_port(udp); } else { port = udp_update_next_port(udp, - udp_g_next_port_to_try, B_TRUE); + us->us_next_port_to_try, B_TRUE); } } else { /* @@ -1250,11 +1171,11 @@ udp_bind(queue_t *q, mblk_t *mp) int i; boolean_t priv = B_FALSE; - if (port < udp_smallest_nonpriv_port) { + if (port < us->us_smallest_nonpriv_port) { priv = B_TRUE; } else { - for (i = 0; i < udp_g_num_epriv_ports; i++) { - if (port == udp_g_epriv_ports[i]) { + for (i = 0; i < us->us_num_epriv_ports; i++) { + if (port == us->us_epriv_ports[i]) { priv = B_TRUE; break; } @@ -1324,10 +1245,14 @@ udp_bind(queue_t *q, mblk_t *mp) count = 0; if (udp->udp_anon_priv_bind) { - /* loopmax = (IPPORT_RESERVED-1) - udp_min_anonpriv_port + 1 */ - loopmax = IPPORT_RESERVED - udp_min_anonpriv_port; + /* + * loopmax = (IPPORT_RESERVED-1) - + * us->us_min_anonpriv_port + 1 + */ + loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; } else { - loopmax = udp_largest_anon_port - udp_smallest_anon_port + 1; + loopmax = us->us_largest_anon_port - + us->us_smallest_anon_port + 1; } is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); @@ -1342,7 +1267,8 @@ udp_bind(queue_t *q, mblk_t *mp) * requested port with the same IP address. */ lport = htons(port); - udpf = &udp_bind_fanout[UDP_BIND_HASH(lport)]; + udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, + us->us_bind_fanout_size)]; mutex_enter(&udpf->uf_lock); for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { @@ -1467,10 +1393,10 @@ udp_bind(queue_t *q, mblk_t *mp) * If the application wants us to find * a port, get one to start with. Set * requested_port to 0, so that we will - * update udp_g_next_port_to_try below. + * update us->us_next_port_to_try below. */ port = udp_update_next_port(udp, - udp_g_next_port_to_try, B_TRUE); + us->us_next_port_to_try, B_TRUE); requested_port = 0; } else { port = udp_update_next_port(udp, port + 1, @@ -1503,7 +1429,7 @@ udp_bind(queue_t *q, mblk_t *mp) * an anonymous port, or we handed out the next anonymous port. */ if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { - udp_g_next_port_to_try = port + 1; + us->us_next_port_to_try = port + 1; } /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ @@ -1548,9 +1474,11 @@ udp_bind(queue_t *q, mblk_t *mp) cred_t *cr = connp->conn_cred; zone_t *zone; + zone = crgetzone(cr); connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : mlptSingle; - addrtype = tsol_mlp_addr_type(zoneid, IPV6_VERSION, &v6src); + addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, + &v6src, udp->udp_us->us_netstack->netstack_ip); if (addrtype == mlptSingle) { udp_err_ack(q, mp, TNOADDR, 0); connp->conn_anon_port = B_FALSE; @@ -1558,7 +1486,6 @@ udp_bind(queue_t *q, mblk_t *mp) return; } mlpport = connp->conn_anon_port ? PMAPPORT : port; - zone = crgetzone(cr); mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, addrtype); if (mlptype != mlptSingle && @@ -1582,6 +1509,10 @@ udp_bind(queue_t *q, mblk_t *mp) * zone actually owns the MLP. Reject if not. */ if (mlptype == mlptShared && addrtype == mlptShared) { + /* + * No need to handle exclusive-stack zones since + * ALL_ZONES only applies to the shared stack. + */ zoneid_t mlpzone; mlpzone = tsol_mlp_findzone(IPPROTO_UDP, @@ -1702,10 +1633,12 @@ udp_connect(queue_t *q, mblk_t *mp) mblk_t *mp1, *mp2; udp_fanout_t *udpf; udp_t *udp, *udp1; + udp_stack_t *us; udp = Q_TO_UDP(q); tcr = (struct T_conn_req *)mp->b_rptr; + us = udp->udp_us; /* A bit of sanity checking */ if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { @@ -1724,7 +1657,8 @@ udp_connect(queue_t *q, mblk_t *mp) } ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); - udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; + udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, + us->us_bind_fanout_size)]; if (udp->udp_state == TS_DATA_XFER) { /* Already connected - clear out state */ @@ -2044,6 +1978,8 @@ udp_close_free(conn_t *connp) ip6_pkt_free(&udp->udp_sticky_ipp); udp->udp_connp = NULL; + netstack_rele(udp->udp_us->us_netstack); + connp->conn_udp = NULL; kmem_cache_free(udp_cache, udp); } @@ -2069,14 +2005,17 @@ udp_disconnect(queue_t *q, mblk_t *mp) udp_t *udp = Q_TO_UDP(q); mblk_t *mp1; udp_fanout_t *udpf; + udp_stack_t *us; + us = udp->udp_us; if (udp->udp_state != TS_DATA_XFER) { (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, "udp_disconnect: bad state, %u", udp->udp_state); udp_err_ack(q, mp, TOUTSTATE, 0); return; } - udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; + udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, + us->us_bind_fanout_size)]; mutex_enter(&udpf->uf_lock); udp->udp_v6src = udp->udp_bound_v6src; udp->udp_state = TS_IDLE; @@ -2160,10 +2099,12 @@ static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) { int i; + udp_t *udp = Q_TO_UDP(q); + udp_stack_t *us = udp->udp_us; - for (i = 0; i < udp_g_num_epriv_ports; i++) { - if (udp_g_epriv_ports[i] != 0) - (void) mi_mpprintf(mp, "%d ", udp_g_epriv_ports[i]); + for (i = 0; i < us->us_num_epriv_ports; i++) { + if (us->us_epriv_ports[i] != 0) + (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); } return (0); } @@ -2175,6 +2116,8 @@ udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, { long new_value; int i; + udp_t *udp = Q_TO_UDP(q); + udp_stack_t *us = udp->udp_us; /* * Fail the request if the new value does not lie within the @@ -2186,22 +2129,22 @@ udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, } /* Check if the value is already in the list */ - for (i = 0; i < udp_g_num_epriv_ports; i++) { - if (new_value == udp_g_epriv_ports[i]) { + for (i = 0; i < us->us_num_epriv_ports; i++) { + if (new_value == us->us_epriv_ports[i]) { return (EEXIST); } } /* Find an empty slot */ - for (i = 0; i < udp_g_num_epriv_ports; i++) { - if (udp_g_epriv_ports[i] == 0) + for (i = 0; i < us->us_num_epriv_ports; i++) { + if (us->us_epriv_ports[i] == 0) break; } - if (i == udp_g_num_epriv_ports) { + if (i == us->us_num_epriv_ports) { return (EOVERFLOW); } /* Set the new value */ - udp_g_epriv_ports[i] = (in_port_t)new_value; + us->us_epriv_ports[i] = (in_port_t)new_value; return (0); } @@ -2212,6 +2155,8 @@ udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, { long new_value; int i; + udp_t *udp = Q_TO_UDP(q); + udp_stack_t *us = udp->udp_us; /* * Fail the request if the new value does not lie within the @@ -2223,16 +2168,16 @@ udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, } /* Check that the value is already in the list */ - for (i = 0; i < udp_g_num_epriv_ports; i++) { - if (udp_g_epriv_ports[i] == new_value) + for (i = 0; i < us->us_num_epriv_ports; i++) { + if (us->us_epriv_ports[i] == new_value) break; } - if (i == udp_g_num_epriv_ports) { + if (i == us->us_num_epriv_ports) { return (ESRCH); } /* Clear the value */ - udp_g_epriv_ports[i] = 0; + us->us_epriv_ports[i] = 0; return (0); } @@ -2479,7 +2424,7 @@ noticmpv6: udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + opt_length; if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { - BUMP_MIB(&udp_mib, udpInErrors); + BUMP_MIB(&udp->udp_mib, udpInErrors); break; } @@ -2890,8 +2835,10 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) int err; udp_t *udp; conn_t *connp; - zoneid_t zoneid = getzoneid(); queue_t *ip_wq; + zoneid_t zoneid; + netstack_t *ns; + udp_stack_t *us; TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); @@ -2903,9 +2850,23 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (sflag != MODOPEN) return (EINVAL); - q->q_hiwat = udp_recv_hiwat; - WR(q)->q_hiwat = udp_xmit_hiwat; - WR(q)->q_lowat = udp_xmit_lowat; + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + us = ns->netstack_udp; + ASSERT(us != NULL); + + /* + * For exclusive stacks we set the zoneid to zero + * to make UDP operate as if in the global zone. + */ + if (us->us_netstack->netstack_stackid != GLOBAL_NETSTACKID) + zoneid = GLOBAL_ZONEID; + else + zoneid = crgetzoneid(credp); + + q->q_hiwat = us->us_recv_hiwat; + WR(q)->q_hiwat = us->us_xmit_hiwat; + WR(q)->q_lowat = us->us_xmit_lowat; /* Insert ourselves in the stream since we're about to walk q_next */ qprocson(q); @@ -2922,13 +2883,15 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) ip_wq = WR(q)->q_next; if (NOT_OVER_IP(ip_wq)) { /* Support just SNMP for MIB browsers */ - connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP); + connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP, + us->us_netstack); connp->conn_rq = q; connp->conn_wq = WR(q); connp->conn_flags |= IPCL_UDPMOD; connp->conn_cred = credp; connp->conn_zoneid = zoneid; connp->conn_udp = udp; + udp->udp_us = us; udp->udp_connp = connp; q->q_ptr = WR(q)->q_ptr = connp; crhold(credp); @@ -2956,14 +2919,14 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) udp->udp_family = AF_INET6; udp->udp_ipversion = IPV6_VERSION; udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; - udp->udp_ttl = udp_ipv6_hoplimit; + udp->udp_ttl = us->us_ipv6_hoplimit; connp->conn_af_isv6 = B_TRUE; connp->conn_flags |= IPCL_ISV6; } else { udp->udp_family = AF_INET; udp->udp_ipversion = IPV4_VERSION; udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; - udp->udp_ttl = udp_ipv4_ttl; + udp->udp_ttl = us->us_ipv4_ttl; connp->conn_af_isv6 = B_FALSE; connp->conn_flags &= ~IPCL_ISV6; } @@ -2990,20 +2953,21 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) connp->conn_ulp_labeled = is_system_labeled(); mutex_exit(&connp->conn_lock); + udp->udp_us = us; /* * The transmit hiwat/lowat is only looked at on IP's queue. * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF * getsockopts. */ - q->q_hiwat = udp_recv_hiwat; - WR(q)->q_hiwat = udp_xmit_hiwat; - WR(q)->q_lowat = udp_xmit_lowat; + q->q_hiwat = us->us_recv_hiwat; + WR(q)->q_hiwat = us->us_xmit_hiwat; + WR(q)->q_lowat = us->us_xmit_lowat; if (udp->udp_family == AF_INET6) { /* Build initial header template for transmit */ if ((err = udp_build_hdrs(q, udp)) != 0) { -error: + /* XXX missing free of connp? crfree? netstack_rele? */ qprocsoff(UDP_RD(q)); udp->udp_connp = NULL; connp->conn_udp = NULL; @@ -3014,11 +2978,9 @@ error: /* Set the Stream head write offset and high watermark. */ (void) mi_set_sth_wroff(UDP_RD(q), - udp->udp_max_hdr_len + udp_wroff_extra); + udp->udp_max_hdr_len + us->us_wroff_extra); (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); - WR(UDP_RD(q))->q_qinfo = &udp_winit; - return (0); } @@ -3040,6 +3002,8 @@ udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) int udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) { + udp_t *udp = Q_TO_UDP(q); + udp_stack_t *us = udp->udp_us; int *i1 = (int *)ptr; switch (level) { @@ -3062,7 +3026,7 @@ udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) *i1 = IP_DEFAULT_MULTICAST_LOOP; return (sizeof (int)); case IPV6_UNICAST_HOPS: - *i1 = udp_ipv6_hoplimit; + *i1 = us->us_ipv6_hoplimit; return (sizeof (int)); } break; @@ -3083,11 +3047,13 @@ udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) udp_t *udp; ip6_pkt_t *ipp; int len; + udp_stack_t *us; q = UDP_WR(q); connp = Q_TO_CONN(q); udp = connp->conn_udp; ipp = &udp->udp_sticky_ipp; + us = udp->udp_us; switch (level) { case SOL_SOCKET: @@ -3366,7 +3332,8 @@ udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) return (ipp->ipp_dstoptslen); case IPV6_PATHMTU: return (ip_fill_mtuinfo(&udp->udp_v6dst, - udp->udp_dstport, (struct ip6_mtuinfo *)ptr)); + udp->udp_dstport, (struct ip6_mtuinfo *)ptr, + us->us_netstack)); default: return (-1); } @@ -3410,10 +3377,12 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, conn_t *connp; udp_t *udp; uint_t newlen; + udp_stack_t *us; q = UDP_WR(q); connp = Q_TO_CONN(q); udp = connp->conn_udp; + us = udp->udp_us; switch (optset_context) { case SETFN_OPTCOM_CHECKONLY: @@ -3495,7 +3464,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, break; case SO_SNDBUF: - if (*i1 > udp_max_buf) { + if (*i1 > us->us_max_buf) { *outlenp = 0; return (ENOBUFS); } @@ -3505,7 +3474,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, } break; case SO_RCVBUF: - if (*i1 > udp_max_buf) { + if (*i1 > us->us_max_buf) { *outlenp = 0; return (ENOBUFS); } @@ -3620,7 +3589,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + udp->udp_ip_snd_options_len; (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + - udp_wroff_extra); + us->us_wroff_extra); break; case IP_TTL: @@ -3796,7 +3765,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, if (!checkonly) { if (*i1 == -1) { udp->udp_ttl = ipp->ipp_unicast_hops = - udp_ipv6_hoplimit; + us->us_ipv6_hoplimit; ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; /* Pass modified value to IP. */ *i1 = udp->udp_ttl; @@ -3963,7 +3932,8 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, if (*i1 > 255 || *i1 < -1) return (EINVAL); if (*i1 == -1) - ipp->ipp_hoplimit = udp_ipv6_hoplimit; + ipp->ipp_hoplimit = + us->us_ipv6_hoplimit; else ipp->ipp_hoplimit = *i1; ipp->ipp_fields |= IPPF_HOPLIMIT; @@ -4264,6 +4234,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, static int udp_build_hdrs(queue_t *q, udp_t *udp) { + udp_stack_t *us = udp->udp_us; uchar_t *hdrs; uint_t hdrs_len; ip6_t *ip6h; @@ -4307,7 +4278,7 @@ udp_build_hdrs(queue_t *q, udp_t *udp) if (hdrs_len > udp->udp_max_hdr_len) { udp->udp_max_hdr_len = hdrs_len; (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + - udp_wroff_extra); + us->us_wroff_extra); } return (0); } @@ -4332,41 +4303,41 @@ udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) * named dispatch (ND) handler. */ static boolean_t -udp_param_register(udpparam_t *udppa, int cnt) +udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) { for (; cnt-- > 0; udppa++) { if (udppa->udp_param_name && udppa->udp_param_name[0]) { - if (!nd_load(&udp_g_nd, udppa->udp_param_name, + if (!nd_load(ndp, udppa->udp_param_name, udp_param_get, udp_param_set, (caddr_t)udppa)) { - nd_free(&udp_g_nd); + nd_free(ndp); return (B_FALSE); } } } - if (!nd_load(&udp_g_nd, "udp_extra_priv_ports", + if (!nd_load(ndp, "udp_extra_priv_ports", udp_extra_priv_ports_get, NULL, NULL)) { - nd_free(&udp_g_nd); + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_add", + if (!nd_load(ndp, "udp_extra_priv_ports_add", NULL, udp_extra_priv_ports_add, NULL)) { - nd_free(&udp_g_nd); + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_del", + if (!nd_load(ndp, "udp_extra_priv_ports_del", NULL, udp_extra_priv_ports_del, NULL)) { - nd_free(&udp_g_nd); + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&udp_g_nd, "udp_status", udp_status_report, NULL, + if (!nd_load(ndp, "udp_status", udp_status_report, NULL, NULL)) { - nd_free(&udp_g_nd); + nd_free(ndp); return (B_FALSE); } - if (!nd_load(&udp_g_nd, "udp_bind_hash", udp_bind_hash_report, NULL, + if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, NULL)) { - nd_free(&udp_g_nd); + nd_free(ndp); return (B_FALSE); } return (B_TRUE); @@ -4566,11 +4537,13 @@ udp_input(conn_t *connp, mblk_t *mp) queue_t *q = connp->conn_rq; pid_t cpid; cred_t *rcr = connp->conn_cred; + udp_stack_t *us; TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, "udp_rput_start: q %p mp %p", q, mp); udp = connp->conn_udp; + us = udp->udp_us; rptr = mp->b_rptr; ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); ASSERT(OK_32PTR(rptr)); @@ -4593,7 +4566,7 @@ udp_input(conn_t *connp, mblk_t *mp) options_mp = mp; mp = mp->b_cont; rptr = mp->b_rptr; - UDP_STAT(udp_in_pktinfo); + UDP_STAT(us, udp_in_pktinfo); } else { /* * ICMP messages. @@ -4757,14 +4730,14 @@ udp_input(conn_t *connp, mblk_t *mp) if (udp->udp_recvdstaddr) { udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); - UDP_STAT(udp_in_recvdstaddr); + UDP_STAT(us, udp_in_recvdstaddr); } if (udp->udp_ip_recvpktinfo && (pinfo != NULL) && (pinfo->ip_pkt_flags & IPF_RECVADDR)) { udi_size += sizeof (struct T_opthdr) + sizeof (struct in_pktinfo); - UDP_STAT(udp_ip_recvpktinfo); + UDP_STAT(us, udp_ip_recvpktinfo); } /* @@ -4774,20 +4747,20 @@ udp_input(conn_t *connp, mblk_t *mp) if (udp->udp_recvif && (pinfo != NULL) && (pinfo->ip_pkt_flags & IPF_RECVIF)) { udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); - UDP_STAT(udp_in_recvif); + UDP_STAT(us, udp_in_recvif); } if (udp->udp_recvslla && (pinfo != NULL) && (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { udi_size += sizeof (struct T_opthdr) + sizeof (struct sockaddr_dl); - UDP_STAT(udp_in_recvslla); + UDP_STAT(us, udp_in_recvslla); } if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { udi_size += sizeof (struct T_opthdr) + ucredsize; cpid = DB_CPID(mp); - UDP_STAT(udp_in_recvucred); + UDP_STAT(us, udp_in_recvucred); } /* @@ -4799,7 +4772,7 @@ udp_input(conn_t *connp, mblk_t *mp) if (udp->udp_timestamp) { udi_size += sizeof (struct T_opthdr) + sizeof (timestruc_t) + _POINTER_ALIGNMENT; - UDP_STAT(udp_in_timestamp); + UDP_STAT(us, udp_in_timestamp); } /* @@ -4807,7 +4780,7 @@ udp_input(conn_t *connp, mblk_t *mp) */ if (udp->udp_recvttl) { udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); - UDP_STAT(udp_in_recvttl); + UDP_STAT(us, udp_in_recvttl); } ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); @@ -4819,7 +4792,7 @@ udp_input(conn_t *connp, mblk_t *mp) freeb(options_mp); TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, "udp_rput_end: q %p (%S)", q, "allocbfail"); - BUMP_MIB(&udp_mib, udpInErrors); + BUMP_MIB(&udp->udp_mib, udpInErrors); return; } mp1->b_cont = mp; @@ -5012,7 +4985,7 @@ udp_input(conn_t *connp, mblk_t *mp) (ipp.ipp_fields & IPPF_HOPOPTS)) { size_t hlen; - UDP_STAT(udp_in_recvhopopts); + UDP_STAT(us, udp_in_recvhopopts); hlen = copy_hop_opts(&ipp, NULL); if (hlen == 0) ipp.ipp_fields &= ~IPPF_HOPOPTS; @@ -5023,7 +4996,7 @@ udp_input(conn_t *connp, mblk_t *mp) (ipp.ipp_fields & IPPF_DSTOPTS)) { udi_size += sizeof (struct T_opthdr) + ipp.ipp_dstoptslen; - UDP_STAT(udp_in_recvdstopts); + UDP_STAT(us, udp_in_recvdstopts); } if (((udp->udp_ipv6_recvdstopts && udp->udp_ipv6_recvrthdr && @@ -5032,36 +5005,36 @@ udp_input(conn_t *connp, mblk_t *mp) (ipp.ipp_fields & IPPF_RTDSTOPTS)) { udi_size += sizeof (struct T_opthdr) + ipp.ipp_rtdstoptslen; - UDP_STAT(udp_in_recvrtdstopts); + UDP_STAT(us, udp_in_recvrtdstopts); } if (udp->udp_ipv6_recvrthdr && (ipp.ipp_fields & IPPF_RTHDR)) { udi_size += sizeof (struct T_opthdr) + ipp.ipp_rthdrlen; - UDP_STAT(udp_in_recvrthdr); + UDP_STAT(us, udp_in_recvrthdr); } if (udp->udp_ip_recvpktinfo && (ipp.ipp_fields & IPPF_IFINDEX)) { udi_size += sizeof (struct T_opthdr) + sizeof (struct in6_pktinfo); - UDP_STAT(udp_in_recvpktinfo); + UDP_STAT(us, udp_in_recvpktinfo); } } if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { udi_size += sizeof (struct T_opthdr) + ucredsize; cpid = DB_CPID(mp); - UDP_STAT(udp_in_recvucred); + UDP_STAT(us, udp_in_recvucred); } if (udp->udp_ipv6_recvhoplimit) { udi_size += sizeof (struct T_opthdr) + sizeof (int); - UDP_STAT(udp_in_recvhoplimit); + UDP_STAT(us, udp_in_recvhoplimit); } if (udp->udp_ipv6_recvtclass) { udi_size += sizeof (struct T_opthdr) + sizeof (int); - UDP_STAT(udp_in_recvtclass); + UDP_STAT(us, udp_in_recvtclass); } mp1 = allocb(udi_size, BPRI_MED); @@ -5071,7 +5044,7 @@ udp_input(conn_t *connp, mblk_t *mp) freeb(options_mp); TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, "udp_rput_end: q %p (%S)", q, "allocbfail"); - BUMP_MIB(&udp_mib, udpInErrors); + BUMP_MIB(&udp->udp_mib, udpInErrors); return; } mp1->b_cont = mp; @@ -5097,7 +5070,7 @@ udp_input(conn_t *connp, mblk_t *mp) sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, - connp->conn_zoneid); + connp->conn_zoneid, us->us_netstack); } else { sin6->sin6_addr = ip6h->ip6_src; /* No sin6_flowinfo per API */ @@ -5109,7 +5082,8 @@ udp_input(conn_t *connp, mblk_t *mp) else sin6->sin6_scope_id = 0; sin6->__sin6_src_id = ip_srcid_find_addr( - &ip6h->ip6_dst, connp->conn_zoneid); + &ip6h->ip6_dst, connp->conn_zoneid, + us->us_netstack); } sin6->sin6_port = udpha->uha_src_port; sin6->sin6_family = udp->udp_family; @@ -5257,7 +5231,7 @@ udp_input(conn_t *connp, mblk_t *mp) /* No IP_RECVDSTADDR for IPv6. */ } - BUMP_MIB(&udp_mib, udpHCInDatagrams); + BUMP_MIB(&udp->udp_mib, udpHCInDatagrams); TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, "udp_rput_end: q %p (%S)", q, "end"); if (options_mp != NULL) @@ -5286,7 +5260,7 @@ tossit: freemsg(mp); if (options_mp != NULL) freeb(options_mp); - BUMP_MIB(&udp_mib, udpInErrors); + BUMP_MIB(&udp->udp_mib, udpInErrors); } void @@ -5326,6 +5300,7 @@ udp_rput_other(queue_t *q, mblk_t *mp) udp_t *udp = Q_TO_UDP(q); pid_t cpid; cred_t *rcr = udp->udp_connp->conn_cred; + udp_stack_t *us = udp->udp_us; TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, "udp_rput_other: q %p mp %p", q, mp); @@ -5381,8 +5356,9 @@ udp_rput_other(queue_t *q, mblk_t *mp) */ udp_fanout_t *udpf; - udpf = &udp_bind_fanout[ - UDP_BIND_HASH(udp->udp_port)]; + udpf = &us->us_bind_fanout[ + UDP_BIND_HASH(udp->udp_port, + us->us_bind_fanout_size)]; mutex_enter(&udpf->uf_lock); if (udp->udp_state == TS_DATA_XFER) { /* Connect failed */ @@ -5400,16 +5376,16 @@ udp_rput_other(queue_t *q, mblk_t *mp) tea->ERROR_prim = T_DISCON_REQ; udp->udp_discon_pending = 0; } - V6_SET_ZERO(udp->udp_v6src); - V6_SET_ZERO(udp->udp_bound_v6src); - udp->udp_state = TS_UNBND; - udp_bind_hash_remove(udp, B_TRUE); - udp->udp_port = 0; - mutex_exit(&udpf->uf_lock); - if (udp->udp_family == AF_INET6) - (void) udp_build_hdrs(q, udp); - break; - } + V6_SET_ZERO(udp->udp_v6src); + V6_SET_ZERO(udp->udp_bound_v6src); + udp->udp_state = TS_UNBND; + udp_bind_hash_remove(udp, B_TRUE); + udp->udp_port = 0; + mutex_exit(&udpf->uf_lock); + if (udp->udp_family == AF_INET6) + (void) udp_build_hdrs(q, udp); + break; + } default: break; } @@ -5444,10 +5420,9 @@ udp_rput_other(queue_t *q, mblk_t *mp) freemsg(mp); if (options_mp != NULL) freeb(options_mp); - BUMP_MIB(&udp_mib, udpInErrors); + BUMP_MIB(&udp->udp_mib, udpInErrors); TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, "udp_rput_other_end: q %p (%S)", q, "hdrshort"); - BUMP_MIB(&udp_mib, udpInErrors); return; } rptr = mp->b_rptr; @@ -5490,19 +5465,19 @@ udp_rput_other(queue_t *q, mblk_t *mp) udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); if (udp->udp_recvdstaddr) { udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); - UDP_STAT(udp_in_recvdstaddr); + UDP_STAT(us, udp_in_recvdstaddr); } if (udp->udp_ip_recvpktinfo && recv_on && (pinfo->ip_pkt_flags & IPF_RECVADDR)) { udi_size += sizeof (struct T_opthdr) + sizeof (struct in_pktinfo); - UDP_STAT(udp_ip_recvpktinfo); + UDP_STAT(us, udp_ip_recvpktinfo); } if (udp->udp_recvopts && opt_len > 0) { udi_size += sizeof (struct T_opthdr) + opt_len; - UDP_STAT(udp_in_recvopts); + UDP_STAT(us, udp_in_recvopts); } /* @@ -5512,27 +5487,27 @@ udp_rput_other(queue_t *q, mblk_t *mp) if (udp->udp_recvif && recv_on && (pinfo->ip_pkt_flags & IPF_RECVIF)) { udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); - UDP_STAT(udp_in_recvif); + UDP_STAT(us, udp_in_recvif); } if (udp->udp_recvslla && recv_on && (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { udi_size += sizeof (struct T_opthdr) + sizeof (struct sockaddr_dl); - UDP_STAT(udp_in_recvslla); + UDP_STAT(us, udp_in_recvslla); } if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { udi_size += sizeof (struct T_opthdr) + ucredsize; cpid = DB_CPID(mp); - UDP_STAT(udp_in_recvucred); + UDP_STAT(us, udp_in_recvucred); } /* * If IP_RECVTTL is set allocate the appropriate sized buffer */ if (udp->udp_recvttl) { udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); - UDP_STAT(udp_in_recvttl); + UDP_STAT(us, udp_in_recvttl); } /* Allocate a message block for the T_UNITDATA_IND structure. */ @@ -5543,7 +5518,7 @@ udp_rput_other(queue_t *q, mblk_t *mp) freeb(options_mp); TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, "udp_rput_other_end: q %p (%S)", q, "allocbfail"); - BUMP_MIB(&udp_mib, udpInErrors); + BUMP_MIB(&udp->udp_mib, udpInErrors); return; } mp1->b_cont = mp; @@ -5700,7 +5675,7 @@ udp_rput_other(queue_t *q, mblk_t *mp) ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ } - BUMP_MIB(&udp_mib, udpHCInDatagrams); + BUMP_MIB(&udp->udp_mib, udpHCInDatagrams); TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, "udp_rput_other_end: q %p (%S)", q, "end"); if (options_mp != NULL) @@ -5870,6 +5845,7 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl) int v4_conn_idx; int v6_conn_idx; boolean_t needattr; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; if (mpctl == NULL || @@ -5887,16 +5863,17 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl) zoneid = connp->conn_zoneid; /* fixed length structure for IPv4 and IPv6 counters */ - SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); - SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); + SET_MIB(udp->udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); + SET_MIB(udp->udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); /* synchronize 64- and 32-bit counters */ - SYNC32_MIB(&udp_mib, udpInDatagrams, udpHCInDatagrams); - SYNC32_MIB(&udp_mib, udpOutDatagrams, udpHCOutDatagrams); + SYNC32_MIB(&udp->udp_mib, udpInDatagrams, udpHCInDatagrams); + SYNC32_MIB(&udp->udp_mib, udpOutDatagrams, udpHCOutDatagrams); optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = MIB2_UDP; optp->name = 0; - (void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib)); + (void) snmp_append_data(mpdata, (char *)&udp->udp_mib, + sizeof (udp->udp_mib)); optp->len = msgdsize(mpdata); qreply(q, mpctl); @@ -5904,7 +5881,7 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl) v4_conn_idx = v6_conn_idx = 0; for (i = 0; i < CONN_G_HASH_SIZE; i++) { - connfp = &ipcl_globalhash_fanout[i]; + connfp = &ipst->ips_ipcl_globalhash_fanout[i]; connp = NULL; while ((connp = ipcl_get_next_conn(connfp, connp, @@ -6140,18 +6117,20 @@ udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) conn_t *connp = Q_TO_CONN(q); udp_t *udp = connp->conn_udp; int i; + udp_stack_t *us = udp->udp_us; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; /* * Because of the ndd constraint, at most we can have 64K buffer * to put in all UDP info. So to be more efficient, just * allocate a 64K buffer here, assuming we need that large buffer. * This may be a problem as any user can read udp_status. Therefore - * we limit the rate of doing this using udp_ndd_get_info_interval. + * we limit the rate of doing this using us_ndd_get_info_interval. * This should be OK as normal users should not do this too often. */ - if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { - if (ddi_get_lbolt() - udp_last_ndd_get_info_time < - drv_usectohz(udp_ndd_get_info_interval * 1000)) { + if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { + if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < + drv_usectohz(us->us_ndd_get_info_interval * 1000)) { (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); return (0); } @@ -6170,7 +6149,7 @@ udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) zoneid = connp->conn_zoneid; for (i = 0; i < CONN_G_HASH_SIZE; i++) { - connfp = &ipcl_globalhash_fanout[i]; + connfp = &ipst->ips_ipcl_globalhash_fanout[i]; connp = NULL; while ((connp = ipcl_get_next_conn(connfp, connp, @@ -6183,7 +6162,7 @@ udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) udp_report_item(mp->b_cont, udp); } } - udp_last_ndd_get_info_time = ddi_get_lbolt(); + us->us_last_ndd_get_info_time = ddi_get_lbolt(); return (0); } @@ -6294,7 +6273,7 @@ udp_unbind(queue_t *q, mblk_t *mp) * Don't let port fall into the privileged range. * Since the extra privileged ports can be arbitrary we also * ensure that we exclude those from consideration. - * udp_g_epriv_ports is not sorted thus we loop over it until + * us->us_epriv_ports is not sorted thus we loop over it until * there are no changes. */ static in_port_t @@ -6303,6 +6282,7 @@ udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) int i; in_port_t nextport; boolean_t restart = B_FALSE; + udp_stack_t *us = udp->udp_us; if (random && udp_random_anon_port != 0) { (void) random_get_pseudo_bytes((uint8_t *)&port, @@ -6316,29 +6296,29 @@ udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) * port to get the random port. It should fall into the * valid anon port range. */ - if (port < udp_smallest_anon_port) { - port = udp_smallest_anon_port + - port % (udp_largest_anon_port - - udp_smallest_anon_port); + if (port < us->us_smallest_anon_port) { + port = us->us_smallest_anon_port + + port % (us->us_largest_anon_port - + us->us_smallest_anon_port); } } retry: - if (port < udp_smallest_anon_port) - port = udp_smallest_anon_port; + if (port < us->us_smallest_anon_port) + port = us->us_smallest_anon_port; - if (port > udp_largest_anon_port) { - port = udp_smallest_anon_port; + if (port > us->us_largest_anon_port) { + port = us->us_smallest_anon_port; if (restart) return (0); restart = B_TRUE; } - if (port < udp_smallest_nonpriv_port) - port = udp_smallest_nonpriv_port; + if (port < us->us_smallest_nonpriv_port) + port = us->us_smallest_nonpriv_port; - for (i = 0; i < udp_g_num_epriv_ports; i++) { - if (port == udp_g_epriv_ports[i]) { + for (i = 0; i < us->us_num_epriv_ports; i++) { + if (port == us->us_epriv_ports[i]) { port++; /* * Make sure that the port is in the @@ -6366,7 +6346,8 @@ udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) udp_t *udp = Q_TO_UDP(wq); err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, - opt_storage, udp->udp_mac_exempt); + opt_storage, udp->udp_mac_exempt, + udp->udp_us->us_netstack->netstack_ip); if (err == 0) { err = tsol_update_options(&udp->udp_ip_snd_options, &udp->udp_ip_snd_options_len, &udp->udp_label_len, @@ -6401,6 +6382,9 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, ip4_pkt_t pktinfo; ip4_pkt_t *pktinfop = &pktinfo; ip_opt_info_t optinfo; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; + udp_stack_t *us = udp->udp_us; + ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; *error = 0; @@ -6475,7 +6459,7 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || !OK_32PTR(ipha)) { - mp2 = allocb(ip_hdr_length + udp_wroff_extra, BPRI_LO); + mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); if (mp2 == NULL) { TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, "udp_wput_end: q %p (%S)", q, "allocbfail2"); @@ -6522,7 +6506,8 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { in6_addr_t v6src; - ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid); + ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, + us->us_netstack); IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); } } @@ -6586,7 +6571,7 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, * Ignore the destination in T_unitdata_req. * Create a checksum adjustment for a source route, if any. */ - cksum = ip_massage_options(ipha); + cksum = ip_massage_options(ipha, us->us_netstack); cksum = (cksum & 0xFFFF) + (cksum >> 16); cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + (ipha->ipha_dst & 0xFFFF); @@ -6603,10 +6588,10 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, /* There might be a carry. */ cksum = (cksum & 0xFFFF) + (cksum >> 16); #ifdef _LITTLE_ENDIAN - if (udp_do_checksum) + if (us->us_do_checksum) ip_len = (cksum << 16) | ip_len; #else - if (udp_do_checksum) + if (us->us_do_checksum) ip_len = (ip_len << 16) | cksum; else ip_len <<= 16; @@ -6617,7 +6602,7 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, * We make it easy for IP to include our pseudo header * by putting our length in uha_checksum. */ - if (udp_do_checksum) + if (us->us_do_checksum) ip_len |= (ip_len << 16); #ifndef _LITTLE_ENDIAN else @@ -6640,19 +6625,20 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, mp = NULL; /* We're done. Pass the packet to ip. */ - BUMP_MIB(&udp_mib, udpHCOutDatagrams); + BUMP_MIB(&udp->udp_mib, udpHCOutDatagrams); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, "udp_wput_end: q %p (%S)", q, "end"); if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || - CONN_OUTBOUND_POLICY_PRESENT(connp) || + CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || connp->conn_nofailover_ill != NULL || connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || optinfo.ip_opt_ill_index != 0 || ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || - IPP_ENABLED(IPP_LOCAL_OUT) || ip_g_mrouter != NULL) { - UDP_STAT(udp_ip_send); + IPP_ENABLED(IPP_LOCAL_OUT, ipst) || + ipst->ips_ip_g_mrouter != NULL) { + UDP_STAT(us, udp_ip_send); ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, &optinfo); } else { @@ -6662,7 +6648,7 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, done: if (*error != 0) { ASSERT(mp != NULL); - BUMP_MIB(&udp_mib, udpOutErrors); + BUMP_MIB(&udp->udp_mib, udpOutErrors); } return (mp); } @@ -6681,6 +6667,8 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) uint32_t cksum, hcksum_txflags; queue_t *dev_q; boolean_t retry_caching; + udp_stack_t *us = udp->udp_us; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; dst = ipha->ipha_dst; src = ipha->ipha_src; @@ -6697,7 +6685,7 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) PHYI_LOOPBACK)) { if (ipif != NULL) ipif_refrele(ipif); - UDP_STAT(udp_ip_send); + UDP_STAT(us, udp_ip_send); ip_output(connp, mp, q, IP_WPUT); return; } @@ -6740,17 +6728,17 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) ASSERT(ipif != NULL); ire = ire_ctable_lookup(dst, 0, 0, ipif, connp->conn_zoneid, MBLK_GETLABEL(mp), - MATCH_IRE_ILL_GROUP); + MATCH_IRE_ILL_GROUP, ipst); } else { ASSERT(ipif == NULL); ire = ire_cache_lookup(dst, connp->conn_zoneid, - MBLK_GETLABEL(mp)); + MBLK_GETLABEL(mp), ipst); } if (ire == NULL) { if (ipif != NULL) ipif_refrele(ipif); - UDP_STAT(udp_ire_null); + UDP_STAT(us, udp_ire_null); ip_output(connp, mp, q, IP_WPUT); return; } @@ -6793,7 +6781,7 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) ((ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp))) { if (ipif != NULL) ipif_refrele(ipif); - UDP_STAT(udp_ip_ire_send); + UDP_STAT(us, udp_ip_ire_send); IRE_REFRELE(ire); ip_output(connp, mp, q, IP_WPUT); return; @@ -6812,7 +6800,7 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) */ if ((q->q_first != NULL || connp->conn_draining) || ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { - if (ip_output_queue) { + if (ipst->ips_ip_output_queue) { (void) putq(q, mp); } else { BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); @@ -6855,8 +6843,8 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) /* Software checksum? */ if (DB_CKSUMFLAGS(mp) == 0) { - UDP_STAT(udp_out_sw_cksum); - UDP_STAT_UPDATE(udp_out_sw_cksum_bytes, + UDP_STAT(us, udp_out_sw_cksum); + UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); } } @@ -6909,13 +6897,14 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) * depending on the availability of transmit resources at * the media layer. */ - IP_DLS_ILL_TX(ill, ipha, mp); + IP_DLS_ILL_TX(ill, ipha, mp, ipst); } else { DTRACE_PROBE4(ip4__physical__out__start, ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); - FW_HOOKS(ip4_physical_out_event, ipv4firewall_physical_out, - NULL, ill, ipha, mp, mp); + FW_HOOKS(ipst->ips_ip4_physical_out_event, + ipst->ips_ipv4firewall_physical_out, + NULL, ill, ipha, mp, mp, ipst); DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); if (mp != NULL) putnext(ire->ire_stq, mp); @@ -6934,7 +6923,8 @@ udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), - dst, opt_storage, udp->udp_mac_exempt); + dst, opt_storage, udp->udp_mac_exempt, + udp->udp_us->us_netstack->netstack_ip); if (err == 0) { err = tsol_update_sticky(&udp->udp_sticky_ipp, &udp->udp_label_len_v6, opt_storage); @@ -6967,6 +6957,7 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) udp_t *udp = connp->conn_udp; int error = 0; struct sockaddr_storage ss; + udp_stack_t *us = udp->udp_us; TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, "udp_wput_start: connp %p mp %p", connp, mp); @@ -6983,8 +6974,8 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) if (!udp->udp_direct_sockfs || addr == NULL || addrlen == 0) { /* Not connected; address is required */ - BUMP_MIB(&udp_mib, udpOutErrors); - UDP_STAT(udp_out_err_notconn); + BUMP_MIB(&udp->udp_mib, udpOutErrors); + UDP_STAT(us, udp_out_err_notconn); freemsg(mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, "udp_wput_end: connp %p (%S)", connp, @@ -6992,12 +6983,12 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) return; } ASSERT(udp->udp_issocket); - UDP_DBGSTAT(udp_data_notconn); + UDP_DBGSTAT(us, udp_data_notconn); /* Not connected; do some more checks below */ break; } /* M_DATA for connected socket */ - UDP_DBGSTAT(udp_data_conn); + UDP_DBGSTAT(us, udp_data_conn); IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); /* Initialize addr and addrlen as if they're passed in */ @@ -7079,7 +7070,7 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) &mp->b_rptr[tudr->DEST_offset]; addrlen = tudr->DEST_length; if (tudr->OPT_length != 0) - UDP_STAT(udp_out_opt); + UDP_STAT(us, udp_out_opt); break; } /* FALLTHRU */ @@ -7154,7 +7145,7 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); if (error != 0) { ud_error: - UDP_STAT(udp_out_err_output); + UDP_STAT(us, udp_out_err_output); ASSERT(mp != NULL); /* mp is freed by the following routine */ udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, @@ -7214,10 +7205,12 @@ udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) { conn_t *connp; udp_t *udp; + udp_stack_t *us; q = UDP_WR(q); connp = Q_TO_CONN(q); udp = connp->conn_udp; + us = udp->udp_us; /* udpsockfs should only send down M_DATA for this entry point */ ASSERT(DB_TYPE(mp) == M_DATA); @@ -7238,8 +7231,8 @@ udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) if (tudr_mp == NULL) { mutex_exit(&connp->conn_lock); - BUMP_MIB(&udp_mib, udpOutErrors); - UDP_STAT(udp_out_err_tudr); + BUMP_MIB(&udp->udp_mib, udpOutErrors); + UDP_STAT(us, udp_out_err_tudr); freemsg(mp); return; } @@ -7292,6 +7285,7 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) ip6_hbh_t *hopoptsptr = NULL; uint_t hopoptslen = 0; boolean_t is_ancillary = B_FALSE; + udp_stack_t *us = udp->udp_us; *error = 0; @@ -7546,9 +7540,9 @@ no_options: if (udp_ip_hdr_len > udp->udp_max_hdr_len) { udp->udp_max_hdr_len = udp_ip_hdr_len; (void) mi_set_sth_wroff(UDP_RD(q), - udp->udp_max_hdr_len + udp_wroff_extra); + udp->udp_max_hdr_len + us->us_wroff_extra); } - mp2 = allocb(udp_ip_hdr_len + udp_wroff_extra, BPRI_LO); + mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); if (mp2 == NULL) { *error = ENOMEM; goto done; @@ -7647,7 +7641,8 @@ no_options: if (sin6->__sin6_src_id != 0 && IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { ip_srcid_find_id(sin6->__sin6_src_id, - &ip6h->ip6_src, connp->conn_zoneid); + &ip6h->ip6_src, connp->conn_zoneid, + us->us_netstack); } } @@ -7780,7 +7775,8 @@ no_options: * between the first hop (in ip6_dst) and * the destination (in the last routing hdr entry). */ - csum = ip_massage_options_v6(ip6h, rth); + csum = ip_massage_options_v6(ip6h, rth, + us->us_netstack); /* * Verify that the first hop isn't a mapped address. * Routers along the path need to do this verification @@ -7846,7 +7842,7 @@ no_options: mp = NULL; /* We're done. Pass the packet to IP */ - BUMP_MIB(&udp_mib, udpHCOutDatagrams); + BUMP_MIB(&udp->udp_mib, udpHCOutDatagrams); ip_output_v6(connp, mp1, q, IP_WPUT); done: @@ -7856,7 +7852,7 @@ done: } if (*error != 0) { ASSERT(mp != NULL); - BUMP_MIB(&udp_mib, udpOutErrors); + BUMP_MIB(&udp->udp_mib, udpOutErrors); } return (mp); } @@ -7870,10 +7866,12 @@ udp_wput_other(queue_t *q, mblk_t *mp) cred_t *cr; conn_t *connp = Q_TO_CONN(q); udp_t *udp = connp->conn_udp; + udp_stack_t *us; TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, "udp_wput_other_start: q %p", q); + us = udp->udp_us; db = mp->b_datap; cr = DB_CREDDEF(mp, connp->conn_cred); @@ -8028,7 +8026,7 @@ udp_wput_other(queue_t *q, mblk_t *mp) case ND_SET: /* nd_getset performs the necessary checking */ case ND_GET: - if (nd_getset(q, udp_g_nd, mp)) { + if (nd_getset(q, us->us_nd, mp)) { putnext(UDP_RD(q), mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, "udp_wput_other_end: q %p (%S)", @@ -8059,7 +8057,7 @@ udp_wput_other(queue_t *q, mblk_t *mp) udp_rcv_drain(UDP_RD(q), udp, B_FALSE); ASSERT(!udp->udp_direct_sockfs); - UDP_STAT(udp_sock_fallback); + UDP_STAT(us, udp_sock_fallback); } DB_TYPE(mp) = M_IOCACK; iocp->ioc_error = 0; @@ -8288,57 +8286,183 @@ udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, void udp_ddi_init(void) { - int i; - UDP6_MAJ = ddi_name_to_major(UDP6); - udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, udp_opt_obj.odb_opt_arr_cnt); - if (udp_bind_fanout_size & (udp_bind_fanout_size - 1)) { + udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), + CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); + + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel, so we can maintain the + * set of udp_stack_t's. + */ + netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); +} + +void +udp_ddi_destroy(void) +{ + netstack_unregister(NS_UDP); + + kmem_cache_destroy(udp_cache); +} + +/* + * Initialize the UDP stack instance. + */ +static void * +udp_stack_init(netstackid_t stackid, netstack_t *ns) +{ + udp_stack_t *us; + udpparam_t *pa; + int i; + + us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); + us->us_netstack = ns; + + us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; + us->us_epriv_ports[0] = 2049; + us->us_epriv_ports[1] = 4045; + + /* + * The smallest anonymous port in the priviledged port range which UDP + * looks for free port. Use in the option UDP_ANONPRIVBIND. + */ + us->us_min_anonpriv_port = 512; + + us->us_bind_fanout_size = udp_bind_fanout_size; + + /* Roundup variable that might have been modified in /etc/system */ + if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { /* Not a power of two. Round up to nearest power of two */ for (i = 0; i < 31; i++) { - if (udp_bind_fanout_size < (1 << i)) + if (us->us_bind_fanout_size < (1 << i)) break; } - udp_bind_fanout_size = 1 << i; + us->us_bind_fanout_size = 1 << i; } - udp_bind_fanout = kmem_zalloc(udp_bind_fanout_size * + us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * sizeof (udp_fanout_t), KM_SLEEP); - for (i = 0; i < udp_bind_fanout_size; i++) { - mutex_init(&udp_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, + for (i = 0; i < us->us_bind_fanout_size; i++) { + mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, NULL); } - (void) udp_param_register(udp_param_arr, A_CNT(udp_param_arr)); - udp_kstat_init(); + pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); - udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), - CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); + us->us_param_arr = pa; + bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); + + (void) udp_param_register(&us->us_nd, + us->us_param_arr, A_CNT(udp_param_arr)); + + us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); + us->us_mibkp = udp_kstat_init(stackid); + return (us); } -void -udp_ddi_destroy(void) +/* + * Free the UDP stack instance. + */ +static void +udp_stack_fini(netstackid_t stackid, void *arg) { + udp_stack_t *us = (udp_stack_t *)arg; int i; - nd_free(&udp_g_nd); - - for (i = 0; i < udp_bind_fanout_size; i++) { - mutex_destroy(&udp_bind_fanout[i].uf_lock); + for (i = 0; i < us->us_bind_fanout_size; i++) { + mutex_destroy(&us->us_bind_fanout[i].uf_lock); } - kmem_free(udp_bind_fanout, udp_bind_fanout_size * + kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * sizeof (udp_fanout_t)); - udp_kstat_fini(); + us->us_bind_fanout = NULL; - kmem_cache_destroy(udp_cache); + nd_free(&us->us_nd); + kmem_free(us->us_param_arr, sizeof (udp_param_arr)); + us->us_param_arr = NULL; + + udp_kstat_fini(stackid, us->us_mibkp); + us->us_mibkp = NULL; + + udp_kstat2_fini(stackid, us->us_kstat); + us->us_kstat = NULL; + bzero(&us->us_statistics, sizeof (us->us_statistics)); + kmem_free(us, sizeof (*us)); +} + +static void * +udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) +{ + kstat_t *ksp; + + udp_stat_t template = { + { "udp_ip_send", KSTAT_DATA_UINT64 }, + { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, + { "udp_ire_null", KSTAT_DATA_UINT64 }, + { "udp_drain", KSTAT_DATA_UINT64 }, + { "udp_sock_fallback", KSTAT_DATA_UINT64 }, + { "udp_rrw_busy", KSTAT_DATA_UINT64 }, + { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, + { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, + { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, + { "udp_out_opt", KSTAT_DATA_UINT64 }, + { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, + { "udp_out_err_output", KSTAT_DATA_UINT64 }, + { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, + { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, + { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, + { "udp_in_recvopts", KSTAT_DATA_UINT64 }, + { "udp_in_recvif", KSTAT_DATA_UINT64 }, + { "udp_in_recvslla", KSTAT_DATA_UINT64 }, + { "udp_in_recvucred", KSTAT_DATA_UINT64 }, + { "udp_in_recvttl", KSTAT_DATA_UINT64 }, + { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, + { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, + { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, + { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, + { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, + { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, + { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, + { "udp_in_timestamp", KSTAT_DATA_UINT64 }, +#ifdef DEBUG + { "udp_data_conn", KSTAT_DATA_UINT64 }, + { "udp_data_notconn", KSTAT_DATA_UINT64 }, +#endif + }; + + ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", + KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL, stackid); + + if (ksp == NULL) + return (NULL); + + bcopy(&template, us_statisticsp, sizeof (template)); + ksp->ks_data = (void *)us_statisticsp; + ksp->ks_private = (void *)(uintptr_t)stackid; + + kstat_install(ksp); + return (ksp); } static void -udp_kstat_init(void) +udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) { + if (ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); + kstat_delete_netstack(ksp, stackid); + } +} + +static void * +udp_kstat_init(netstackid_t stackid) +{ + kstat_t *ksp; + udp_named_kstat_t template = { { "inDatagrams", KSTAT_DATA_UINT64, 0 }, { "inErrors", KSTAT_DATA_UINT32, 0 }, @@ -8348,40 +8472,30 @@ udp_kstat_init(void) { "outErrors", KSTAT_DATA_UINT32, 0 }, }; - udp_mibkp = kstat_create(UDP_MOD_NAME, 0, UDP_MOD_NAME, - "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0); + ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", + KSTAT_TYPE_NAMED, + NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); - if (udp_mibkp == NULL) - return; + if (ksp == NULL || ksp->ks_data == NULL) + return (NULL); template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); - bcopy(&template, udp_mibkp->ks_data, sizeof (template)); + bcopy(&template, ksp->ks_data, sizeof (template)); + ksp->ks_update = udp_kstat_update; + ksp->ks_private = (void *)(uintptr_t)stackid; - udp_mibkp->ks_update = udp_kstat_update; - - kstat_install(udp_mibkp); - - if ((udp_ksp = kstat_create(UDP_MOD_NAME, 0, "udpstat", - "net", KSTAT_TYPE_NAMED, - sizeof (udp_statistics) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL)) != NULL) { - udp_ksp->ks_data = &udp_statistics; - kstat_install(udp_ksp); - } + kstat_install(ksp); + return (ksp); } static void -udp_kstat_fini(void) +udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) { - if (udp_ksp != NULL) { - kstat_delete(udp_ksp); - udp_ksp = NULL; - } - if (udp_mibkp != NULL) { - kstat_delete(udp_mibkp); - udp_mibkp = NULL; + if (ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); + kstat_delete_netstack(ksp, stackid); } } @@ -8389,6 +8503,9 @@ static int udp_kstat_update(kstat_t *kp, int rw) { udp_named_kstat_t *udpkp; + netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; + netstack_t *ns; + udp_stack_t *us; if ((kp == NULL) || (kp->ks_data == NULL)) return (EIO); @@ -8396,13 +8513,21 @@ udp_kstat_update(kstat_t *kp, int rw) if (rw == KSTAT_WRITE) return (EACCES); + ns = netstack_find_by_stackid(stackid); + if (ns == NULL) + return (-1); + us = ns->netstack_udp; + if (us == NULL) { + netstack_rele(ns); + return (-1); + } udpkp = (udp_named_kstat_t *)kp->ks_data; - udpkp->inDatagrams.value.ui64 = udp_mib.udpHCInDatagrams; - udpkp->inErrors.value.ui32 = udp_mib.udpInErrors; - udpkp->outDatagrams.value.ui64 = udp_mib.udpHCOutDatagrams; - udpkp->outErrors.value.ui32 = udp_mib.udpOutErrors; - + udpkp->inDatagrams.value.ui32 = us->us_udp_mib.udpHCInDatagrams; + udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; + udpkp->outDatagrams.value.ui32 = us->us_udp_mib.udpHCOutDatagrams; + udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; + netstack_rele(ns); return (0); } @@ -8504,6 +8629,7 @@ udp_rrw(queue_t *q, struiod_t *dp) { mblk_t *mp; udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); + udp_stack_t *us = udp->udp_us; /* We should never get here when we're in SNMP mode */ ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); @@ -8517,7 +8643,7 @@ udp_rrw(queue_t *q, struiod_t *dp) mutex_enter(&udp->udp_drain_lock); if (!udp->udp_direct_sockfs) { mutex_exit(&udp->udp_drain_lock); - UDP_STAT(udp_rrw_busy); + UDP_STAT(us, udp_rrw_busy); return (EBUSY); } if ((mp = udp->udp_rcv_list_head) != NULL) { @@ -8530,7 +8656,7 @@ udp_rrw(queue_t *q, struiod_t *dp) udp->udp_rcv_cnt -= size; udp->udp_rcv_msgcnt--; - UDP_STAT(udp_rrw_msgcnt); + UDP_STAT(us, udp_rrw_msgcnt); /* No longer flow-controlling? */ if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && @@ -8616,6 +8742,7 @@ static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) { mblk_t *mp; + udp_stack_t *us = udp->udp_us; ASSERT(q == RD(q)); @@ -8631,7 +8758,7 @@ udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) mutex_exit(&udp->udp_drain_lock); if (udp->udp_rcv_list_head != NULL) - UDP_STAT(udp_drain); + UDP_STAT(us, udp_drain); /* * Send up everything via putnext(); note here that we @@ -8660,10 +8787,12 @@ udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size) { + udp_stack_t *us = udp->udp_us; + /* We add a bit of extra buffering */ size += size >> 1; - if (size > udp_max_buf) - size = udp_max_buf; + if (size > us->us_max_buf) + size = us->us_max_buf; udp->udp_rcv_hiwat = size; return (size); @@ -8673,7 +8802,9 @@ udp_set_rcv_hiwat(udp_t *udp, size_t size) * Little helper for IPsec's NAT-T processing. */ boolean_t -udp_compute_checksum(void) +udp_compute_checksum(netstack_t *ns) { - return (udp_do_checksum); + udp_stack_t *us = ns->netstack_udp; + + return (us->us_do_checksum); } diff --git a/usr/src/uts/common/inet/udp/udp_opt_data.c b/usr/src/uts/common/inet/udp/udp_opt_data.c index 7986a989f5..12f13e11b8 100644 --- a/usr/src/uts/common/inet/udp/udp_opt_data.c +++ b/usr/src/uts/common/inet/udp/udp_opt_data.c @@ -143,7 +143,7 @@ opdes_t udp_opt_arr[] = { { IP_PKTINFO, IPPROTO_IP, OA_RW, OA_RW, OP_NP, (OP_PASSNEXT|OP_NODEFAULT|OP_VARLEN), sizeof (struct in_pktinfo), -1 /* not initialized */ }, -{ IP_NEXTHOP, IPPROTO_IP, OA_RW, OA_RW, OP_CONFIG, OP_PASSNEXT, +{ IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, OP_PASSNEXT, sizeof (in_addr_t), -1 /* not initialized */ }, { MCAST_JOIN_GROUP, IPPROTO_IP, OA_X, OA_X, OP_NP, diff --git a/usr/src/uts/common/inet/udp_impl.h b/usr/src/uts/common/inet/udp_impl.h index b5c2282f39..c9b6c8128b 100644 --- a/usr/src/uts/common/inet/udp_impl.h +++ b/usr/src/uts/common/inet/udp_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -42,6 +42,7 @@ extern "C" { #ifdef _KERNEL #include <sys/int_types.h> +#include <sys/netstack.h> #include <netinet/in.h> #include <netinet/ip6.h> @@ -60,6 +61,116 @@ typedef enum { UDP_SQUEUE = 3 /* Single threaded using squeues */ } udp_mode_t; +/* + * Bind hash list size and hash function. It has to be a power of 2 for + * hashing. + */ +#define UDP_BIND_FANOUT_SIZE 512 +#define UDP_BIND_HASH(lport, size) \ + ((ntohs((uint16_t)lport)) & (size - 1)) + +/* UDP bind fanout hash structure. */ +typedef struct udp_fanout_s { + struct udp_s *uf_udp; + kmutex_t uf_lock; +#if defined(_LP64) || defined(_I32LPx) + char uf_pad[48]; +#else + char uf_pad[56]; +#endif +} udp_fanout_t; + + +/* Kstats */ +typedef struct udp_stat { /* Class "net" kstats */ + kstat_named_t udp_ip_send; + kstat_named_t udp_ip_ire_send; + kstat_named_t udp_ire_null; + kstat_named_t udp_drain; + kstat_named_t udp_sock_fallback; + kstat_named_t udp_rrw_busy; + kstat_named_t udp_rrw_msgcnt; + kstat_named_t udp_out_sw_cksum; + kstat_named_t udp_out_sw_cksum_bytes; + kstat_named_t udp_out_opt; + kstat_named_t udp_out_err_notconn; + kstat_named_t udp_out_err_output; + kstat_named_t udp_out_err_tudr; + kstat_named_t udp_in_pktinfo; + kstat_named_t udp_in_recvdstaddr; + kstat_named_t udp_in_recvopts; + kstat_named_t udp_in_recvif; + kstat_named_t udp_in_recvslla; + kstat_named_t udp_in_recvucred; + kstat_named_t udp_in_recvttl; + kstat_named_t udp_in_recvhopopts; + kstat_named_t udp_in_recvhoplimit; + kstat_named_t udp_in_recvdstopts; + kstat_named_t udp_in_recvrtdstopts; + kstat_named_t udp_in_recvrthdr; + kstat_named_t udp_in_recvpktinfo; + kstat_named_t udp_in_recvtclass; + kstat_named_t udp_in_timestamp; + kstat_named_t udp_ip_recvpktinfo; +#ifdef DEBUG + kstat_named_t udp_data_conn; + kstat_named_t udp_data_notconn; +#endif + +} udp_stat_t; + +/* Named Dispatch Parameter Management Structure */ +typedef struct udpparam_s { + uint32_t udp_param_min; + uint32_t udp_param_max; + uint32_t udp_param_value; + char *udp_param_name; +} udpparam_t; + +#define UDP_NUM_EPRIV_PORTS 64 + +/* + * UDP stack instances + */ +struct udp_stack { + netstack_t *us_netstack; /* Common netstack */ + + uint_t us_bind_fanout_size; + udp_fanout_t *us_bind_fanout; + + int us_num_epriv_ports; + in_port_t us_epriv_ports[UDP_NUM_EPRIV_PORTS]; + + /* Hint not protected by any lock */ + in_port_t us_next_port_to_try; + + IDP us_nd; /* Points to table of UDP ND variables. */ + udpparam_t *us_param_arr; /* ndd variable table */ + + kstat_t *us_mibkp; /* kstats exporting mib data */ + kstat_t *us_kstat; + udp_stat_t us_statistics; + + mib2_udp_t us_udp_mib; /* SNMP fixed size info */ + +/* + * This controls the rate some ndd info report functions can be used + * by non-priviledged users. It stores the last time such info is + * requested. When those report functions are called again, this + * is checked with the current time and compare with the ndd param + * udp_ndd_get_info_interval. + */ + clock_t us_last_ndd_get_info_time; + +/* + * The smallest anonymous port in the priviledged port range which UDP + * looks for free port. Use in the option UDP_ANONPRIVBIND. + */ + in_port_t us_min_anonpriv_port; + +}; +typedef struct udp_stack udp_stack_t; + /* Internal udp control structure, one per open stream */ typedef struct udp_s { uint32_t udp_state; /* TPI state */ @@ -155,7 +266,9 @@ typedef struct udp_s { uint64_t udp_open_time; /* time when this was opened */ pid_t udp_open_pid; /* process id when this was opened */ + udp_stack_t *udp_us; /* Stack instance for zone */ } udp_t; +#define udp_mib udp_us->us_udp_mib /* UDP Protocol header */ /* UDP Protocol header aligned */ @@ -166,74 +279,28 @@ typedef struct udpahdr_s { uint16_t uha_checksum; /* UDP checksum */ } udpha_t; -/* Named Dispatch Parameter Management Structure */ -typedef struct udpparam_s { - uint32_t udp_param_min; - uint32_t udp_param_max; - uint32_t udp_param_value; - char *udp_param_name; -} udpparam_t; - -extern udpparam_t udp_param_arr[]; - -#define udp_wroff_extra udp_param_arr[0].udp_param_value -#define udp_ipv4_ttl udp_param_arr[1].udp_param_value -#define udp_ipv6_hoplimit udp_param_arr[2].udp_param_value -#define udp_smallest_nonpriv_port udp_param_arr[3].udp_param_value -#define udp_do_checksum udp_param_arr[4].udp_param_value -#define udp_smallest_anon_port udp_param_arr[5].udp_param_value -#define udp_largest_anon_port udp_param_arr[6].udp_param_value -#define udp_xmit_hiwat udp_param_arr[7].udp_param_value -#define udp_xmit_lowat udp_param_arr[8].udp_param_value -#define udp_recv_hiwat udp_param_arr[9].udp_param_value -#define udp_max_buf udp_param_arr[10].udp_param_value -#define udp_ndd_get_info_interval udp_param_arr[11].udp_param_value +#define us_wroff_extra us_param_arr[0].udp_param_value +#define us_ipv4_ttl us_param_arr[1].udp_param_value +#define us_ipv6_hoplimit us_param_arr[2].udp_param_value +#define us_smallest_nonpriv_port us_param_arr[3].udp_param_value +#define us_do_checksum us_param_arr[4].udp_param_value +#define us_smallest_anon_port us_param_arr[5].udp_param_value +#define us_largest_anon_port us_param_arr[6].udp_param_value +#define us_xmit_hiwat us_param_arr[7].udp_param_value +#define us_xmit_lowat us_param_arr[8].udp_param_value +#define us_recv_hiwat us_param_arr[9].udp_param_value +#define us_max_buf us_param_arr[10].udp_param_value +#define us_ndd_get_info_interval us_param_arr[11].udp_param_value -/* Kstats */ -typedef struct { /* Class "net" kstats */ - kstat_named_t udp_ip_send; - kstat_named_t udp_ip_ire_send; - kstat_named_t udp_ire_null; - kstat_named_t udp_drain; - kstat_named_t udp_sock_fallback; - kstat_named_t udp_rrw_busy; - kstat_named_t udp_rrw_msgcnt; - kstat_named_t udp_out_sw_cksum; - kstat_named_t udp_out_sw_cksum_bytes; - kstat_named_t udp_out_opt; - kstat_named_t udp_out_err_notconn; - kstat_named_t udp_out_err_output; - kstat_named_t udp_out_err_tudr; - kstat_named_t udp_in_pktinfo; - kstat_named_t udp_in_recvdstaddr; - kstat_named_t udp_in_recvopts; - kstat_named_t udp_in_recvif; - kstat_named_t udp_in_recvslla; - kstat_named_t udp_in_recvucred; - kstat_named_t udp_in_recvttl; - kstat_named_t udp_in_recvhopopts; - kstat_named_t udp_in_recvhoplimit; - kstat_named_t udp_in_recvdstopts; - kstat_named_t udp_in_recvrtdstopts; - kstat_named_t udp_in_recvrthdr; - kstat_named_t udp_in_recvpktinfo; - kstat_named_t udp_in_recvtclass; - kstat_named_t udp_in_timestamp; - kstat_named_t udp_ip_recvpktinfo; -#ifdef DEBUG - kstat_named_t udp_data_conn; - kstat_named_t udp_data_notconn; -#endif -} udp_stat_t; -extern udp_stat_t udp_statistics; +#define UDP_STAT(us, x) ((us)->us_statistics.x.value.ui64++) +#define UDP_STAT_UPDATE(us, x, n) \ + ((us)->us_statistics.x.value.ui64 += (n)) -#define UDP_STAT(x) (udp_statistics.x.value.ui64++) -#define UDP_STAT_UPDATE(x, n) (udp_statistics.x.value.ui64 += (n)) #ifdef DEBUG -#define UDP_DBGSTAT(x) UDP_STAT(x) +#define UDP_DBGSTAT(us, x) UDP_STAT(us, x) #else -#define UDP_DBGSTAT(x) +#define UDP_DBGSTAT(us, x) #endif /* DEBUG */ extern major_t UDP6_MAJ; @@ -250,7 +317,7 @@ extern void udp_ddi_init(void); extern void udp_ddi_destroy(void); extern void udp_resume_bind(conn_t *, mblk_t *); extern void udp_conn_recv(conn_t *, mblk_t *); -extern boolean_t udp_compute_checksum(void); +extern boolean_t udp_compute_checksum(netstack_t *); extern void udp_wput_data(queue_t *, mblk_t *, struct sockaddr *, socklen_t); diff --git a/usr/src/uts/common/io/aggr/aggr_send.c b/usr/src/uts/common/io/aggr/aggr_send.c index 6c5787a297..974d6a1d36 100644 --- a/usr/src/uts/common/io/aggr/aggr_send.c +++ b/usr/src/uts/common/io/aggr/aggr_send.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,6 +44,8 @@ #include <inet/ip6.h> #include <inet/tcp.h> #include <netinet/udp.h> +#include <inet/ipsec_impl.h> +#include <inet/sadb.h> #include <inet/ipsecesp.h> #include <inet/ipsecah.h> diff --git a/usr/src/uts/common/io/dld/dld_drv.c b/usr/src/uts/common/io/dld/dld_drv.c index 778a73528d..e409e165f7 100644 --- a/usr/src/uts/common/io/dld/dld_drv.c +++ b/usr/src/uts/common/io/dld/dld_drv.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -37,6 +37,7 @@ #include <sys/dld.h> #include <sys/dld_impl.h> #include <sys/dls_impl.h> +#include <sys/vlan.h> #include <inet/common.h> /* @@ -486,6 +487,95 @@ failed: miocnak(q, mp, 0, err); } +/* + * DLDIOCHOLDVLAN + */ +static void +drv_hold_vlan(dld_ctl_str_t *ctls, mblk_t *mp) +{ + queue_t *q = ctls->cs_wq; + dld_hold_vlan_t *dhv; + mblk_t *nmp; + int err; + dls_vlan_t *dvp; + + nmp = mp->b_cont; + if (nmp == NULL || MBLKL(nmp) < sizeof (dld_hold_vlan_t)) { + err = EINVAL; + miocnak(q, mp, 0, err); + return; + } + dhv = (dld_hold_vlan_t *)nmp->b_rptr; + + if ((err = dls_vlan_hold(dhv->dhv_name, &dvp, B_TRUE)) != 0) { + miocnak(q, mp, 0, err); + return; + } + + if ((err = dls_vlan_setzoneid(dhv->dhv_name, dhv->dhv_zid, + dhv->dhv_docheck)) != 0) + miocnak(q, mp, 0, err); + else + miocack(q, mp, 0, 0); +} + +/* + * DLDIOCRELEVLAN + */ +static void +drv_rele_vlan(dld_ctl_str_t *ctls, mblk_t *mp) +{ + queue_t *q = ctls->cs_wq; + dld_hold_vlan_t *dhv; + mblk_t *nmp; + int err; + + nmp = mp->b_cont; + if (nmp == NULL || MBLKL(nmp) < sizeof (dld_hold_vlan_t)) { + err = EINVAL; + miocnak(q, mp, 0, err); + return; + } + dhv = (dld_hold_vlan_t *)nmp->b_rptr; + + if ((err = dls_vlan_setzoneid(dhv->dhv_name, dhv->dhv_zid, + dhv->dhv_docheck)) != 0) { + miocnak(q, mp, 0, err); + return; + } + + if ((err = dls_vlan_rele_by_name(dhv->dhv_name)) != 0) { + miocnak(q, mp, 0, err); + return; + } + + miocack(q, mp, 0, 0); +} + +/* + * DLDIOCZIDGET + */ +static void +drv_ioc_zid_get(dld_ctl_str_t *ctls, mblk_t *mp) +{ + queue_t *q = ctls->cs_wq; + dld_hold_vlan_t *dhv; + mblk_t *nmp; + int err; + + nmp = mp->b_cont; + if (nmp == NULL || MBLKL(nmp) < sizeof (dld_hold_vlan_t)) { + err = EINVAL; + miocnak(q, mp, 0, err); + return; + } + dhv = (dld_hold_vlan_t *)nmp->b_rptr; + + if ((err = dls_vlan_getzoneid(dhv->dhv_name, &dhv->dhv_zid)) != 0) + miocnak(q, mp, 0, err); + else + miocack(q, mp, sizeof (dld_hold_vlan_t), 0); +} /* * Process an IOCTL message received by the control node. @@ -512,6 +602,15 @@ drv_ioc(dld_ctl_str_t *ctls, mblk_t *mp) case DLDIOCSECOBJUNSET: drv_ioc_secobj_unset(ctls, mp); return; + case DLDIOCHOLDVLAN: + drv_hold_vlan(ctls, mp); + return; + case DLDIOCRELEVLAN: + drv_rele_vlan(ctls, mp); + return; + case DLDIOCZIDGET: + drv_ioc_zid_get(ctls, mp); + return; default: miocnak(ctls->cs_wq, mp, 0, ENOTSUP); return; diff --git a/usr/src/uts/common/io/dld/dld_str.c b/usr/src/uts/common/io/dld/dld_str.c index 9ebffec151..377e8c3be0 100644 --- a/usr/src/uts/common/io/dld/dld_str.c +++ b/usr/src/uts/common/io/dld/dld_str.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -57,21 +57,15 @@ static void ioc_raw(dld_str_t *, mblk_t *); static void ioc_fast(dld_str_t *, mblk_t *); static void ioc(dld_str_t *, mblk_t *); static void dld_ioc(dld_str_t *, mblk_t *); -static minor_t dld_minor_hold(boolean_t); -static void dld_minor_rele(minor_t); static void str_mdata_raw_put(dld_str_t *, mblk_t *); static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t); static mblk_t *i_dld_ether_header_strip_tag(mblk_t *); static uint32_t str_count; static kmem_cache_t *str_cachep; -static vmem_t *minor_arenap; static uint32_t minor_count; static mod_hash_t *str_hashp; -#define MINOR_TO_PTR(minor) ((void *)(uintptr_t)(minor)) -#define PTR_TO_MINOR(ptr) ((minor_t)(uintptr_t)(ptr)) - #define STR_HASHSZ 64 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) @@ -213,9 +207,12 @@ dld_finddevinfo(dev_t dev) return (NULL); mod_hash_walk(str_hashp, i_dld_str_walker, &state); - return (state.ds_dip); -} + if (state.ds_dip != NULL || state.ds_minor <= DLD_MAX_MINOR) + return (state.ds_dip); + /* See if it's a minor node of a VLAN */ + return (dls_finddevinfo(dev)); +} /* * devo_getinfo: getinfo(9e) @@ -273,8 +270,6 @@ dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) major = getmajor(*devp); minor = getminor(*devp); - if (minor > DLD_MAX_MINOR) - return (ENODEV); /* * Create a new dld_str_t for the stream. This will grab a new minor @@ -291,8 +286,12 @@ dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) /* * Style 1 open */ + t_uscalar_t ppa; + + if ((dls_ppa_from_minor(minor, &ppa)) != 0) + goto failed; - if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) + if ((err = dld_str_attach(dsp, ppa)) != 0) goto failed; ASSERT(dsp->ds_dlstate == DL_UNBOUND); } else { @@ -558,20 +557,10 @@ dld_str_init(void) ASSERT(str_cachep != NULL); /* - * Allocate a vmem arena to manage minor numbers. The range of the - * arena will be from DLD_MAX_MINOR + 1 to MAXMIN (maximum legal - * minor number). - */ - minor_arenap = vmem_create("dld_minor_arena", - MINOR_TO_PTR(DLD_MAX_MINOR + 1), MAXMIN, 1, NULL, NULL, NULL, 0, - VM_SLEEP | VMC_IDENTIFIER); - ASSERT(minor_arenap != NULL); - - /* * Create a hash table for maintaining dld_str_t's. * The ds_minor field (the clone minor number) of a dld_str_t * is used as a key for this hash table because this number is - * globally unique (allocated from "dld_minor_arena"). + * globally unique (allocated from "dls_minor_arena"). */ str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ, mod_hash_null_valdtor); @@ -599,7 +588,6 @@ dld_str_fini(void) * Destroy object cache. */ kmem_cache_destroy(str_cachep); - vmem_destroy(minor_arenap); mod_hash_destroy_idhash(str_hashp); return (0); } @@ -723,8 +711,11 @@ str_constructor(void *buf, void *cdrarg, int kmflags) /* * Allocate a new minor number. */ - if ((dsp->ds_minor = dld_minor_hold(kmflags == KM_SLEEP)) == 0) + atomic_add_32(&minor_count, 1); + if ((dsp->ds_minor = dls_minor_hold(kmflags == KM_SLEEP)) == 0) { + atomic_add_32(&minor_count, -1); return (-1); + } /* * Initialize the DLPI state machine. @@ -773,7 +764,8 @@ str_destructor(void *buf, void *cdrarg) /* * Release the minor number. */ - dld_minor_rele(dsp->ds_minor); + dls_minor_rele(dsp->ds_minor); + atomic_add_32(&minor_count, -1); ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); rw_destroy(&dsp->ds_lock); @@ -2089,38 +2081,3 @@ ioc(dld_str_t *dsp, mblk_t *mp) rw_exit(&dsp->ds_lock); mac_ioctl(mh, q, mp); } - -/* - * Allocate a new minor number. - */ -static minor_t -dld_minor_hold(boolean_t sleep) -{ - minor_t minor; - - /* - * Grab a value from the arena. - */ - atomic_add_32(&minor_count, 1); - if ((minor = PTR_TO_MINOR(vmem_alloc(minor_arenap, 1, - (sleep) ? VM_SLEEP : VM_NOSLEEP))) == 0) { - atomic_add_32(&minor_count, -1); - return (0); - } - - return (minor); -} - -/* - * Release a previously allocated minor number. - */ -static void -dld_minor_rele(minor_t minor) -{ - /* - * Return the value to the arena. - */ - vmem_free(minor_arenap, MINOR_TO_PTR(minor), 1); - - atomic_add_32(&minor_count, -1); -} diff --git a/usr/src/uts/common/io/dls/dls.c b/usr/src/uts/common/io/dls/dls.c index a8e1089776..5049286781 100644 --- a/usr/src/uts/common/io/dls/dls.c +++ b/usr/src/uts/common/io/dls/dls.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -53,7 +53,6 @@ struct dls_kstats dls_kstat = { "soft_ring_pkt_drop", KSTAT_DATA_UINT32 }, }; - /* * Private functions. */ @@ -226,6 +225,17 @@ dls_open(const char *name, dls_channel_t *dcp) atomic_add_32(&i_dls_impl_count, 1); /* + * Set the di_zid to the zone id of current zone + */ + dip->di_zid = getzoneid(); + + /* + * Add this dls_impl_t to the list of the "opened stream" + * list of the corresponding dls_vlan_t + */ + dls_vlan_add_impl(dvp, dip); + + /* * Hand back a reference to the dls_impl_t. */ *dcp = (dls_channel_t)dip; @@ -277,6 +287,12 @@ dls_close(dls_channel_t dc) } dip->di_dmap = NULL; + /* + * Remove this dls_impl_t from the list of the "open streams" + * list of the corresponding dls_vlan_t + */ + dls_vlan_remove_impl(dvp, dip); + rw_exit(&(dip->di_lock)); /* @@ -870,3 +886,15 @@ dls_active_clear(dls_channel_t dc) out: rw_exit(&dip->di_lock); } + +dev_info_t * +dls_finddevinfo(dev_t dev) +{ + return (dls_vlan_finddevinfo(dev)); +} + +int +dls_ppa_from_minor(minor_t minor, t_uscalar_t *ppa) +{ + return (dls_vlan_ppa_from_minor(minor, ppa)); +} diff --git a/usr/src/uts/common/io/dls/dls_vlan.c b/usr/src/uts/common/io/dls/dls_vlan.c index bea767627a..9d98659e50 100644 --- a/usr/src/uts/common/io/dls/dls_vlan.c +++ b/usr/src/uts/common/io/dls/dls_vlan.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ #include <sys/types.h> #include <sys/sysmacros.h> #include <sys/atomic.h> +#include <sys/mkdev.h> #include <sys/modhash.h> #include <sys/kstat.h> #include <sys/vlan.h> @@ -39,12 +40,18 @@ #include <sys/ctype.h> #include <sys/dls.h> #include <sys/dls_impl.h> +#include <sys/dld.h> static kmem_cache_t *i_dls_vlan_cachep; static mod_hash_t *i_dls_vlan_hash; +static mod_hash_t *i_dls_vlan_dev_hash; static krwlock_t i_dls_vlan_lock; static uint_t i_dls_vlan_count; +static vmem_t *minor_arenap; +#define MINOR_TO_PTR(minor) ((void *)(uintptr_t)(minor)) +#define PTR_TO_MINOR(ptr) ((minor_t)(uintptr_t)(ptr)) + #define VLAN_HASHSZ 67 /* prime */ /* @@ -90,8 +97,24 @@ dls_vlan_init(void) i_dls_vlan_hash = mod_hash_create_extended("dls_vlan_hash", VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); + /* + * Create a second hash table, keyed by minor, of dls_vlan_t. + * The number of the hash slots is the same. + */ + i_dls_vlan_dev_hash = mod_hash_create_idhash("dls_vlan_dev_hash", + VLAN_HASHSZ, mod_hash_null_valdtor); rw_init(&i_dls_vlan_lock, NULL, RW_DEFAULT, NULL); i_dls_vlan_count = 0; + + /* + * Allocate a vmem arena to manage minor numbers. The range of the + * arena will be from DLD_MAX_MINOR + 1 to MAXMIN (maximum legal + * minor number). + */ + minor_arenap = vmem_create("dls_minor_arena", + MINOR_TO_PTR(DLD_MAX_MINOR + 1), MAXMIN, 1, NULL, NULL, NULL, 0, + VM_SLEEP | VMC_IDENTIFIER); + ASSERT(minor_arenap != NULL); } int @@ -104,12 +127,15 @@ dls_vlan_fini(void) * Destroy the hash table */ mod_hash_destroy_hash(i_dls_vlan_hash); + mod_hash_destroy_hash(i_dls_vlan_dev_hash); rw_destroy(&i_dls_vlan_lock); /* * Destroy the kmem_cache. */ kmem_cache_destroy(i_dls_vlan_cachep); + + vmem_destroy(minor_arenap); return (0); } @@ -235,6 +261,8 @@ dls_vlan_hold(const char *name, dls_vlan_t **dvpp, boolean_t create_vlan) dls_vlan_t *dvp; dls_link_t *dlp; boolean_t vlan_created = B_FALSE; + uint16_t vid; + uint_t ddi_inst; again: rw_enter(&i_dls_vlan_lock, RW_WRITER); @@ -243,8 +271,7 @@ again: (mod_hash_val_t *)&dvp); if (err != 0) { char mac[MAXNAMELEN]; - uint_t index, ddi_inst, mac_ppa, len; - uint16_t vid; + uint_t index, mac_ppa, len; ASSERT(err == MH_ERR_NOTFOUND); @@ -298,6 +325,34 @@ again: if ((err = dls_mac_hold(dlp)) != 0) goto done; + /* Create a minor node for this VLAN */ + if (vid != 0 && vlan_created) { + /* A tagged VLAN */ + dvp->dv_minor = dls_minor_hold(B_TRUE); + dvp->dv_ppa = DLS_VIDINST2PPA(vid, ddi_inst); + + err = mod_hash_insert(i_dls_vlan_dev_hash, + (mod_hash_key_t)(uintptr_t)dvp->dv_minor, + (mod_hash_val_t)dvp); + ASSERT(err == 0); + + err = mac_vlan_create(dlp->dl_mh, name, dvp->dv_minor); + + if (err != 0) { + mod_hash_val_t val; + + err = mod_hash_remove(i_dls_vlan_dev_hash, + (mod_hash_key_t)(uintptr_t)dvp->dv_minor, + (mod_hash_val_t *)&val); + ASSERT(err == 0); + ASSERT(dvp == (dls_vlan_t *)val); + + dvp->dv_minor = 0; + dls_mac_rele(dlp); + goto done; + } + } + /* * Do not allow the creation of tagged VLAN interfaces on * non-Ethernet links. Note that we cannot do this check in @@ -348,6 +403,21 @@ dls_vlan_rele(dls_vlan_t *dvp) rw_enter(&i_dls_vlan_lock, RW_WRITER); dlp = dvp->dv_dlp; + /* a minor node has been created for this vlan */ + if (dvp->dv_ref == 1 && dvp->dv_minor > 0) { + int err; + mod_hash_val_t val; + + mac_vlan_remove(dlp->dl_mh, dvp->dv_name); + err = mod_hash_remove(i_dls_vlan_dev_hash, + (mod_hash_key_t)(uintptr_t)dvp->dv_minor, + (mod_hash_val_t *)&val); + ASSERT(err == 0); + ASSERT(dvp == (dls_vlan_t *)val); + dls_minor_rele(dvp->dv_minor); + dvp->dv_minor = 0; + } + mac_stop(dlp->dl_mh); dls_mac_rele(dlp); if (--dvp->dv_ref == 0) { @@ -401,3 +471,207 @@ dls_vlan_walk(int (*fn)(dls_vlan_t *, void *), void *arg) rw_exit(&i_dls_vlan_lock); return (state.rc); } + +int +dls_vlan_ppa_from_minor(minor_t minor, t_uscalar_t *ppa) +{ + dls_vlan_t *dvp; + + if (minor <= DLD_MAX_MINOR) { + *ppa = (t_uscalar_t)minor - 1; + return (0); + } + + rw_enter(&i_dls_vlan_lock, RW_WRITER); + + if (mod_hash_find(i_dls_vlan_dev_hash, (mod_hash_key_t)(uintptr_t)minor, + (mod_hash_val_t *)&dvp) != 0) { + rw_exit(&i_dls_vlan_lock); + return (ENOENT); + } + *ppa = dvp->dv_ppa; + + rw_exit(&i_dls_vlan_lock); + return (0); +} + +int +dls_vlan_rele_by_name(const char *name) +{ + dls_vlan_t *dvp; + dls_link_t *dlp; + boolean_t destroy_vlan = B_FALSE; + + rw_enter(&i_dls_vlan_lock, RW_WRITER); + + if (mod_hash_find(i_dls_vlan_hash, (mod_hash_key_t)name, + (mod_hash_val_t *)&dvp) != 0) { + rw_exit(&i_dls_vlan_lock); + return (ENOENT); + } + + dlp = dvp->dv_dlp; + + /* a minor node has been created for this vlan */ + if (dvp->dv_ref == 1 && dvp->dv_minor > 0) { + int err; + mod_hash_val_t val; + + mac_vlan_remove(dlp->dl_mh, dvp->dv_name); + err = mod_hash_remove(i_dls_vlan_dev_hash, + (mod_hash_key_t)(uintptr_t)dvp->dv_minor, + (mod_hash_val_t *)&val); + ASSERT(err == 0); + ASSERT(dvp == (dls_vlan_t *)val); + dls_minor_rele(dvp->dv_minor); + dvp->dv_minor = 0; + } + + mac_stop(dlp->dl_mh); + dls_mac_rele(dlp); + if (--dvp->dv_ref == 0) { + dls_mac_stat_destroy(dvp); + /* Tagged vlans get destroyed when dv_ref drops to 0. */ + if (dvp->dv_id != 0) + destroy_vlan = B_TRUE; + } + rw_exit(&i_dls_vlan_lock); + if (destroy_vlan) + (void) dls_vlan_destroy(name); + + return (0); +} + +typedef struct dls_vlan_dip_state { + minor_t minor; + dev_info_t *dip; +} dls_vlan_dip_k_state_t; + +static int +dls_vlan_devinfo(dls_vlan_t *dvp, void *arg) +{ + dls_vlan_dip_k_state_t *statep = arg; + + if (dvp->dv_minor == statep->minor) { + dls_link_t *dlp = dvp->dv_dlp; + + if (dls_mac_hold(dlp) != 0) + return (0); + statep->dip = mac_devinfo_get(dlp->dl_mh); + dls_mac_rele(dlp); + + return (1); + } + + return (0); +} + +dev_info_t * +dls_vlan_finddevinfo(dev_t dev) +{ + dls_vlan_dip_k_state_t vlan_state; + + vlan_state.minor = getminor(dev); + vlan_state.dip = NULL; + + (void) dls_vlan_walk(dls_vlan_devinfo, &vlan_state); + return (vlan_state.dip); +} + +/* + * Allocate a new minor number. + */ +minor_t +dls_minor_hold(boolean_t sleep) +{ + /* + * Grab a value from the arena. + */ + return (PTR_TO_MINOR(vmem_alloc(minor_arenap, 1, + (sleep) ? VM_SLEEP : VM_NOSLEEP))); +} + +/* + * Release a previously allocated minor number. + */ +void +dls_minor_rele(minor_t minor) +{ + /* + * Return the value to the arena. + */ + vmem_free(minor_arenap, MINOR_TO_PTR(minor), 1); +} + +int +dls_vlan_setzoneid(char *name, zoneid_t zid, boolean_t docheck) +{ + int err; + dls_vlan_t *dvp; + + if ((err = dls_vlan_hold(name, &dvp, B_TRUE)) != 0) + return (err); + + rw_enter(&i_dls_vlan_lock, RW_WRITER); + if (!docheck) { + dvp->dv_zid = zid; + } else { + dls_impl_t *dip; + + for (dip = dvp->dv_impl_list; dip != NULL; + dip = dip->di_next_impl) + if (dip->di_zid != zid) + break; + if (dip == NULL) + dvp->dv_zid = zid; + else + err = EBUSY; + } + rw_exit(&i_dls_vlan_lock); + + dls_vlan_rele(dvp); + return (err); +} + +int +dls_vlan_getzoneid(char *name, zoneid_t *zidp) +{ + int err; + dls_vlan_t *dvp; + + if ((err = dls_vlan_hold(name, &dvp, B_FALSE)) != 0) + return (err); + + *zidp = dvp->dv_zid; + + dls_vlan_rele(dvp); + + return (0); +} + +void +dls_vlan_add_impl(dls_vlan_t *dvp, dls_impl_t *dip) +{ + rw_enter(&i_dls_vlan_lock, RW_WRITER); + dip->di_next_impl = dvp->dv_impl_list; + dvp->dv_impl_list = dip; + rw_exit(&i_dls_vlan_lock); +} + + +void +dls_vlan_remove_impl(dls_vlan_t *dvp, dls_impl_t *dip) +{ + dls_impl_t **pp; + dls_impl_t *p; + + rw_enter(&i_dls_vlan_lock, RW_WRITER); + for (pp = &dvp->dv_impl_list; (p = *pp) != NULL; + pp = &(p->di_next_impl)) + if (p == dip) + break; + ASSERT(p != NULL); + *pp = p->di_next_impl; + p->di_next_impl = NULL; + rw_exit(&i_dls_vlan_lock); +} diff --git a/usr/src/uts/common/io/hook.c b/usr/src/uts/common/io/hook.c index 7e791647e4..323503498c 100644 --- a/usr/src/uts/common/io/hook.c +++ b/usr/src/uts/common/io/hook.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -54,19 +54,20 @@ static struct modlinkage modlinkage = { * Hook internal functions */ static hook_int_t *hook_copy(hook_t *src); -static hook_event_int_t *hook_event_checkdup(hook_event_t *he); +static hook_event_int_t *hook_event_checkdup(hook_event_t *he, + hook_stack_t *hks); static hook_event_int_t *hook_event_copy(hook_event_t *src); static hook_event_int_t *hook_event_find(hook_family_int_t *hfi, char *event); static void hook_event_free(hook_event_int_t *hei); static hook_family_int_t *hook_family_copy(hook_family_t *src); -static hook_family_int_t *hook_family_find(char *family); +static hook_family_int_t *hook_family_find(char *family, hook_stack_t *hks); static void hook_family_free(hook_family_int_t *hfi); static hook_int_t *hook_find(hook_event_int_t *hei, hook_t *h); static void hook_free(hook_int_t *hi); static void hook_init(void); - -static cvwaitlock_t familylock; /* global lock */ -static hook_family_int_head_t familylist; /* family list head */ +static void hook_fini(void); +static void *hook_stack_init(netstackid_t stackid, netstack_t *ns); +static void hook_stack_fini(netstackid_t stackid, void *arg); /* * Module entry points. @@ -74,15 +75,27 @@ static hook_family_int_head_t familylist; /* family list head */ int _init(void) { + int error; + hook_init(); - return (mod_install(&modlinkage)); + error = mod_install(&modlinkage); + if (error != 0) + hook_fini(); + + return (error); } int _fini(void) { - return (mod_remove(&modlinkage)); + int error; + + error = mod_remove(&modlinkage); + if (error == 0) + hook_fini(); + + return (error); } @@ -103,10 +116,63 @@ _info(struct modinfo *modinfop) static void hook_init(void) { - CVW_INIT(&familylock); - SLIST_INIT(&familylist); + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel. + */ + netstack_register(NS_HOOK, hook_stack_init, NULL, + hook_stack_fini); +} + +/* + * Function: hook_fini + * Returns: None + * Parameters: None + * + * Deinitialize hooks + */ +static void +hook_fini(void) +{ + netstack_unregister(NS_HOOK); } +/* + * Initialize the hook stack instance. + */ +/*ARGSUSED*/ +static void * +hook_stack_init(netstackid_t stackid, netstack_t *ns) +{ + hook_stack_t *hks; + +#ifdef NS_DEBUG + printf("hook_stack_init(stack %d)\n", stackid); +#endif + + hks = (hook_stack_t *)kmem_zalloc(sizeof (*hks), KM_SLEEP); + hks->hk_netstack = ns; + + CVW_INIT(&hks->hks_familylock); + SLIST_INIT(&hks->hks_familylist); + + return (hks); +} + +/* + * Free the hook stack instance. + */ +/*ARGSUSED*/ +static void +hook_stack_fini(netstackid_t stackid, void *arg) +{ + hook_stack_t *hks = (hook_stack_t *)arg; +#ifdef NS_DEBUG + printf("hook_stack_fini(%p, stack %d)\n", arg, stackid); +#endif + CVW_DESTROY(&hks->hks_familylock); + kmem_free(hks, sizeof (*hks)); +} /* * Function: hook_run @@ -121,10 +187,11 @@ hook_init(void) * called more than once, simultaneously. */ int -hook_run(hook_event_token_t token, hook_data_t info) +hook_run(hook_event_token_t token, hook_data_t info, netstack_t *ns) { hook_int_t *hi; hook_event_int_t *hei; + hook_stack_t *hks = ns->netstack_hook; int rval = 0; ASSERT(token != NULL); @@ -135,7 +202,7 @@ hook_run(hook_event_token_t token, hook_data_t info) hook_data_t, info); /* Hold global read lock to ensure event will not be deleted */ - CVW_ENTER_READ(&familylock); + CVW_ENTER_READ(&hks->hks_familylock); /* Hold event read lock to ensure hook will not be changed */ CVW_ENTER_READ(&hei->hei_lock); @@ -146,7 +213,7 @@ hook_run(hook_event_token_t token, hook_data_t info) hook_event_token_t, token, hook_data_t, info, hook_int_t *, hi); - rval = (*hi->hi_hook.h_func)(token, info); + rval = (*hi->hi_hook.h_func)(token, info, ns); DTRACE_PROBE4(hook__func__end, hook_event_token_t, token, hook_data_t, info, @@ -157,7 +224,7 @@ hook_run(hook_event_token_t token, hook_data_t info) } CVW_EXIT_READ(&hei->hei_lock); - CVW_EXIT_READ(&familylock); + CVW_EXIT_READ(&hks->hks_familylock); DTRACE_PROBE3(hook__run__end, hook_event_token_t, token, @@ -176,7 +243,7 @@ hook_run(hook_event_token_t token, hook_data_t info) * Add new family to family list */ hook_family_int_t * -hook_family_add(hook_family_t *hf) +hook_family_add(hook_family_t *hf, hook_stack_t *hks) { hook_family_int_t *hfi, *new; @@ -187,20 +254,22 @@ hook_family_add(hook_family_t *hf) if (new == NULL) return (NULL); - CVW_ENTER_WRITE(&familylock); + CVW_ENTER_WRITE(&hks->hks_familylock); /* search family list */ - hfi = hook_family_find(hf->hf_name); + hfi = hook_family_find(hf->hf_name, hks); if (hfi != NULL) { - CVW_EXIT_WRITE(&familylock); + CVW_EXIT_WRITE(&hks->hks_familylock); hook_family_free(new); return (NULL); } + new->hfi_ptr = (void *)hks; + /* Add to family list head */ - SLIST_INSERT_HEAD(&familylist, new, hfi_entry); + SLIST_INSERT_HEAD(&hks->hks_familylist, new, hfi_entry); - CVW_EXIT_WRITE(&familylock); + CVW_EXIT_WRITE(&hks->hks_familylock); return (new); } @@ -215,21 +284,23 @@ hook_family_add(hook_family_t *hf) int hook_family_remove(hook_family_int_t *hfi) { + hook_stack_t *hks; ASSERT(hfi != NULL); + hks = (hook_stack_t *)hfi->hfi_ptr; - CVW_ENTER_WRITE(&familylock); + CVW_ENTER_WRITE(&hks->hks_familylock); /* Check if there are events */ if (!SLIST_EMPTY(&hfi->hfi_head)) { - CVW_EXIT_WRITE(&familylock); + CVW_EXIT_WRITE(&hks->hks_familylock); return (EBUSY); } /* Remove from family list */ - SLIST_REMOVE(&familylist, hfi, hook_family_int, hfi_entry); + SLIST_REMOVE(&hks->hks_familylist, hfi, hook_family_int, hfi_entry); - CVW_EXIT_WRITE(&familylock); + CVW_EXIT_WRITE(&hks->hks_familylock); hook_family_free(hfi); return (0); @@ -269,7 +340,6 @@ hook_family_copy(hook_family_t *src) /* - * Function: hook_family_find * Returns: internal family pointer - NULL = Not match * Parameters: family(I) - family name string * @@ -277,13 +347,13 @@ hook_family_copy(hook_family_t *src) * A lock on familylock must be held when called. */ static hook_family_int_t * -hook_family_find(char *family) +hook_family_find(char *family, hook_stack_t *hks) { hook_family_int_t *hfi = NULL; ASSERT(family != NULL); - SLIST_FOREACH(hfi, &familylist, hfi_entry) { + SLIST_FOREACH(hfi, &hks->hks_familylist, hfi_entry) { if (strcmp(hfi->hfi_family.hf_name, family) == 0) break; } @@ -328,22 +398,24 @@ hook_family_free(hook_family_int_t *hfi) hook_event_int_t * hook_event_add(hook_family_int_t *hfi, hook_event_t *he) { + hook_stack_t *hks; hook_event_int_t *hei, *new; ASSERT(hfi != NULL); ASSERT(he != NULL); ASSERT(he->he_name != NULL); + hks = (hook_stack_t *)hfi->hfi_ptr; new = hook_event_copy(he); if (new == NULL) return (NULL); - CVW_ENTER_WRITE(&familylock); + CVW_ENTER_WRITE(&hks->hks_familylock); /* Check whether this event pointer is already registered */ - hei = hook_event_checkdup(he); + hei = hook_event_checkdup(he, hks); if (hei != NULL) { - CVW_EXIT_WRITE(&familylock); + CVW_EXIT_WRITE(&hks->hks_familylock); hook_event_free(new); return (NULL); } @@ -351,7 +423,7 @@ hook_event_add(hook_family_int_t *hfi, hook_event_t *he) /* Add to event list head */ SLIST_INSERT_HEAD(&hfi->hfi_head, new, hei_entry); - CVW_EXIT_WRITE(&familylock); + CVW_EXIT_WRITE(&hks->hks_familylock); return (new); } @@ -367,29 +439,31 @@ hook_event_add(hook_family_int_t *hfi, hook_event_t *he) int hook_event_remove(hook_family_int_t *hfi, hook_event_t *he) { + hook_stack_t *hks; hook_event_int_t *hei; ASSERT(hfi != NULL); ASSERT(he != NULL); + hks = (hook_stack_t *)hfi->hfi_ptr; - CVW_ENTER_WRITE(&familylock); + CVW_ENTER_WRITE(&hks->hks_familylock); hei = hook_event_find(hfi, he->he_name); if (hei == NULL) { - CVW_EXIT_WRITE(&familylock); + CVW_EXIT_WRITE(&hks->hks_familylock); return (ENXIO); } /* Check if there are registered hooks for this event */ if (!TAILQ_EMPTY(&hei->hei_head)) { - CVW_EXIT_WRITE(&familylock); + CVW_EXIT_WRITE(&hks->hks_familylock); return (EBUSY); } /* Remove from event list */ SLIST_REMOVE(&hfi->hfi_head, hei, hook_event_int, hei_entry); - CVW_EXIT_WRITE(&familylock); + CVW_EXIT_WRITE(&hks->hks_familylock); hook_event_free(hei); return (0); @@ -405,14 +479,14 @@ hook_event_remove(hook_family_int_t *hfi, hook_event_t *he) * A lock on familylock must be held when called. */ static hook_event_int_t * -hook_event_checkdup(hook_event_t *he) +hook_event_checkdup(hook_event_t *he, hook_stack_t *hks) { hook_family_int_t *hfi; hook_event_int_t *hei; ASSERT(he != NULL); - SLIST_FOREACH(hfi, &familylist, hfi_entry) { + SLIST_FOREACH(hfi, &hks->hks_familylist, hfi_entry) { SLIST_FOREACH(hei, &hfi->hfi_head, hei_entry) { if (hei->hei_event == he) return (hei); @@ -456,7 +530,7 @@ hook_event_copy(hook_event_t *src) * event(I) - event name string * * Search event list with event name - * A lock on familylock must be held when called. + * A lock on hks->hks_familylock must be held when called. */ static hook_event_int_t * hook_event_find(hook_family_int_t *hfi, char *event) @@ -503,12 +577,14 @@ hook_event_free(hook_event_int_t *hei) int hook_register(hook_family_int_t *hfi, char *event, hook_t *h) { + hook_stack_t *hks; hook_event_int_t *hei; hook_int_t *hi, *new; ASSERT(hfi != NULL); ASSERT(event != NULL); ASSERT(h != NULL); + hks = (hook_stack_t *)hfi->hfi_ptr; /* Alloc hook_int_t and copy hook */ new = hook_copy(h); @@ -520,11 +596,11 @@ hook_register(hook_family_int_t *hfi, char *event, hook_t *h) * to hold global family write lock. Just get read lock here to * ensure event will not be removed when doing hooks operation */ - CVW_ENTER_READ(&familylock); + CVW_ENTER_READ(&hks->hks_familylock); hei = hook_event_find(hfi, event); if (hei == NULL) { - CVW_EXIT_READ(&familylock); + CVW_EXIT_READ(&hks->hks_familylock); hook_free(new); return (ENXIO); } @@ -535,7 +611,7 @@ hook_register(hook_family_int_t *hfi, char *event, hook_t *h) if (((hei->hei_event->he_flags & HOOK_RDONLY) == 0) && (!TAILQ_EMPTY(&hei->hei_head))) { CVW_EXIT_WRITE(&hei->hei_lock); - CVW_EXIT_READ(&familylock); + CVW_EXIT_READ(&hks->hks_familylock); hook_free(new); return (EEXIST); } @@ -543,7 +619,7 @@ hook_register(hook_family_int_t *hfi, char *event, hook_t *h) hi = hook_find(hei, h); if (hi != NULL) { CVW_EXIT_WRITE(&hei->hei_lock); - CVW_EXIT_READ(&familylock); + CVW_EXIT_READ(&hks->hks_familylock); hook_free(new); return (EEXIST); } @@ -553,7 +629,7 @@ hook_register(hook_family_int_t *hfi, char *event, hook_t *h) hei->hei_event->he_interested = B_TRUE; CVW_EXIT_WRITE(&hei->hei_lock); - CVW_EXIT_READ(&familylock); + CVW_EXIT_READ(&hks->hks_familylock); return (0); } @@ -570,17 +646,19 @@ hook_register(hook_family_int_t *hfi, char *event, hook_t *h) int hook_unregister(hook_family_int_t *hfi, char *event, hook_t *h) { + hook_stack_t *hks; hook_event_int_t *hei; hook_int_t *hi; ASSERT(hfi != NULL); ASSERT(h != NULL); + hks = (hook_stack_t *)hfi->hfi_ptr; - CVW_ENTER_READ(&familylock); + CVW_ENTER_READ(&hks->hks_familylock); hei = hook_event_find(hfi, event); if (hei == NULL) { - CVW_EXIT_READ(&familylock); + CVW_EXIT_READ(&hks->hks_familylock); return (ENXIO); } @@ -590,7 +668,7 @@ hook_unregister(hook_family_int_t *hfi, char *event, hook_t *h) hi = hook_find(hei, h); if (hi == NULL) { CVW_EXIT_WRITE(&hei->hei_lock); - CVW_EXIT_READ(&familylock); + CVW_EXIT_READ(&hks->hks_familylock); return (ENXIO); } @@ -601,7 +679,7 @@ hook_unregister(hook_family_int_t *hfi, char *event, hook_t *h) } CVW_EXIT_WRITE(&hei->hei_lock); - CVW_EXIT_READ(&familylock); + CVW_EXIT_READ(&hks->hks_familylock); hook_free(hi); return (0); diff --git a/usr/src/uts/common/io/ib/clients/rds/rdssubr.c b/usr/src/uts/common/io/ib/clients/rds/rdssubr.c index 0bff0a6187..8e57cb783d 100644 --- a/usr/src/uts/common/io/ib/clients/rds/rdssubr.c +++ b/usr/src/uts/common/io/ib/clients/rds/rdssubr.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -299,9 +299,14 @@ boolean_t rds_islocal(ipaddr_t addr) { ire_t *ire; + ip_stack_t *ipst; + + ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip; + ASSERT(ipst != NULL); ire = ire_ctable_lookup(addr, NULL, IRE_LOCAL | IRE_LOOPBACK | - IRE_BROADCAST, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); + IRE_BROADCAST, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); + netstack_rele(ipst->ips_netstack); if (ire == NULL) return (B_FALSE); ire_refrele(ire); diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c index 05d50006e2..f54607fca7 100644 --- a/usr/src/uts/common/io/mac/mac.c +++ b/usr/src/uts/common/io/mac/mac.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -42,6 +42,7 @@ #include <sys/dls.h> #include <sys/dld.h> #include <sys/modctl.h> +#include <sys/fs/dv_node.h> #include <sys/atomic.h> #define IMPL_HASHSZ 67 /* prime */ @@ -1886,3 +1887,27 @@ done: mutex_exit(&i_mactype_lock); return (err); } + +int +mac_vlan_create(mac_handle_t mh, const char *name, minor_t minor) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + /* Create a style-1 DLPI device */ + if (ddi_create_minor_node(mip->mi_dip, (char *)name, S_IFCHR, minor, + DDI_NT_NET, 0) != DDI_SUCCESS) { + return (-1); + } + return (0); +} + +void +mac_vlan_remove(mac_handle_t mh, const char *name) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + dev_info_t *dipp; + + ddi_remove_minor_node(mip->mi_dip, (char *)name); + dipp = ddi_get_parent(mip->mi_dip); + (void) devfs_clean(dipp, NULL, 0); +} diff --git a/usr/src/uts/common/io/neti.c b/usr/src/uts/common/io/neti.c index 3f7ae3c611..49a17ee91f 100644 --- a/usr/src/uts/common/io/neti.c +++ b/usr/src/uts/common/io/neti.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,11 +38,11 @@ #include <sys/neti.h> -static krwlock_t netlock; -static LIST_HEAD(netd_listhead, net_data) netd_head; /* list of net_data_t */ - static void net_init(); -static net_data_t net_find(const char *protocol); +static void net_fini(); +static net_data_t net_find(const char *protocol, neti_stack_t *ns); +static void *neti_stack_init(netstackid_t stackid, netstack_t *ns); +static void neti_stack_fini(netstackid_t stackid, void *arg); /* * Module linkage information for the kernel. @@ -64,16 +64,27 @@ static struct modlinkage modlinkage = { int _init(void) { + int error; + net_init(); - return (mod_install(&modlinkage)); + error = mod_install(&modlinkage); + if (error != 0) + net_fini(); + + return (error); } int _fini(void) { + int error; - return (mod_remove(&modlinkage)); + error = mod_remove(&modlinkage); + if (error == 0) + net_fini(); + + return (error); } @@ -88,20 +99,68 @@ _info(struct modinfo *modinfop) static void net_init() { + /* + * We want to be informed each time a stack is created or + * destroyed in the kernel. + */ + netstack_register(NS_NETI, neti_stack_init, NULL, + neti_stack_fini); +} + +static void +net_fini() +{ + netstack_unregister(NS_NETI); +} + + +/* + * Initialize the neti stack instance. + */ +/*ARGSUSED*/ +static void * +neti_stack_init(netstackid_t stackid, netstack_t *ns) +{ + neti_stack_t *nts; + +#ifdef NS_DEBUG + printf("neti_stack_init(stack %d)\n", stackid); +#endif + + nts = (neti_stack_t *)kmem_zalloc(sizeof (*nts), KM_SLEEP); + nts->nts_netstack = ns; + + rw_init(&nts->nts_netlock, NULL, RW_DRIVER, NULL); + LIST_INIT(&nts->nts_netd_head); + + return (nts); +} + - rw_init(&netlock, NULL, RW_DRIVER, NULL); - LIST_INIT(&netd_head); +/* + * Free the neti stack instance. + */ +/*ARGSUSED*/ +static void +neti_stack_fini(netstackid_t stackid, void *arg) +{ + neti_stack_t *nts = (neti_stack_t *)arg; +#ifdef NS_DEBUG + printf("neti_stack_fini(%p, stack %d)\n", arg, stackid); +#endif + rw_destroy(&nts->nts_netlock); + kmem_free(nts, sizeof (*nts)); } static net_data_t -net_find(const char *protocol) +net_find(const char *protocol, neti_stack_t *nts) { struct net_data *n; ASSERT(protocol != NULL); - LIST_FOREACH(n, &netd_head, netd_list) { + LIST_FOREACH(n, &nts->nts_netd_head, netd_list) { ASSERT(n->netd_info.neti_protocol != NULL); if (strcmp(n->netd_info.neti_protocol, protocol) == 0) { break; @@ -111,33 +170,51 @@ net_find(const char *protocol) return (n); } +net_data_t +net_register(const net_info_t *info, netstackid_t nsid) +{ + netstack_t *ns; + net_data_t nd; + + ns = netstack_find_by_stackid(nsid); + nd = net_register_impl(info, ns); + netstack_rele(ns); + + return (nd); +} net_data_t -net_register(const net_info_t *info) +net_register_impl(const net_info_t *info, netstack_t *ns) { struct net_data *n, *new; + struct neti_stack *nts; ASSERT(info != NULL); + ASSERT(ns != NULL); + + nts = ns->netstack_neti; new = kmem_alloc(sizeof (*new), KM_SLEEP); new->netd_refcnt = 0; new->netd_hooks = NULL; new->netd_info = *info; + new->netd_netstack = ns; - rw_enter(&netlock, RW_WRITER); - n = net_find(info->neti_protocol); + rw_enter(&nts->nts_netlock, RW_WRITER); + n = net_find(info->neti_protocol, nts); if (n != NULL) { - rw_exit(&netlock); + rw_exit(&nts->nts_netlock); kmem_free(new, sizeof (*new)); return (NULL); } - if (LIST_EMPTY(&netd_head)) - LIST_INSERT_HEAD(&netd_head, new, netd_list); + if (LIST_EMPTY(&nts->nts_netd_head)) + LIST_INSERT_HEAD(&nts->nts_netd_head, new, netd_list); else - LIST_INSERT_AFTER(LIST_FIRST(&netd_head), new, netd_list); + LIST_INSERT_AFTER(LIST_FIRST(&nts->nts_netd_head), + new, netd_list); - rw_exit(&netlock); + rw_exit(&nts->nts_netlock); return (new); } @@ -145,36 +222,56 @@ net_register(const net_info_t *info) int net_unregister(net_data_t info) { + struct netstack *ns; + struct neti_stack *nts; + ns = info->netd_netstack; + nts = ns->netstack_neti; ASSERT(info != NULL); - rw_enter(&netlock, RW_WRITER); + rw_enter(&nts->nts_netlock, RW_WRITER); if (info->netd_refcnt != 0) { - rw_exit(&netlock); + rw_exit(&nts->nts_netlock); return (EBUSY); } LIST_REMOVE(info, netd_list); - rw_exit(&netlock); + rw_exit(&nts->nts_netlock); kmem_free(info, sizeof (struct net_data)); return (0); } +net_data_t +net_lookup(const char *protocol, netstackid_t nsid) +{ + netstack_t *ns; + net_data_t nd; + + ns = netstack_find_by_stackid(nsid); + nd = net_lookup_impl(protocol, ns); + netstack_rele(ns); + + return (nd); +} net_data_t -net_lookup(const char *protocol) +net_lookup_impl(const char *protocol, netstack_t *ns) { struct net_data *n; + struct neti_stack *nts; ASSERT(protocol != NULL); + ASSERT(ns != NULL); + + nts = ns->netstack_neti; - rw_enter(&netlock, RW_READER); - n = net_find(protocol); + rw_enter(&nts->nts_netlock, RW_READER); + n = net_find(protocol, nts); if (n != NULL) atomic_add_32((uint_t *)&n->netd_refcnt, 1); - rw_exit(&netlock); + rw_exit(&nts->nts_netlock); return (n); } @@ -187,33 +284,57 @@ net_lookup(const char *protocol) int net_release(net_data_t info) { + struct netstack *ns; + struct neti_stack *nts; + + ns = info->netd_netstack; + nts = ns->netstack_neti; + ASSERT(info != NULL); - rw_enter(&netlock, RW_READER); + rw_enter(&nts->nts_netlock, RW_READER); ASSERT(info->netd_refcnt > 0); atomic_add_32((uint_t *)&info->netd_refcnt, -1); /* net_release has been called too many times */ if (info->netd_refcnt < 0) { - rw_exit(&netlock); + rw_exit(&nts->nts_netlock); return (1); } - rw_exit(&netlock); + rw_exit(&nts->nts_netlock); + return (0); } +net_data_t +net_walk(net_data_t info, netstackid_t nsid) +{ + netstack_t *ns; + net_data_t nd; + + ns = netstack_find_by_stackid(nsid); + nd = net_walk_impl(info, ns); + netstack_rele(ns); + + return (nd); +} net_data_t -net_walk(net_data_t info) +net_walk_impl(net_data_t info, netstack_t *ns) { struct net_data *n = NULL; boolean_t found = B_FALSE; + struct neti_stack *nts; + + ASSERT(ns != NULL); + + nts = ns->netstack_neti; if (info == NULL) found = B_TRUE; - rw_enter(&netlock, RW_READER); - LIST_FOREACH(n, &netd_head, netd_list) { + rw_enter(&nts->nts_netlock, RW_READER); + LIST_FOREACH(n, &nts->nts_netd_head, netd_list) { if (found) break; if (n == info) @@ -227,7 +348,8 @@ net_walk(net_data_t info) if (n != NULL) atomic_add_32((uint_t *)&n->netd_refcnt, 1); - rw_exit(&netlock); + rw_exit(&nts->nts_netlock); + return (n); } @@ -242,7 +364,8 @@ net_getifname(net_data_t info, phy_if_t phy_ifdata, ASSERT(info != NULL); - return (info->netd_info.neti_getifname(phy_ifdata, buffer, buflen)); + return (info->netd_info.neti_getifname(phy_ifdata, buffer, buflen, + info->netd_netstack)); } @@ -252,7 +375,8 @@ net_getmtu(net_data_t info, phy_if_t phy_ifdata, lif_if_t ifdata) ASSERT(info != NULL); - return (info->netd_info.neti_getmtu(phy_ifdata, ifdata)); + return (info->netd_info.neti_getmtu(phy_ifdata, ifdata, + info->netd_netstack)); } @@ -262,7 +386,7 @@ net_getpmtuenabled(net_data_t info) ASSERT(info != NULL); - return (info->netd_info.neti_getpmtuenabled()); + return (info->netd_info.neti_getpmtuenabled(info->netd_netstack)); } @@ -274,7 +398,7 @@ net_getlifaddr(net_data_t info, phy_if_t phy_ifdata, lif_if_t ifdata, ASSERT(info != NULL); return (info->netd_info.neti_getlifaddr(phy_ifdata, ifdata, - nelem, type, storage)); + nelem, type, storage, info->netd_netstack)); } @@ -284,7 +408,8 @@ net_phygetnext(net_data_t info, phy_if_t phy_ifdata) ASSERT(info != NULL); - return (info->netd_info.neti_phygetnext(phy_ifdata)); + return (info->netd_info.neti_phygetnext(phy_ifdata, + info->netd_netstack)); } @@ -294,7 +419,7 @@ net_phylookup(net_data_t info, const char *name) ASSERT(info != NULL); - return (info->netd_info.neti_phylookup(name)); + return (info->netd_info.neti_phylookup(name, info->netd_netstack)); } @@ -304,7 +429,8 @@ net_lifgetnext(net_data_t info, phy_if_t ifidx, lif_if_t ifdata) ASSERT(info != NULL); - return (info->netd_info.neti_lifgetnext(ifidx, ifdata)); + return (info->netd_info.neti_lifgetnext(ifidx, ifdata, + info->netd_netstack)); } @@ -314,7 +440,8 @@ net_inject(net_data_t info, inject_t style, net_inject_t *packet) ASSERT(info != NULL); - return (info->netd_info.neti_inject(style, packet)); + return (info->netd_info.neti_inject(style, packet, + info->netd_netstack)); } @@ -324,7 +451,7 @@ net_routeto(net_data_t info, struct sockaddr *address) ASSERT(info != NULL); - return (info->netd_info.neti_routeto(address)); + return (info->netd_info.neti_routeto(address, info->netd_netstack)); } @@ -373,7 +500,7 @@ net_register_family(net_data_t info, hook_family_t *hf) if (info->netd_hooks != NULL) return (EEXIST); - hfi = hook_family_add(hf); + hfi = hook_family_add(hf, info->netd_netstack->netstack_hook); if (hfi == NULL) return (EEXIST); diff --git a/usr/src/uts/common/io/sad.c b/usr/src/uts/common/io/sad.c index d8371327dc..de3e66130e 100644 --- a/usr/src/uts/common/io/sad.c +++ b/usr/src/uts/common/io/sad.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -54,6 +54,7 @@ #include <sys/modctl.h> #include <sys/priv_names.h> #include <sys/sysmacros.h> +#include <sys/zone.h> static int sadopen(queue_t *, dev_t *, int, int, cred_t *); static int sadclose(queue_t *, int, cred_t *); @@ -186,35 +187,45 @@ sadopen( cred_t *credp) /* user credentials */ { int i; + netstack_t *ns; + str_stack_t *ss; if (sflag) /* no longer called from clone driver */ return (EINVAL); + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + ss = ns->netstack_str; + ASSERT(ss != NULL); + /* * Both USRMIN and ADMMIN are clone interfaces. */ - for (i = 0; i < sadcnt; i++) - if (saddev[i].sa_qp == NULL) + for (i = 0; i < ss->ss_sadcnt; i++) + if (ss->ss_saddev[i].sa_qp == NULL) break; - if (i >= sadcnt) /* no such device */ + if (i >= ss->ss_sadcnt) { /* no such device */ + netstack_rele(ss->ss_netstack); return (ENXIO); - + } switch (getminor(*devp)) { case USRMIN: /* mere mortal */ - saddev[i].sa_flags = 0; + ss->ss_saddev[i].sa_flags = 0; break; case ADMMIN: /* privileged user */ - saddev[i].sa_flags = SADPRIV; + ss->ss_saddev[i].sa_flags = SADPRIV; break; default: + netstack_rele(ss->ss_netstack); return (EINVAL); } - saddev[i].sa_qp = qp; - qp->q_ptr = (caddr_t)&saddev[i]; - WR(qp)->q_ptr = (caddr_t)&saddev[i]; + ss->ss_saddev[i].sa_qp = qp; + ss->ss_saddev[i].sa_ss = ss; + qp->q_ptr = (caddr_t)&ss->ss_saddev[i]; + WR(qp)->q_ptr = (caddr_t)&ss->ss_saddev[i]; /* * NOTE: should the ADMMIN or USRMIN minors change @@ -244,6 +255,8 @@ sadclose( sadp = (struct saddev *)qp->q_ptr; sadp->sa_qp = NULL; sadp->sa_addr = NULL; + netstack_rele(sadp->sa_ss->ss_netstack); + sadp->sa_ss = NULL; qp->q_ptr = NULL; WR(qp)->q_ptr = NULL; return (0); @@ -382,6 +395,10 @@ apush_iocdata( struct saddev *sadp; uint_t size; dev_t dev; + str_stack_t *ss; + + sadp = (struct saddev *)qp->q_ptr; + ss = sadp->sa_ss; csp = (struct copyresp *)mp->b_rptr; if (csp->cp_rval) { /* if there was an error */ @@ -436,25 +453,26 @@ apush_iocdata( /* sanity check the request */ if (((ret = sad_ap_verify(ap)) != 0) || ((ret = valid_major(ap->ap_major)) != 0)) { - sad_ap_rele(ap); + sad_ap_rele(ap, ss); miocnak(qp, mp, 0, ret); return; } /* check for overlapping configs */ - mutex_enter(&sad_lock); - if ((ap_tmp = sad_ap_find(&ap->ap_common)) != NULL) { + mutex_enter(&ss->ss_sad_lock); + ap_tmp = sad_ap_find(&ap->ap_common, ss); + if (ap_tmp != NULL) { /* already configured */ - mutex_exit(&sad_lock); - sad_ap_rele(ap_tmp); - sad_ap_rele(ap); + mutex_exit(&ss->ss_sad_lock); + sad_ap_rele(ap_tmp, ss); + sad_ap_rele(ap, ss); miocnak(qp, mp, 0, EEXIST); return; } /* add the new config to our hash */ - sad_ap_insert(ap); - mutex_exit(&sad_lock); + sad_ap_insert(ap, ss); + mutex_exit(&ss->ss_sad_lock); miocack(qp, mp, 0, 0); return; @@ -466,7 +484,7 @@ apush_iocdata( } /* search for a matching config */ - if ((ap = sad_ap_find_by_dev(dev)) == NULL) { + if ((ap = sad_ap_find_by_dev(dev, ss)) == NULL) { /* no config found */ miocnak(qp, mp, 0, ENODEV); return; @@ -479,7 +497,7 @@ apush_iocdata( */ if ((ap->ap_type == SAP_RANGE) && (ap->ap_minor != sap->sap_minor)) { - sad_ap_rele(ap); + sad_ap_rele(ap, ss); miocnak(qp, mp, 0, ERANGE); return; } @@ -490,7 +508,7 @@ apush_iocdata( */ if ((ap->ap_type == SAP_ALL) && (sap->sap_minor != 0)) { - sad_ap_rele(ap); + sad_ap_rele(ap, ss); miocnak(qp, mp, 0, EINVAL); return; } @@ -499,27 +517,27 @@ apush_iocdata( * make sure someone else hasn't already * removed this config from the hash. */ - mutex_enter(&sad_lock); - ap_tmp = sad_ap_find(&ap->ap_common); + mutex_enter(&ss->ss_sad_lock); + ap_tmp = sad_ap_find(&ap->ap_common, ss); if (ap_tmp != ap) { - mutex_exit(&sad_lock); - sad_ap_rele(ap_tmp); - sad_ap_rele(ap); + mutex_exit(&ss->ss_sad_lock); + sad_ap_rele(ap_tmp, ss); + sad_ap_rele(ap, ss); miocnak(qp, mp, 0, ENODEV); return; - } else + } /* remove the config from the hash and return */ - sad_ap_remove(ap); - mutex_exit(&sad_lock); + sad_ap_remove(ap, ss); + mutex_exit(&ss->ss_sad_lock); /* * Release thrice, once for sad_ap_find_by_dev(), * once for sad_ap_find(), and once to free. */ - sad_ap_rele(ap); - sad_ap_rele(ap); - sad_ap_rele(ap); + sad_ap_rele(ap, ss); + sad_ap_rele(ap, ss); + sad_ap_rele(ap, ss); miocack(qp, mp, 0, 0); return; } /* switch (sap_cmd) */ @@ -536,7 +554,7 @@ apush_iocdata( } /* search for a matching config */ - if ((ap = sad_ap_find_by_dev(dev)) == NULL) { + if ((ap = sad_ap_find_by_dev(dev, ss)) == NULL) { /* no config found */ miocnak(qp, mp, 0, ENODEV); return; @@ -550,9 +568,10 @@ apush_iocdata( (void) strcpy(sap->sap_list[i], ap->ap_list[i]); for (; i < MAXAPUSH; i++) bzero(sap->sap_list[i], FMNAMESZ + 1); + mutex_exit(&ss->ss_sad_lock); /* release our hold on the config */ - sad_ap_rele(ap); + sad_ap_rele(ap, ss); /* copyout the results */ if (SAD_VER(csp->cp_cmd) == 1) @@ -560,7 +579,6 @@ apush_iocdata( else size = STRAPUSH_V0_LEN; - sadp = (struct saddev *)qp->q_ptr; mcopyout(mp, (void *)GETRESULT, size, sadp->sa_addr, NULL); qreply(qp, mp); diff --git a/usr/src/uts/common/io/sad_conf.c b/usr/src/uts/common/io/sad_conf.c index 1611eaa18a..4560922f2d 100644 --- a/usr/src/uts/common/io/sad_conf.c +++ b/usr/src/uts/common/io/sad_conf.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -37,9 +37,6 @@ #include <sys/kmem.h> #include <sys/sysmacros.h> -struct saddev *saddev; /* sad device array */ -int sadcnt = 16; /* number of sad devices */ - /* * Currently we store all the sad data in a hash table keyed by major * number. This is far from ideal. It means that if a single device @@ -69,9 +66,6 @@ int sadcnt = 16; /* number of sad devices */ * for a given major number then there can't be any SAP_RANGE or SAP_ONE * nodes for that same major number. */ -kmutex_t sad_lock; /* protects sad_hash */ -static mod_hash_t *sad_hash; -static size_t sad_hash_nchains = 127; /* * Private Internal Interfaces @@ -207,59 +201,60 @@ sad_ap_alloc(void) } void -sad_ap_rele(struct autopush *ap) +sad_ap_rele(struct autopush *ap, str_stack_t *ss) { - mutex_enter(&sad_lock); + mutex_enter(&ss->ss_sad_lock); ASSERT(ap->ap_cnt > 0); if (--(ap->ap_cnt) == 0) { - mutex_exit(&sad_lock); + mutex_exit(&ss->ss_sad_lock); kmem_free(ap, sizeof (struct autopush)); } else { - mutex_exit(&sad_lock); + mutex_exit(&ss->ss_sad_lock); } } void -sad_ap_insert(struct autopush *ap) +sad_ap_insert(struct autopush *ap, str_stack_t *ss) { - ASSERT(MUTEX_HELD(&sad_lock)); + ASSERT(MUTEX_HELD(&ss->ss_sad_lock)); ASSERT(sad_apc_verify(&ap->ap_common) == 0); - ASSERT(sad_ap_find(&ap->ap_common) == NULL); - (void) mod_hash_insert(sad_hash, &ap->ap_common, ap); + ASSERT(sad_ap_find(&ap->ap_common, ss) == NULL); + (void) mod_hash_insert(ss->ss_sad_hash, &ap->ap_common, ap); } void -sad_ap_remove(struct autopush *ap) +sad_ap_remove(struct autopush *ap, str_stack_t *ss) { struct autopush *ap_removed = NULL; - ASSERT(MUTEX_HELD(&sad_lock)); - (void) mod_hash_remove(sad_hash, &ap->ap_common, + ASSERT(MUTEX_HELD(&ss->ss_sad_lock)); + (void) mod_hash_remove(ss->ss_sad_hash, &ap->ap_common, (mod_hash_val_t *)&ap_removed); ASSERT(ap == ap_removed); } struct autopush * -sad_ap_find(struct apcommon *apc) +sad_ap_find(struct apcommon *apc, str_stack_t *ss) { struct autopush *ap_result = NULL; - ASSERT(MUTEX_HELD(&sad_lock)); + ASSERT(MUTEX_HELD(&ss->ss_sad_lock)); ASSERT(sad_apc_verify(apc) == 0); - (void) mod_hash_find(sad_hash, apc, (mod_hash_val_t *)&ap_result); + (void) mod_hash_find(ss->ss_sad_hash, apc, + (mod_hash_val_t *)&ap_result); if (ap_result != NULL) ap_result->ap_cnt++; return (ap_result); } struct autopush * -sad_ap_find_by_dev(dev_t dev) +sad_ap_find_by_dev(dev_t dev, str_stack_t *ss) { struct apcommon apc; struct autopush *ap_result; - ASSERT(MUTEX_NOT_HELD(&sad_lock)); + ASSERT(MUTEX_NOT_HELD(&ss->ss_sad_lock)); /* prepare an apcommon structure to search with */ apc.apc_cmd = SAP_ONE; @@ -274,17 +269,35 @@ sad_ap_find_by_dev(dev_t dev) apc.apc_npush = 1; apc.apc_lastminor = 0; - mutex_enter(&sad_lock); - ap_result = sad_ap_find(&apc); - mutex_exit(&sad_lock); + mutex_enter(&ss->ss_sad_lock); + ap_result = sad_ap_find(&apc, ss); + mutex_exit(&ss->ss_sad_lock); return (ap_result); } void -sad_initspace(void) +sad_initspace(str_stack_t *ss) { - saddev = kmem_zalloc(sadcnt * sizeof (struct saddev), KM_SLEEP); - sad_hash = mod_hash_create_extended("sad_hash", - sad_hash_nchains, mod_hash_null_keydtor, mod_hash_null_valdtor, + mutex_init(&ss->ss_sad_lock, NULL, MUTEX_DEFAULT, NULL); + ss->ss_sad_hash_nchains = 127; + ss->ss_sadcnt = 16; + + ss->ss_saddev = kmem_zalloc(ss->ss_sadcnt * sizeof (struct saddev), + KM_SLEEP); + ss->ss_sad_hash = mod_hash_create_extended("sad_hash", + ss->ss_sad_hash_nchains, mod_hash_null_keydtor, + mod_hash_null_valdtor, sad_hash_alg, NULL, sad_hash_keycmp, KM_SLEEP); } + +void +sad_freespace(str_stack_t *ss) +{ + kmem_free(ss->ss_saddev, ss->ss_sadcnt * sizeof (struct saddev)); + ss->ss_saddev = NULL; + + mod_hash_destroy_hash(ss->ss_sad_hash); + ss->ss_sad_hash = NULL; + + mutex_destroy(&ss->ss_sad_lock); +} diff --git a/usr/src/uts/common/io/strplumb.c b/usr/src/uts/common/io/strplumb.c index c7463832fc..7da86a44bb 100644 --- a/usr/src/uts/common/io/strplumb.c +++ b/usr/src/uts/common/io/strplumb.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -628,6 +628,10 @@ strplumb(void) if ((err = ldi_ident_from_mod(&modlinkage, &li)) != 0) return (err); + /* + * Setup the TCP and SCTP default queues for the global stack. + * tcp/sctp_stack_init will do this for additional stack instances. + */ if ((err = strplumb_sctpq(li)) != 0) goto done; diff --git a/usr/src/uts/common/ipp/dlcosmk/dlcosmk.c b/usr/src/uts/common/ipp/dlcosmk/dlcosmk.c index 4d26a9091b..27eaaba86f 100644 --- a/usr/src/uts/common/ipp/dlcosmk/dlcosmk.c +++ b/usr/src/uts/common/ipp/dlcosmk/dlcosmk.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2002-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -89,8 +88,8 @@ dlcosmk_process(mblk_t **mpp, dlcosmk_data_t *dlcosmk_data, uint32_t ill_index, } if ((ill_index == 0) || - ((ill = ill_lookup_on_ifindex(ill_index, B_FALSE, NULL, NULL, - NULL, NULL)) == NULL)) { + ((ill = ill_lookup_on_ifindex_global_instance(ill_index, B_FALSE, + NULL, NULL, NULL, NULL)) == NULL)) { dlcosmk2dbg(("dlcosmk_process:invalid ill index %u\n", ill_index)); atomic_add_64(&dlcosmk_data->ipackets, 1); diff --git a/usr/src/uts/common/ipp/ipgpc/classifierddi.c b/usr/src/uts/common/ipp/ipgpc/classifierddi.c index 2554fd8f4e..d9955d84a6 100644 --- a/usr/src/uts/common/ipp/ipgpc/classifierddi.c +++ b/usr/src/uts/common/ipp/ipgpc/classifierddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2002-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -436,8 +435,8 @@ ipgpc_invoke_action(ipp_action_id_t aid, ipp_packet_t *packet) /* The ill_index could be 0 when called from forwarding (read) path */ if (ill_idx > 0) { - ill = ill_lookup_on_ifindex(ill_idx, B_FALSE, NULL, NULL, - NULL, NULL); + ill = ill_lookup_on_ifindex_global_instance(ill_idx, B_FALSE, + NULL, NULL, NULL, NULL); } /* parse the packet from the message block */ diff --git a/usr/src/uts/common/netinet/igmp_var.h b/usr/src/uts/common/netinet/igmp_var.h index 2cdcaff904..7caf9e2600 100644 --- a/usr/src/uts/common/netinet/igmp_var.h +++ b/usr/src/uts/common/netinet/igmp_var.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -56,8 +55,6 @@ struct igmpstat { }; #ifdef _KERNEL -struct igmpstat igmpstat; - /* * slowtimo interval used for both IGMP and MLD */ diff --git a/usr/src/uts/common/os/kmem.c b/usr/src/uts/common/os/kmem.c index c6c0166974..c451fba2d9 100644 --- a/usr/src/uts/common/os/kmem.c +++ b/usr/src/uts/common/os/kmem.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -63,6 +63,7 @@ #include <sys/reboot.h> #include <sys/id32.h> #include <sys/zone.h> +#include <sys/netstack.h> extern void streams_msg_init(void); extern int segkp_fromheap; @@ -2601,6 +2602,7 @@ kmem_init(void) * can register their callbacks. */ zone_zsd_init(); + log_init(); taskq_init(); @@ -2647,6 +2649,12 @@ kmem_init(void) * Initialize 32-bit ID cache. */ id32_init(); + + /* + * Initialize the networking stack so modules loaded can + * register their callbacks. + */ + netstack_init(); } void diff --git a/usr/src/uts/common/os/netstack.c b/usr/src/uts/common/os/netstack.c new file mode 100644 index 0000000000..60ee49f8ed --- /dev/null +++ b/usr/src/uts/common/os/netstack.c @@ -0,0 +1,1217 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/vm.h> +#include <sys/proc.h> +#include <sys/tuneable.h> +#include <sys/systm.h> +#include <sys/cmn_err.h> +#include <sys/debug.h> +#include <sys/sdt.h> +#include <sys/mutex.h> +#include <sys/bitmap.h> +#include <sys/atomic.h> +#include <sys/kobj.h> +#include <sys/disp.h> +#include <vm/seg_kmem.h> +#include <sys/zone.h> +#include <sys/netstack.h> + +/* + * What we use so that the zones framework can tell us about new zones, + * which we use to create new stacks. + */ +static zone_key_t netstack_zone_key; + +static int netstack_initialized = 0; + +/* + * Track the registered netstacks. + * The global lock protects + * - ns_reg + * - the list starting at netstack_head and following the netstack_next + * pointers. + */ +static kmutex_t netstack_g_lock; + +/* + * Registry of netstacks with their create/shutdown/destory functions. + */ +static struct netstack_registry ns_reg[NS_MAX]; + +/* + * Global list of existing stacks. We use this when a new zone with + * an exclusive IP instance is created. + * + * Note that in some cases a netstack_t needs to stay around after the zone + * has gone away. This is because there might be outstanding references + * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data + * structure and all the foo_stack_t's hanging off of it will be cleaned up + * when the last reference to it is dropped. + * However, the same zone might be rebooted. That is handled using the + * assumption that the zones framework picks a new zoneid each time a zone + * is (re)booted. We assert for that condition in netstack_zone_create(). + * Thus the old netstack_t can take its time for things to time out. + */ +static netstack_t *netstack_head; + +/* + * To support kstat_create_netstack() using kstat_zone_add we need + * to track both + * - all zoneids that use the global/shared stack + * - all kstats that have been added for the shared stack + */ +struct shared_zone_list { + struct shared_zone_list *sz_next; + zoneid_t sz_zoneid; +}; + +struct shared_kstat_list { + struct shared_kstat_list *sk_next; + kstat_t *sk_kstat; +}; + +static kmutex_t netstack_shared_lock; /* protects the following two */ +static struct shared_zone_list *netstack_shared_zones; +static struct shared_kstat_list *netstack_shared_kstats; + +static void *netstack_zone_create(zoneid_t zoneid); +static void netstack_zone_shutdown(zoneid_t zoneid, void *arg); +static void netstack_zone_destroy(zoneid_t zoneid, void *arg); + +static void netstack_do_create(void); +static void netstack_do_shutdown(void); +static void netstack_do_destroy(void); + +static void netstack_shared_zone_add(zoneid_t zoneid); +static void netstack_shared_zone_remove(zoneid_t zoneid); +static void netstack_shared_kstat_add(kstat_t *ks); +static void netstack_shared_kstat_remove(kstat_t *ks); + + +void +netstack_init(void) +{ + mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL); + + netstack_initialized = 1; + + /* + * We want to be informed each time a zone is created or + * destroyed in the kernel, so we can maintain the + * stack instance information. + */ + zone_key_create(&netstack_zone_key, netstack_zone_create, + netstack_zone_shutdown, netstack_zone_destroy); +} + +/* + * Register a new module with the framework. + * This registers interest in changes to the set of netstacks. + * The createfn and destroyfn are required, but the shutdownfn can be + * NULL. + * Note that due to the current zsd implementation, when the create + * function is called the zone isn't fully present, thus functions + * like zone_find_by_* will fail, hence the create function can not + * use many zones kernel functions including zcmn_err(). + */ +void +netstack_register(int moduleid, + void *(*module_create)(netstackid_t, netstack_t *), + void (*module_shutdown)(netstackid_t, void *), + void (*module_destroy)(netstackid_t, void *)) +{ + netstack_t *ns; + + ASSERT(netstack_initialized); + ASSERT(moduleid >= 0 && moduleid < NS_MAX); + ASSERT(module_create != NULL); + + mutex_enter(&netstack_g_lock); + ASSERT(ns_reg[moduleid].nr_create == NULL); + ASSERT(ns_reg[moduleid].nr_flags == 0); + ns_reg[moduleid].nr_create = module_create; + ns_reg[moduleid].nr_shutdown = module_shutdown; + ns_reg[moduleid].nr_destroy = module_destroy; + ns_reg[moduleid].nr_flags = NRF_REGISTERED; + + /* + * Determine the set of stacks that exist before we drop the lock. + * Set CREATE_NEEDED for each of those. + * netstacks which have been deleted will have NSS_CREATE_COMPLETED + * set, but check NSF_CLOSING to be sure. + */ + for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) { + mutex_enter(&ns->netstack_lock); + if (!(ns->netstack_flags & NSF_CLOSING) && + (ns->netstack_m_state[moduleid] & NSS_CREATE_ALL) == 0) { + ns->netstack_m_state[moduleid] |= NSS_CREATE_NEEDED; + DTRACE_PROBE2(netstack__create__needed, + netstack_t *, ns, int, moduleid); + } + mutex_exit(&ns->netstack_lock); + } + mutex_exit(&netstack_g_lock); + + /* + * Call the create function for each stack that has CREATE_NEEDED. + * Set CREATE_INPROGRESS, drop lock, and after done, + * set CREATE_COMPLETE + */ + netstack_do_create(); +} + +void +netstack_unregister(int moduleid) +{ + netstack_t *ns; + + ASSERT(moduleid >= 0 && moduleid < NS_MAX); + + ASSERT(ns_reg[moduleid].nr_create != NULL); + ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED); + + mutex_enter(&netstack_g_lock); + /* + * Determine the set of stacks that exist before we drop the lock. + * Set SHUTDOWN_NEEDED and DESTROY_NEEDED for each of those. + */ + for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) { + mutex_enter(&ns->netstack_lock); + if (ns_reg[moduleid].nr_shutdown != NULL && + (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) && + (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_ALL) == 0) { + ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_NEEDED; + DTRACE_PROBE2(netstack__shutdown__needed, + netstack_t *, ns, int, moduleid); + } + if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) && + ns_reg[moduleid].nr_destroy != NULL && + (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) && + (ns->netstack_m_state[moduleid] & NSS_DESTROY_ALL) == 0) { + ns->netstack_m_state[moduleid] |= NSS_DESTROY_NEEDED; + DTRACE_PROBE2(netstack__destroy__needed, + netstack_t *, ns, int, moduleid); + } + mutex_exit(&ns->netstack_lock); + } + mutex_exit(&netstack_g_lock); + + netstack_do_shutdown(); + netstack_do_destroy(); + + /* + * Clear the netstack_m_state so that we can handle this module + * being loaded again. + */ + mutex_enter(&netstack_g_lock); + for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) { + mutex_enter(&ns->netstack_lock); + if (ns->netstack_m_state[moduleid] & NSS_DESTROY_COMPLETED) { + ns->netstack_m_state[moduleid] = 0; + DTRACE_PROBE2(netstack__destroy__done, + netstack_t *, ns, int, moduleid); + } + mutex_exit(&ns->netstack_lock); + } + + ns_reg[moduleid].nr_create = NULL; + ns_reg[moduleid].nr_shutdown = NULL; + ns_reg[moduleid].nr_destroy = NULL; + ns_reg[moduleid].nr_flags = 0; + mutex_exit(&netstack_g_lock); +} + +/* + * Lookup and/or allocate a netstack for this zone. + */ +static void * +netstack_zone_create(zoneid_t zoneid) +{ + netstackid_t stackid; + netstack_t *ns; + netstack_t **nsp; + zone_t *zone; + int i; + + ASSERT(netstack_initialized); + + zone = zone_find_by_id_nolock(zoneid); + ASSERT(zone != NULL); + + if (zone->zone_flags & ZF_NET_EXCL) { + stackid = zoneid; + } else { + /* Look for the stack instance for the global */ + stackid = GLOBAL_NETSTACKID; + } + + /* Allocate even if it isn't needed; simplifies locking */ + ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP); + + /* Look if there is a matching stack instance */ + mutex_enter(&netstack_g_lock); + for (nsp = &netstack_head; *nsp != NULL; + nsp = &((*nsp)->netstack_next)) { + if ((*nsp)->netstack_stackid == stackid) { + /* + * Should never find a pre-existing exclusive stack + */ + ASSERT(stackid == GLOBAL_NETSTACKID); + kmem_free(ns, sizeof (netstack_t)); + ns = *nsp; + mutex_enter(&ns->netstack_lock); + ns->netstack_numzones++; + mutex_exit(&ns->netstack_lock); + mutex_exit(&netstack_g_lock); + DTRACE_PROBE1(netstack__inc__numzones, + netstack_t *, ns); + /* Record that we have a new shared stack zone */ + netstack_shared_zone_add(zoneid); + zone->zone_netstack = ns; + return (ns); + } + } + /* Not found */ + mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL); + ns->netstack_stackid = zoneid; + ns->netstack_numzones = 1; + ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */ + ns->netstack_flags = NSF_UNINIT; + *nsp = ns; + zone->zone_netstack = ns; + + /* + * Determine the set of module create functions that need to be + * called before we drop the lock. + */ + for (i = 0; i < NS_MAX; i++) { + mutex_enter(&ns->netstack_lock); + if ((ns_reg[i].nr_flags & NRF_REGISTERED) && + (ns->netstack_m_state[i] & NSS_CREATE_ALL) == 0) { + ns->netstack_m_state[i] |= NSS_CREATE_NEEDED; + DTRACE_PROBE2(netstack__create__needed, + netstack_t *, ns, int, i); + } + mutex_exit(&ns->netstack_lock); + } + mutex_exit(&netstack_g_lock); + + netstack_do_create(); + + mutex_enter(&ns->netstack_lock); + ns->netstack_flags &= ~NSF_UNINIT; + mutex_exit(&ns->netstack_lock); + + return (ns); +} + +/* ARGSUSED */ +static void +netstack_zone_shutdown(zoneid_t zoneid, void *arg) +{ + netstack_t *ns = (netstack_t *)arg; + int i; + + ASSERT(arg != NULL); + + mutex_enter(&ns->netstack_lock); + ASSERT(ns->netstack_numzones > 0); + if (ns->netstack_numzones != 1) { + /* Stack instance being used by other zone */ + mutex_exit(&ns->netstack_lock); + ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID); + return; + } + mutex_exit(&ns->netstack_lock); + + mutex_enter(&netstack_g_lock); + /* + * Determine the set of stacks that exist before we drop the lock. + * Set SHUTDOWN_NEEDED for each of those. + */ + for (i = 0; i < NS_MAX; i++) { + mutex_enter(&ns->netstack_lock); + if ((ns_reg[i].nr_flags & NRF_REGISTERED) && + ns_reg[i].nr_shutdown != NULL && + (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) && + (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) { + ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED; + DTRACE_PROBE2(netstack__shutdown__needed, + netstack_t *, ns, int, i); + } + mutex_exit(&ns->netstack_lock); + } + mutex_exit(&netstack_g_lock); + + /* Call the shutdown function for all registered modules */ + netstack_do_shutdown(); +} + +/* + * Common routine to release a zone. + * If this was the last zone using the stack instance then prepare to + * have the refcnt dropping to zero free the zone. + */ +/* ARGSUSED */ +static void +netstack_zone_destroy(zoneid_t zoneid, void *arg) +{ + netstack_t *ns = (netstack_t *)arg; + + ASSERT(arg != NULL); + + mutex_enter(&ns->netstack_lock); + ASSERT(ns->netstack_numzones > 0); + ns->netstack_numzones--; + if (ns->netstack_numzones != 0) { + /* Stack instance being used by other zone */ + mutex_exit(&ns->netstack_lock); + ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID); + /* Record that we a shared stack zone has gone away */ + netstack_shared_zone_remove(zoneid); + return; + } + /* + * Set CLOSING so that netstack_find_by will not find it + * and decrement the reference count. + */ + ns->netstack_flags |= NSF_CLOSING; + mutex_exit(&ns->netstack_lock); + DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns); + /* No other thread can call zone_destroy for this stack */ + + /* + * Decrease refcnt to account for the one in netstack_zone_init() + */ + netstack_rele(ns); +} + +/* + * Called when the reference count drops to zero. + * Call the destroy functions for each registered module. + */ +static void +netstack_stack_inactive(netstack_t *ns) +{ + int i; + + mutex_enter(&netstack_g_lock); + /* + * If the shutdown callback wasn't called earlier (e.g., if this is + * a netstack shared between multiple zones), then we call it now. + */ + for (i = 0; i < NS_MAX; i++) { + mutex_enter(&ns->netstack_lock); + if ((ns_reg[i].nr_flags & NRF_REGISTERED) && + ns_reg[i].nr_shutdown != NULL && + (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) && + (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) { + ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED; + DTRACE_PROBE2(netstack__shutdown__needed, + netstack_t *, ns, int, i); + } + mutex_exit(&ns->netstack_lock); + } + /* + * Determine the set of stacks that exist before we drop the lock. + * Set DESTROY_NEEDED for each of those. + */ + for (i = 0; i < NS_MAX; i++) { + mutex_enter(&ns->netstack_lock); + if ((ns_reg[i].nr_flags & NRF_REGISTERED) && + ns_reg[i].nr_destroy != NULL && + (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) && + (ns->netstack_m_state[i] & NSS_DESTROY_ALL) == 0) { + ns->netstack_m_state[i] |= NSS_DESTROY_NEEDED; + DTRACE_PROBE2(netstack__destroy__needed, + netstack_t *, ns, int, i); + } + mutex_exit(&ns->netstack_lock); + } + mutex_exit(&netstack_g_lock); + + netstack_do_shutdown(); + netstack_do_destroy(); +} + +/* + * Call the create function for the ns and moduleid if CREATE_NEEDED + * is set. + * When it calls it, it drops the netstack_lock held by the caller, + * and returns true to tell the caller it needs to re-evalute the + * state.. + */ +static boolean_t +netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid) +{ + void *result; + netstackid_t stackid; + + ASSERT(MUTEX_HELD(lockp)); + mutex_enter(&ns->netstack_lock); + if (ns->netstack_m_state[moduleid] & NSS_CREATE_NEEDED) { + ns->netstack_m_state[moduleid] &= ~NSS_CREATE_NEEDED; + ns->netstack_m_state[moduleid] |= NSS_CREATE_INPROGRESS; + DTRACE_PROBE2(netstack__create__inprogress, + netstack_t *, ns, int, moduleid); + mutex_exit(&ns->netstack_lock); + mutex_exit(lockp); + + ASSERT(ns_reg[moduleid].nr_create != NULL); + stackid = ns->netstack_stackid; + DTRACE_PROBE2(netstack__create__start, + netstackid_t, stackid, + netstack_t *, ns); + result = (ns_reg[moduleid].nr_create)(stackid, ns); + DTRACE_PROBE2(netstack__create__end, + void *, result, netstack_t *, ns); + + ASSERT(result != NULL); + mutex_enter(&ns->netstack_lock); + ns->netstack_modules[moduleid] = result; + ns->netstack_m_state[moduleid] &= ~NSS_CREATE_INPROGRESS; + ns->netstack_m_state[moduleid] |= NSS_CREATE_COMPLETED; + DTRACE_PROBE2(netstack__create__completed, + netstack_t *, ns, int, moduleid); + mutex_exit(&ns->netstack_lock); + return (B_TRUE); + } else { + mutex_exit(&ns->netstack_lock); + return (B_FALSE); + } +} + +/* + * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED + * is set. + * When it calls it, it drops the netstack_lock held by the caller, + * and returns true to tell the caller it needs to re-evalute the + * state.. + */ +static boolean_t +netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid) +{ + netstackid_t stackid; + void * netstack_module; + + ASSERT(MUTEX_HELD(lockp)); + mutex_enter(&ns->netstack_lock); + if (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_NEEDED) { + ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_NEEDED; + ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_INPROGRESS; + DTRACE_PROBE2(netstack__shutdown__inprogress, + netstack_t *, ns, int, moduleid); + mutex_exit(&ns->netstack_lock); + mutex_exit(lockp); + + ASSERT(ns_reg[moduleid].nr_shutdown != NULL); + stackid = ns->netstack_stackid; + netstack_module = ns->netstack_modules[moduleid]; + DTRACE_PROBE2(netstack__shutdown__start, + netstackid_t, stackid, + void *, netstack_module); + (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module); + DTRACE_PROBE1(netstack__shutdown__end, + netstack_t *, ns); + + mutex_enter(&ns->netstack_lock); + ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_INPROGRESS; + ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_COMPLETED; + DTRACE_PROBE2(netstack__shutdown__completed, + netstack_t *, ns, int, moduleid); + mutex_exit(&ns->netstack_lock); + return (B_TRUE); + } else { + mutex_exit(&ns->netstack_lock); + return (B_FALSE); + } +} + +/* + * Call the destroy function for the ns and moduleid if DESTROY_NEEDED + * is set. + * When it calls it, it drops the netstack_lock held by the caller, + * and returns true to tell the caller it needs to re-evalute the + * state.. + */ +static boolean_t +netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid) +{ + netstackid_t stackid; + void * netstack_module; + + ASSERT(MUTEX_HELD(lockp)); + mutex_enter(&ns->netstack_lock); + if (ns->netstack_m_state[moduleid] & NSS_DESTROY_NEEDED) { + ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_NEEDED; + ns->netstack_m_state[moduleid] |= NSS_DESTROY_INPROGRESS; + DTRACE_PROBE2(netstack__destroy__inprogress, + netstack_t *, ns, int, moduleid); + mutex_exit(&ns->netstack_lock); + mutex_exit(lockp); + + /* XXX race against unregister? */ + ASSERT(ns_reg[moduleid].nr_destroy != NULL); + stackid = ns->netstack_stackid; + netstack_module = ns->netstack_modules[moduleid]; + DTRACE_PROBE2(netstack__destroy__start, + netstackid_t, stackid, + void *, netstack_module); + (ns_reg[moduleid].nr_destroy)(stackid, netstack_module); + DTRACE_PROBE1(netstack__destroy__end, + netstack_t *, ns); + + mutex_enter(&ns->netstack_lock); + ns->netstack_modules[moduleid] = NULL; + ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_INPROGRESS; + ns->netstack_m_state[moduleid] |= NSS_DESTROY_COMPLETED; + DTRACE_PROBE2(netstack__destroy__completed, + netstack_t *, ns, int, moduleid); + mutex_exit(&ns->netstack_lock); + return (B_TRUE); + } else { + mutex_exit(&ns->netstack_lock); + return (B_FALSE); + } +} + +static void +apply_loop(netstack_t **headp, kmutex_t *lockp, + boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid)) +{ + netstack_t *ns; + int i; + boolean_t lock_dropped, result; + + lock_dropped = B_FALSE; + ns = *headp; + while (ns != NULL) { + for (i = 0; i < NS_MAX; i++) { + result = (applyfn)(lockp, ns, i); + if (result) { +#ifdef NS_DEBUG + (void) printf("netstack_do_apply: " + "LD for %p/%d, %d\n", + (void *)ns, ns->netstack_stackid, i); +#endif + lock_dropped = B_TRUE; + mutex_enter(lockp); + } + } + /* + * If at least one applyfn call caused lockp to be dropped, + * then we don't follow netstack_next after reacquiring the + * lock, even if it is possible to do so without any hazards. + * This is because we want the design to allow for the list of + * netstacks threaded by netstack_next to change in any + * arbitrary way during the time the 'lockp' was dropped. + * + * It is safe to restart the loop at *headp since + * the applyfn changes netstack_m_state as it processes + * things, so a subsequent pass through will have no + * effect in applyfn, hence the loop will terminate + * in at worst O(N^2). + */ + if (lock_dropped) { +#ifdef NS_DEBUG + (void) printf("netstack_do_apply: " + "Lock Dropped for %p/%d, %d\n", + (void *)ns, ns->netstack_stackid, i); +#endif + lock_dropped = B_FALSE; + ns = *headp; + } else { + ns = ns->netstack_next; + } + } +} + +/* Like above, but in the reverse order of moduleids */ +static void +apply_loop_reverse(netstack_t **headp, kmutex_t *lockp, + boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid)) +{ + netstack_t *ns; + int i; + boolean_t lock_dropped, result; + + lock_dropped = B_FALSE; + ns = *headp; + while (ns != NULL) { + for (i = NS_MAX-1; i >= 0; i--) { + result = (applyfn)(lockp, ns, i); + if (result) { +#ifdef NS_DEBUG + (void) printf("netstack_do_apply: " + "LD for %p/%d, %d\n", + (void *)ns, ns->netstack_stackid, i); +#endif + lock_dropped = B_TRUE; + mutex_enter(lockp); + } + } + /* + * If at least one applyfn call caused lockp to be dropped, + * then we don't follow netstack_next after reacquiring the + * lock, even if it is possible to do so without any hazards. + * This is because we want the design to allow for the list of + * netstacks threaded by netstack_next to change in any + * arbitrary way during the time the 'lockp' was dropped. + * + * It is safe to restart the loop at *headp since + * the applyfn changes netstack_m_state as it processes + * things, so a subsequent pass through will have no + * effect in applyfn, hence the loop will terminate + * in at worst O(N^2). + */ + if (lock_dropped) { +#ifdef NS_DEBUG + (void) printf("netstack_do_apply: " + "Lock Dropped for %p/%d, %d\n", + (void *)ns, ns->netstack_stackid, i); +#endif + lock_dropped = B_FALSE; + ns = *headp; + } else { + ns = ns->netstack_next; + } + } +} + +/* + * Apply a function to all module/netstack combinations. + * The applyfn returns true if it had dropped the locks. + */ +static void +netstack_do_apply(int reverse, + boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid)) +{ + mutex_enter(&netstack_g_lock); + if (reverse) + apply_loop_reverse(&netstack_head, &netstack_g_lock, applyfn); + else + apply_loop(&netstack_head, &netstack_g_lock, applyfn); + mutex_exit(&netstack_g_lock); +} + +/* + * Run the create function for all modules x stack combinations + * that have NSS_CREATE_NEEDED set. + * + * Call the create function for each stack that has CREATE_NEEDED. + * Set CREATE_INPROGRESS, drop lock, and after done, + * set CREATE_COMPLETE + */ +static void +netstack_do_create(void) +{ + netstack_do_apply(B_FALSE, netstack_apply_create); +} + +/* + * Run the shutdown function for all modules x stack combinations + * that have NSS_SHUTDOWN_NEEDED set. + * + * Call the shutdown function for each stack that has SHUTDOWN_NEEDED. + * Set SHUTDOWN_INPROGRESS, drop lock, and after done, + * set SHUTDOWN_COMPLETE + */ +static void +netstack_do_shutdown(void) +{ + netstack_do_apply(B_FALSE, netstack_apply_shutdown); +} + +/* + * Run the destroy function for all modules x stack combinations + * that have NSS_DESTROY_NEEDED set. + * + * Call the destroy function for each stack that has DESTROY_NEEDED. + * Set DESTROY_INPROGRESS, drop lock, and after done, + * set DESTROY_COMPLETE + * + * Since a netstack_t is never reused (when a zone is rebooted it gets + * a new zoneid == netstackid i.e. a new netstack_t is allocated) we leave + * netstack_m_state the way it is i.e. with NSS_DESTROY_COMPLETED set. + */ +static void +netstack_do_destroy(void) +{ + /* + * Have to walk the moduleids in reverse order since some + * modules make implicit assumptions about the order + */ + netstack_do_apply(B_TRUE, netstack_apply_destroy); +} + +/* + * Get the stack instance used in caller's zone. + * Increases the reference count, caller must do a netstack_rele. + * It can't be called after zone_destroy() has started. + */ +static netstack_t * +netstack_get_current(void) +{ + netstack_t *ns; + + ns = curproc->p_zone->zone_netstack; + ASSERT(ns != NULL); + if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) + return (NULL); + + netstack_hold(ns); + + return (ns); +} + +/* + * Find a stack instance given the cred. + * This is used by the modules to potentially allow for a future when + * something other than the zoneid is used to determine the stack. + */ +netstack_t * +netstack_find_by_cred(const cred_t *cr) +{ + zoneid_t zoneid = crgetzoneid(cr); + + /* Handle the case when cr_zone is NULL */ + if (zoneid == (zoneid_t)-1) + zoneid = GLOBAL_ZONEID; + + /* For performance ... */ + if (curproc->p_zone->zone_id == zoneid) + return (netstack_get_current()); + else + return (netstack_find_by_zoneid(zoneid)); +} + +/* + * Find a stack instance given the zoneid. + * Increases the reference count if found; caller must do a + * netstack_rele(). + * + * If there is no exact match then assume the shared stack instance + * matches. + * + * Skip the unitialized ones. + */ +netstack_t * +netstack_find_by_zoneid(zoneid_t zoneid) +{ + netstack_t *ns; + zone_t *zone; + + zone = zone_find_by_id(zoneid); + + if (zone == NULL) + return (NULL); + + ns = zone->zone_netstack; + ASSERT(ns != NULL); + if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) + ns = NULL; + else + netstack_hold(ns); + + zone_rele(zone); + return (ns); +} + +/* + * Find a stack instance given the zoneid. + * Increases the reference count if found; caller must do a + * netstack_rele(). + * + * If there is no exact match then assume the shared stack instance + * matches. + * + * Skip the unitialized ones. + * + * NOTE: The caller must hold zonehash_lock. + */ +netstack_t * +netstack_find_by_zoneid_nolock(zoneid_t zoneid) +{ + netstack_t *ns; + zone_t *zone; + + zone = zone_find_by_id_nolock(zoneid); + + if (zone == NULL) + return (NULL); + + ns = zone->zone_netstack; + ASSERT(ns != NULL); + + if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) + ns = NULL; + else + netstack_hold(ns); + + zone_rele(zone); + return (ns); +} + +/* + * Find a stack instance given the stackid with exact match? + * Increases the reference count if found; caller must do a + * netstack_rele(). + * + * Skip the unitialized ones. + */ +netstack_t * +netstack_find_by_stackid(netstackid_t stackid) +{ + netstack_t *ns; + + mutex_enter(&netstack_g_lock); + for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) { + mutex_enter(&ns->netstack_lock); + if (ns->netstack_stackid == stackid && + !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) { + mutex_exit(&ns->netstack_lock); + netstack_hold(ns); + mutex_exit(&netstack_g_lock); + return (ns); + } + mutex_exit(&ns->netstack_lock); + } + mutex_exit(&netstack_g_lock); + return (NULL); +} + +void +netstack_rele(netstack_t *ns) +{ + netstack_t **nsp; + boolean_t found; + int refcnt, numzones; + + mutex_enter(&ns->netstack_lock); + ASSERT(ns->netstack_refcnt > 0); + ns->netstack_refcnt--; + /* + * As we drop the lock additional netstack_rele()s can come in + * and decrement the refcnt to zero and free the netstack_t. + * Store pointers in local variables and if we were not the last + * then don't reference the netstack_t after that. + */ + refcnt = ns->netstack_refcnt; + numzones = ns->netstack_numzones; + DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns); + mutex_exit(&ns->netstack_lock); + + if (refcnt == 0 && numzones == 0) { + /* + * Time to call the destroy functions and free up + * the structure + */ + netstack_stack_inactive(ns); + + /* Finally remove from list of netstacks */ + mutex_enter(&netstack_g_lock); + found = B_FALSE; + for (nsp = &netstack_head; *nsp != NULL; + nsp = &(*nsp)->netstack_next) { + if (*nsp == ns) { + *nsp = ns->netstack_next; + ns->netstack_next = NULL; + found = B_TRUE; + break; + } + } + ASSERT(found); + mutex_exit(&netstack_g_lock); + + ASSERT(ns->netstack_flags & NSF_CLOSING); + kmem_free(ns, sizeof (*ns)); + } +} + +void +netstack_hold(netstack_t *ns) +{ + mutex_enter(&ns->netstack_lock); + ns->netstack_refcnt++; + ASSERT(ns->netstack_refcnt > 0); + mutex_exit(&ns->netstack_lock); + DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns); +} + +/* + * To support kstat_create_netstack() using kstat_zone_add we need + * to track both + * - all zoneids that use the global/shared stack + * - all kstats that have been added for the shared stack + */ +kstat_t * +kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name, + char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags, + netstackid_t ks_netstackid) +{ + kstat_t *ks; + + if (ks_netstackid == GLOBAL_NETSTACKID) { + ks = kstat_create_zone(ks_module, ks_instance, ks_name, + ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID); + if (ks != NULL) + netstack_shared_kstat_add(ks); + return (ks); + } else { + zoneid_t zoneid = ks_netstackid; + + return (kstat_create_zone(ks_module, ks_instance, ks_name, + ks_class, ks_type, ks_ndata, ks_flags, zoneid)); + } +} + +void +kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid) +{ + if (ks_netstackid == GLOBAL_NETSTACKID) { + netstack_shared_kstat_remove(ks); + } + kstat_delete(ks); +} + +static void +netstack_shared_zone_add(zoneid_t zoneid) +{ + struct shared_zone_list *sz; + struct shared_kstat_list *sk; + + sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP); + sz->sz_zoneid = zoneid; + + /* Insert in list */ + mutex_enter(&netstack_shared_lock); + sz->sz_next = netstack_shared_zones; + netstack_shared_zones = sz; + + /* + * Perform kstat_zone_add for each existing shared stack kstat. + * Note: Holds netstack_shared_lock lock across kstat_zone_add. + */ + for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) { + kstat_zone_add(sk->sk_kstat, zoneid); + } + mutex_exit(&netstack_shared_lock); +} + +static void +netstack_shared_zone_remove(zoneid_t zoneid) +{ + struct shared_zone_list **szp, *sz; + struct shared_kstat_list *sk; + + /* Find in list */ + mutex_enter(&netstack_shared_lock); + sz = NULL; + for (szp = &netstack_shared_zones; *szp != NULL; + szp = &((*szp)->sz_next)) { + if ((*szp)->sz_zoneid == zoneid) { + sz = *szp; + break; + } + } + /* We must find it */ + ASSERT(sz != NULL); + *szp = sz->sz_next; + sz->sz_next = NULL; + + /* + * Perform kstat_zone_remove for each existing shared stack kstat. + * Note: Holds netstack_shared_lock lock across kstat_zone_remove. + */ + for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) { + kstat_zone_remove(sk->sk_kstat, zoneid); + } + mutex_exit(&netstack_shared_lock); + + kmem_free(sz, sizeof (*sz)); +} + +static void +netstack_shared_kstat_add(kstat_t *ks) +{ + struct shared_zone_list *sz; + struct shared_kstat_list *sk; + + sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP); + sk->sk_kstat = ks; + + /* Insert in list */ + mutex_enter(&netstack_shared_lock); + sk->sk_next = netstack_shared_kstats; + netstack_shared_kstats = sk; + + /* + * Perform kstat_zone_add for each existing shared stack zone. + * Note: Holds netstack_shared_lock lock across kstat_zone_add. + */ + for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) { + kstat_zone_add(ks, sz->sz_zoneid); + } + mutex_exit(&netstack_shared_lock); +} + +static void +netstack_shared_kstat_remove(kstat_t *ks) +{ + struct shared_zone_list *sz; + struct shared_kstat_list **skp, *sk; + + /* Find in list */ + mutex_enter(&netstack_shared_lock); + sk = NULL; + for (skp = &netstack_shared_kstats; *skp != NULL; + skp = &((*skp)->sk_next)) { + if ((*skp)->sk_kstat == ks) { + sk = *skp; + break; + } + } + /* Must find it */ + ASSERT(sk != NULL); + *skp = sk->sk_next; + sk->sk_next = NULL; + + /* + * Perform kstat_zone_remove for each existing shared stack kstat. + * Note: Holds netstack_shared_lock lock across kstat_zone_remove. + */ + for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) { + kstat_zone_remove(ks, sz->sz_zoneid); + } + mutex_exit(&netstack_shared_lock); + kmem_free(sk, sizeof (*sk)); +} + +/* + * If a zoneid is part of the shared zone, return true + */ +static boolean_t +netstack_find_shared_zoneid(zoneid_t zoneid) +{ + struct shared_zone_list *sz; + + mutex_enter(&netstack_shared_lock); + for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) { + if (sz->sz_zoneid == zoneid) { + mutex_exit(&netstack_shared_lock); + return (B_TRUE); + } + } + mutex_exit(&netstack_shared_lock); + return (B_FALSE); +} + +/* + * Hide the fact that zoneids and netstackids are allocated from + * the same space in the current implementation. + * XXX could add checks that the stackid/zoneids are valid... + */ +zoneid_t +netstackid_to_zoneid(netstackid_t stackid) +{ + return (stackid); +} + +netstackid_t +zoneid_to_netstackid(zoneid_t zoneid) +{ + if (netstack_find_shared_zoneid(zoneid)) + return (GLOBAL_ZONEID); + else + return (zoneid); +} + +/* + * Simplistic support for walking all the handles. + * Example usage: + * netstack_handle_t nh; + * netstack_t *ns; + * + * netstack_next_init(&nh); + * while ((ns = netstack_next(&nh)) != NULL) { + * do something; + * netstack_rele(ns); + * } + * netstack_next_fini(&nh); + */ +void +netstack_next_init(netstack_handle_t *handle) +{ + *handle = 0; +} + +/* ARGSUSED */ +void +netstack_next_fini(netstack_handle_t *handle) +{ +} + +netstack_t * +netstack_next(netstack_handle_t *handle) +{ + netstack_t *ns; + int i, end; + + end = *handle; + /* Walk skipping *handle number of instances */ + + /* Look if there is a matching stack instance */ + mutex_enter(&netstack_g_lock); + ns = netstack_head; + for (i = 0; i < end; i++) { + if (ns == NULL) + break; + ns = ns->netstack_next; + } + /* skip those with that aren't really here */ + while (ns != NULL) { + mutex_enter(&ns->netstack_lock); + if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) { + mutex_exit(&ns->netstack_lock); + break; + } + mutex_exit(&ns->netstack_lock); + end++; + ns = ns->netstack_next; + } + if (ns != NULL) { + *handle = end + 1; + netstack_hold(ns); + } + mutex_exit(&netstack_g_lock); + return (ns); +} diff --git a/usr/src/uts/common/os/policy.c b/usr/src/uts/common/os/policy.c index 8ea071b09b..5800e1e96f 100644 --- a/usr/src/uts/common/os/policy.c +++ b/usr/src/uts/common/os/policy.c @@ -1507,6 +1507,56 @@ secpolicy_net_config(const cred_t *cr, boolean_t checkonly) /* + * PRIV_SYS_NET_CONFIG has a superset of PRIV_SYS_IP_CONFIG. + * + * There are a few rare cases where the kernel generates ioctls() from + * interrupt context with a credential of kcred rather than NULL. + * In those cases, we take the safe and cheap test. + */ +int +secpolicy_ip_config(const cred_t *cr, boolean_t checkonly) +{ + if (PRIV_POLICY_ONLY(cr, PRIV_SYS_NET_CONFIG, B_FALSE)) + return (secpolicy_net_config(cr, checkonly)); + + if (checkonly) { + return (PRIV_POLICY_ONLY(cr, PRIV_SYS_IP_CONFIG, B_FALSE) ? + 0 : EPERM); + } else { + return (PRIV_POLICY(cr, PRIV_SYS_IP_CONFIG, B_FALSE, EPERM, + NULL)); + } +} + + +/* + * Map IP pseudo privileges to actual privileges. + * So we don't need to recompile IP when we change the privileges. + */ +int +secpolicy_ip(const cred_t *cr, int netpriv, boolean_t checkonly) +{ + int priv = PRIV_ALL; + + switch (netpriv) { + case OP_CONFIG: + priv = PRIV_SYS_IP_CONFIG; + break; + case OP_RAW: + priv = PRIV_NET_RAWACCESS; + break; + case OP_PRIVPORT: + priv = PRIV_NET_PRIVADDR; + break; + } + ASSERT(priv != PRIV_ALL); + if (checkonly) + return (PRIV_POLICY_ONLY(cr, priv, B_FALSE) ? 0 : EPERM); + else + return (PRIV_POLICY(cr, priv, B_FALSE, EPERM, NULL)); +} + +/* * Map network pseudo privileges to actual privileges. * So we don't need to recompile IP when we change the privileges. */ diff --git a/usr/src/uts/common/os/priv_defs b/usr/src/uts/common/os/priv_defs index 08d1b40eb6..a39896e73a 100644 --- a/usr/src/uts/common/os/priv_defs +++ b/usr/src/uts/common/os/priv_defs @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * INSERT COMMENT @@ -363,16 +363,20 @@ privilege PRIV_SYS_MOUNT only control mounts performed from within said zone. Outside the global zone, the "nodevices" option is always forced. -privilege PRIV_SYS_NET_CONFIG +privilege PRIV_SYS_IP_CONFIG Allows a process to configure a system's network interfaces and routes. Allows a process to configure network parameters using ndd. Allows a process access to otherwise restricted information using ndd. + Allows a process to configure IPsec. + Allows a process to pop anchored STREAMs modules with matching zoneid. + +privilege PRIV_SYS_NET_CONFIG + + Allows all that PRIV_SYS_IP_CONFIG allows. Allows a process to push the rpcmod STREAMs module. - Allows a process to pop anchored STREAMs modules. Allows a process to INSERT/REMOVE STREAMs modules on locations other than the top of the module stack. - Allows a process to configure IPsec. privilege PRIV_SYS_NFS diff --git a/usr/src/uts/common/os/space.c b/usr/src/uts/common/os/space.c index c91cd5c8be..30f102e93c 100644 --- a/usr/src/uts/common/os/space.c +++ b/usr/src/uts/common/os/space.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -176,7 +176,6 @@ char ti_statetbl[TE_NOEVENTS][TS_NOSTATES] = { }; -#include <sys/sad.h> #include <sys/tty.h> #include <sys/ptyvar.h> @@ -188,7 +187,6 @@ static void store_fetch_initspace(); void space_init(void) { - sad_initspace(); pty_initspace(); store_fetch_initspace(); } @@ -221,16 +219,6 @@ dev_t uconsdev = NODEV; int cn_conf; /* - * Moved from sad_conf.c because of the usual in loadable modules - */ - -#ifndef NSTRPHASH -#define NSTRPHASH 128 -#endif -struct autopush **strpcache; -int strpmask = NSTRPHASH - 1; - -/* * Flag whether console fb output is using PROM/PROM emulation * terminal emulator, or is using the kernel terminal emulator. */ diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c index 37b666c199..8c04a0754f 100644 --- a/usr/src/uts/common/os/streamio.c +++ b/usr/src/uts/common/os/streamio.c @@ -23,7 +23,7 @@ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -52,6 +52,7 @@ #include <sys/uio.h> #include <sys/cmn_err.h> #include <sys/sad.h> +#include <sys/netstack.h> #include <sys/priocntl.h> #include <sys/jioctl.h> #include <sys/procset.h> @@ -76,6 +77,7 @@ #include <sys/sunldi_impl.h> #include <sys/autoconf.h> #include <sys/policy.h> +#include <sys/zone.h> /* @@ -190,7 +192,7 @@ static boolean_t msghasdata(mblk_t *bp); static int push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name, - int anchor, cred_t *crp) + int anchor, cred_t *crp, uint_t anchor_zoneid) { int error; fmodsw_impl_t *fp; @@ -219,8 +221,10 @@ push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name, * put at this place in the stream, and add if so. */ mutex_enter(&stp->sd_lock); - if (anchor == stp->sd_pushcnt) + if (anchor == stp->sd_pushcnt) { stp->sd_anchor = stp->sd_pushcnt; + stp->sd_anchorzone = anchor_zoneid; + } mutex_exit(&stp->sd_lock); return (0); @@ -242,6 +246,9 @@ stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp) int cloneopen; queue_t *brq; major_t major; + str_stack_t *ss; + zoneid_t zoneid; + uint_t anchor; #ifdef C2_AUDIT if (audit_active) @@ -464,23 +471,41 @@ ckreturn: major = getmajor(*devp); if (push_drcompat && cloneopen && NETWORK_DRV(major) && ((brq->q_flag & _QASSOCIATED) == 0)) { - if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp) != 0) + if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0) cmn_err(CE_WARN, "cannot push " DRMODNAME " streams module"); } /* - * check for modules that need to be autopushed + * Check for autopush. Start with the global zone. If not found + * check in the local zone. */ - if ((ap = sad_ap_find_by_dev(*devp)) == NULL) + zoneid = GLOBAL_ZONEID; +retryap: + ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))-> + netstack_str; + if ((ap = sad_ap_find_by_dev(*devp, ss)) == NULL) { + netstack_rele(ss->ss_netstack); + if (zoneid == GLOBAL_ZONEID) { + /* + * None found. Also look in the zone's autopush table. + */ + zoneid = crgetzoneid(crp); + if (zoneid != GLOBAL_ZONEID) + goto retryap; + } goto opendone; + } + anchor = ap->ap_anchor; + zoneid = crgetzoneid(crp); for (s = 0; s < ap->ap_npush; s++) { error = push_mod(qp, &dummydev, stp, ap->ap_list[s], - ap->ap_anchor, crp); + anchor, crp, zoneid); if (error != 0) break; } - sad_ap_rele(ap); + sad_ap_rele(ap, ss); + netstack_rele(ss->ss_netstack); /* * let specfs know that open failed part way through @@ -623,7 +648,16 @@ strclose(struct vnode *vp, int flag, cred_t *crp) /* Check if an I_LINK was ever done on this stream */ if (stp->sd_flag & STRHASLINKS) { - (void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval); + netstack_t *ns; + str_stack_t *ss; + + ns = netstack_find_by_cred(crp); + ASSERT(ns != NULL); + ss = ns->netstack_str; + ASSERT(ss != NULL); + + (void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss); + netstack_rele(ss->ss_netstack); } while (_SAMESTR(qp)) { @@ -3754,7 +3788,8 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, * privileges; take the cheapest (non-locking) check * first. */ - if (secpolicy_net_config(crp, B_TRUE) != 0) { + if (secpolicy_ip_config(crp, B_TRUE) != 0 || + (stp->sd_anchorzone != crgetzoneid(crp))) { mutex_enter(&stp->sd_lock); /* * Anchors only apply if there's at least one @@ -3765,8 +3800,10 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, stp->sd_vnode->v_type != VFIFO) { strendplumb(stp); mutex_exit(&stp->sd_lock); + if (stp->sd_anchorzone != crgetzoneid(crp)) + return (EINVAL); /* Audit and report error */ - return (secpolicy_net_config(crp, B_FALSE)); + return (secpolicy_ip_config(crp, B_FALSE)); } mutex_exit(&stp->sd_lock); } @@ -3809,9 +3846,10 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, mutex_exit(QLOCK(wrq)); /* If we popped through the anchor, then reset the anchor. */ - if (stp->sd_pushcnt < stp->sd_anchor) + if (stp->sd_pushcnt < stp->sd_anchor) { stp->sd_anchor = 0; - + stp->sd_anchorzone = 0; + } strendplumb(stp); mutex_exit(&stp->sd_lock); return (error); @@ -3841,6 +3879,8 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, int fd; linkinfo_t *linkp; struct file *fp; + netstack_t *ns; + str_stack_t *ss; /* * Do not allow the wildcard muxid. This ioctl is not @@ -3850,15 +3890,22 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, return (EINVAL); } + ns = netstack_find_by_cred(crp); + ASSERT(ns != NULL); + ss = ns->netstack_str; + ASSERT(ss != NULL); + mutex_enter(&muxifier); - linkp = findlinks(vp->v_stream, muxid, LINKPERSIST); + linkp = findlinks(vp->v_stream, muxid, LINKPERSIST, ss); if (linkp == NULL) { mutex_exit(&muxifier); + netstack_rele(ss->ss_netstack); return (EINVAL); } if ((fd = ufalloc(0)) == -1) { mutex_exit(&muxifier); + netstack_rele(ss->ss_netstack); return (EMFILE); } fp = linkp->li_fpdown; @@ -3868,6 +3915,7 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, mutex_exit(&muxifier); setf(fd, fp); *rvalp = fd; + netstack_rele(ss->ss_netstack); return (0); } @@ -3876,7 +3924,9 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, /* * To insert a module to a given position in a stream. * In the first release, only allow privileged user - * to use this ioctl. + * to use this ioctl. Furthermore, the insert is only allowed + * below an anchor if the zoneid is the same as the zoneid + * which created the anchor. * * Note that we do not plan to support this ioctl * on pipes in the first release. We want to learn more @@ -3905,6 +3955,9 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, return (EINVAL); if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) return (error); + if (stp->sd_anchor != 0 && + stp->sd_anchorzone != crgetzoneid(crp)) + return (EINVAL); error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert), STRUCT_SIZE(strmodinsert), copyflag); @@ -3950,6 +4003,22 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, mutex_exit(&stp->sd_lock); return (EINVAL); } + if (stp->sd_anchor != 0) { + /* + * Is this insert below the anchor? + * Pushcnt hasn't been increased yet hence + * we test for greater than here, and greater or + * equal after qattach. + */ + if (pos > (stp->sd_pushcnt - stp->sd_anchor) && + stp->sd_anchorzone != crgetzoneid(crp)) { + fmodsw_rele(fp); + strendplumb(stp); + mutex_exit(&stp->sd_lock); + return (EPERM); + } + } + mutex_exit(&stp->sd_lock); /* @@ -4026,6 +4095,9 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, * the ambiguity of removal if a module is inserted/pushed * multiple times in a stream. In the first release, only * allow privileged user to use this ioctl. + * Furthermore, the remove is only allowed + * below an anchor if the zoneid is the same as the zoneid + * which created the anchor. * * Note that we do not plan to support this ioctl * on pipes in the first release. We want to learn more @@ -4055,6 +4127,9 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, return (EINVAL); if ((error = secpolicy_net_config(crp, B_FALSE)) != 0) return (error); + if (stp->sd_anchor != 0 && + stp->sd_anchorzone != crgetzoneid(crp)) + return (EINVAL); error = strcopyin((void *)arg, STRUCT_BUF(strmodremove), STRUCT_SIZE(strmodremove), copyflag); @@ -4089,6 +4164,22 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, return (EINVAL); } + /* + * If the position is at or below an anchor, then the zoneid + * must match the zoneid that created the anchor. + */ + if (stp->sd_anchor != 0) { + pos = STRUCT_FGET(strmodremove, pos); + if (pos >= (stp->sd_pushcnt - stp->sd_anchor) && + stp->sd_anchorzone != crgetzoneid(crp)) { + mutex_enter(&stp->sd_lock); + strendplumb(stp); + mutex_exit(&stp->sd_lock); + return (EPERM); + } + } + + ASSERT(!(q->q_flag & QREADR)); qdetach(_RD(q), 1, flag, crp, is_remove); @@ -4132,7 +4223,7 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, */ if (stp->sd_anchor != 0) { pos = STRUCT_FGET(strmodremove, pos); - if (pos == 0) + if (pos == stp->sd_pushcnt - stp->sd_anchor + 1) stp->sd_anchor = 0; else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1)) stp->sd_anchor--; @@ -4156,9 +4247,14 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, mutex_exit(&stp->sd_lock); return (EINVAL); } - + /* Only allow the same zoneid to update the anchor */ + if (stp->sd_anchor != 0 && + stp->sd_anchorzone != crgetzoneid(crp)) { + mutex_exit(&stp->sd_lock); + return (EINVAL); + } stp->sd_anchor = stp->sd_pushcnt; - + stp->sd_anchorzone = crgetzoneid(crp); mutex_exit(&stp->sd_lock); return (0); @@ -4185,7 +4281,8 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, /* * Link a multiplexor. */ - return (mlink(vp, cmd, (int)arg, crp, rvalp, 0)); + error = mlink(vp, cmd, (int)arg, crp, rvalp, 0); + return (error); case _I_PLINK_LH: /* @@ -4207,6 +4304,8 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, struct linkinfo *linkp; int native_arg = (int)arg; int type; + netstack_t *ns; + str_stack_t *ss; TRACE_1(TR_FAC_STREAMS_FR, TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp); @@ -4220,18 +4319,25 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, if (native_arg == 0) { return (EINVAL); } + ns = netstack_find_by_cred(crp); + ASSERT(ns != NULL); + ss = ns->netstack_str; + ASSERT(ss != NULL); + if (native_arg == MUXID_ALL) - error = munlinkall(stp, type, crp, rvalp); + error = munlinkall(stp, type, crp, rvalp, ss); else { mutex_enter(&muxifier); - if (!(linkp = findlinks(stp, (int)arg, type))) { + if (!(linkp = findlinks(stp, (int)arg, type, ss))) { /* invalid user supplied index number */ mutex_exit(&muxifier); + netstack_rele(ss->ss_netstack); return (EINVAL); } /* munlink drops the muxifier lock */ - error = munlink(stp, linkp, type, crp, rvalp); + error = munlink(stp, linkp, type, crp, rvalp, ss); } + netstack_rele(ss->ss_netstack); return (error); } diff --git a/usr/src/uts/common/os/strsubr.c b/usr/src/uts/common/os/strsubr.c index db039e241e..37302b67e4 100644 --- a/usr/src/uts/common/os/strsubr.c +++ b/usr/src/uts/common/os/strsubr.c @@ -23,7 +23,7 @@ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -77,6 +77,10 @@ #include <sys/strft.h> #include <sys/fs/snode.h> #include <sys/zone.h> +#include <sys/open.h> +#include <sys/sunldi.h> +#include <sys/sad.h> +#include <sys/netstack.h> #define O_SAMESTR(q) (((q)->q_next) && \ (((q)->q_flag & QREADR) == ((q)->q_next->q_flag & QREADR))) @@ -199,6 +203,10 @@ kthread_t *bc_bkgrnd_thread; /* Thread to service bufcall requests */ kmutex_t strresources; /* protects global resources */ kmutex_t muxifier; /* single-threads multiplexor creation */ +static void *str_stack_init(netstackid_t stackid, netstack_t *ns); +static void str_stack_shutdown(netstackid_t stackid, void *arg); +static void str_stack_fini(netstackid_t stackid, void *arg); + extern void time_to_wait(clock_t *, clock_t); /* @@ -228,8 +236,6 @@ int max_n_ciputctrl = 16; */ int min_n_ciputctrl = 2; -static struct mux_node *mux_nodes; /* mux info for cycle checking */ - /* * Per-driver/module syncqs * ======================== @@ -835,16 +841,8 @@ ciputctrl_destructor(void *buf, void *cdrarg) void strinit(void) { - int i; int ncpus = ((boot_max_ncpus == -1) ? max_ncpus : boot_max_ncpus); - /* - * Set up mux_node structures. - */ - mux_nodes = kmem_zalloc((sizeof (struct mux_node) * devcnt), KM_SLEEP); - for (i = 0; i < devcnt; i++) - mux_nodes[i].mn_imaj = i; - stream_head_cache = kmem_cache_create("stream_head_cache", sizeof (stdata_t), 0, stream_head_constructor, stream_head_destructor, NULL, @@ -904,6 +902,16 @@ strinit(void) * TPI support routine initialisation. */ tpi_init(); + + /* + * Handle to have autopush and persistent link information per + * zone. + * Note: uses shutdown hook instead of destroy hook so that the + * persistent links can be torn down before the destroy hooks + * in the TCP/IP stack are called. + */ + netstack_register(NS_STR, str_stack_init, str_stack_shutdown, + str_stack_fini); } void @@ -1544,7 +1552,7 @@ lbfree(linkinfo_t *linkp) * and 0 otherwise. */ int -linkcycle(stdata_t *upstp, stdata_t *lostp) +linkcycle(stdata_t *upstp, stdata_t *lostp, str_stack_t *ss) { struct mux_node *np; struct mux_edge *ep; @@ -1558,13 +1566,13 @@ linkcycle(stdata_t *upstp, stdata_t *lostp) if (lostp->sd_vnode->v_type == VFIFO) return (0); - for (i = 0; i < devcnt; i++) { - np = &mux_nodes[i]; + for (i = 0; i < ss->ss_devcnt; i++) { + np = &ss->ss_mux_nodes[i]; MUX_CLEAR(np); } lomaj = getmajor(lostp->sd_vnode->v_rdev); upmaj = getmajor(upstp->sd_vnode->v_rdev); - np = &mux_nodes[lomaj]; + np = &ss->ss_mux_nodes[lomaj]; for (;;) { if (!MUX_DIDVISIT(np)) { if (np->mn_imaj == upmaj) @@ -1607,7 +1615,7 @@ linkcycle(stdata_t *upstp, stdata_t *lostp) * Find linkinfo entry corresponding to the parameters. */ linkinfo_t * -findlinks(stdata_t *stp, int index, int type) +findlinks(stdata_t *stp, int index, int type, str_stack_t *ss) { linkinfo_t *linkp; struct mux_edge *mep; @@ -1626,7 +1634,7 @@ findlinks(stdata_t *stp, int index, int type) } } else { ASSERT((type & LINKTYPEMASK) == LINKPERSIST); - mnp = &mux_nodes[getmajor(stp->sd_vnode->v_rdev)]; + mnp = &ss->ss_mux_nodes[getmajor(stp->sd_vnode->v_rdev)]; mep = mnp->mn_outp; while (mep) { if ((index == 0) || (index == mep->me_muxid)) @@ -1724,6 +1732,8 @@ mlink_file(vnode_t *vp, int cmd, struct file *fpdown, cred_t *crp, int *rvalp, uint32_t sqtype; perdm_t *dmp; int error = 0; + netstack_t *ns; + str_stack_t *ss; stp = vp->v_stream; TRACE_1(TR_FAC_STREAMS_FR, @@ -1746,12 +1756,19 @@ mlink_file(vnode_t *vp, int cmd, struct file *fpdown, cred_t *crp, int *rvalp, if (fpdown == NULL) { return (EBADF); } - if (getmajor(stp->sd_vnode->v_rdev) >= devcnt) { + ns = netstack_find_by_cred(crp); + ASSERT(ns != NULL); + ss = ns->netstack_str; + ASSERT(ss != NULL); + + if (getmajor(stp->sd_vnode->v_rdev) >= ss->ss_devcnt) { + netstack_rele(ss->ss_netstack); return (EINVAL); } mutex_enter(&muxifier); if (stp->sd_flag & STPLEX) { mutex_exit(&muxifier); + netstack_rele(ss->ss_netstack); return (ENXIO); } @@ -1767,9 +1784,10 @@ mlink_file(vnode_t *vp, int cmd, struct file *fpdown, cred_t *crp, int *rvalp, (stpdown == stp) || (stpdown->sd_flag & (STPLEX|STRHUP|STRDERR|STWRERR|IOCWAIT|STRPLUMB)) || ((stpdown->sd_vnode->v_type != VFIFO) && - (getmajor(stpdown->sd_vnode->v_rdev) >= devcnt)) || - linkcycle(stp, stpdown)) { + (getmajor(stpdown->sd_vnode->v_rdev) >= ss->ss_devcnt)) || + linkcycle(stp, stpdown, ss)) { mutex_exit(&muxifier); + netstack_rele(ss->ss_netstack); return (EINVAL); } TRACE_1(TR_FAC_STREAMS_FR, @@ -1899,6 +1917,7 @@ mlink_file(vnode_t *vp, int cmd, struct file *fpdown, cred_t *crp, int *rvalp, mutex_exit(&stpdown->sd_lock); mutex_exit(&muxifier); + netstack_rele(ss->ss_netstack); return (error); } mutex_enter(&fpdown->f_tlock); @@ -1919,7 +1938,7 @@ mlink_file(vnode_t *vp, int cmd, struct file *fpdown, cred_t *crp, int *rvalp, link_rempassthru(passq); - mux_addedge(stp, stpdown, linkp->li_lblk.l_index); + mux_addedge(stp, stpdown, linkp->li_lblk.l_index, ss); /* * Mark the upper stream as having dependent links @@ -1944,6 +1963,7 @@ mlink_file(vnode_t *vp, int cmd, struct file *fpdown, cred_t *crp, int *rvalp, mutex_exit(&stpdown->sd_lock); mutex_exit(&muxifier); *rvalp = linkp->li_lblk.l_index; + netstack_rele(ss->ss_netstack); return (0); } @@ -1979,7 +1999,8 @@ mlink(vnode_t *vp, int cmd, int arg, cred_t *crp, int *rvalp, int lhlink) * re-blocked. */ int -munlink(stdata_t *stp, linkinfo_t *linkp, int flag, cred_t *crp, int *rvalp) +munlink(stdata_t *stp, linkinfo_t *linkp, int flag, cred_t *crp, int *rvalp, + str_stack_t *ss) { struct strioctl strioc; struct stdata *stpdown; @@ -2037,7 +2058,7 @@ munlink(stdata_t *stp, linkinfo_t *linkp, int flag, cred_t *crp, int *rvalp) } } - mux_rmvedge(stp, linkp->li_lblk.l_index); + mux_rmvedge(stp, linkp->li_lblk.l_index, ss); fpdown = linkp->li_fpdown; lbfree(linkp); @@ -2225,17 +2246,17 @@ munlink(stdata_t *stp, linkinfo_t *linkp, int flag, cred_t *crp, int *rvalp) * Return 0, or a non-zero errno on failure. */ int -munlinkall(stdata_t *stp, int flag, cred_t *crp, int *rvalp) +munlinkall(stdata_t *stp, int flag, cred_t *crp, int *rvalp, str_stack_t *ss) { linkinfo_t *linkp; int error = 0; mutex_enter(&muxifier); - while (linkp = findlinks(stp, 0, flag)) { + while (linkp = findlinks(stp, 0, flag, ss)) { /* * munlink() releases the muxifier lock. */ - if (error = munlink(stp, linkp, flag, crp, rvalp)) + if (error = munlink(stp, linkp, flag, crp, rvalp, ss)) return (error); mutex_enter(&muxifier); } @@ -2248,7 +2269,7 @@ munlinkall(stdata_t *stp, int flag, cred_t *crp, int *rvalp) * edge to the directed graph. */ void -mux_addedge(stdata_t *upstp, stdata_t *lostp, int muxid) +mux_addedge(stdata_t *upstp, stdata_t *lostp, int muxid, str_stack_t *ss) { struct mux_node *np; struct mux_edge *ep; @@ -2257,7 +2278,7 @@ mux_addedge(stdata_t *upstp, stdata_t *lostp, int muxid) upmaj = getmajor(upstp->sd_vnode->v_rdev); lomaj = getmajor(lostp->sd_vnode->v_rdev); - np = &mux_nodes[upmaj]; + np = &ss->ss_mux_nodes[upmaj]; if (np->mn_outp) { ep = np->mn_outp; while (ep->me_nextp) @@ -2270,10 +2291,18 @@ mux_addedge(stdata_t *upstp, stdata_t *lostp, int muxid) } ep->me_nextp = NULL; ep->me_muxid = muxid; + /* + * Save the dev_t for the purposes of str_stack_shutdown. + * str_stack_shutdown assumes that the device allows reopen, since + * this dev_t is the one after any cloning by xx_open(). + * Would prefer finding the dev_t from before any cloning, + * but specfs doesn't retain that. + */ + ep->me_dev = upstp->sd_vnode->v_rdev; if (lostp->sd_vnode->v_type == VFIFO) ep->me_nodep = NULL; else - ep->me_nodep = &mux_nodes[lomaj]; + ep->me_nodep = &ss->ss_mux_nodes[lomaj]; } /* @@ -2281,7 +2310,7 @@ mux_addedge(stdata_t *upstp, stdata_t *lostp, int muxid) * edge in the directed graph. */ void -mux_rmvedge(stdata_t *upstp, int muxid) +mux_rmvedge(stdata_t *upstp, int muxid, str_stack_t *ss) { struct mux_node *np; struct mux_edge *ep; @@ -2289,7 +2318,7 @@ mux_rmvedge(stdata_t *upstp, int muxid) major_t upmaj; upmaj = getmajor(upstp->sd_vnode->v_rdev); - np = &mux_nodes[upmaj]; + np = &ss->ss_mux_nodes[upmaj]; ASSERT(np->mn_outp != NULL); ep = np->mn_outp; while (ep) { @@ -4057,9 +4086,11 @@ backenable(queue_t *q, uchar_t pri) * or with the stream frozen (the latter occurs when a module * calls rmvq with the stream frozen.) If the stream is frozen * by the caller the caller will hold all qlocks in the stream. + * Note that a frozen stream doesn't freeze a mated stream, + * so we explicitly check for that. */ freezer = STREAM(q)->sd_freezer; - if (freezer != curthread) { + if (freezer != curthread || STREAM(q) != STREAM(nq)) { mutex_enter(QLOCK(nq)); } #ifdef DEBUG @@ -4071,7 +4102,7 @@ backenable(queue_t *q, uchar_t pri) #endif setqback(nq, pri); qenable_locked(nq); - if (freezer != curthread) + if (freezer != curthread || STREAM(q) != STREAM(nq)) mutex_exit(QLOCK(nq)); } releasestr(q); @@ -8464,3 +8495,103 @@ void queuerun(void) { } + +/* + * Initialize the STR stack instance, which tracks autopush and persistent + * links. + */ +/* ARGSUSED */ +static void * +str_stack_init(netstackid_t stackid, netstack_t *ns) +{ + str_stack_t *ss; + int i; + + ss = (str_stack_t *)kmem_zalloc(sizeof (*ss), KM_SLEEP); + ss->ss_netstack = ns; + + /* + * set up autopush + */ + sad_initspace(ss); + + /* + * set up mux_node structures. + */ + ss->ss_devcnt = devcnt; /* In case it should change before free */ + ss->ss_mux_nodes = kmem_zalloc((sizeof (struct mux_node) * + ss->ss_devcnt), KM_SLEEP); + for (i = 0; i < ss->ss_devcnt; i++) + ss->ss_mux_nodes[i].mn_imaj = i; + return (ss); +} + +/* + * Note: run at zone shutdown and not destroy so that the PLINKs are + * gone by the time other cleanup happens from the destroy callbacks. + */ +static void +str_stack_shutdown(netstackid_t stackid, void *arg) +{ + str_stack_t *ss = (str_stack_t *)arg; + int i; + cred_t *cr; + + cr = zone_get_kcred(netstackid_to_zoneid(stackid)); + ASSERT(cr != NULL); + + /* Undo all the I_PLINKs for this zone */ + for (i = 0; i < ss->ss_devcnt; i++) { + struct mux_edge *ep; + ldi_handle_t lh; + ldi_ident_t li; + int ret; + int rval; + dev_t rdev; + + ep = ss->ss_mux_nodes[i].mn_outp; + if (ep == NULL) + continue; + ret = ldi_ident_from_major((major_t)i, &li); + if (ret != 0) { + continue; + } + rdev = ep->me_dev; + ret = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD|FWRITE, + cr, &lh, li); + if (ret != 0) { + ldi_ident_release(li); + continue; + } + + ret = ldi_ioctl(lh, I_PUNLINK, (intptr_t)MUXID_ALL, FKIOCTL, + cr, &rval); + if (ret) { + (void) ldi_close(lh, FREAD|FWRITE, cr); + ldi_ident_release(li); + continue; + } + (void) ldi_close(lh, FREAD|FWRITE, cr); + + /* Close layered handles */ + ldi_ident_release(li); + } + crfree(cr); + + sad_freespace(ss); + + kmem_free(ss->ss_mux_nodes, sizeof (struct mux_node) * ss->ss_devcnt); + ss->ss_mux_nodes = NULL; +} + +/* + * Free the structure; str_stack_shutdown did the other cleanup work. + */ +/* ARGSUSED */ +static void +str_stack_fini(netstackid_t stackid, void *arg) +{ + str_stack_t *ss = (str_stack_t *)arg; + + kmem_free(ss, sizeof (*ss)); +} diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index da658c8a48..d33c712f4f 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -239,8 +239,7 @@ #include <sys/fss.h> #include <sys/brand.h> #include <sys/zone.h> -#include <sys/tsol/label.h> - +#include <net/if.h> #include <vm/seg.h> /* @@ -345,6 +344,10 @@ static kmutex_t mount_lock; const char * const zone_default_initname = "/sbin/init"; static char * const zone_prefix = "/zone/"; static int zone_shutdown(zoneid_t zoneid); +static int zone_add_datalink(zoneid_t, char *); +static int zone_remove_datalink(zoneid_t, char *); +static int zone_check_datalink(zoneid_t *, char *); +static int zone_list_datalink(zoneid_t, int *, char *); /* * Bump this number when you alter the zone syscall interfaces; this is @@ -361,8 +364,9 @@ static int zone_shutdown(zoneid_t zoneid); * Trusted Extensions. * Version 5 alters the zone_boot system call, and converts its old * bootargs parameter to be set by the zone_setattr API instead. + * Version 6 adds the flag argument to zone_create. */ -static const int ZONE_SYSCALL_API_VERSION = 5; +static const int ZONE_SYSCALL_API_VERSION = 6; /* * Certain filesystems (such as NFS and autofs) need to know which zone @@ -3196,7 +3200,8 @@ zone_create(const char *zone_name, const char *zone_root, const priv_set_t *zone_privs, size_t zone_privssz, caddr_t rctlbuf, size_t rctlbufsz, caddr_t zfsbuf, size_t zfsbufsz, int *extended_error, - int match, uint32_t doi, const bslabel_t *label) + int match, uint32_t doi, const bslabel_t *label, + int flags) { struct zsched_arg zarg; nvlist_t *rctls = NULL; @@ -3238,6 +3243,10 @@ zone_create(const char *zone_name, const char *zone_root, offsetof(zone_dataset_t, zd_linkage)); rw_init(&zone->zone_mlps.mlpl_rwlock, NULL, RW_DEFAULT, NULL); + if (flags & ZCF_NET_EXCL) { + zone->zone_flags |= ZF_NET_EXCL; + } + if ((error = zone_set_name(zone, zone_name)) != 0) { zone_free(zone); return (zone_create_error(error, 0, extended_error)); @@ -3826,6 +3835,7 @@ zone_destroy(zoneid_t zoneid) */ zone_status_wait(zone, ZONE_IS_DEAD); zone_zsd_callbacks(zone, ZSD_DESTROY); + zone->zone_netstack = NULL; uniqid = zone->zone_uniqid; zone_rele(zone); zone = NULL; /* potentially free'd */ @@ -3923,6 +3933,7 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) pid_t initpid; boolean_t global = (curproc->p_zone == global_zone); boolean_t curzone = (curproc->p_zone->zone_id == zoneid); + ushort_t flags; mutex_enter(&zonehash_lock); if ((zone = zone_find_all_by_id(zoneid)) == NULL) { @@ -4021,6 +4032,15 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) copyout(&zone_status, buf, bufsize) != 0) error = EFAULT; break; + case ZONE_ATTR_FLAGS: + size = sizeof (zone->zone_flags); + if (bufsize > size) + bufsize = size; + flags = zone->zone_flags; + if (buf != NULL && + copyout(&flags, buf, bufsize) != 0) + error = EFAULT; + break; case ZONE_ATTR_PRIVSET: size = sizeof (priv_set_t); if (bufsize > size) @@ -4877,6 +4897,7 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4) zs.match = zs32.match; zs.doi = zs32.doi; zs.label = (const bslabel_t *)(uintptr_t)zs32.label; + zs.flags = zs32.flags; #else panic("get_udatamodel() returned bogus result\n"); #endif @@ -4887,7 +4908,7 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4) (caddr_t)zs.rctlbuf, zs.rctlbufsz, (caddr_t)zs.zfsbuf, zs.zfsbufsz, zs.extended_error, zs.match, zs.doi, - zs.label)); + zs.label, zs.flags)); case ZONE_BOOT: return (zone_boot((zoneid_t)(uintptr_t)arg1)); case ZONE_DESTROY: @@ -4908,6 +4929,17 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4) return (zone_lookup((const char *)arg1)); case ZONE_VERSION: return (zone_version((int *)arg1)); + case ZONE_ADD_DATALINK: + return (zone_add_datalink((zoneid_t)(uintptr_t)arg1, + (char *)arg2)); + case ZONE_DEL_DATALINK: + return (zone_remove_datalink((zoneid_t)(uintptr_t)arg1, + (char *)arg2)); + case ZONE_CHECK_DATALINK: + return (zone_check_datalink((zoneid_t *)arg1, (char *)arg2)); + case ZONE_LIST_DATALINK: + return (zone_list_datalink((zoneid_t)(uintptr_t)arg1, + (int *)arg2, (char *)arg3)); default: return (set_errno(EINVAL)); } @@ -5298,3 +5330,251 @@ zone_find_by_any_path(const char *path, boolean_t treat_abs) mutex_exit(&zonehash_lock); return (zone); } + +/* List of data link names which are accessible from the zone */ +struct dlnamelist { + char dlnl_name[LIFNAMSIZ]; + struct dlnamelist *dlnl_next; +}; + + +/* + * Check whether the datalink name (dlname) itself is present. + * Return true if found. + */ +static boolean_t +zone_dlname(zone_t *zone, char *dlname) +{ + struct dlnamelist *dlnl; + boolean_t found = B_FALSE; + + mutex_enter(&zone->zone_lock); + for (dlnl = zone->zone_dl_list; dlnl != NULL; dlnl = dlnl->dlnl_next) { + if (strncmp(dlnl->dlnl_name, dlname, LIFNAMSIZ) == 0) { + found = B_TRUE; + break; + } + } + mutex_exit(&zone->zone_lock); + return (found); +} + +/* + * Add an data link name for the zone. Does not check for duplicates. + */ +static int +zone_add_datalink(zoneid_t zoneid, char *dlname) +{ + struct dlnamelist *dlnl; + zone_t *zone; + zone_t *thiszone; + int err; + + dlnl = kmem_zalloc(sizeof (struct dlnamelist), KM_SLEEP); + if ((err = copyinstr(dlname, dlnl->dlnl_name, LIFNAMSIZ, NULL)) != 0) { + kmem_free(dlnl, sizeof (struct dlnamelist)); + return (set_errno(err)); + } + + thiszone = zone_find_by_id(zoneid); + if (thiszone == NULL) { + kmem_free(dlnl, sizeof (struct dlnamelist)); + return (set_errno(ENXIO)); + } + + /* + * Verify that the datalink name isn't already used by a different + * zone while allowing duplicate entries for the same zone (e.g. due + * to both using IPv4 and IPv6 on an interface) + */ + mutex_enter(&zonehash_lock); + for (zone = list_head(&zone_active); zone != NULL; + zone = list_next(&zone_active, zone)) { + if (zone->zone_id == zoneid) + continue; + + if (zone_dlname(zone, dlnl->dlnl_name)) { + mutex_exit(&zonehash_lock); + zone_rele(thiszone); + kmem_free(dlnl, sizeof (struct dlnamelist)); + return (set_errno(EPERM)); + } + } + mutex_enter(&thiszone->zone_lock); + dlnl->dlnl_next = thiszone->zone_dl_list; + thiszone->zone_dl_list = dlnl; + mutex_exit(&thiszone->zone_lock); + mutex_exit(&zonehash_lock); + zone_rele(thiszone); + return (0); +} + +static int +zone_remove_datalink(zoneid_t zoneid, char *dlname) +{ + struct dlnamelist *dlnl, *odlnl, **dlnlp; + zone_t *zone; + int err; + + dlnl = kmem_zalloc(sizeof (struct dlnamelist), KM_SLEEP); + if ((err = copyinstr(dlname, dlnl->dlnl_name, LIFNAMSIZ, NULL)) != 0) { + kmem_free(dlnl, sizeof (struct dlnamelist)); + return (set_errno(err)); + } + zone = zone_find_by_id(zoneid); + if (zone == NULL) { + kmem_free(dlnl, sizeof (struct dlnamelist)); + return (set_errno(EINVAL)); + } + + mutex_enter(&zone->zone_lock); + /* Look for match */ + dlnlp = &zone->zone_dl_list; + while (*dlnlp != NULL) { + if (strncmp(dlnl->dlnl_name, (*dlnlp)->dlnl_name, + LIFNAMSIZ) == 0) + goto found; + dlnlp = &((*dlnlp)->dlnl_next); + } + mutex_exit(&zone->zone_lock); + zone_rele(zone); + kmem_free(dlnl, sizeof (struct dlnamelist)); + return (set_errno(ENXIO)); + +found: + odlnl = *dlnlp; + *dlnlp = (*dlnlp)->dlnl_next; + kmem_free(odlnl, sizeof (struct dlnamelist)); + + mutex_exit(&zone->zone_lock); + zone_rele(zone); + kmem_free(dlnl, sizeof (struct dlnamelist)); + return (0); +} + +/* + * Using the zoneidp as ALL_ZONES, we can lookup which zone is using datalink + * name (dlname); otherwise we just check if the specified zoneidp has access + * to the datalink name. + */ +static int +zone_check_datalink(zoneid_t *zoneidp, char *dlname) +{ + zoneid_t id; + char *dln; + zone_t *zone; + int err = 0; + boolean_t allzones = B_FALSE; + + if (copyin(zoneidp, &id, sizeof (id)) != 0) { + return (set_errno(EFAULT)); + } + dln = kmem_zalloc(LIFNAMSIZ, KM_SLEEP); + if ((err = copyinstr(dlname, dln, LIFNAMSIZ, NULL)) != 0) { + kmem_free(dln, LIFNAMSIZ); + return (set_errno(err)); + } + + if (id == ALL_ZONES) + allzones = B_TRUE; + + /* + * Check whether datalink name is already used. + */ + mutex_enter(&zonehash_lock); + for (zone = list_head(&zone_active); zone != NULL; + zone = list_next(&zone_active, zone)) { + if (allzones || (id == zone->zone_id)) { + if (!zone_dlname(zone, dln)) + continue; + if (allzones) + err = copyout(&zone->zone_id, zoneidp, + sizeof (*zoneidp)); + + mutex_exit(&zonehash_lock); + kmem_free(dln, LIFNAMSIZ); + return (err ? set_errno(EFAULT) : 0); + } + } + + /* datalink name is not found in any active zone. */ + mutex_exit(&zonehash_lock); + kmem_free(dln, LIFNAMSIZ); + return (set_errno(ENXIO)); +} + +/* + * Get the names of the datalinks assigned to a zone. + * Here *nump is the number of datalinks, and the assumption + * is that the caller will gurantee that the the supplied buffer is + * big enough to hold at least #*nump datalink names, that is, + * LIFNAMSIZ X *nump + * On return, *nump will be the "new" number of datalinks, if it + * ever changed. + */ +static int +zone_list_datalink(zoneid_t zoneid, int *nump, char *buf) +{ + int num, dlcount; + zone_t *zone; + struct dlnamelist *dlnl; + char *ptr; + + if (copyin(nump, &dlcount, sizeof (dlcount)) != 0) + return (set_errno(EFAULT)); + + zone = zone_find_by_id(zoneid); + if (zone == NULL) { + return (set_errno(ENXIO)); + } + + num = 0; + mutex_enter(&zone->zone_lock); + ptr = buf; + for (dlnl = zone->zone_dl_list; dlnl != NULL; dlnl = dlnl->dlnl_next) { + /* + * If the list changed and the new number is bigger + * than what the caller supplied, just count, don't + * do copyout + */ + if (++num > dlcount) + continue; + if (copyout(dlnl->dlnl_name, ptr, LIFNAMSIZ) != 0) { + mutex_exit(&zone->zone_lock); + zone_rele(zone); + return (set_errno(EFAULT)); + } + ptr += LIFNAMSIZ; + } + mutex_exit(&zone->zone_lock); + zone_rele(zone); + + /* Increased or decreased, caller should be notified. */ + if (num != dlcount) { + if (copyout(&num, nump, sizeof (num)) != 0) { + return (set_errno(EFAULT)); + } + } + return (0); +} + +/* + * Public interface for looking up a zone by zoneid. It's a customized version + * for netstack_zone_create(), it: + * 1. Doesn't acquire the zonehash_lock, since it is called from + * zone_key_create() or zone_zsd_configure(), lock already held. + * 2. Doesn't check the status of the zone. + * 3. It will be called even before zone_init is called, in that case the + * address of zone0 is returned directly, and netstack_zone_create() + * will only assign a value to zone0.zone_netstack, won't break anything. + */ +zone_t * +zone_find_by_id_nolock(zoneid_t zoneid) +{ + ASSERT(MUTEX_HELD(&zonehash_lock)); + + if (zonehashbyid == NULL) + return (&zone0); + else + return (zone_find_all_by_id(zoneid)); +} diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index c0947b12a5..62f1efac65 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -346,6 +346,7 @@ CHKHDRS= \ ndi_impldefs.h \ netconfig.h \ neti.h \ + netstack.h \ nexusdefs.h \ note.h \ nvpair.h \ diff --git a/usr/src/uts/common/sys/condvar_impl.h b/usr/src/uts/common/sys/condvar_impl.h index 15adcedb93..75c5867375 100644 --- a/usr/src/uts/common/sys/condvar_impl.h +++ b/usr/src/uts/common/sys/condvar_impl.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -102,6 +102,11 @@ typedef struct cvwaitlock_s { mutex_exit(&(_c)->cvw_lock); \ } +#define CVW_DESTROY(_c) { \ + mutex_destroy(&(_c)->cvw_lock); \ + cv_destroy(&(_c)->cvw_waiter); \ +} + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h index 783f581e68..8f02bafc0f 100644 --- a/usr/src/uts/common/sys/dld.h +++ b/usr/src/uts/common/sys/dld.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -108,6 +108,12 @@ typedef struct dld_vlan_info { char dvi_name[IFNAMSIZ]; } dld_vlan_info_t; +typedef struct dld_hold_vlan { + char dhv_name[IFNAMSIZ]; + zoneid_t dhv_zid; + boolean_t dhv_docheck; +} dld_hold_vlan_t; + /* * Secure objects ioctls */ @@ -142,6 +148,17 @@ typedef struct dld_ioc_secobj_unset { char su_name[DLD_SECOBJ_NAME_MAX]; } dld_ioc_secobj_unset_t; +/* + * DLDIOCHOLDVLAN/DLDIOCRELEVLAN are added to support a "hold/release" + * operation on a VLAN. A hold will cause a VLAN to be created or the + * reference count will be increased, release will do the reverse. + */ +#define DLDIOCHOLDVLAN (DLDIOC | 0x08) + +#define DLDIOCRELEVLAN (DLDIOC | 0x09) + +#define DLDIOCZIDGET (DLDIOC | 0x0a) + #ifdef _KERNEL int dld_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); int dld_open(queue_t *, dev_t *, int, int, cred_t *); diff --git a/usr/src/uts/common/sys/dls.h b/usr/src/uts/common/sys/dls.h index f43cb816de..ebaed461a5 100644 --- a/usr/src/uts/common/sys/dls.h +++ b/usr/src/uts/common/sys/dls.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -58,6 +58,11 @@ extern "C" { #define DLS_PPA2MINOR(ppa) ((minor_t)((DLS_PPA2INST(ppa)) + 1)) /* + * Maps a (VID, INST) pair to ppa + */ +#define DLS_VIDINST2PPA(vid, inst) ((minor_t)((vid) * 1000 + (inst))) + +/* * Converts a minor to an instance#; makes sense only when minor <= 1000. */ #define DLS_MINOR2INST(minor) ((int)((minor) - 1)) @@ -104,6 +109,8 @@ extern mblk_t *dls_tx(dls_channel_t, mblk_t *); extern boolean_t dls_active_set(dls_channel_t); extern void dls_active_clear(dls_channel_t); +extern dev_info_t *dls_finddevinfo(dev_t); +extern int dls_ppa_from_minor(minor_t, t_uscalar_t *); #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/dls_impl.h b/usr/src/uts/common/sys/dls_impl.h index 0b5dec3fb1..6d9ca66df5 100644 --- a/usr/src/uts/common/sys/dls_impl.h +++ b/usr/src/uts/common/sys/dls_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -70,17 +70,21 @@ struct dls_link_s { kmutex_t dl_lock; }; +typedef struct dls_impl_s dls_impl_t; +typedef struct dls_head_s dls_head_t; + typedef struct dls_vlan_s { char dv_name[IFNAMSIZ]; uint_t dv_ref; dls_link_t *dv_dlp; uint16_t dv_id; kstat_t *dv_ksp; + minor_t dv_minor; + t_uscalar_t dv_ppa; + zoneid_t dv_zid; + dls_impl_t *dv_impl_list; } dls_vlan_t; -typedef struct dls_impl_s dls_impl_t; -typedef struct dls_head_s dls_head_t; - struct dls_impl_s { dls_impl_t *di_nextp; dls_head_t *di_headp; @@ -103,6 +107,8 @@ struct dls_impl_s { soft_ring_t **di_soft_ring_list; uint_t di_soft_ring_size; int di_soft_ring_fanout_type; + zoneid_t di_zid; + dls_impl_t *di_next_impl; }; struct dls_head_s { @@ -133,6 +139,15 @@ extern int dls_vlan_destroy(const char *); extern int dls_vlan_hold(const char *, dls_vlan_t **, boolean_t); extern void dls_vlan_rele(dls_vlan_t *); extern int dls_vlan_walk(int (*)(dls_vlan_t *, void *), void *); +extern dev_info_t *dls_vlan_finddevinfo(dev_t); +extern int dls_vlan_ppa_from_minor(minor_t, t_uscalar_t *); +extern int dls_vlan_rele_by_name(const char *); +extern minor_t dls_minor_hold(boolean_t); +extern void dls_minor_rele(minor_t); +extern int dls_vlan_setzoneid(char *, zoneid_t, boolean_t); +extern int dls_vlan_getzoneid(char *, zoneid_t *); +extern void dls_vlan_add_impl(dls_vlan_t *, dls_impl_t *); +extern void dls_vlan_remove_impl(dls_vlan_t *, dls_impl_t *); extern void dls_init(void); extern int dls_fini(void); diff --git a/usr/src/uts/common/sys/hook.h b/usr/src/uts/common/sys/hook.h index c7a6779dc5..00b0048093 100644 --- a/usr/src/uts/common/sys/hook.h +++ b/usr/src/uts/common/sys/hook.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,6 +33,7 @@ #pragma ident "%Z%%M% %I% %E% SMI" #include <sys/queue.h> +#include <sys/netstack.h> #ifdef __cplusplus extern "C" { @@ -49,7 +50,7 @@ typedef uintptr_t hook_data_t; struct hook_event_int; typedef struct hook_event_int *hook_event_token_t; -typedef int (* hook_func_t)(hook_event_token_t, hook_data_t); +typedef int (* hook_func_t)(hook_event_token_t, hook_data_t, netstack_t *); /* * Hook @@ -107,7 +108,6 @@ typedef struct hook_event { _NOTE(CONSTCOND) \ } while (0) - #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/hook_impl.h b/usr/src/uts/common/sys/hook_impl.h index d8e169b2ae..08112e4144 100644 --- a/usr/src/uts/common/sys/hook_impl.h +++ b/usr/src/uts/common/sys/hook_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -34,6 +34,7 @@ #include <sys/hook.h> #include <sys/condvar_impl.h> +#include <sys/netstack.h> #ifdef __cplusplus extern "C" { @@ -106,6 +107,7 @@ typedef struct hook_family_int { SLIST_ENTRY(hook_family_int) hfi_entry; hook_event_int_head_t hfi_head; hook_family_t hfi_family; + void *hfi_ptr; } hook_family_int_t; /* @@ -115,19 +117,29 @@ SLIST_HEAD(hook_family_int_head, hook_family_int); typedef struct hook_family_int_head hook_family_int_head_t; /* + * hook stack instances + */ +struct hook_stack { + cvwaitlock_t hks_familylock; /* global lock */ + hook_family_int_head_t hks_familylist; /* family list head */ + netstack_t *hk_netstack; +}; +typedef struct hook_stack hook_stack_t; + +/* * Names of hooks families currently defined by Solaris */ #define Hn_ARP "arp" #define Hn_IPV4 "inet" #define Hn_IPV6 "inet6" -extern hook_family_int_t *hook_family_add(hook_family_t *); +extern hook_family_int_t *hook_family_add(hook_family_t *, hook_stack_t *); extern int hook_family_remove(hook_family_int_t *); extern hook_event_int_t *hook_event_add(hook_family_int_t *, hook_event_t *); extern int hook_event_remove(hook_family_int_t *, hook_event_t *); extern int hook_register(hook_family_int_t *, char *, hook_t *); extern int hook_unregister(hook_family_int_t *, char *, hook_t *); -extern int hook_run(hook_event_token_t, hook_data_t); +extern int hook_run(hook_event_token_t, hook_data_t, netstack_t *); #ifdef __cplusplus } diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h index fd4c89150c..c3b9b8fa1e 100644 --- a/usr/src/uts/common/sys/mac.h +++ b/usr/src/uts/common/sys/mac.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -554,6 +554,9 @@ extern mactype_register_t *mactype_alloc(uint_t); extern void mactype_free(mactype_register_t *); extern int mactype_register(mactype_register_t *); extern int mactype_unregister(const char *); +extern int mac_vlan_create(mac_handle_t, const char *, + minor_t); +extern void mac_vlan_remove(mac_handle_t, const char *); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/neti.h b/usr/src/uts/common/sys/neti.h index 552ac25e93..ea6c843158 100644 --- a/usr/src/uts/common/sys/neti.h +++ b/usr/src/uts/common/sys/neti.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ #include <sys/int_types.h> #include <sys/queue.h> #include <sys/hook_impl.h> +#include <sys/netstack.h> #ifdef __cplusplus extern "C" { @@ -118,16 +119,17 @@ typedef struct net_inject { typedef struct net_info { int neti_version; char *neti_protocol; - int (*neti_getifname)(phy_if_t, char *, const size_t); - int (*neti_getmtu)(phy_if_t, lif_if_t); - int (*neti_getpmtuenabled)(void); + int (*neti_getifname)(phy_if_t, char *, const size_t, + netstack_t *); + int (*neti_getmtu)(phy_if_t, lif_if_t, netstack_t *); + int (*neti_getpmtuenabled)(netstack_t *); int (*neti_getlifaddr)(phy_if_t, lif_if_t, size_t, - net_ifaddr_t [], void *); - phy_if_t (*neti_phygetnext)(phy_if_t); - phy_if_t (*neti_phylookup)(const char *); - lif_if_t (*neti_lifgetnext)(phy_if_t, lif_if_t); - int (*neti_inject)(inject_t, net_inject_t *); - phy_if_t (*neti_routeto)(struct sockaddr *); + net_ifaddr_t [], void *, netstack_t *); + phy_if_t (*neti_phygetnext)(phy_if_t, netstack_t *); + phy_if_t (*neti_phylookup)(const char *, netstack_t *); + lif_if_t (*neti_lifgetnext)(phy_if_t, lif_if_t, netstack_t *); + int (*neti_inject)(inject_t, net_inject_t *, netstack_t *); + phy_if_t (*neti_routeto)(struct sockaddr *, netstack_t *); int (*neti_ispartialchecksum)(mblk_t *); int (*neti_isvalidchecksum)(mblk_t *); } net_info_t; @@ -141,12 +143,14 @@ struct net_data { net_info_t netd_info; int netd_refcnt; hook_family_int_t *netd_hooks; + netstack_t *netd_netstack; }; typedef struct injection_s { net_inject_t inj_data; boolean_t inj_isv6; + void * inj_ptr; } injection_t; /* @@ -160,13 +164,29 @@ typedef struct injection_s { /* + * neti stack instances + */ +struct neti_stack { + krwlock_t nts_netlock; + + /* list of net_data_t */ + LIST_HEAD(netd_listhead, net_data) nts_netd_head; + netstack_t *nts_netstack; +}; +typedef struct neti_stack neti_stack_t; + + +/* * Data management functions */ -extern net_data_t net_register(const net_info_t *); +extern net_data_t net_register(const net_info_t *, netstackid_t); +extern net_data_t net_register_impl(const net_info_t *, netstack_t *); extern int net_unregister(net_data_t); -extern net_data_t net_lookup(const char *); +extern net_data_t net_lookup(const char *, netstackid_t); +extern net_data_t net_lookup_impl(const char *, netstack_t *); extern int net_release(net_data_t); -extern net_data_t net_walk(net_data_t); +extern net_data_t net_walk(net_data_t, netstackid_t); +extern net_data_t net_walk_impl(net_data_t, netstack_t *); /* * Accessor functions diff --git a/usr/src/uts/common/sys/netstack.h b/usr/src/uts/common/sys/netstack.h new file mode 100644 index 0000000000..07ad7b5c30 --- /dev/null +++ b/usr/src/uts/common/sys/netstack.h @@ -0,0 +1,239 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#ifndef _SYS_NETSTACK_H +#define _SYS_NETSTACK_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/kstat.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This allows various pieces in and around IP to have a separate instance + * for each instance of IP. This is used to support zones that have an + * exclusive stack. + * Pieces of software far removed from IP (e.g., kernel software + * sitting on top of TCP or UDP) probably should not use the netstack + * support; if such software wants to support separate zones it + * can do that using the zones framework (zone_key_create() etc) + * whether there is a shared IP stack or and exclusive IP stack underneath. + */ + +/* + * Each netstack has an identifier. We reuse the zoneid allocation for + * this but have a separate typedef. Thus the shared stack (used by + * the global zone and other shared stack zones) have a zero ID, and + * the exclusive stacks have a netstackid that is the same as their zoneid. + */ +typedef id_t netstackid_t; + +#define GLOBAL_NETSTACKID 0 + +/* + * One for each module which uses netstack support. + * Used in netstack_register(). + * + * The order of these is important for some modules both for + * the creation (which done in ascending order) and destruction (which is + * done ine in decending order). + */ +#define NS_HOOK 0 +#define NS_NETI 1 +#define NS_ARP 2 +#define NS_IP 3 +#define NS_ICMP 4 +#define NS_UDP 5 +#define NS_TCP 6 +#define NS_SCTP 7 +#define NS_RTS 8 +#define NS_IPSEC 9 +#define NS_KEYSOCK 10 +#define NS_SPDSOCK 11 +#define NS_IPSECAH 12 +#define NS_IPSECESP 13 +#define NS_TUN 14 +#define NS_IPF 15 +#define NS_STR 16 /* autopush list etc */ +#define NS_MAX (NS_STR+1) + +/* + * One for every netstack in the system. + * We use a union so that the compilar and lint can provide type checking - + * in principle we could have + * #define netstack_arp netstack_modules[NS_ARP] + * etc, but that would imply void * types hence no type checking by the + * compiler. + * + * All the fields in netstack_t except netstack_next are protected by + * netstack_lock. netstack_next is protected by netstack_g_lock. + */ +struct netstack { + union { + void *nu_modules[NS_MAX]; + struct { + struct hook_stack *nu_hook; + struct neti_stack *nu_neti; + struct arp_stack *nu_arp; + struct ip_stack *nu_ip; + struct icmp_stack *nu_icmp; + struct udp_stack *nu_udp; + struct tcp_stack *nu_tcp; + struct sctp_stack *nu_sctp; + struct rts_stack *nu_rts; + struct ipsec_stack *nu_ipsec; + struct keysock_stack *nu_keysock; + struct spd_stack *nu_spdsock; + struct ipsecah_stack *nu_ipsecah; + struct ipsecesp_stack *nu_ipsecesp; + struct tun_stack *nu_tun; + struct ipf_stack *nu_ipf; + struct str_stack *nu_str; + } nu_s; + } netstack_u; +#define netstack_modules netstack_u.nu_modules +#define netstack_hook netstack_u.nu_s.nu_hook +#define netstack_neti netstack_u.nu_s.nu_neti +#define netstack_arp netstack_u.nu_s.nu_arp +#define netstack_ip netstack_u.nu_s.nu_ip +#define netstack_icmp netstack_u.nu_s.nu_icmp +#define netstack_udp netstack_u.nu_s.nu_udp +#define netstack_tcp netstack_u.nu_s.nu_tcp +#define netstack_sctp netstack_u.nu_s.nu_sctp +#define netstack_rts netstack_u.nu_s.nu_rts +#define netstack_ipsec netstack_u.nu_s.nu_ipsec +#define netstack_keysock netstack_u.nu_s.nu_keysock +#define netstack_spdsock netstack_u.nu_s.nu_spdsock +#define netstack_ipsecah netstack_u.nu_s.nu_ipsecah +#define netstack_ipsecesp netstack_u.nu_s.nu_ipsecesp +#define netstack_tun netstack_u.nu_s.nu_tun +#define netstack_ipf netstack_u.nu_s.nu_ipf +#define netstack_str netstack_u.nu_s.nu_str + + uint16_t netstack_m_state[NS_MAX]; /* module state */ + + kmutex_t netstack_lock; + struct netstack *netstack_next; + netstackid_t netstack_stackid; + int netstack_numzones; /* Number of zones using this */ + int netstack_refcnt; /* Number of hold-rele */ + int netstack_flags; /* See below */ +}; +typedef struct netstack netstack_t; + +/* netstack_flags values */ +#define NSF_UNINIT 0x01 /* Not initialized */ +#define NSF_CLOSING 0x02 /* Going away */ + +/* + * State for each module for each stack - netstack_m_state[moduleid] + * Keeps track of pending actions to avoid holding looks when + * calling into the create/shutdown/destroy functions in the module. + */ +#define NSS_CREATE_NEEDED 0x0001 +#define NSS_CREATE_INPROGRESS 0x0002 +#define NSS_CREATE_COMPLETED 0x0004 +#define NSS_SHUTDOWN_NEEDED 0x0010 +#define NSS_SHUTDOWN_INPROGRESS 0x0020 +#define NSS_SHUTDOWN_COMPLETED 0x0040 +#define NSS_DESTROY_NEEDED 0x0100 +#define NSS_DESTROY_INPROGRESS 0x0200 +#define NSS_DESTROY_COMPLETED 0x0400 + +#define NSS_CREATE_ALL \ + (NSS_CREATE_NEEDED|NSS_CREATE_INPROGRESS|NSS_CREATE_COMPLETED) +#define NSS_SHUTDOWN_ALL \ + (NSS_SHUTDOWN_NEEDED|NSS_SHUTDOWN_INPROGRESS|NSS_SHUTDOWN_COMPLETED) +#define NSS_DESTROY_ALL \ + (NSS_DESTROY_NEEDED|NSS_DESTROY_INPROGRESS|NSS_DESTROY_COMPLETED) + +/* + * One for each of the NS_* values. + */ +struct netstack_registry { + int nr_flags; /* 0 if nothing registered */ + void *(*nr_create)(netstackid_t, netstack_t *); + void (*nr_shutdown)(netstackid_t, void *); + void (*nr_destroy)(netstackid_t, void *); +}; + +/* nr_flags values */ +#define NRF_REGISTERED 0x01 + +/* + * To support kstat_create_netstack() using kstat_add_zone we need + * to track both + * - all zoneids that use the global/shared stack + * - all kstats that have been added for the shared stack + */ + +extern void netstack_init(void); +extern void netstack_hold(netstack_t *); +extern void netstack_rele(netstack_t *); +extern netstack_t *netstack_find_by_cred(const cred_t *); +extern netstack_t *netstack_find_by_stackid(netstackid_t); +extern netstack_t *netstack_find_by_zoneid(zoneid_t); + +extern zoneid_t netstackid_to_zoneid(netstackid_t); +extern netstackid_t zoneid_to_netstackid(zoneid_t); + +/* + * Register interest in changes to the set of netstacks. + * The createfn and destroyfn are required, but the shutdownfn can be + * NULL. + * Note that due to the current zsd implementation, when the create + * function is called the zone isn't fully present, thus functions + * like zone_find_by_* will fail, hence the create function can not + * use many zones kernel functions including zcmn_err(). + */ +extern void netstack_register(int, + void *(*)(netstackid_t, netstack_t *), + void (*)(netstackid_t, void *), + void (*)(netstackid_t, void *)); +extern void netstack_unregister(int); +extern kstat_t *kstat_create_netstack(char *, int, char *, char *, uchar_t, + uint_t, uchar_t, netstackid_t); +extern void kstat_delete_netstack(kstat_t *, netstackid_t); + +/* + * Simple support for walking all the netstacks. + * The caller of netstack_next() needs to call netstack_rele() when + * done with a netstack. + */ +typedef int netstack_handle_t; + +extern void netstack_next_init(netstack_handle_t *); +extern void netstack_next_fini(netstack_handle_t *); +extern netstack_t *netstack_next(netstack_handle_t *); + +#ifdef __cplusplus +} +#endif + + +#endif /* _SYS_NETSTACK_H */ diff --git a/usr/src/uts/common/sys/policy.h b/usr/src/uts/common/sys/policy.h index 1b86a5507f..89636cf86d 100644 --- a/usr/src/uts/common/sys/policy.h +++ b/usr/src/uts/common/sys/policy.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -95,6 +95,8 @@ int secpolicy_fs_config(const cred_t *, const struct vfs *); int secpolicy_fs_linkdir(const cred_t *, const struct vfs *); int secpolicy_fs_minfree(const cred_t *, const struct vfs *); int secpolicy_fs_quota(const cred_t *, const struct vfs *); +int secpolicy_ip(const cred_t *, int, boolean_t); +int secpolicy_ip_config(const cred_t *, boolean_t); int secpolicy_ipc_access(const cred_t *, const struct kipc_perm *, mode_t); int secpolicy_ipc_config(const cred_t *); int secpolicy_ipc_owner(const cred_t *, const struct kipc_perm *); diff --git a/usr/src/uts/common/sys/sad.h b/usr/src/uts/common/sys/sad.h index 6fac90accc..d7f10f40cc 100644 --- a/usr/src/uts/common/sys/sad.h +++ b/usr/src/uts/common/sys/sad.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,6 +33,9 @@ #pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.5 */ #include <sys/types.h> +#ifdef _KERNEL +#include <sys/strsubr.h> +#endif #include <sys/modhash.h> #ifdef __cplusplus @@ -168,6 +171,7 @@ struct saddev { queue_t *sa_qp; /* pointer to read queue */ caddr_t sa_addr; /* saved address for copyout */ int sa_flags; /* see below */ + str_stack_t *sa_ss; }; /* @@ -201,11 +205,6 @@ struct autopush { #define ap_npush ap_common.apc_npush #define ap_anchor ap_data.apd_anchor -extern struct saddev *saddev; /* sad device array */ -extern int sadcnt; /* number of elements in saddev */ - -extern kmutex_t sad_lock; /* protects sad ap data store */ - /* * function prototypes */ @@ -220,27 +219,28 @@ void audit_fdsend(int, struct file *, int); void audit_fdrecv(int, struct file *); #endif -extern void sad_initspace(void); +extern void sad_initspace(str_stack_t *); +extern void sad_freespace(str_stack_t *); /* - * The following interfaces do not care about sad_lock. + * The following interfaces do not care about ss_sad_lock. */ extern struct autopush *sad_ap_alloc(void); extern int sad_apc_verify(struct apcommon *); extern int sad_ap_verify(struct autopush *); /* - * The following interfaces attempt to acquire sad_lock. + * The following interfaces attempt to acquire ss_sad_lock. */ -extern void sad_ap_rele(struct autopush *); -extern struct autopush *sad_ap_find_by_dev(dev_t); +extern void sad_ap_rele(struct autopush *, str_stack_t *); +extern struct autopush *sad_ap_find_by_dev(dev_t, str_stack_t *); /* - * The following interfaces require sad_lock to be held when invoked. + * The following interfaces require ss_sad_lock to be held when invoked. */ -extern void sad_ap_insert(struct autopush *); -extern void sad_ap_remove(struct autopush *); -extern struct autopush *sad_ap_find(struct apcommon *); +extern void sad_ap_insert(struct autopush *, str_stack_t *); +extern void sad_ap_remove(struct autopush *, str_stack_t *); +extern struct autopush *sad_ap_find(struct apcommon *, str_stack_t *); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/strsubr.h b/usr/src/uts/common/sys/strsubr.h index 10ce2e4012..34e05d799c 100644 --- a/usr/src/uts/common/sys/strsubr.h +++ b/usr/src/uts/common/sys/strsubr.h @@ -23,7 +23,7 @@ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,6 +44,8 @@ #include <sys/kstat.h> #include <sys/uio.h> #include <sys/proc.h> +#include <sys/netstack.h> +#include <sys/modhash.h> #ifdef __cplusplus extern "C" { @@ -241,6 +243,7 @@ typedef struct stdata { kcondvar_t sd_qcv; /* Waiters for qhead to become empty */ kcondvar_t sd_zcopy_wait; uint_t sd_copyflag; /* copy-related flags */ + zoneid_t sd_anchorzone; /* Allow removal from same zone only */ } stdata_t; /* @@ -687,6 +690,7 @@ struct mux_edge { struct mux_node *me_nodep; /* edge leads to this node */ struct mux_edge *me_nextp; /* next edge */ int me_muxid; /* id of link */ + dev_t me_dev; /* dev_t - used for kernel PUNLINK */ }; /* @@ -780,6 +784,20 @@ enum jcaccess { JCGETP /* get ctty parameters */ }; +struct str_stack { + netstack_t *ss_netstack; /* Common netstack */ + + kmutex_t ss_sad_lock; /* autopush lock */ + mod_hash_t *ss_sad_hash; + size_t ss_sad_hash_nchains; + struct saddev *ss_saddev; /* sad device array */ + int ss_sadcnt; /* number of sad devices */ + + int ss_devcnt; /* number of mux_nodes */ + struct mux_node *ss_mux_nodes; /* mux info for cycle checking */ +}; +typedef struct str_stack str_stack_t; + /* * Finding related queues */ @@ -1070,15 +1088,16 @@ extern int putiocd(mblk_t *, caddr_t, int, cred_t *); extern int getiocd(mblk_t *, caddr_t, int); extern struct linkinfo *alloclink(queue_t *, queue_t *, struct file *); extern void lbfree(struct linkinfo *); -extern int linkcycle(stdata_t *, stdata_t *); -extern struct linkinfo *findlinks(stdata_t *, int, int); +extern int linkcycle(stdata_t *, stdata_t *, str_stack_t *); +extern struct linkinfo *findlinks(stdata_t *, int, int, str_stack_t *); extern queue_t *getendq(queue_t *); extern int mlink(vnode_t *, int, int, cred_t *, int *, int); extern int mlink_file(vnode_t *, int, struct file *, cred_t *, int *, int); -extern int munlink(struct stdata *, struct linkinfo *, int, cred_t *, int *); -extern int munlinkall(struct stdata *, int, cred_t *, int *); -extern void mux_addedge(stdata_t *, stdata_t *, int); -extern void mux_rmvedge(stdata_t *, int); +extern int munlink(struct stdata *, struct linkinfo *, int, cred_t *, int *, + str_stack_t *); +extern int munlinkall(struct stdata *, int, cred_t *, int *, str_stack_t *); +extern void mux_addedge(stdata_t *, stdata_t *, int, str_stack_t *); +extern void mux_rmvedge(stdata_t *, int, str_stack_t *); extern int devflg_to_qflag(struct streamtab *, uint32_t, uint32_t *, uint32_t *); extern void setq(queue_t *, struct qinit *, struct qinit *, perdm_t *, diff --git a/usr/src/uts/common/sys/syscall.h b/usr/src/uts/common/sys/syscall.h index eedadfa0c0..71d4164ff9 100644 --- a/usr/src/uts/common/sys/syscall.h +++ b/usr/src/uts/common/sys/syscall.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -471,8 +471,13 @@ extern "C" { * zone_list(...) :: zone(ZONE_LIST, ...) * zone_shutdown(...) :: zone(ZONE_SHUTDOWN, ...) * zone_lookup(...) :: zone(ZONE_LOOKUP, ...) + * zone_boot(...) :: zone(ZONE_BOOT, ...) + * zone_version(...) :: zone(ZONE_VERSION, ...) * zone_setattr(...) :: zone(ZONE_SETATTR, ...) - * zone_getattr(...) :: zone(ZONE_GETATTR, ...) + * zone_add_datalink(...) :: zone(ZONE_ADD_DATALINK, ...) + * zone_remove_datalink(...) :: zone(ZONE_DEL_DATALINK, ...) + * zone_check_datalink(...) :: zone(ZONE_CHECK_DATALINK, ...) + * zone_list_datalink(...) :: zone(ZONE_LIST_DATALINK, ...) */ #define SYS_autofssys 228 #define SYS_getcwd 229 diff --git a/usr/src/uts/common/sys/tsol/tnet.h b/usr/src/uts/common/sys/tsol/tnet.h index 75eadb48ce..906302cfce 100644 --- a/usr/src/uts/common/sys/tsol/tnet.h +++ b/usr/src/uts/common/sys/tsol/tnet.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * * from "tnet.h 7.44 02/10/09 SMI; TSOL 2.x" @@ -48,11 +48,14 @@ extern "C" { extern int tsol_tnrh_chk(tsol_tpent_t *, bslabel_t *, int); extern tsol_tnrhc_t *find_rhc(const void *, uchar_t, boolean_t); -extern int tsol_compute_label(const cred_t *, ipaddr_t, uchar_t *, boolean_t); +extern int tsol_compute_label(const cred_t *, ipaddr_t, uchar_t *, boolean_t, + ip_stack_t *); extern int tsol_compute_label_v6(const cred_t *, const in6_addr_t *, uchar_t *, - boolean_t); -extern int tsol_check_label(const cred_t *, mblk_t **, int *, boolean_t); -extern int tsol_check_label_v6(const cred_t *, mblk_t **, int *, boolean_t); + boolean_t, ip_stack_t *); +extern int tsol_check_label(const cred_t *, mblk_t **, int *, boolean_t, + ip_stack_t *); +extern int tsol_check_label_v6(const cred_t *, mblk_t **, int *, boolean_t, + ip_stack_t *); extern int tsol_prepend_option(uchar_t *, ipha_t *, int); extern int tsol_prepend_option_v6(uchar_t *, ip6_t *, int); extern int tsol_remove_secopt(ipha_t *, int); @@ -82,7 +85,8 @@ extern int tsol_rtsa_init(rt_msghdr_t *, tsol_rtsecattr_t *, caddr_t); extern int tsol_ire_init_gwattr(ire_t *, uchar_t, tsol_gc_t *, tsol_gcgrp_t *); extern mblk_t *tsol_ip_forward(ire_t *, mblk_t *); -extern mlp_type_t tsol_mlp_addr_type(zoneid_t, uchar_t, const void *); +extern mlp_type_t tsol_mlp_addr_type(zoneid_t, uchar_t, const void *, + ip_stack_t *); extern boolean_t tsol_check_interface_address(const ipif_t *); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 94646bc976..9983e8ec85 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,6 +35,8 @@ #include <sys/ipc_rctl.h> #include <sys/pset.h> #include <sys/tsol/label.h> +#include <sys/cred.h> +#include <sys/netstack.h> #include <sys/uadmin.h> #ifdef __cplusplus @@ -65,16 +67,20 @@ extern "C" { #define ALL_ZONES (-1) /* system call subcodes */ -#define ZONE_CREATE 0 -#define ZONE_DESTROY 1 -#define ZONE_GETATTR 2 -#define ZONE_ENTER 3 -#define ZONE_LIST 4 -#define ZONE_SHUTDOWN 5 -#define ZONE_LOOKUP 6 -#define ZONE_BOOT 7 -#define ZONE_VERSION 8 -#define ZONE_SETATTR 9 +#define ZONE_CREATE 0 +#define ZONE_DESTROY 1 +#define ZONE_GETATTR 2 +#define ZONE_ENTER 3 +#define ZONE_LIST 4 +#define ZONE_SHUTDOWN 5 +#define ZONE_LOOKUP 6 +#define ZONE_BOOT 7 +#define ZONE_VERSION 8 +#define ZONE_SETATTR 9 +#define ZONE_ADD_DATALINK 10 +#define ZONE_DEL_DATALINK 11 +#define ZONE_CHECK_DATALINK 12 +#define ZONE_LIST_DATALINK 13 /* zone attributes */ #define ZONE_ATTR_ROOT 1 @@ -90,6 +96,7 @@ extern "C" { #define ZONE_ATTR_BRAND 11 #define ZONE_ATTR_PHYS_MCAP 12 #define ZONE_ATTR_SCHED_CLASS 13 +#define ZONE_ATTR_FLAGS 14 /* Start of the brand-specific attribute namespace */ #define ZONE_ATTR_BRAND_ATTRS 32768 @@ -166,6 +173,7 @@ typedef struct { int match; /* match level */ uint32_t doi; /* DOI for label */ caddr32_t label; /* label associated with zone */ + int flags; } zone_def32; #endif typedef struct { @@ -181,6 +189,7 @@ typedef struct { int match; /* match level */ uint32_t doi; /* DOI for label */ const bslabel_t *label; /* label associated with zone */ + int flags; } zone_def; /* extended error information */ @@ -257,6 +266,15 @@ typedef struct zone_cmd_rval { */ #define ZONE_DOOR_PATH ZONES_TMPDIR "/%s.zoneadmd_door" +/* zone_flags */ +#define ZF_DESTROYED 0x1 /* ZSD destructor callbacks run */ +#define ZF_HASHED_LABEL 0x2 /* zone has a unique label */ +#define ZF_IS_SCRATCH 0x4 /* scratch zone */ +#define ZF_NET_EXCL 0x8 /* Zone has an exclusive IP stack */ + +/* zone_create flags */ +#define ZCF_NET_EXCL 0x1 /* Create a zone with exclusive IP */ + #ifdef _KERNEL /* * We need to protect the definition of 'list_t' from userland applications and @@ -266,13 +284,9 @@ typedef struct zone_cmd_rval { #define GLOBAL_ZONEUNIQID 0 /* uniqid of the global zone */ -/* zone_flags */ -#define ZF_DESTROYED 0x1 /* ZSD destructor callbacks run */ -#define ZF_HASHED_LABEL 0x2 /* zone has a unique label */ -#define ZF_IS_SCRATCH 0x4 /* scratch zone */ - struct pool; struct brand; +struct dlnamelist; /* * Structure to record list of ZFS datasets exported to a zone. @@ -397,6 +411,11 @@ typedef struct zone { id_t zone_defaultcid; /* dflt scheduling class id */ kstat_t *zone_swapresv_kstat; kstat_t *zone_lockedmem_kstat; + /* + * zone_dl_list is protected by zone_lock + */ + struct dlnamelist *zone_dl_list; + netstack_t *zone_netstack; } zone_t; /* @@ -404,6 +423,7 @@ typedef struct zone { */ #define ZONE_PS_INVAL PS_MYID + extern zone_t zone0; extern zone_t *global_zone; extern uint_t maxzones; @@ -424,6 +444,7 @@ extern zone_t *zone_find_by_name(char *); extern zone_t *zone_find_by_any_path(const char *, boolean_t); extern zone_t *zone_find_by_path(const char *); extern zoneid_t getzoneid(void); +extern zone_t *zone_find_by_id_nolock(zoneid_t); /* * Zone-specific data (ZSD) APIs diff --git a/usr/src/uts/intel/arp/Makefile b/usr/src/uts/intel/arp/Makefile index ec1052094d..ae82f7f07d 100644 --- a/usr/src/uts/intel/arp/Makefile +++ b/usr/src/uts/intel/arp/Makefile @@ -21,7 +21,7 @@ # # uts/intel/arp/Makefile # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -103,6 +103,14 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u > \ + $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(SORT) -u > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/intel/arp/arp.objt-symbols.obj64 b/usr/src/uts/intel/arp/arp.objt-symbols.obj64 new file mode 100644 index 0000000000..1e349eca81 --- /dev/null +++ b/usr/src/uts/intel/arp/arp.objt-symbols.obj64 @@ -0,0 +1,42 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +ar_cmd_tbl +ar_m_tbl +arpinfo +arp_param_arr +arp_netinfo +cb_inet_devops +fsw +inet_dev_info +inet_devops +info +modldrv +modlinkage +modlstrmod +netdev_privs +rinit +winit diff --git a/usr/src/uts/intel/hook/Makefile b/usr/src/uts/intel/hook/Makefile index 9fa6442003..aeacb069ba 100644 --- a/usr/src/uts/intel/hook/Makefile +++ b/usr/src/uts/intel/hook/Makefile @@ -21,7 +21,7 @@ # # uts/intel/hook/Makefile # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -79,6 +79,14 @@ clean.lint: $(CLEAN_LINT_DEPS) install: $(INSTALL_DEPS) +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new # # Include common targets. diff --git a/usr/src/uts/intel/hook/hook.objt-symbols.obj64 b/usr/src/uts/intel/hook/hook.objt-symbols.obj64 new file mode 100644 index 0000000000..c813224003 --- /dev/null +++ b/usr/src/uts/intel/hook/hook.objt-symbols.obj64 @@ -0,0 +1,28 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +modlinkage +modlmisc diff --git a/usr/src/uts/intel/icmp/Makefile b/usr/src/uts/intel/icmp/Makefile index 985fc0dec7..bc9480662e 100644 --- a/usr/src/uts/intel/icmp/Makefile +++ b/usr/src/uts/intel/icmp/Makefile @@ -21,7 +21,7 @@ # # uts/intel/icmp/Makefile # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -103,6 +103,14 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(SORT) -u > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/intel/icmp/icmp.objt-symbols.obj64 b/usr/src/uts/intel/icmp/icmp.objt-symbols.obj64 new file mode 100644 index 0000000000..2cb9d3484b --- /dev/null +++ b/usr/src/uts/intel/icmp/icmp.objt-symbols.obj64 @@ -0,0 +1,47 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +cb_inet_devops +fsw +ICMP6_MAJ +icmp_g_t_info_ack +icmpinfo +icmp_max_optsize +icmp_opt_arr +icmp_opt_obj +icmp_param_arr +icmp_valid_levels_arr +inet_dev_info +inet_devops +info +modldrv +modlinkage +modlstrmod +netdev_privs +rinit +sin6_null +sin_null +winit diff --git a/usr/src/uts/intel/ip/Makefile b/usr/src/uts/intel/ip/Makefile index ff8947d635..11f048352c 100644 --- a/usr/src/uts/intel/ip/Makefile +++ b/usr/src/uts/intel/ip/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -104,6 +104,20 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj sis_check.debug +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(SORT) -u > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new +sis_check.debug: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.debug64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.dbg.tmp + @$(NM) debug64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(SORT) -u > $(MODULE).symbols.dbg.tmp.new + -@$(DIFF) $(MODULE).symbols.dbg.tmp $(MODULE).symbols.dbg.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/intel/ip/ip.objt-symbols.debug64 b/usr/src/uts/intel/ip/ip.objt-symbols.debug64 new file mode 100644 index 0000000000..0417803e80 --- /dev/null +++ b/usr/src/uts/intel/ip/ip.objt-symbols.debug64 @@ -0,0 +1,312 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +IP_MAJ +SCTP6_MAJ +SCTP_MAJ +TCP6_MAJ +TCP_MAJ +UDP6_MAJ +cb_inet_devops +cl_inet_bind +cl_inet_connect +cl_inet_disconnect +cl_inet_ipident +cl_inet_isclusterwide +cl_inet_listen +cl_inet_unbind +cl_inet_unlisten +cl_sctp_assoc_change +cl_sctp_check_addrs +cl_sctp_connect +cl_sctp_disconnect +cl_sctp_listen +cl_sctp_unlisten +conn_drain_nthreads +crctab +default_ip6_asp_table +do_tcp_direct_sockfs +do_tcp_fusion +do_tcpzcopy +dohwcksum +eventq_queue_in +eventq_queue_nic +eventq_queue_out +fsw +gcdb_hash +gcdb_hash_size +gcdb_lock +gcgrp4_hash +gcgrp6_hash +gcgrp_hash_size +gcgrp_lock +icmp_frag_size_table +icmp_ipha +ill_no_arena +ill_null +inet_dev_info +inet_devops +inet_maxminor +ip6_area_template +ip6_ared_template +ip6_cache_table_size +ip6_ftable_hash_size +ip6_ire_max_bucket_cnt +ip6_ire_min_bucket_cnt +ip6_max_cache_table_size +ip6opt_ls +ip_ard_template +ip_area_template +ip_ared_template +ip_areq_template +ip_aresq_template +ip_arma_multi_template +ip_aroff_template +ip_aron_template +ip_aru_template +ip_cache_table_size +ip_cgtp_filter +ip_cgtp_filter_ops +ip_cgtp_filter_rev +ip_debug +ip_g_all_ones +ip_input_proc +ip_ioctl_ftbl +ip_ire_cpu_ratio +ip_ire_max_bucket_cnt +ip_ire_mem_ratio +ip_ire_min_bucket_cnt +ip_loopback_mtu +ip_loopback_mtu_v6plus +ip_loopback_mtuplus +ip_m_tbl +ip_max_cache_table_size +ip_max_frag_dups +ip_min_frag_prune_time +ip_minor_arena +ip_misc_ioctl_count +ip_misc_ioctl_table +ip_mod_info +ip_modclose_ackwait_ms +ip_ndx_ioctl_count +ip_ndx_ioctl_table +ip_opt_arr +ip_opt_obj +ip_poll_normal_ms +ip_poll_normal_ticks +ip_rput_pullups +ip_six_byte_all_ones +ip_soft_rings_cnt +ip_squeue_bind +ip_squeue_create_callback +ip_squeue_enter +ip_squeue_enter_unbound +ip_squeue_fanout +ip_squeue_profile +ip_squeue_worker_wait +ip_squeues_per_cpu +ip_wput_frag_mdt_min +ipcl_bind_fanout_size +ipcl_conn_cache +ipcl_conn_hash_maxsize +ipcl_conn_hash_memfactor +ipcl_conn_hash_size +ipcl_debug_level +ipcl_raw_fanout_size +ipcl_tcpconn_cache +ipcl_udp_fanout_size +ipclassifier_version +ipif_loopback_name +ipif_nv_tbl +ipif_zero +ipinfo +iplrinit +iplwinit +iprinit +ipsec_action_cache +ipsec_hdr_pullup_needed +ipsec_info_cache +ipsec_pol_cache +ipsec_policy_failure_msgs +ipsec_sel_cache +ipsec_weird_null_inbound_policy +ipsechw_debug +ipv4_forward_suffix +ipv4info +ipv6_all_hosts_mcast +ipv6_all_ones +ipv6_all_rtrs_mcast +ipv6_all_v2rtrs_mcast +ipv6_all_zeros +ipv6_areq_template +ipv6_forward_suffix +ipv6_ll_template +ipv6_loopback +ipv6_solicited_node_mcast +ipv6_unspecified_group +ipv6info +ipwinit +ire_cache +ire_gw_secattr_cache +ire_idle_cutoff_interval +ire_null +ire_nv_arr +ire_nv_tbl +ire_uinfo_null +lcl_ndp_arr +lcl_param_arr +lcl_sctp_param_arr +lcl_sctp_wroff_xtra_param +lcl_tcp_mdt_head_param +lcl_tcp_mdt_max_pbufs_param +lcl_tcp_mdt_tail_param +lcl_tcp_param_arr +lcl_tcp_wroff_xtra_param +log_format +mask_rnhead +max_keylen +modldrv +modlinkage +modlstrmod +multicast_encap_iphdr +netdev_privs +netmasks +prov_update_handle +radix_mask_cache +radix_node_cache +recvq_call +recvq_loop_cnt +req_arr +rinit_ipv6 +rn_mkfreelist +rn_ones +rn_zeros +rr_max_blank_ratio +rr_max_pkt_cnt_ratio +rr_min_blank_ratio +rr_min_pkt_cnt_ratio +rt_entry_cache +sctp_asconf_default_dispatch +sctp_asconf_dispatch_tbl +sctp_conn_cache +sctp_conn_hash_size +sctp_kmem_faddr_cache +sctp_kmem_ftsn_set_cache +sctp_kmem_set_cache +sctp_recvq_tq_task_max +sctp_recvq_tq_task_min +sctp_recvq_tq_thr_max +sctp_recvq_tq_thr_min +sctp_sin6_null +sctp_taskq +sctpdebug +sendq_collision +sendq_empty +sendq_loop_cnt +sin6_null +sin_null +skip_sctp_cksum +sqset_global_list +sqset_global_size +squeue_cache +squeue_intrdrain_ms +squeue_intrdrain_ns +squeue_kstat +squeue_kstat_lock +squeue_profile +squeue_worker_poll_min +squeue_workerdrain_ms +squeue_workerdrain_ns +squeue_workerwait_ms +squeue_workerwait_tick +squeue_writerdrain_ms +squeue_writerdrain_ns +tcp_acceptor_rinit +tcp_acceptor_winit +tcp_conn_hash_size +tcp_drop_ack_unsent_cnt +tcp_free_list_max_cnt +tcp_fusion_rcv_unread_min +tcp_g_kstat +tcp_g_statistics +tcp_g_t_info_ack +tcp_g_t_info_ack_v6 +tcp_icmp_source_quench +tcp_iphc_cache +tcp_loopback_rinit +tcp_max_optsize +tcp_mdt_chain +tcp_mdt_smss_threshold +tcp_mod_rinit +tcp_mod_winit +tcp_opt_arr +tcp_opt_obj +tcp_random_anon_port +tcp_random_end_ptr +tcp_random_fptr +tcp_random_lock +tcp_random_rptr +tcp_random_state +tcp_randtbl +tcp_report_header +tcp_rinfo +tcp_rinit +tcp_sack_info_cache +tcp_sock_winit +tcp_squeue_close +tcp_squeue_close_proc +tcp_squeue_wput +tcp_squeue_wput_proc +tcp_static_maxpsz +tcp_taskq +tcp_timercache +tcp_tx_pull_len +tcp_valid_levels_arr +tcp_version +tcp_winfo +tcp_winit +tcpinfo +tsol_strict_error +udp_bind_fanout_size +udp_cache +udp_count +udp_g_t_info_ack_ipv4 +udp_g_t_info_ack_ipv6 +udp_info +udp_max_optsize +udp_opt_arr +udp_opt_obj +udp_param_arr +udp_random_anon_port +udp_rinit +udp_snmp_rinit +udp_snmp_winit +udp_valid_levels_arr +udp_version +udp_winit +udpinfo +winit_ipv6 +zero_info diff --git a/usr/src/uts/intel/ip/ip.objt-symbols.obj64 b/usr/src/uts/intel/ip/ip.objt-symbols.obj64 new file mode 100644 index 0000000000..dcc6c81167 --- /dev/null +++ b/usr/src/uts/intel/ip/ip.objt-symbols.obj64 @@ -0,0 +1,300 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +IP_MAJ +SCTP6_MAJ +SCTP_MAJ +TCP6_MAJ +TCP_MAJ +UDP6_MAJ +cb_inet_devops +cl_inet_bind +cl_inet_connect +cl_inet_disconnect +cl_inet_ipident +cl_inet_isclusterwide +cl_inet_listen +cl_inet_unbind +cl_inet_unlisten +cl_sctp_assoc_change +cl_sctp_check_addrs +cl_sctp_connect +cl_sctp_disconnect +cl_sctp_listen +cl_sctp_unlisten +conn_drain_nthreads +crctab +default_ip6_asp_table +do_tcp_direct_sockfs +do_tcp_fusion +do_tcpzcopy +dohwcksum +eventq_queue_in +eventq_queue_nic +eventq_queue_out +fsw +gcdb_hash +gcdb_hash_size +gcdb_lock +gcgrp4_hash +gcgrp6_hash +gcgrp_hash_size +gcgrp_lock +icmp_frag_size_table +icmp_ipha +ill_no_arena +ill_null +inet_dev_info +inet_devops +inet_maxminor +ip6_area_template +ip6_ared_template +ip6_cache_table_size +ip6_ftable_hash_size +ip6_ire_max_bucket_cnt +ip6_ire_min_bucket_cnt +ip6_max_cache_table_size +ip6opt_ls +ip_ard_template +ip_area_template +ip_ared_template +ip_areq_template +ip_aresq_template +ip_arma_multi_template +ip_aroff_template +ip_aron_template +ip_aru_template +ip_cache_table_size +ip_cgtp_filter +ip_cgtp_filter_ops +ip_cgtp_filter_rev +ip_debug +ip_g_all_ones +ip_input_proc +ip_ioctl_ftbl +ip_ire_cpu_ratio +ip_ire_max_bucket_cnt +ip_ire_mem_ratio +ip_ire_min_bucket_cnt +ip_loopback_mtu +ip_loopback_mtu_v6plus +ip_loopback_mtuplus +ip_m_tbl +ip_max_cache_table_size +ip_max_frag_dups +ip_min_frag_prune_time +ip_minor_arena +ip_misc_ioctl_count +ip_misc_ioctl_table +ip_mod_info +ip_modclose_ackwait_ms +ip_ndx_ioctl_count +ip_ndx_ioctl_table +ip_opt_arr +ip_opt_obj +ip_poll_normal_ms +ip_poll_normal_ticks +ip_rput_pullups +ip_six_byte_all_ones +ip_soft_rings_cnt +ip_squeue_bind +ip_squeue_create_callback +ip_squeue_enter +ip_squeue_enter_unbound +ip_squeue_fanout +ip_squeue_profile +ip_squeue_worker_wait +ip_squeues_per_cpu +ip_wput_frag_mdt_min +ipcl_bind_fanout_size +ipcl_conn_cache +ipcl_conn_hash_maxsize +ipcl_conn_hash_memfactor +ipcl_conn_hash_size +ipcl_raw_fanout_size +ipcl_tcpconn_cache +ipcl_udp_fanout_size +ipclassifier_version +ipif_loopback_name +ipif_nv_tbl +ipif_zero +ipinfo +iplrinit +iplwinit +iprinit +ipsec_action_cache +ipsec_hdr_pullup_needed +ipsec_info_cache +ipsec_pol_cache +ipsec_policy_failure_msgs +ipsec_sel_cache +ipsec_weird_null_inbound_policy +ipv4_forward_suffix +ipv4info +ipv6_all_hosts_mcast +ipv6_all_ones +ipv6_all_rtrs_mcast +ipv6_all_v2rtrs_mcast +ipv6_all_zeros +ipv6_areq_template +ipv6_forward_suffix +ipv6_ll_template +ipv6_loopback +ipv6_solicited_node_mcast +ipv6_unspecified_group +ipv6info +ipwinit +ire_cache +ire_gw_secattr_cache +ire_idle_cutoff_interval +ire_null +ire_nv_arr +ire_nv_tbl +ire_uinfo_null +lcl_ndp_arr +lcl_param_arr +lcl_sctp_param_arr +lcl_sctp_wroff_xtra_param +lcl_tcp_mdt_head_param +lcl_tcp_mdt_max_pbufs_param +lcl_tcp_mdt_tail_param +lcl_tcp_param_arr +lcl_tcp_wroff_xtra_param +log_format +mask_rnhead +max_keylen +modldrv +modlinkage +modlstrmod +multicast_encap_iphdr +netdev_privs +netmasks +prov_update_handle +radix_mask_cache +radix_node_cache +req_arr +rinit_ipv6 +rn_mkfreelist +rn_ones +rn_zeros +rr_max_blank_ratio +rr_max_pkt_cnt_ratio +rr_min_blank_ratio +rr_min_pkt_cnt_ratio +rt_entry_cache +sctp_asconf_default_dispatch +sctp_asconf_dispatch_tbl +sctp_conn_cache +sctp_conn_hash_size +sctp_kmem_faddr_cache +sctp_kmem_ftsn_set_cache +sctp_kmem_set_cache +sctp_recvq_tq_task_max +sctp_recvq_tq_task_min +sctp_recvq_tq_thr_max +sctp_recvq_tq_thr_min +sctp_sin6_null +sctp_taskq +sctpdebug +sin6_null +sin_null +sqset_global_list +sqset_global_size +squeue_cache +squeue_intrdrain_ms +squeue_intrdrain_ns +squeue_worker_poll_min +squeue_workerdrain_ms +squeue_workerdrain_ns +squeue_workerwait_ms +squeue_workerwait_tick +squeue_writerdrain_ms +squeue_writerdrain_ns +tcp_acceptor_rinit +tcp_acceptor_winit +tcp_conn_hash_size +tcp_drop_ack_unsent_cnt +tcp_free_list_max_cnt +tcp_fusion_rcv_unread_min +tcp_g_kstat +tcp_g_statistics +tcp_g_t_info_ack +tcp_g_t_info_ack_v6 +tcp_icmp_source_quench +tcp_iphc_cache +tcp_loopback_rinit +tcp_max_optsize +tcp_mdt_chain +tcp_mdt_smss_threshold +tcp_mod_rinit +tcp_mod_winit +tcp_opt_arr +tcp_opt_obj +tcp_random_anon_port +tcp_random_end_ptr +tcp_random_fptr +tcp_random_lock +tcp_random_rptr +tcp_random_state +tcp_randtbl +tcp_report_header +tcp_rinfo +tcp_rinit +tcp_sack_info_cache +tcp_sock_winit +tcp_squeue_close +tcp_squeue_close_proc +tcp_squeue_wput +tcp_squeue_wput_proc +tcp_static_maxpsz +tcp_taskq +tcp_timercache +tcp_tx_pull_len +tcp_valid_levels_arr +tcp_version +tcp_winfo +tcp_winit +tcpinfo +tsol_strict_error +udp_bind_fanout_size +udp_cache +udp_g_t_info_ack_ipv4 +udp_g_t_info_ack_ipv6 +udp_info +udp_max_optsize +udp_opt_arr +udp_opt_obj +udp_param_arr +udp_random_anon_port +udp_rinit +udp_snmp_rinit +udp_snmp_winit +udp_valid_levels_arr +udp_version +udp_winit +udpinfo +winit_ipv6 +zero_info diff --git a/usr/src/uts/intel/ipf/Makefile b/usr/src/uts/intel/ipf/Makefile index 95079db4f3..8bd851efbe 100644 --- a/usr/src/uts/intel/ipf/Makefile +++ b/usr/src/uts/intel/ipf/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -96,6 +96,14 @@ clean.lint: $(CLEAN_LINT_DEPS) install: $(INSTALL_DEPS) +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(SORT) -u > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/intel/ipf/ipf.objt-symbols.obj64 b/usr/src/uts/intel/ipf/ipf.objt-symbols.obj64 new file mode 100644 index 0000000000..a9f7284419 --- /dev/null +++ b/usr/src/uts/intel/ipf/ipf.objt-symbols.obj64 @@ -0,0 +1,55 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +fr_availfuncs +fr_features +fr_objbytes +hdrsizes +icmpreplytype4 +icmpreplytype6 +icmptoicmp6types +icmptoicmp6unreach +ip6exthdr +ipf_cb_ops +ipf_dev_info +ipf_devfiles +ipf_kstat_tmp +ipf_ops +ipf_proxy_debug +ipfilter_version +ipl_magic +iplmod +ipopts +ippr_irc_dcctypes +ippr_pptp_debug +ippr_pptp_gretimeout +lcl_ap_proxies +lcl_ipf_tuneables +modlink1 +rcsid +sccsid +secopt +tcpopts diff --git a/usr/src/uts/intel/ipsecah/Makefile b/usr/src/uts/intel/ipsecah/Makefile index 824a82a2a9..d67282cca7 100644 --- a/usr/src/uts/intel/ipsecah/Makefile +++ b/usr/src/uts/intel/ipsecah/Makefile @@ -20,7 +20,7 @@ # # # uts/intel/ipsecah/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -97,6 +97,14 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(SORT) -u > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/intel/ipsecah/ipsecah.objt-symbols.obj64 b/usr/src/uts/intel/ipsecah/ipsecah.objt-symbols.obj64 new file mode 100644 index 0000000000..9c1b759006 --- /dev/null +++ b/usr/src/uts/intel/ipsecah/ipsecah.objt-symbols.obj64 @@ -0,0 +1,42 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +ah_hash_size +ah_taskq +cb_inet_devops +fsw +inet_dev_info +inet_devops +info +ipsacq_maxpackets +ipsecahinfo +lcl_param_arr +modldrv +modlinkage +modlstrmod +netdev_privs +rinit +winit diff --git a/usr/src/uts/intel/ipsecesp/Makefile b/usr/src/uts/intel/ipsecesp/Makefile index c9ab588067..2bb0f080ed 100644 --- a/usr/src/uts/intel/ipsecesp/Makefile +++ b/usr/src/uts/intel/ipsecesp/Makefile @@ -20,7 +20,7 @@ # # # uts/intel/ipsecesp/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -96,6 +96,14 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(SORT) -u > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/intel/ipsecesp/ipsecesp.objt-symbols.obj64 b/usr/src/uts/intel/ipsecesp/ipsecesp.objt-symbols.obj64 new file mode 100644 index 0000000000..a6f2c78e5f --- /dev/null +++ b/usr/src/uts/intel/ipsecesp/ipsecesp.objt-symbols.obj64 @@ -0,0 +1,41 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +cb_inet_devops +esp_hash_size +esp_taskq +fsw +inet_dev_info +inet_devops +info +ipsecespinfo +lcl_param_arr +modldrv +modlinkage +modlstrmod +netdev_privs +rinit +winit diff --git a/usr/src/uts/intel/keysock/Makefile b/usr/src/uts/intel/keysock/Makefile index 7e8b19ca3f..22a34f769e 100644 --- a/usr/src/uts/intel/keysock/Makefile +++ b/usr/src/uts/intel/keysock/Makefile @@ -20,7 +20,7 @@ # # # uts/intel/keysock/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -97,6 +97,14 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(SORT) -u > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/intel/keysock/keysock.objt-symbols.obj64 b/usr/src/uts/intel/keysock/keysock.objt-symbols.obj64 new file mode 100644 index 0000000000..428b56455e --- /dev/null +++ b/usr/src/uts/intel/keysock/keysock.objt-symbols.obj64 @@ -0,0 +1,53 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +cb_inet_devops +fsw +inet_dev_info +inet_devops +info +IP6DEV +IPSECAH +IPSECAHDEV +IPSECESP +IPSECESPDEV +KEYSOCK +keysock_g_t_info_ack +keysockinfo +keysock_max_optsize +keysock_modlp +keysock_opt_arr +keysock_opt_obj +keysock_valid_levels_arr +keysock_vmem +lcl_param_arr +modldrv +modlinkage +modlstrmod +netdev_privs +rinit +STRMOD +winit diff --git a/usr/src/uts/intel/neti/Makefile b/usr/src/uts/intel/neti/Makefile index 069cacc8d2..306200001d 100644 --- a/usr/src/uts/intel/neti/Makefile +++ b/usr/src/uts/intel/neti/Makefile @@ -22,7 +22,7 @@ # # uts/intel/neti/Makefile # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -80,6 +80,15 @@ clean.lint: $(CLEAN_LINT_DEPS) install: $(INSTALL_DEPS) +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/intel/neti/neti.objt-symbols.obj64 b/usr/src/uts/intel/neti/neti.objt-symbols.obj64 new file mode 100644 index 0000000000..c813224003 --- /dev/null +++ b/usr/src/uts/intel/neti/neti.objt-symbols.obj64 @@ -0,0 +1,28 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +modlinkage +modlmisc diff --git a/usr/src/uts/intel/os/device_policy b/usr/src/uts/intel/os/device_policy index 184ac3454d..8d0009fc70 100644 --- a/usr/src/uts/intel/os/device_policy +++ b/usr/src/uts/intel/os/device_policy @@ -1,5 +1,5 @@ # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # CDDL HEADER START @@ -49,10 +49,10 @@ icmp read_priv_set=net_icmpaccess write_priv_set=net_icmpaccess icmp6 read_priv_set=net_icmpaccess write_priv_set=net_icmpaccess ip read_priv_set=net_rawaccess write_priv_set=net_rawaccess ip6 read_priv_set=net_rawaccess write_priv_set=net_rawaccess -keysock read_priv_set=sys_net_config write_priv_set=sys_net_config -ipsecah read_priv_set=sys_net_config write_priv_set=sys_net_config -ipsecesp read_priv_set=sys_net_config write_priv_set=sys_net_config -spdsock read_priv_set=sys_net_config write_priv_set=sys_net_config +keysock read_priv_set=sys_ip_config write_priv_set=sys_ip_config +ipsecah read_priv_set=sys_ip_config write_priv_set=sys_ip_config +ipsecesp read_priv_set=sys_ip_config write_priv_set=sys_ip_config +spdsock read_priv_set=sys_ip_config write_priv_set=sys_ip_config # # Raw network interface access permissions # @@ -83,5 +83,5 @@ aggr:ctl read_priv_set=sys_net_config write_priv_set=sys_net_config # # IP Filter # -ipf read_priv_set=sys_net_config write_priv_set=sys_net_config +ipf read_priv_set=sys_ip_config write_priv_set=sys_ip_config diff --git a/usr/src/uts/intel/rts/Makefile b/usr/src/uts/intel/rts/Makefile index 2ced9936ce..3e61ad2f20 100644 --- a/usr/src/uts/intel/rts/Makefile +++ b/usr/src/uts/intel/rts/Makefile @@ -21,7 +21,7 @@ # # uts/intel/rts/Makefile # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -103,6 +103,14 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(SORT) -u > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/intel/rts/rts.objt-symbols.obj64 b/usr/src/uts/intel/rts/rts.objt-symbols.obj64 new file mode 100644 index 0000000000..31673cdf39 --- /dev/null +++ b/usr/src/uts/intel/rts/rts.objt-symbols.obj64 @@ -0,0 +1,44 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +cb_inet_devops +fsw +inet_dev_info +inet_devops +info +lcl_param_arr +modldrv +modlinkage +modlstrmod +netdev_privs +rinit +rts_g_t_info_ack +rtsinfo +rts_max_optsize +rts_opt_arr +rts_opt_obj +rts_valid_levels_arr +winit diff --git a/usr/src/uts/intel/spdsock/Makefile b/usr/src/uts/intel/spdsock/Makefile index 1f081bd96a..ba918cb9d2 100644 --- a/usr/src/uts/intel/spdsock/Makefile +++ b/usr/src/uts/intel/spdsock/Makefile @@ -20,7 +20,7 @@ # # # uts/intel/spdsock/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -92,6 +92,14 @@ clean.lint: $(CLEAN_LINT_DEPS) install: $(INSTALL_DEPS) +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(SORT) -u > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/intel/spdsock/spdsock.objt-symbols.obj64 b/usr/src/uts/intel/spdsock/spdsock.objt-symbols.obj64 new file mode 100644 index 0000000000..c63f15a2c4 --- /dev/null +++ b/usr/src/uts/intel/spdsock/spdsock.objt-symbols.obj64 @@ -0,0 +1,52 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +algattr +algproto +bad_ext_diag +cb_inet_devops +defbitsattr +dup_ext_diag +execmodes +incrbitsattr +inet_dev_info +inet_devops +info +lcl_param_arr +maxbitsattr +minbitsattr +modldrv +modlinkage +netdev_privs +rinit +spdsock_g_t_info_ack +spdsockinfo +spdsock_max_optsize +spdsock_opt_arr +spdsock_opt_obj +spdsock_valid_levels_arr +spdsock_vmem +winit diff --git a/usr/src/uts/intel/tun/Makefile b/usr/src/uts/intel/tun/Makefile index bb8138582d..63fbaf0ca8 100644 --- a/usr/src/uts/intel/tun/Makefile +++ b/usr/src/uts/intel/tun/Makefile @@ -21,7 +21,7 @@ # # uts/intel/tun/Makefile # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -99,6 +99,14 @@ clean.lint: $(CLEAN_LINT_DEPS) install: $(INSTALL_DEPS) +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(SORT) -u > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/intel/tun/tun.objt-symbols.obj64 b/usr/src/uts/intel/tun/tun.objt-symbols.obj64 new file mode 100644 index 0000000000..99d06ddee0 --- /dev/null +++ b/usr/src/uts/intel/tun/tun.objt-symbols.obj64 @@ -0,0 +1,41 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +IP6_MAJ +IP_MAJ +bindack +info +infoack +modlinkage +modlstrmod +tun_debug +tun_do_fastpath +tun_fmodsw +tun_limit_init_upper_v4 +tun_limit_init_upper_v6 +tuninfo +tunrinit +tunwinit diff --git a/usr/src/uts/sparc/arp/Makefile b/usr/src/uts/sparc/arp/Makefile index a394f7534b..83f6128b17 100644 --- a/usr/src/uts/sparc/arp/Makefile +++ b/usr/src/uts/sparc/arp/Makefile @@ -20,7 +20,7 @@ # # # uts/sparc/arp/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -107,6 +107,15 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/sparc/arp/arp.objt-symbols.obj64 b/usr/src/uts/sparc/arp/arp.objt-symbols.obj64 new file mode 100644 index 0000000000..ac7f04c356 --- /dev/null +++ b/usr/src/uts/sparc/arp/arp.objt-symbols.obj64 @@ -0,0 +1,42 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +ar_cmd_tbl +ar_m_tbl +arp_netinfo +arp_param_arr +arpinfo +cb_inet_devops +fsw +inet_dev_info +inet_devops +info +modldrv +modlinkage +modlstrmod +netdev_privs +rinit +winit diff --git a/usr/src/uts/sparc/hook/Makefile b/usr/src/uts/sparc/hook/Makefile index 53a95c8d1b..c15b8ae613 100644 --- a/usr/src/uts/sparc/hook/Makefile +++ b/usr/src/uts/sparc/hook/Makefile @@ -21,7 +21,7 @@ # # uts/sparc/hook/Makefile # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -82,6 +82,15 @@ clean.lint: $(CLEAN_LINT_DEPS) install: $(INSTALL_DEPS) +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/sparc/hook/hook.objt-symbols.obj64 b/usr/src/uts/sparc/hook/hook.objt-symbols.obj64 new file mode 100644 index 0000000000..c813224003 --- /dev/null +++ b/usr/src/uts/sparc/hook/hook.objt-symbols.obj64 @@ -0,0 +1,28 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +modlinkage +modlmisc diff --git a/usr/src/uts/sparc/icmp/Makefile b/usr/src/uts/sparc/icmp/Makefile index 4980c4916e..180117c414 100644 --- a/usr/src/uts/sparc/icmp/Makefile +++ b/usr/src/uts/sparc/icmp/Makefile @@ -20,7 +20,7 @@ # # # uts/sparc/icmp/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -107,6 +107,15 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/sparc/icmp/icmp.objt-symbols.obj64 b/usr/src/uts/sparc/icmp/icmp.objt-symbols.obj64 new file mode 100644 index 0000000000..e63dd0ec2e --- /dev/null +++ b/usr/src/uts/sparc/icmp/icmp.objt-symbols.obj64 @@ -0,0 +1,47 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +ICMP6_MAJ +cb_inet_devops +fsw +icmp_g_t_info_ack +icmp_max_optsize +icmp_opt_arr +icmp_opt_obj +icmp_param_arr +icmp_valid_levels_arr +icmpinfo +inet_dev_info +inet_devops +info +modldrv +modlinkage +modlstrmod +netdev_privs +rinit +sin6_null +sin_null +winit diff --git a/usr/src/uts/sparc/ip/Makefile b/usr/src/uts/sparc/ip/Makefile index 7131fa0ade..85b0c8af64 100644 --- a/usr/src/uts/sparc/ip/Makefile +++ b/usr/src/uts/sparc/ip/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -106,6 +106,23 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj sis_check.debug +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + +sis_check.debug: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.debug64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.dbg.tmp + @$(NM) -n debug64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.dbg.tmp.new + -@$(DIFF) $(MODULE).symbols.dbg.tmp $(MODULE).symbols.dbg.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/sparc/ip/ip.objt-symbols.debug64 b/usr/src/uts/sparc/ip/ip.objt-symbols.debug64 new file mode 100644 index 0000000000..0417803e80 --- /dev/null +++ b/usr/src/uts/sparc/ip/ip.objt-symbols.debug64 @@ -0,0 +1,312 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +IP_MAJ +SCTP6_MAJ +SCTP_MAJ +TCP6_MAJ +TCP_MAJ +UDP6_MAJ +cb_inet_devops +cl_inet_bind +cl_inet_connect +cl_inet_disconnect +cl_inet_ipident +cl_inet_isclusterwide +cl_inet_listen +cl_inet_unbind +cl_inet_unlisten +cl_sctp_assoc_change +cl_sctp_check_addrs +cl_sctp_connect +cl_sctp_disconnect +cl_sctp_listen +cl_sctp_unlisten +conn_drain_nthreads +crctab +default_ip6_asp_table +do_tcp_direct_sockfs +do_tcp_fusion +do_tcpzcopy +dohwcksum +eventq_queue_in +eventq_queue_nic +eventq_queue_out +fsw +gcdb_hash +gcdb_hash_size +gcdb_lock +gcgrp4_hash +gcgrp6_hash +gcgrp_hash_size +gcgrp_lock +icmp_frag_size_table +icmp_ipha +ill_no_arena +ill_null +inet_dev_info +inet_devops +inet_maxminor +ip6_area_template +ip6_ared_template +ip6_cache_table_size +ip6_ftable_hash_size +ip6_ire_max_bucket_cnt +ip6_ire_min_bucket_cnt +ip6_max_cache_table_size +ip6opt_ls +ip_ard_template +ip_area_template +ip_ared_template +ip_areq_template +ip_aresq_template +ip_arma_multi_template +ip_aroff_template +ip_aron_template +ip_aru_template +ip_cache_table_size +ip_cgtp_filter +ip_cgtp_filter_ops +ip_cgtp_filter_rev +ip_debug +ip_g_all_ones +ip_input_proc +ip_ioctl_ftbl +ip_ire_cpu_ratio +ip_ire_max_bucket_cnt +ip_ire_mem_ratio +ip_ire_min_bucket_cnt +ip_loopback_mtu +ip_loopback_mtu_v6plus +ip_loopback_mtuplus +ip_m_tbl +ip_max_cache_table_size +ip_max_frag_dups +ip_min_frag_prune_time +ip_minor_arena +ip_misc_ioctl_count +ip_misc_ioctl_table +ip_mod_info +ip_modclose_ackwait_ms +ip_ndx_ioctl_count +ip_ndx_ioctl_table +ip_opt_arr +ip_opt_obj +ip_poll_normal_ms +ip_poll_normal_ticks +ip_rput_pullups +ip_six_byte_all_ones +ip_soft_rings_cnt +ip_squeue_bind +ip_squeue_create_callback +ip_squeue_enter +ip_squeue_enter_unbound +ip_squeue_fanout +ip_squeue_profile +ip_squeue_worker_wait +ip_squeues_per_cpu +ip_wput_frag_mdt_min +ipcl_bind_fanout_size +ipcl_conn_cache +ipcl_conn_hash_maxsize +ipcl_conn_hash_memfactor +ipcl_conn_hash_size +ipcl_debug_level +ipcl_raw_fanout_size +ipcl_tcpconn_cache +ipcl_udp_fanout_size +ipclassifier_version +ipif_loopback_name +ipif_nv_tbl +ipif_zero +ipinfo +iplrinit +iplwinit +iprinit +ipsec_action_cache +ipsec_hdr_pullup_needed +ipsec_info_cache +ipsec_pol_cache +ipsec_policy_failure_msgs +ipsec_sel_cache +ipsec_weird_null_inbound_policy +ipsechw_debug +ipv4_forward_suffix +ipv4info +ipv6_all_hosts_mcast +ipv6_all_ones +ipv6_all_rtrs_mcast +ipv6_all_v2rtrs_mcast +ipv6_all_zeros +ipv6_areq_template +ipv6_forward_suffix +ipv6_ll_template +ipv6_loopback +ipv6_solicited_node_mcast +ipv6_unspecified_group +ipv6info +ipwinit +ire_cache +ire_gw_secattr_cache +ire_idle_cutoff_interval +ire_null +ire_nv_arr +ire_nv_tbl +ire_uinfo_null +lcl_ndp_arr +lcl_param_arr +lcl_sctp_param_arr +lcl_sctp_wroff_xtra_param +lcl_tcp_mdt_head_param +lcl_tcp_mdt_max_pbufs_param +lcl_tcp_mdt_tail_param +lcl_tcp_param_arr +lcl_tcp_wroff_xtra_param +log_format +mask_rnhead +max_keylen +modldrv +modlinkage +modlstrmod +multicast_encap_iphdr +netdev_privs +netmasks +prov_update_handle +radix_mask_cache +radix_node_cache +recvq_call +recvq_loop_cnt +req_arr +rinit_ipv6 +rn_mkfreelist +rn_ones +rn_zeros +rr_max_blank_ratio +rr_max_pkt_cnt_ratio +rr_min_blank_ratio +rr_min_pkt_cnt_ratio +rt_entry_cache +sctp_asconf_default_dispatch +sctp_asconf_dispatch_tbl +sctp_conn_cache +sctp_conn_hash_size +sctp_kmem_faddr_cache +sctp_kmem_ftsn_set_cache +sctp_kmem_set_cache +sctp_recvq_tq_task_max +sctp_recvq_tq_task_min +sctp_recvq_tq_thr_max +sctp_recvq_tq_thr_min +sctp_sin6_null +sctp_taskq +sctpdebug +sendq_collision +sendq_empty +sendq_loop_cnt +sin6_null +sin_null +skip_sctp_cksum +sqset_global_list +sqset_global_size +squeue_cache +squeue_intrdrain_ms +squeue_intrdrain_ns +squeue_kstat +squeue_kstat_lock +squeue_profile +squeue_worker_poll_min +squeue_workerdrain_ms +squeue_workerdrain_ns +squeue_workerwait_ms +squeue_workerwait_tick +squeue_writerdrain_ms +squeue_writerdrain_ns +tcp_acceptor_rinit +tcp_acceptor_winit +tcp_conn_hash_size +tcp_drop_ack_unsent_cnt +tcp_free_list_max_cnt +tcp_fusion_rcv_unread_min +tcp_g_kstat +tcp_g_statistics +tcp_g_t_info_ack +tcp_g_t_info_ack_v6 +tcp_icmp_source_quench +tcp_iphc_cache +tcp_loopback_rinit +tcp_max_optsize +tcp_mdt_chain +tcp_mdt_smss_threshold +tcp_mod_rinit +tcp_mod_winit +tcp_opt_arr +tcp_opt_obj +tcp_random_anon_port +tcp_random_end_ptr +tcp_random_fptr +tcp_random_lock +tcp_random_rptr +tcp_random_state +tcp_randtbl +tcp_report_header +tcp_rinfo +tcp_rinit +tcp_sack_info_cache +tcp_sock_winit +tcp_squeue_close +tcp_squeue_close_proc +tcp_squeue_wput +tcp_squeue_wput_proc +tcp_static_maxpsz +tcp_taskq +tcp_timercache +tcp_tx_pull_len +tcp_valid_levels_arr +tcp_version +tcp_winfo +tcp_winit +tcpinfo +tsol_strict_error +udp_bind_fanout_size +udp_cache +udp_count +udp_g_t_info_ack_ipv4 +udp_g_t_info_ack_ipv6 +udp_info +udp_max_optsize +udp_opt_arr +udp_opt_obj +udp_param_arr +udp_random_anon_port +udp_rinit +udp_snmp_rinit +udp_snmp_winit +udp_valid_levels_arr +udp_version +udp_winit +udpinfo +winit_ipv6 +zero_info diff --git a/usr/src/uts/sparc/ip/ip.objt-symbols.obj64 b/usr/src/uts/sparc/ip/ip.objt-symbols.obj64 new file mode 100644 index 0000000000..dcc6c81167 --- /dev/null +++ b/usr/src/uts/sparc/ip/ip.objt-symbols.obj64 @@ -0,0 +1,300 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +IP_MAJ +SCTP6_MAJ +SCTP_MAJ +TCP6_MAJ +TCP_MAJ +UDP6_MAJ +cb_inet_devops +cl_inet_bind +cl_inet_connect +cl_inet_disconnect +cl_inet_ipident +cl_inet_isclusterwide +cl_inet_listen +cl_inet_unbind +cl_inet_unlisten +cl_sctp_assoc_change +cl_sctp_check_addrs +cl_sctp_connect +cl_sctp_disconnect +cl_sctp_listen +cl_sctp_unlisten +conn_drain_nthreads +crctab +default_ip6_asp_table +do_tcp_direct_sockfs +do_tcp_fusion +do_tcpzcopy +dohwcksum +eventq_queue_in +eventq_queue_nic +eventq_queue_out +fsw +gcdb_hash +gcdb_hash_size +gcdb_lock +gcgrp4_hash +gcgrp6_hash +gcgrp_hash_size +gcgrp_lock +icmp_frag_size_table +icmp_ipha +ill_no_arena +ill_null +inet_dev_info +inet_devops +inet_maxminor +ip6_area_template +ip6_ared_template +ip6_cache_table_size +ip6_ftable_hash_size +ip6_ire_max_bucket_cnt +ip6_ire_min_bucket_cnt +ip6_max_cache_table_size +ip6opt_ls +ip_ard_template +ip_area_template +ip_ared_template +ip_areq_template +ip_aresq_template +ip_arma_multi_template +ip_aroff_template +ip_aron_template +ip_aru_template +ip_cache_table_size +ip_cgtp_filter +ip_cgtp_filter_ops +ip_cgtp_filter_rev +ip_debug +ip_g_all_ones +ip_input_proc +ip_ioctl_ftbl +ip_ire_cpu_ratio +ip_ire_max_bucket_cnt +ip_ire_mem_ratio +ip_ire_min_bucket_cnt +ip_loopback_mtu +ip_loopback_mtu_v6plus +ip_loopback_mtuplus +ip_m_tbl +ip_max_cache_table_size +ip_max_frag_dups +ip_min_frag_prune_time +ip_minor_arena +ip_misc_ioctl_count +ip_misc_ioctl_table +ip_mod_info +ip_modclose_ackwait_ms +ip_ndx_ioctl_count +ip_ndx_ioctl_table +ip_opt_arr +ip_opt_obj +ip_poll_normal_ms +ip_poll_normal_ticks +ip_rput_pullups +ip_six_byte_all_ones +ip_soft_rings_cnt +ip_squeue_bind +ip_squeue_create_callback +ip_squeue_enter +ip_squeue_enter_unbound +ip_squeue_fanout +ip_squeue_profile +ip_squeue_worker_wait +ip_squeues_per_cpu +ip_wput_frag_mdt_min +ipcl_bind_fanout_size +ipcl_conn_cache +ipcl_conn_hash_maxsize +ipcl_conn_hash_memfactor +ipcl_conn_hash_size +ipcl_raw_fanout_size +ipcl_tcpconn_cache +ipcl_udp_fanout_size +ipclassifier_version +ipif_loopback_name +ipif_nv_tbl +ipif_zero +ipinfo +iplrinit +iplwinit +iprinit +ipsec_action_cache +ipsec_hdr_pullup_needed +ipsec_info_cache +ipsec_pol_cache +ipsec_policy_failure_msgs +ipsec_sel_cache +ipsec_weird_null_inbound_policy +ipv4_forward_suffix +ipv4info +ipv6_all_hosts_mcast +ipv6_all_ones +ipv6_all_rtrs_mcast +ipv6_all_v2rtrs_mcast +ipv6_all_zeros +ipv6_areq_template +ipv6_forward_suffix +ipv6_ll_template +ipv6_loopback +ipv6_solicited_node_mcast +ipv6_unspecified_group +ipv6info +ipwinit +ire_cache +ire_gw_secattr_cache +ire_idle_cutoff_interval +ire_null +ire_nv_arr +ire_nv_tbl +ire_uinfo_null +lcl_ndp_arr +lcl_param_arr +lcl_sctp_param_arr +lcl_sctp_wroff_xtra_param +lcl_tcp_mdt_head_param +lcl_tcp_mdt_max_pbufs_param +lcl_tcp_mdt_tail_param +lcl_tcp_param_arr +lcl_tcp_wroff_xtra_param +log_format +mask_rnhead +max_keylen +modldrv +modlinkage +modlstrmod +multicast_encap_iphdr +netdev_privs +netmasks +prov_update_handle +radix_mask_cache +radix_node_cache +req_arr +rinit_ipv6 +rn_mkfreelist +rn_ones +rn_zeros +rr_max_blank_ratio +rr_max_pkt_cnt_ratio +rr_min_blank_ratio +rr_min_pkt_cnt_ratio +rt_entry_cache +sctp_asconf_default_dispatch +sctp_asconf_dispatch_tbl +sctp_conn_cache +sctp_conn_hash_size +sctp_kmem_faddr_cache +sctp_kmem_ftsn_set_cache +sctp_kmem_set_cache +sctp_recvq_tq_task_max +sctp_recvq_tq_task_min +sctp_recvq_tq_thr_max +sctp_recvq_tq_thr_min +sctp_sin6_null +sctp_taskq +sctpdebug +sin6_null +sin_null +sqset_global_list +sqset_global_size +squeue_cache +squeue_intrdrain_ms +squeue_intrdrain_ns +squeue_worker_poll_min +squeue_workerdrain_ms +squeue_workerdrain_ns +squeue_workerwait_ms +squeue_workerwait_tick +squeue_writerdrain_ms +squeue_writerdrain_ns +tcp_acceptor_rinit +tcp_acceptor_winit +tcp_conn_hash_size +tcp_drop_ack_unsent_cnt +tcp_free_list_max_cnt +tcp_fusion_rcv_unread_min +tcp_g_kstat +tcp_g_statistics +tcp_g_t_info_ack +tcp_g_t_info_ack_v6 +tcp_icmp_source_quench +tcp_iphc_cache +tcp_loopback_rinit +tcp_max_optsize +tcp_mdt_chain +tcp_mdt_smss_threshold +tcp_mod_rinit +tcp_mod_winit +tcp_opt_arr +tcp_opt_obj +tcp_random_anon_port +tcp_random_end_ptr +tcp_random_fptr +tcp_random_lock +tcp_random_rptr +tcp_random_state +tcp_randtbl +tcp_report_header +tcp_rinfo +tcp_rinit +tcp_sack_info_cache +tcp_sock_winit +tcp_squeue_close +tcp_squeue_close_proc +tcp_squeue_wput +tcp_squeue_wput_proc +tcp_static_maxpsz +tcp_taskq +tcp_timercache +tcp_tx_pull_len +tcp_valid_levels_arr +tcp_version +tcp_winfo +tcp_winit +tcpinfo +tsol_strict_error +udp_bind_fanout_size +udp_cache +udp_g_t_info_ack_ipv4 +udp_g_t_info_ack_ipv6 +udp_info +udp_max_optsize +udp_opt_arr +udp_opt_obj +udp_param_arr +udp_random_anon_port +udp_rinit +udp_snmp_rinit +udp_snmp_winit +udp_valid_levels_arr +udp_version +udp_winit +udpinfo +winit_ipv6 +zero_info diff --git a/usr/src/uts/sparc/ipf/Makefile b/usr/src/uts/sparc/ipf/Makefile index 18f0430ebd..c94f69b3f3 100644 --- a/usr/src/uts/sparc/ipf/Makefile +++ b/usr/src/uts/sparc/ipf/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -101,6 +101,15 @@ clean.lint: $(CLEAN_LINT_DEPS) install: $(INSTALL_DEPS) +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/sparc/ipf/ipf.objt-symbols.obj64 b/usr/src/uts/sparc/ipf/ipf.objt-symbols.obj64 new file mode 100644 index 0000000000..a9f7284419 --- /dev/null +++ b/usr/src/uts/sparc/ipf/ipf.objt-symbols.obj64 @@ -0,0 +1,55 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +fr_availfuncs +fr_features +fr_objbytes +hdrsizes +icmpreplytype4 +icmpreplytype6 +icmptoicmp6types +icmptoicmp6unreach +ip6exthdr +ipf_cb_ops +ipf_dev_info +ipf_devfiles +ipf_kstat_tmp +ipf_ops +ipf_proxy_debug +ipfilter_version +ipl_magic +iplmod +ipopts +ippr_irc_dcctypes +ippr_pptp_debug +ippr_pptp_gretimeout +lcl_ap_proxies +lcl_ipf_tuneables +modlink1 +rcsid +sccsid +secopt +tcpopts diff --git a/usr/src/uts/sparc/ipsecah/Makefile b/usr/src/uts/sparc/ipsecah/Makefile index f1ad94ab6d..0b8cc97477 100644 --- a/usr/src/uts/sparc/ipsecah/Makefile +++ b/usr/src/uts/sparc/ipsecah/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -101,6 +101,15 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/sparc/ipsecah/ipsecah.objt-symbols.obj64 b/usr/src/uts/sparc/ipsecah/ipsecah.objt-symbols.obj64 new file mode 100644 index 0000000000..9c1b759006 --- /dev/null +++ b/usr/src/uts/sparc/ipsecah/ipsecah.objt-symbols.obj64 @@ -0,0 +1,42 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +ah_hash_size +ah_taskq +cb_inet_devops +fsw +inet_dev_info +inet_devops +info +ipsacq_maxpackets +ipsecahinfo +lcl_param_arr +modldrv +modlinkage +modlstrmod +netdev_privs +rinit +winit diff --git a/usr/src/uts/sparc/ipsecesp/Makefile b/usr/src/uts/sparc/ipsecesp/Makefile index 749da84c0e..61253ae4a4 100644 --- a/usr/src/uts/sparc/ipsecesp/Makefile +++ b/usr/src/uts/sparc/ipsecesp/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -100,6 +100,15 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/sparc/ipsecesp/ipsecesp.objt-symbols.obj64 b/usr/src/uts/sparc/ipsecesp/ipsecesp.objt-symbols.obj64 new file mode 100644 index 0000000000..a6f2c78e5f --- /dev/null +++ b/usr/src/uts/sparc/ipsecesp/ipsecesp.objt-symbols.obj64 @@ -0,0 +1,41 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +cb_inet_devops +esp_hash_size +esp_taskq +fsw +inet_dev_info +inet_devops +info +ipsecespinfo +lcl_param_arr +modldrv +modlinkage +modlstrmod +netdev_privs +rinit +winit diff --git a/usr/src/uts/sparc/keysock/Makefile b/usr/src/uts/sparc/keysock/Makefile index 8bd28e437b..e2dce24dad 100644 --- a/usr/src/uts/sparc/keysock/Makefile +++ b/usr/src/uts/sparc/keysock/Makefile @@ -20,7 +20,7 @@ # # # uts/sparc/keysock/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -102,6 +102,15 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/sparc/keysock/keysock.objt-symbols.obj64 b/usr/src/uts/sparc/keysock/keysock.objt-symbols.obj64 new file mode 100644 index 0000000000..c07de512be --- /dev/null +++ b/usr/src/uts/sparc/keysock/keysock.objt-symbols.obj64 @@ -0,0 +1,53 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +IP6DEV +IPSECAH +IPSECAHDEV +IPSECESP +IPSECESPDEV +KEYSOCK +STRMOD +cb_inet_devops +fsw +inet_dev_info +inet_devops +info +keysock_g_t_info_ack +keysock_max_optsize +keysock_modlp +keysock_opt_arr +keysock_opt_obj +keysock_valid_levels_arr +keysock_vmem +keysockinfo +lcl_param_arr +modldrv +modlinkage +modlstrmod +netdev_privs +rinit +winit diff --git a/usr/src/uts/sparc/neti/Makefile b/usr/src/uts/sparc/neti/Makefile index 9b5985c734..2d43cc6c01 100644 --- a/usr/src/uts/sparc/neti/Makefile +++ b/usr/src/uts/sparc/neti/Makefile @@ -21,7 +21,7 @@ # # uts/sparc/neti/Makefile # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -84,6 +84,15 @@ clean.lint: $(CLEAN_LINT_DEPS) install: $(INSTALL_DEPS) +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/sparc/neti/neti.objt-symbols.obj64 b/usr/src/uts/sparc/neti/neti.objt-symbols.obj64 new file mode 100644 index 0000000000..c813224003 --- /dev/null +++ b/usr/src/uts/sparc/neti/neti.objt-symbols.obj64 @@ -0,0 +1,28 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +modlinkage +modlmisc diff --git a/usr/src/uts/sparc/os/device_policy b/usr/src/uts/sparc/os/device_policy index 0e8e3f5b13..e177ee37c0 100644 --- a/usr/src/uts/sparc/os/device_policy +++ b/usr/src/uts/sparc/os/device_policy @@ -1,5 +1,5 @@ # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # CDDL HEADER START @@ -51,10 +51,10 @@ icmp read_priv_set=net_icmpaccess write_priv_set=net_icmpaccess icmp6 read_priv_set=net_icmpaccess write_priv_set=net_icmpaccess ip read_priv_set=net_rawaccess write_priv_set=net_rawaccess ip6 read_priv_set=net_rawaccess write_priv_set=net_rawaccess -keysock read_priv_set=sys_net_config write_priv_set=sys_net_config -ipsecah read_priv_set=sys_net_config write_priv_set=sys_net_config -ipsecesp read_priv_set=sys_net_config write_priv_set=sys_net_config -spdsock read_priv_set=sys_net_config write_priv_set=sys_net_config +keysock read_priv_set=sys_ip_config write_priv_set=sys_ip_config +ipsecah read_priv_set=sys_ip_config write_priv_set=sys_ip_config +ipsecesp read_priv_set=sys_ip_config write_priv_set=sys_ip_config +spdsock read_priv_set=sys_ip_config write_priv_set=sys_ip_config # # Raw network interface access permissions # @@ -89,5 +89,5 @@ aggr:ctl read_priv_set=sys_net_config write_priv_set=sys_net_config # # IP Filter # -ipf read_priv_set=sys_net_config write_priv_set=sys_net_config +ipf read_priv_set=sys_ip_config write_priv_set=sys_ip_config diff --git a/usr/src/uts/sparc/rts/Makefile b/usr/src/uts/sparc/rts/Makefile index fc45c9d8aa..6bfe8f01f0 100644 --- a/usr/src/uts/sparc/rts/Makefile +++ b/usr/src/uts/sparc/rts/Makefile @@ -20,7 +20,7 @@ # # # uts/sparc/rts/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -107,6 +107,15 @@ install: $(INSTALL_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/sparc/rts/rts.objt-symbols.obj64 b/usr/src/uts/sparc/rts/rts.objt-symbols.obj64 new file mode 100644 index 0000000000..f6d7f5a6ad --- /dev/null +++ b/usr/src/uts/sparc/rts/rts.objt-symbols.obj64 @@ -0,0 +1,44 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +cb_inet_devops +fsw +inet_dev_info +inet_devops +info +lcl_param_arr +modldrv +modlinkage +modlstrmod +netdev_privs +rinit +rts_g_t_info_ack +rts_max_optsize +rts_opt_arr +rts_opt_obj +rts_valid_levels_arr +rtsinfo +winit diff --git a/usr/src/uts/sparc/spdsock/Makefile b/usr/src/uts/sparc/spdsock/Makefile index 2ee4fb9e8f..0c9ebae937 100644 --- a/usr/src/uts/sparc/spdsock/Makefile +++ b/usr/src/uts/sparc/spdsock/Makefile @@ -20,7 +20,7 @@ # # # uts/sparc/keysock/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -97,6 +97,15 @@ clean.lint: $(CLEAN_LINT_DEPS) install: $(INSTALL_DEPS) +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(GREP) -v '^___const_' |$(SORT) -u \ + > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/sparc/spdsock/spdsock.objt-symbols.obj64 b/usr/src/uts/sparc/spdsock/spdsock.objt-symbols.obj64 new file mode 100644 index 0000000000..c294cf4517 --- /dev/null +++ b/usr/src/uts/sparc/spdsock/spdsock.objt-symbols.obj64 @@ -0,0 +1,52 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +algattr +algproto +bad_ext_diag +cb_inet_devops +defbitsattr +dup_ext_diag +execmodes +incrbitsattr +inet_dev_info +inet_devops +info +lcl_param_arr +maxbitsattr +minbitsattr +modldrv +modlinkage +netdev_privs +rinit +spdsock_g_t_info_ack +spdsock_max_optsize +spdsock_opt_arr +spdsock_opt_obj +spdsock_valid_levels_arr +spdsock_vmem +spdsockinfo +winit diff --git a/usr/src/uts/sparc/tun/Makefile b/usr/src/uts/sparc/tun/Makefile index ceaa355bb2..98a698d8ad 100644 --- a/usr/src/uts/sparc/tun/Makefile +++ b/usr/src/uts/sparc/tun/Makefile @@ -20,7 +20,7 @@ # # # uts/sparc/tun/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -103,6 +103,14 @@ clean.lint: $(CLEAN_LINT_DEPS) install: $(INSTALL_DEPS) +sis_check: sis_check.obj +sis_check.obj: $(ALL_DEPS) + @$(GREP) -v '#' $(MODULE).objt-symbols.obj64 |$(GREP) . |$(SORT) -u \ + > $(MODULE).symbols.tmp + @$(NM) obj64/$(MODULE) |$(GREP) OBJT |$(GREP) -v UNDEF | \ + $(CUT) -d'|' -f8 |$(SORT) -u > $(MODULE).symbols.tmp.new + -@$(DIFF) $(MODULE).symbols.tmp $(MODULE).symbols.tmp.new + # # Include common targets. # diff --git a/usr/src/uts/sparc/tun/tun.objt-symbols.obj64 b/usr/src/uts/sparc/tun/tun.objt-symbols.obj64 new file mode 100644 index 0000000000..99d06ddee0 --- /dev/null +++ b/usr/src/uts/sparc/tun/tun.objt-symbols.obj64 @@ -0,0 +1,41 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +IP6_MAJ +IP_MAJ +bindack +info +infoack +modlinkage +modlstrmod +tun_debug +tun_do_fastpath +tun_fmodsw +tun_limit_init_upper_v4 +tun_limit_init_upper_v6 +tuninfo +tunrinit +tunwinit |