diff options
Diffstat (limited to 'usr/src/uts/common/inet/ip/ip_attr.c')
-rw-r--r-- | usr/src/uts/common/inet/ip/ip_attr.c | 1338 |
1 files changed, 1338 insertions, 0 deletions
diff --git a/usr/src/uts/common/inet/ip/ip_attr.c b/usr/src/uts/common/inet/ip/ip_attr.c new file mode 100644 index 0000000000..a46a82c85f --- /dev/null +++ b/usr/src/uts/common/inet/ip/ip_attr.c @@ -0,0 +1,1338 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* Copyright (c) 1990 Mentat Inc. */ + +#include <sys/types.h> +#include <sys/stream.h> +#include <sys/strsun.h> +#include <sys/zone.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/cmn_err.h> +#include <sys/debug.h> +#include <sys/atomic.h> + +#include <sys/systm.h> +#include <sys/param.h> +#include <sys/kmem.h> +#include <sys/sdt.h> +#include <sys/socket.h> +#include <sys/mac.h> +#include <net/if.h> +#include <net/if_arp.h> +#include <net/route.h> +#include <sys/sockio.h> +#include <netinet/in.h> +#include <net/if_dl.h> + +#include <inet/common.h> +#include <inet/mi.h> +#include <inet/mib2.h> +#include <inet/nd.h> +#include <inet/arp.h> +#include <inet/snmpcom.h> +#include <inet/kstatcom.h> + +#include <netinet/igmp_var.h> +#include <netinet/ip6.h> +#include <netinet/icmp6.h> +#include <netinet/sctp.h> + +#include <inet/ip.h> +#include <inet/ip_impl.h> +#include <inet/ip6.h> +#include <inet/ip6_asp.h> +#include <inet/tcp.h> +#include <inet/ip_multi.h> +#include <inet/ip_if.h> +#include <inet/ip_ire.h> +#include <inet/ip_ftable.h> +#include <inet/ip_rts.h> +#include <inet/optcom.h> +#include <inet/ip_ndp.h> +#include <inet/ip_listutils.h> +#include <netinet/igmp.h> +#include <netinet/ip_mroute.h> +#include <inet/ipp_common.h> + +#include <net/pfkeyv2.h> +#include <inet/sadb.h> +#include <inet/ipsec_impl.h> +#include <inet/ipdrop.h> +#include <inet/ip_netinfo.h> +#include <sys/squeue_impl.h> +#include <sys/squeue.h> + +#include <inet/ipclassifier.h> +#include <inet/sctp_ip.h> +#include <inet/sctp/sctp_impl.h> +#include <inet/udp_impl.h> +#include <sys/sunddi.h> + +#include <sys/tsol/label.h> +#include <sys/tsol/tnet.h> + +/* + * Release a reference on ip_xmit_attr. + * The reference is acquired by conn_get_ixa() + */ +#define IXA_REFRELE(ixa) \ +{ \ + if (atomic_add_32_nv(&(ixa)->ixa_refcnt, -1) == 0) \ + ixa_inactive(ixa); \ +} + +#define IXA_REFHOLD(ixa) \ +{ \ + ASSERT((ixa)->ixa_refcnt != 0); \ + atomic_add_32(&(ixa)->ixa_refcnt, 1); \ +} + +/* + * When we need to handle a transmit side asynchronous operation, then we need + * to save sufficient information so that we can call the fragment and postfrag + * functions. That information is captured in an mblk containing this structure. + * + * Since this is currently only used for IPsec, we include information for + * the kernel crypto framework. + */ +typedef struct ixamblk_s { + boolean_t ixm_inbound; /* B_FALSE */ + iaflags_t ixm_flags; /* ixa_flags */ + netstackid_t ixm_stackid; /* Verify it didn't go away */ + uint_t ixm_ifindex; /* Used to find the nce */ + in6_addr_t ixm_nceaddr_v6; /* Used to find nce */ +#define ixm_nceaddr_v4 V4_PART_OF_V6(ixm_nceaddr_v6) + uint32_t ixm_fragsize; + uint_t ixm_pktlen; + uint16_t ixm_ip_hdr_length; /* Points to ULP header */ + uint8_t ixm_protocol; /* Protocol number for ULP cksum */ + pfirepostfrag_t ixm_postfragfn; + + zoneid_t ixm_zoneid; /* Needed for ipobs */ + zoneid_t ixm_no_loop_zoneid; /* IXAF_NO_LOOP_ZONEID_SET */ + + uint_t ixm_scopeid; /* For IPv6 link-locals */ + + uint32_t ixm_ident; /* For IPv6 fragment header */ + uint32_t ixm_xmit_hint; + + cred_t *ixm_cred; /* For getpeerucred - refhold if set */ + pid_t ixm_cpid; /* For getpeerucred */ + + ts_label_t *ixm_tsl; /* Refhold if set. */ + + /* + * When the pointers below are set they have a refhold on the struct. + */ + ipsec_latch_t *ixm_ipsec_latch; + struct ipsa_s *ixm_ipsec_ah_sa; /* SA for AH */ + struct ipsa_s *ixm_ipsec_esp_sa; /* SA for ESP */ + struct ipsec_policy_s *ixm_ipsec_policy; /* why are we here? */ + struct ipsec_action_s *ixm_ipsec_action; /* For reflected packets */ + + ipsa_ref_t ixm_ipsec_ref[2]; /* Soft reference to SA */ + + /* Need these while waiting for SA */ + uint16_t ixm_ipsec_src_port; /* Source port number of d-gram. */ + uint16_t ixm_ipsec_dst_port; /* Destination port number of d-gram. */ + uint8_t ixm_ipsec_icmp_type; /* ICMP type of d-gram */ + uint8_t ixm_ipsec_icmp_code; /* ICMP code of d-gram */ + + sa_family_t ixm_ipsec_inaf; /* Inner address family */ + uint32_t ixm_ipsec_insrc[IXA_MAX_ADDRLEN]; /* Inner src address */ + uint32_t ixm_ipsec_indst[IXA_MAX_ADDRLEN]; /* Inner dest address */ + uint8_t ixm_ipsec_insrcpfx; /* Inner source prefix */ + uint8_t ixm_ipsec_indstpfx; /* Inner destination prefix */ + + uint8_t ixm_ipsec_proto; /* IP protocol number for d-gram. */ +} ixamblk_t; + + +/* + * When we need to handle a receive side asynchronous operation, then we need + * to save sufficient information so that we can call ip_fanout. + * That information is captured in an mblk containing this structure. + * + * Since this is currently only used for IPsec, we include information for + * the kernel crypto framework. + */ +typedef struct iramblk_s { + boolean_t irm_inbound; /* B_TRUE */ + iaflags_t irm_flags; /* ira_flags */ + netstackid_t irm_stackid; /* Verify it didn't go away */ + uint_t irm_ifindex; /* To find ira_ill */ + + uint_t irm_rifindex; /* ira_rifindex */ + uint_t irm_ruifindex; /* ira_ruifindex */ + uint_t irm_pktlen; + uint16_t irm_ip_hdr_length; /* Points to ULP header */ + uint8_t irm_protocol; /* Protocol number for ULP cksum */ + zoneid_t irm_zoneid; /* ALL_ZONES unless local delivery */ + + squeue_t *irm_sqp; + ill_rx_ring_t *irm_ring; + + ipaddr_t irm_mroute_tunnel; /* IRAF_MROUTE_TUNNEL_SET */ + zoneid_t irm_no_loop_zoneid; /* IRAF_NO_LOOP_ZONEID_SET */ + uint32_t irm_esp_udp_ports; /* IRAF_ESP_UDP_PORTS */ + + char irm_l2src[IRA_L2SRC_SIZE]; /* If IRAF_L2SRC_SET */ + + cred_t *irm_cred; /* For getpeerucred - refhold if set */ + pid_t irm_cpid; /* For getpeerucred */ + + ts_label_t *irm_tsl; /* Refhold if set. */ + + /* + * When set these correspond to a refhold on the object. + */ + struct ipsa_s *irm_ipsec_ah_sa; /* SA for AH */ + struct ipsa_s *irm_ipsec_esp_sa; /* SA for ESP */ + struct ipsec_action_s *irm_ipsec_action; /* For reflected packets */ +} iramblk_t; + + +/* + * Take the information in ip_xmit_attr_t and stick it in an mblk + * that can later be passed to ip_xmit_attr_from_mblk to recreate the + * ip_xmit_attr_t. + * + * Returns NULL on memory allocation failure. + */ +mblk_t * +ip_xmit_attr_to_mblk(ip_xmit_attr_t *ixa) +{ + mblk_t *ixamp; + ixamblk_t *ixm; + nce_t *nce = ixa->ixa_nce; + + ASSERT(nce != NULL); + ixamp = allocb(sizeof (*ixm), BPRI_MED); + if (ixamp == NULL) + return (NULL); + + ixamp->b_datap->db_type = M_BREAK; + ixamp->b_wptr += sizeof (*ixm); + ixm = (ixamblk_t *)ixamp->b_rptr; + + bzero(ixm, sizeof (*ixm)); + ixm->ixm_inbound = B_FALSE; + ixm->ixm_flags = ixa->ixa_flags; + ixm->ixm_stackid = ixa->ixa_ipst->ips_netstack->netstack_stackid; + ixm->ixm_ifindex = nce->nce_ill->ill_phyint->phyint_ifindex; + ixm->ixm_nceaddr_v6 = nce->nce_addr; + ixm->ixm_fragsize = ixa->ixa_fragsize; + ixm->ixm_pktlen = ixa->ixa_pktlen; + ixm->ixm_ip_hdr_length = ixa->ixa_ip_hdr_length; + ixm->ixm_protocol = ixa->ixa_protocol; + ixm->ixm_postfragfn = ixa->ixa_postfragfn; + ixm->ixm_zoneid = ixa->ixa_zoneid; + ixm->ixm_no_loop_zoneid = ixa->ixa_no_loop_zoneid; + ixm->ixm_scopeid = ixa->ixa_scopeid; + ixm->ixm_ident = ixa->ixa_ident; + ixm->ixm_xmit_hint = ixa->ixa_xmit_hint; + + if (ixa->ixa_tsl != NULL) { + ixm->ixm_tsl = ixa->ixa_tsl; + label_hold(ixm->ixm_tsl); + } + if (ixa->ixa_cred != NULL) { + ixm->ixm_cred = ixa->ixa_cred; + crhold(ixa->ixa_cred); + } + ixm->ixm_cpid = ixa->ixa_cpid; + + if (ixa->ixa_flags & IXAF_IPSEC_SECURE) { + if (ixa->ixa_ipsec_ah_sa != NULL) { + ixm->ixm_ipsec_ah_sa = ixa->ixa_ipsec_ah_sa; + IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa); + } + if (ixa->ixa_ipsec_esp_sa != NULL) { + ixm->ixm_ipsec_esp_sa = ixa->ixa_ipsec_esp_sa; + IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa); + } + if (ixa->ixa_ipsec_policy != NULL) { + ixm->ixm_ipsec_policy = ixa->ixa_ipsec_policy; + IPPOL_REFHOLD(ixa->ixa_ipsec_policy); + } + if (ixa->ixa_ipsec_action != NULL) { + ixm->ixm_ipsec_action = ixa->ixa_ipsec_action; + IPACT_REFHOLD(ixa->ixa_ipsec_action); + } + if (ixa->ixa_ipsec_latch != NULL) { + ixm->ixm_ipsec_latch = ixa->ixa_ipsec_latch; + IPLATCH_REFHOLD(ixa->ixa_ipsec_latch); + } + ixm->ixm_ipsec_ref[0] = ixa->ixa_ipsec_ref[0]; + ixm->ixm_ipsec_ref[1] = ixa->ixa_ipsec_ref[1]; + ixm->ixm_ipsec_src_port = ixa->ixa_ipsec_src_port; + ixm->ixm_ipsec_dst_port = ixa->ixa_ipsec_dst_port; + ixm->ixm_ipsec_icmp_type = ixa->ixa_ipsec_icmp_type; + ixm->ixm_ipsec_icmp_code = ixa->ixa_ipsec_icmp_code; + ixm->ixm_ipsec_inaf = ixa->ixa_ipsec_inaf; + ixm->ixm_ipsec_insrc[0] = ixa->ixa_ipsec_insrc[0]; + ixm->ixm_ipsec_insrc[1] = ixa->ixa_ipsec_insrc[1]; + ixm->ixm_ipsec_insrc[2] = ixa->ixa_ipsec_insrc[2]; + ixm->ixm_ipsec_insrc[3] = ixa->ixa_ipsec_insrc[3]; + ixm->ixm_ipsec_indst[0] = ixa->ixa_ipsec_indst[0]; + ixm->ixm_ipsec_indst[1] = ixa->ixa_ipsec_indst[1]; + ixm->ixm_ipsec_indst[2] = ixa->ixa_ipsec_indst[2]; + ixm->ixm_ipsec_indst[3] = ixa->ixa_ipsec_indst[3]; + ixm->ixm_ipsec_insrcpfx = ixa->ixa_ipsec_insrcpfx; + ixm->ixm_ipsec_indstpfx = ixa->ixa_ipsec_indstpfx; + ixm->ixm_ipsec_proto = ixa->ixa_ipsec_proto; + } + return (ixamp); +} + +/* + * Extract the ip_xmit_attr_t from the mblk, checking that the + * ip_stack_t, ill_t, and nce_t still exist. Returns B_FALSE if that is + * not the case. + * + * Otherwise ixa is updated. + * Caller needs to release references on the ixa by calling ixa_refrele() + * which will imediately call ixa_inactive to release the references. + */ +boolean_t +ip_xmit_attr_from_mblk(mblk_t *ixamp, ip_xmit_attr_t *ixa) +{ + ixamblk_t *ixm; + netstack_t *ns; + ip_stack_t *ipst; + ill_t *ill; + nce_t *nce; + + /* We assume the caller hasn't initialized ixa */ + bzero(ixa, sizeof (*ixa)); + + ASSERT(DB_TYPE(ixamp) == M_BREAK); + ASSERT(ixamp->b_cont == NULL); + + ixm = (ixamblk_t *)ixamp->b_rptr; + ASSERT(!ixm->ixm_inbound); + + /* Verify the netstack is still around */ + ns = netstack_find_by_stackid(ixm->ixm_stackid); + if (ns == NULL) { + /* Disappeared on us */ + (void) ip_xmit_attr_free_mblk(ixamp); + return (B_FALSE); + } + ipst = ns->netstack_ip; + + /* Verify the ill is still around */ + ill = ill_lookup_on_ifindex(ixm->ixm_ifindex, + !(ixm->ixm_flags & IXAF_IS_IPV4), ipst); + + /* We have the ill, hence the netstack can't go away */ + netstack_rele(ns); + if (ill == NULL) { + /* Disappeared on us */ + (void) ip_xmit_attr_free_mblk(ixamp); + return (B_FALSE); + } + /* + * Find the nce. We don't load-spread (only lookup nce's on the ill) + * because we want to find the same nce as the one we had when + * ip_xmit_attr_to_mblk was called. + */ + if (ixm->ixm_flags & IXAF_IS_IPV4) { + nce = nce_lookup_v4(ill, &ixm->ixm_nceaddr_v4); + } else { + nce = nce_lookup_v6(ill, &ixm->ixm_nceaddr_v6); + } + + /* We have the nce, hence the ill can't go away */ + ill_refrele(ill); + if (nce == NULL) { + /* + * Since this is unusual and we don't know what type of + * nce it was, we drop the packet. + */ + (void) ip_xmit_attr_free_mblk(ixamp); + return (B_FALSE); + } + + ixa->ixa_flags = ixm->ixm_flags; + ixa->ixa_refcnt = 1; + ixa->ixa_ipst = ipst; + ixa->ixa_fragsize = ixm->ixm_fragsize; + ixa->ixa_pktlen = ixm->ixm_pktlen; + ixa->ixa_ip_hdr_length = ixm->ixm_ip_hdr_length; + ixa->ixa_protocol = ixm->ixm_protocol; + ixa->ixa_nce = nce; + ixa->ixa_postfragfn = ixm->ixm_postfragfn; + ixa->ixa_zoneid = ixm->ixm_zoneid; + ixa->ixa_no_loop_zoneid = ixm->ixm_no_loop_zoneid; + ixa->ixa_scopeid = ixm->ixm_scopeid; + ixa->ixa_ident = ixm->ixm_ident; + ixa->ixa_xmit_hint = ixm->ixm_xmit_hint; + + if (ixm->ixm_tsl != NULL) { + ixa->ixa_tsl = ixm->ixm_tsl; + ixa->ixa_free_flags |= IXA_FREE_TSL; + } + if (ixm->ixm_cred != NULL) { + ixa->ixa_cred = ixm->ixm_cred; + ixa->ixa_free_flags |= IXA_FREE_CRED; + } + ixa->ixa_cpid = ixm->ixm_cpid; + + ixa->ixa_ipsec_ah_sa = ixm->ixm_ipsec_ah_sa; + ixa->ixa_ipsec_esp_sa = ixm->ixm_ipsec_esp_sa; + ixa->ixa_ipsec_policy = ixm->ixm_ipsec_policy; + ixa->ixa_ipsec_action = ixm->ixm_ipsec_action; + ixa->ixa_ipsec_latch = ixm->ixm_ipsec_latch; + + ixa->ixa_ipsec_ref[0] = ixm->ixm_ipsec_ref[0]; + ixa->ixa_ipsec_ref[1] = ixm->ixm_ipsec_ref[1]; + ixa->ixa_ipsec_src_port = ixm->ixm_ipsec_src_port; + ixa->ixa_ipsec_dst_port = ixm->ixm_ipsec_dst_port; + ixa->ixa_ipsec_icmp_type = ixm->ixm_ipsec_icmp_type; + ixa->ixa_ipsec_icmp_code = ixm->ixm_ipsec_icmp_code; + ixa->ixa_ipsec_inaf = ixm->ixm_ipsec_inaf; + ixa->ixa_ipsec_insrc[0] = ixm->ixm_ipsec_insrc[0]; + ixa->ixa_ipsec_insrc[1] = ixm->ixm_ipsec_insrc[1]; + ixa->ixa_ipsec_insrc[2] = ixm->ixm_ipsec_insrc[2]; + ixa->ixa_ipsec_insrc[3] = ixm->ixm_ipsec_insrc[3]; + ixa->ixa_ipsec_indst[0] = ixm->ixm_ipsec_indst[0]; + ixa->ixa_ipsec_indst[1] = ixm->ixm_ipsec_indst[1]; + ixa->ixa_ipsec_indst[2] = ixm->ixm_ipsec_indst[2]; + ixa->ixa_ipsec_indst[3] = ixm->ixm_ipsec_indst[3]; + ixa->ixa_ipsec_insrcpfx = ixm->ixm_ipsec_insrcpfx; + ixa->ixa_ipsec_indstpfx = ixm->ixm_ipsec_indstpfx; + ixa->ixa_ipsec_proto = ixm->ixm_ipsec_proto; + + freeb(ixamp); + return (B_TRUE); +} + +/* + * Free the ixm mblk and any references it holds + * Returns b_cont. + */ +mblk_t * +ip_xmit_attr_free_mblk(mblk_t *ixamp) +{ + ixamblk_t *ixm; + mblk_t *mp; + + /* Consume mp */ + ASSERT(DB_TYPE(ixamp) == M_BREAK); + mp = ixamp->b_cont; + + ixm = (ixamblk_t *)ixamp->b_rptr; + ASSERT(!ixm->ixm_inbound); + + if (ixm->ixm_ipsec_ah_sa != NULL) { + IPSA_REFRELE(ixm->ixm_ipsec_ah_sa); + ixm->ixm_ipsec_ah_sa = NULL; + } + if (ixm->ixm_ipsec_esp_sa != NULL) { + IPSA_REFRELE(ixm->ixm_ipsec_esp_sa); + ixm->ixm_ipsec_esp_sa = NULL; + } + if (ixm->ixm_ipsec_policy != NULL) { + IPPOL_REFRELE(ixm->ixm_ipsec_policy); + ixm->ixm_ipsec_policy = NULL; + } + if (ixm->ixm_ipsec_action != NULL) { + IPACT_REFRELE(ixm->ixm_ipsec_action); + ixm->ixm_ipsec_action = NULL; + } + if (ixm->ixm_ipsec_latch) { + IPLATCH_REFRELE(ixm->ixm_ipsec_latch); + ixm->ixm_ipsec_latch = NULL; + } + + if (ixm->ixm_tsl != NULL) { + label_rele(ixm->ixm_tsl); + ixm->ixm_tsl = NULL; + } + if (ixm->ixm_cred != NULL) { + crfree(ixm->ixm_cred); + ixm->ixm_cred = NULL; + } + freeb(ixamp); + return (mp); +} + +/* + * Take the information in ip_recv_attr_t and stick it in an mblk + * that can later be passed to ip_recv_attr_from_mblk to recreate the + * ip_recv_attr_t. + * + * Returns NULL on memory allocation failure. + */ +mblk_t * +ip_recv_attr_to_mblk(ip_recv_attr_t *ira) +{ + mblk_t *iramp; + iramblk_t *irm; + ill_t *ill = ira->ira_ill; + + ASSERT(ira->ira_ill != NULL || ira->ira_ruifindex != 0); + + iramp = allocb(sizeof (*irm), BPRI_MED); + if (iramp == NULL) + return (NULL); + + iramp->b_datap->db_type = M_BREAK; + iramp->b_wptr += sizeof (*irm); + irm = (iramblk_t *)iramp->b_rptr; + + bzero(irm, sizeof (*irm)); + irm->irm_inbound = B_TRUE; + irm->irm_flags = ira->ira_flags; + if (ill != NULL) { + /* Internal to IP - preserve ip_stack_t, ill and rill */ + irm->irm_stackid = + ill->ill_ipst->ips_netstack->netstack_stackid; + irm->irm_ifindex = ira->ira_ill->ill_phyint->phyint_ifindex; + ASSERT(ira->ira_rill->ill_phyint->phyint_ifindex == + ira->ira_rifindex); + } else { + /* Let ip_recv_attr_from_stackid know there isn't one */ + irm->irm_stackid = -1; + } + irm->irm_rifindex = ira->ira_rifindex; + irm->irm_ruifindex = ira->ira_ruifindex; + irm->irm_pktlen = ira->ira_pktlen; + irm->irm_ip_hdr_length = ira->ira_ip_hdr_length; + irm->irm_protocol = ira->ira_protocol; + + irm->irm_sqp = ira->ira_sqp; + irm->irm_ring = ira->ira_ring; + + irm->irm_zoneid = ira->ira_zoneid; + irm->irm_mroute_tunnel = ira->ira_mroute_tunnel; + irm->irm_no_loop_zoneid = ira->ira_no_loop_zoneid; + irm->irm_esp_udp_ports = ira->ira_esp_udp_ports; + + if (ira->ira_tsl != NULL) { + irm->irm_tsl = ira->ira_tsl; + label_hold(irm->irm_tsl); + } + if (ira->ira_cred != NULL) { + irm->irm_cred = ira->ira_cred; + crhold(ira->ira_cred); + } + irm->irm_cpid = ira->ira_cpid; + + if (ira->ira_flags & IRAF_L2SRC_SET) + bcopy(ira->ira_l2src, irm->irm_l2src, IRA_L2SRC_SIZE); + + if (ira->ira_flags & IRAF_IPSEC_SECURE) { + if (ira->ira_ipsec_ah_sa != NULL) { + irm->irm_ipsec_ah_sa = ira->ira_ipsec_ah_sa; + IPSA_REFHOLD(ira->ira_ipsec_ah_sa); + } + if (ira->ira_ipsec_esp_sa != NULL) { + irm->irm_ipsec_esp_sa = ira->ira_ipsec_esp_sa; + IPSA_REFHOLD(ira->ira_ipsec_esp_sa); + } + if (ira->ira_ipsec_action != NULL) { + irm->irm_ipsec_action = ira->ira_ipsec_action; + IPACT_REFHOLD(ira->ira_ipsec_action); + } + } + return (iramp); +} + +/* + * Extract the ip_recv_attr_t from the mblk. If we are used inside IP + * then irm_stackid is not -1, in which case we check that the + * ip_stack_t and ill_t still exist. Returns B_FALSE if that is + * not the case. + * If irm_stackid is zero then we are used by an ULP (e.g., squeue_enter) + * and we just proceed with ira_ill and ira_rill as NULL. + * + * The caller needs to release any references on the pointers inside the ire + * by calling ira_cleanup. + */ +boolean_t +ip_recv_attr_from_mblk(mblk_t *iramp, ip_recv_attr_t *ira) +{ + iramblk_t *irm; + netstack_t *ns; + ip_stack_t *ipst = NULL; + ill_t *ill = NULL, *rill = NULL; + + /* We assume the caller hasn't initialized ira */ + bzero(ira, sizeof (*ira)); + + ASSERT(DB_TYPE(iramp) == M_BREAK); + ASSERT(iramp->b_cont == NULL); + + irm = (iramblk_t *)iramp->b_rptr; + ASSERT(irm->irm_inbound); + + if (irm->irm_stackid != -1) { + /* Verify the netstack is still around */ + ns = netstack_find_by_stackid(irm->irm_stackid); + if (ns == NULL) { + /* Disappeared on us */ + (void) ip_recv_attr_free_mblk(iramp); + return (B_FALSE); + } + ipst = ns->netstack_ip; + + /* Verify the ill is still around */ + ill = ill_lookup_on_ifindex(irm->irm_ifindex, + !(irm->irm_flags & IRAF_IS_IPV4), ipst); + + if (irm->irm_ifindex == irm->irm_rifindex) { + rill = ill; + } else { + rill = ill_lookup_on_ifindex(irm->irm_rifindex, + !(irm->irm_flags & IRAF_IS_IPV4), ipst); + } + + /* We have the ill, hence the netstack can't go away */ + netstack_rele(ns); + if (ill == NULL || rill == NULL) { + /* Disappeared on us */ + if (ill != NULL) + ill_refrele(ill); + if (rill != NULL && rill != ill) + ill_refrele(rill); + (void) ip_recv_attr_free_mblk(iramp); + return (B_FALSE); + } + } + + ira->ira_flags = irm->irm_flags; + /* Caller must ill_refele(ira_ill) by using ira_cleanup() */ + ira->ira_ill = ill; + ira->ira_rill = rill; + + ira->ira_rifindex = irm->irm_rifindex; + ira->ira_ruifindex = irm->irm_ruifindex; + ira->ira_pktlen = irm->irm_pktlen; + ira->ira_ip_hdr_length = irm->irm_ip_hdr_length; + ira->ira_protocol = irm->irm_protocol; + + ira->ira_sqp = irm->irm_sqp; + /* The rest of IP assumes that the rings never go away. */ + ira->ira_ring = irm->irm_ring; + + ira->ira_zoneid = irm->irm_zoneid; + ira->ira_mroute_tunnel = irm->irm_mroute_tunnel; + ira->ira_no_loop_zoneid = irm->irm_no_loop_zoneid; + ira->ira_esp_udp_ports = irm->irm_esp_udp_ports; + + if (irm->irm_tsl != NULL) { + ira->ira_tsl = irm->irm_tsl; + ira->ira_free_flags |= IRA_FREE_TSL; + } + if (irm->irm_cred != NULL) { + ira->ira_cred = irm->irm_cred; + ira->ira_free_flags |= IRA_FREE_CRED; + } + ira->ira_cpid = irm->irm_cpid; + + if (ira->ira_flags & IRAF_L2SRC_SET) + bcopy(irm->irm_l2src, ira->ira_l2src, IRA_L2SRC_SIZE); + + ira->ira_ipsec_ah_sa = irm->irm_ipsec_ah_sa; + ira->ira_ipsec_esp_sa = irm->irm_ipsec_esp_sa; + ira->ira_ipsec_action = irm->irm_ipsec_action; + + freeb(iramp); + return (B_TRUE); +} + +/* + * Free the irm mblk and any references it holds + * Returns b_cont. + */ +mblk_t * +ip_recv_attr_free_mblk(mblk_t *iramp) +{ + iramblk_t *irm; + mblk_t *mp; + + /* Consume mp */ + ASSERT(DB_TYPE(iramp) == M_BREAK); + mp = iramp->b_cont; + + irm = (iramblk_t *)iramp->b_rptr; + ASSERT(irm->irm_inbound); + + if (irm->irm_ipsec_ah_sa != NULL) { + IPSA_REFRELE(irm->irm_ipsec_ah_sa); + irm->irm_ipsec_ah_sa = NULL; + } + if (irm->irm_ipsec_esp_sa != NULL) { + IPSA_REFRELE(irm->irm_ipsec_esp_sa); + irm->irm_ipsec_esp_sa = NULL; + } + if (irm->irm_ipsec_action != NULL) { + IPACT_REFRELE(irm->irm_ipsec_action); + irm->irm_ipsec_action = NULL; + } + if (irm->irm_tsl != NULL) { + label_rele(irm->irm_tsl); + irm->irm_tsl = NULL; + } + if (irm->irm_cred != NULL) { + crfree(irm->irm_cred); + irm->irm_cred = NULL; + } + + freeb(iramp); + return (mp); +} + +/* + * Returns true if the mblk contains an ip_recv_attr_t + * For now we just check db_type. + */ +boolean_t +ip_recv_attr_is_mblk(mblk_t *mp) +{ + /* + * Need to handle the various forms of tcp_timermp which are tagged + * with b_wptr and might have a NULL b_datap. + */ + if (mp->b_wptr == NULL || mp->b_wptr == (uchar_t *)-1) + return (B_FALSE); + +#ifdef DEBUG + iramblk_t *irm; + + if (DB_TYPE(mp) != M_BREAK) + return (B_FALSE); + + irm = (iramblk_t *)mp->b_rptr; + ASSERT(irm->irm_inbound); + return (B_TRUE); +#else + return (DB_TYPE(mp) == M_BREAK); +#endif +} + +static ip_xmit_attr_t * +conn_get_ixa_impl(conn_t *connp, boolean_t replace, int kmflag) +{ + ip_xmit_attr_t *ixa; + ip_xmit_attr_t *oldixa; + + mutex_enter(&connp->conn_lock); + ixa = connp->conn_ixa; + + /* At least one references for the conn_t */ + ASSERT(ixa->ixa_refcnt >= 1); + if (atomic_add_32_nv(&ixa->ixa_refcnt, 1) == 2) { + /* No other thread using conn_ixa */ + mutex_exit(&connp->conn_lock); + return (ixa); + } + ixa = kmem_alloc(sizeof (*ixa), kmflag); + if (ixa == NULL) { + mutex_exit(&connp->conn_lock); + ixa_refrele(connp->conn_ixa); + return (NULL); + } + ixa_safe_copy(connp->conn_ixa, ixa); + + /* Make sure we drop conn_lock before any refrele */ + if (replace) { + ixa->ixa_refcnt++; /* No atomic needed - not visible */ + oldixa = connp->conn_ixa; + connp->conn_ixa = ixa; + mutex_exit(&connp->conn_lock); + IXA_REFRELE(oldixa); /* Undo refcnt from conn_t */ + } else { + oldixa = connp->conn_ixa; + mutex_exit(&connp->conn_lock); + } + IXA_REFRELE(oldixa); /* Undo above atomic_add_32_nv */ + + return (ixa); +} + +/* + * Return an ip_xmit_attr_t to use with a conn_t that ensures that only + * the caller can access the ip_xmit_attr_t. + * + * If nobody else is using conn_ixa we return it. + * Otherwise we make a "safe" copy of conn_ixa + * and return it. The "safe" copy has the pointers set to NULL + * (since the pointers might be changed by another thread using + * conn_ixa). The caller needs to check for NULL pointers to see + * if ip_set_destination needs to be called to re-establish the pointers. + * + * If 'replace' is set then we replace conn_ixa with the new ip_xmit_attr_t. + * That is used when we connect() the ULP. + */ +ip_xmit_attr_t * +conn_get_ixa(conn_t *connp, boolean_t replace) +{ + return (conn_get_ixa_impl(connp, replace, KM_NOSLEEP)); +} + +/* + * Used only when the option is to have the kernel hang due to not + * cleaning up ixa references on ills etc. + */ +ip_xmit_attr_t * +conn_get_ixa_tryhard(conn_t *connp, boolean_t replace) +{ + return (conn_get_ixa_impl(connp, replace, KM_SLEEP)); +} + +/* + * Replace conn_ixa with the ixa argument. + * + * The caller must hold conn_lock. + * + * We return the old ixa; the caller must ixa_refrele that after conn_lock + * has been dropped. + */ +ip_xmit_attr_t * +conn_replace_ixa(conn_t *connp, ip_xmit_attr_t *ixa) +{ + ip_xmit_attr_t *oldixa; + + ASSERT(MUTEX_HELD(&connp->conn_lock)); + + oldixa = connp->conn_ixa; + IXA_REFHOLD(ixa); + connp->conn_ixa = ixa; + return (oldixa); +} + +/* + * Return a ip_xmit_attr_t to use with a conn_t that is based on but + * separate from conn_ixa. + * + * This "safe" copy has the pointers set to NULL + * (since the pointers might be changed by another thread using + * conn_ixa). The caller needs to check for NULL pointers to see + * if ip_set_destination needs to be called to re-establish the pointers. + */ +ip_xmit_attr_t * +conn_get_ixa_exclusive(conn_t *connp) +{ + ip_xmit_attr_t *ixa; + + mutex_enter(&connp->conn_lock); + ixa = connp->conn_ixa; + + /* At least one references for the conn_t */ + ASSERT(ixa->ixa_refcnt >= 1); + + /* Make sure conn_ixa doesn't disappear while we copy it */ + atomic_add_32(&ixa->ixa_refcnt, 1); + + ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP); + if (ixa == NULL) { + mutex_exit(&connp->conn_lock); + ixa_refrele(connp->conn_ixa); + return (NULL); + } + ixa_safe_copy(connp->conn_ixa, ixa); + mutex_exit(&connp->conn_lock); + IXA_REFRELE(connp->conn_ixa); + return (ixa); +} + +void +ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa) +{ + bcopy(src, ixa, sizeof (*ixa)); + ixa->ixa_refcnt = 1; + /* + * Clear any pointers that have references and might be changed + * by ip_set_destination or the ULP + */ + ixa->ixa_ire = NULL; + ixa->ixa_nce = NULL; + ixa->ixa_dce = NULL; + ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; + ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; +#ifdef DEBUG + ixa->ixa_curthread = NULL; +#endif + /* Clear all the IPsec pointers and the flag as well. */ + ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; + + ixa->ixa_ipsec_latch = NULL; + ixa->ixa_ipsec_ah_sa = NULL; + ixa->ixa_ipsec_esp_sa = NULL; + ixa->ixa_ipsec_policy = NULL; + ixa->ixa_ipsec_action = NULL; + + /* + * We leave ixa_tsl unchanged, but if it has a refhold we need + * to get an extra refhold. + */ + if (ixa->ixa_free_flags & IXA_FREE_TSL) + label_hold(ixa->ixa_tsl); + + /* + * We leave ixa_cred unchanged, but if it has a refhold we need + * to get an extra refhold. + */ + if (ixa->ixa_free_flags & IXA_FREE_CRED) + crhold(ixa->ixa_cred); +} + +/* + * Duplicate an ip_xmit_attr_t. + * Assumes that the caller controls the ixa, hence we do not need to use + * a safe copy. We just have to increase the refcnt on any pointers. + */ +ip_xmit_attr_t * +ip_xmit_attr_duplicate(ip_xmit_attr_t *src_ixa) +{ + ip_xmit_attr_t *ixa; + + ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP); + if (ixa == NULL) + return (NULL); + bcopy(src_ixa, ixa, sizeof (*ixa)); + ixa->ixa_refcnt = 1; + + if (ixa->ixa_ire != NULL) + ire_refhold_notr(ixa->ixa_ire); + if (ixa->ixa_nce != NULL) + nce_refhold(ixa->ixa_nce); + if (ixa->ixa_dce != NULL) + dce_refhold_notr(ixa->ixa_dce); + +#ifdef DEBUG + ixa->ixa_curthread = NULL; +#endif + + if (ixa->ixa_ipsec_latch != NULL) + IPLATCH_REFHOLD(ixa->ixa_ipsec_latch); + if (ixa->ixa_ipsec_ah_sa != NULL) + IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa); + if (ixa->ixa_ipsec_esp_sa != NULL) + IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa); + if (ixa->ixa_ipsec_policy != NULL) + IPPOL_REFHOLD(ixa->ixa_ipsec_policy); + if (ixa->ixa_ipsec_action != NULL) + IPACT_REFHOLD(ixa->ixa_ipsec_action); + + if (ixa->ixa_tsl != NULL) { + label_hold(ixa->ixa_tsl); + ixa->ixa_free_flags |= IXA_FREE_TSL; + } + if (ixa->ixa_cred != NULL) { + crhold(ixa->ixa_cred); + ixa->ixa_free_flags |= IXA_FREE_CRED; + } + return (ixa); +} + +/* + * Used to replace the ixa_label field. + * The caller should have a reference on the label, which we transfer to + * the attributes so that when the attribute is freed/cleaned up + * we will release that reference. + */ +void +ip_xmit_attr_replace_tsl(ip_xmit_attr_t *ixa, ts_label_t *tsl) +{ + ASSERT(tsl != NULL); + + if (ixa->ixa_free_flags & IXA_FREE_TSL) { + ASSERT(ixa->ixa_tsl != NULL); + label_rele(ixa->ixa_tsl); + } else { + ixa->ixa_free_flags |= IXA_FREE_TSL; + } + ixa->ixa_tsl = tsl; +} + +/* + * Replace the ip_recv_attr_t's label. + * Due to kernel RPC's use of db_credp we also need to replace ira_cred; + * TCP/UDP uses ira_cred to set db_credp for non-socket users. + * This can fail (and return B_FALSE) due to lack of memory. + */ +boolean_t +ip_recv_attr_replace_label(ip_recv_attr_t *ira, ts_label_t *tsl) +{ + cred_t *newcr; + + if (ira->ira_free_flags & IRA_FREE_TSL) { + ASSERT(ira->ira_tsl != NULL); + label_rele(ira->ira_tsl); + } + label_hold(tsl); + ira->ira_tsl = tsl; + ira->ira_free_flags |= IRA_FREE_TSL; + + /* + * Reset zoneid if we have a shared address. That allows + * ip_fanout_tx_v4/v6 to determine the zoneid again. + */ + if (ira->ira_flags & IRAF_TX_SHARED_ADDR) + ira->ira_zoneid = ALL_ZONES; + + /* We update ira_cred for RPC */ + newcr = copycred_from_tslabel(ira->ira_cred, ira->ira_tsl, KM_NOSLEEP); + if (newcr == NULL) + return (B_FALSE); + if (ira->ira_free_flags & IRA_FREE_CRED) + crfree(ira->ira_cred); + ira->ira_cred = newcr; + ira->ira_free_flags |= IRA_FREE_CRED; + return (B_TRUE); +} + +/* + * This needs to be called after ip_set_destination/tsol_check_dest might + * have changed ixa_tsl to be specific for a destination, and we now want to + * send to a different destination. + * We have to restart with crgetlabel() since ip_set_destination/ + * tsol_check_dest will start with ixa_tsl. + */ +void +ip_xmit_attr_restore_tsl(ip_xmit_attr_t *ixa, cred_t *cr) +{ + if (!is_system_labeled()) + return; + + if (ixa->ixa_free_flags & IXA_FREE_TSL) { + ASSERT(ixa->ixa_tsl != NULL); + label_rele(ixa->ixa_tsl); + ixa->ixa_free_flags &= ~IXA_FREE_TSL; + } + ixa->ixa_tsl = crgetlabel(cr); +} + +void +ixa_refrele(ip_xmit_attr_t *ixa) +{ + IXA_REFRELE(ixa); +} + +void +ixa_inactive(ip_xmit_attr_t *ixa) +{ + ASSERT(ixa->ixa_refcnt == 0); + + ixa_cleanup(ixa); + kmem_free(ixa, sizeof (*ixa)); +} + +/* + * Release any references contained in the ixa. + * Also clear any fields that are not controlled by ixa_flags. + */ +void +ixa_cleanup(ip_xmit_attr_t *ixa) +{ + if (ixa->ixa_ire != NULL) { + ire_refrele_notr(ixa->ixa_ire); + ixa->ixa_ire = NULL; + } + if (ixa->ixa_dce != NULL) { + dce_refrele_notr(ixa->ixa_dce); + ixa->ixa_dce = NULL; + } + if (ixa->ixa_nce != NULL) { + nce_refrele(ixa->ixa_nce); + ixa->ixa_nce = NULL; + } + ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; + ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; + if (ixa->ixa_flags & IXAF_IPSEC_SECURE) { + ipsec_out_release_refs(ixa); + } + if (ixa->ixa_free_flags & IXA_FREE_TSL) { + ASSERT(ixa->ixa_tsl != NULL); + label_rele(ixa->ixa_tsl); + ixa->ixa_tsl = NULL; + ixa->ixa_free_flags &= ~IXA_FREE_TSL; + } + if (ixa->ixa_free_flags & IXA_FREE_CRED) { + ASSERT(ixa->ixa_cred != NULL); + crfree(ixa->ixa_cred); + ixa->ixa_cred = NULL; + ixa->ixa_free_flags &= ~IXA_FREE_CRED; + } + ixa->ixa_src_preferences = 0; + ixa->ixa_ifindex = 0; + ixa->ixa_multicast_ifindex = 0; + ixa->ixa_multicast_ifaddr = INADDR_ANY; +} + +/* + * Release any references contained in the ira. + * Callers which use ip_recv_attr_from_mblk() would pass B_TRUE as the second + * argument. + */ +void +ira_cleanup(ip_recv_attr_t *ira, boolean_t refrele_ill) +{ + if (ira->ira_ill != NULL) { + if (ira->ira_rill != ira->ira_ill) { + /* Caused by async processing */ + ill_refrele(ira->ira_rill); + } + if (refrele_ill) + ill_refrele(ira->ira_ill); + } + if (ira->ira_flags & IRAF_IPSEC_SECURE) { + ipsec_in_release_refs(ira); + } + if (ira->ira_free_flags & IRA_FREE_TSL) { + ASSERT(ira->ira_tsl != NULL); + label_rele(ira->ira_tsl); + ira->ira_tsl = NULL; + ira->ira_free_flags &= ~IRA_FREE_TSL; + } + if (ira->ira_free_flags & IRA_FREE_CRED) { + ASSERT(ira->ira_cred != NULL); + crfree(ira->ira_cred); + ira->ira_cred = NULL; + ira->ira_free_flags &= ~IRA_FREE_CRED; + } +} + +/* + * Function to help release any IRE, NCE, or DCEs that + * have been deleted and are marked as condemned. + * The caller is responsible for any serialization which is different + * for TCP, SCTP, and others. + */ +static void +ixa_cleanup_stale(ip_xmit_attr_t *ixa) +{ + ire_t *ire; + nce_t *nce; + dce_t *dce; + + ire = ixa->ixa_ire; + nce = ixa->ixa_nce; + dce = ixa->ixa_dce; + + if (ire != NULL && IRE_IS_CONDEMNED(ire)) { + ire_refrele_notr(ire); + ire = ire_blackhole(ixa->ixa_ipst, + !(ixa->ixa_flags & IXAF_IS_IPV4)); + ASSERT(ire != NULL); +#ifdef DEBUG + ire_refhold_notr(ire); + ire_refrele(ire); +#endif + ixa->ixa_ire = ire; + ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; + } + if (nce != NULL && nce->nce_is_condemned) { + /* Can make it NULL as long as we set IRE_GENERATION_VERIFY */ + nce_refrele(nce); + ixa->ixa_nce = NULL; + ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; + } + if (dce != NULL && DCE_IS_CONDEMNED(dce)) { + dce_refrele_notr(dce); + dce = dce_get_default(ixa->ixa_ipst); + ASSERT(dce != NULL); +#ifdef DEBUG + dce_refhold_notr(dce); + dce_refrele(dce); +#endif + ixa->ixa_dce = dce; + ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; + } +} + +/* + * Used to run ixa_cleanup_stale inside the tcp squeue. + * When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp + * and waking up the caller. + */ +/* ARGSUSED2 */ +static void +tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2, + ip_recv_attr_t *dummy) +{ + conn_t *connp = (conn_t *)arg; + tcp_stack_t *tcps; + + tcps = connp->conn_netstack->netstack_tcp; + + ixa_cleanup_stale(connp->conn_ixa); + + mutex_enter(&tcps->tcps_ixa_cleanup_lock); + ASSERT(tcps->tcps_ixa_cleanup_mp == NULL); + tcps->tcps_ixa_cleanup_mp = mp; + cv_signal(&tcps->tcps_ixa_cleanup_cv); + mutex_exit(&tcps->tcps_ixa_cleanup_lock); +} + + +/* + * ipcl_walk() function to help release any IRE, NCE, or DCEs that + * have been deleted and are marked as condemned. + * Note that we can't cleanup the pointers since there can be threads + * in conn_ip_output() sending while we are called. + */ +void +conn_ixa_cleanup(conn_t *connp, void *arg) +{ + boolean_t tryhard = (boolean_t)arg; + + if (IPCL_IS_TCP(connp)) { + mblk_t *mp; + tcp_stack_t *tcps; + + tcps = connp->conn_netstack->netstack_tcp; + + mutex_enter(&tcps->tcps_ixa_cleanup_lock); + while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) { + /* + * Multiple concurrent cleanups; need to have the last + * one run since it could be an unplumb. + */ + cv_wait(&tcps->tcps_ixa_cleanup_cv, + &tcps->tcps_ixa_cleanup_lock); + } + tcps->tcps_ixa_cleanup_mp = NULL; + mutex_exit(&tcps->tcps_ixa_cleanup_lock); + + if (connp->conn_sqp->sq_run == curthread) { + /* Already on squeue */ + tcp_ixa_cleanup(connp, mp, NULL, NULL); + } else { + CONN_INC_REF(connp); + SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup, + connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP); + + /* Wait until tcp_ixa_cleanup has run */ + mutex_enter(&tcps->tcps_ixa_cleanup_lock); + while (tcps->tcps_ixa_cleanup_mp == NULL) { + cv_wait(&tcps->tcps_ixa_cleanup_cv, + &tcps->tcps_ixa_cleanup_lock); + } + mutex_exit(&tcps->tcps_ixa_cleanup_lock); + } + } else if (IPCL_IS_SCTP(connp)) { + sctp_t *sctp; + sctp_faddr_t *fp; + + sctp = CONN2SCTP(connp); + RUN_SCTP(sctp); + ixa_cleanup_stale(connp->conn_ixa); + for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) + ixa_cleanup_stale(fp->ixa); + WAKE_SCTP(sctp); + } else { + ip_xmit_attr_t *ixa; + + /* + * If there is a different thread using conn_ixa then we get a + * new copy and cut the old one loose from conn_ixa. Otherwise + * we use conn_ixa and prevent any other thread from + * using/changing it. Anybody using conn_ixa (e.g., a thread in + * conn_ip_output) will do an ixa_refrele which will remove any + * references on the ire etc. + * + * Once we are done other threads can use conn_ixa since the + * refcnt will be back at one. + * + * We are called either because an ill is going away, or + * due to memory reclaim. In the former case we wait for + * memory since we must remove the refcnts on the ill. + */ + if (tryhard) { + ixa = conn_get_ixa_tryhard(connp, B_TRUE); + ASSERT(ixa != NULL); + } else { + ixa = conn_get_ixa(connp, B_TRUE); + if (ixa == NULL) { + /* + * Somebody else was using it and kmem_alloc + * failed! Next memory reclaim will try to + * clean up. + */ + DTRACE_PROBE1(conn__ixa__cleanup__bail, + conn_t *, connp); + return; + } + } + ixa_cleanup_stale(ixa); + ixa_refrele(ixa); + } +} + +/* + * ixa needs to be an exclusive copy so that no one changes the cookie + * or the ixa_nce. + */ +boolean_t +ixa_check_drain_insert(conn_t *connp, ip_xmit_attr_t *ixa) +{ + uintptr_t cookie = ixa->ixa_cookie; + ill_dld_direct_t *idd; + idl_tx_list_t *idl_txl; + ill_t *ill = ixa->ixa_nce->nce_ill; + boolean_t inserted = B_FALSE; + + idd = &(ill)->ill_dld_capab->idc_direct; + idl_txl = &ixa->ixa_ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; + if (cookie == 0) { + /* + * ip_xmit failed the canputnext check + */ + connp->conn_did_putbq = 1; + ASSERT(cookie == 0); + conn_drain_insert(connp, idl_txl); + if (!IPCL_IS_NONSTR(connp)) + noenable(connp->conn_wq); + return (B_TRUE); + } + ASSERT(ILL_DIRECT_CAPABLE(ill)); + mutex_enter(&idl_txl->txl_lock); + if (connp->conn_direct_blocked || + (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, cookie) == 0)) { + DTRACE_PROBE1(ill__tx__not__blocked, boolean, + connp->conn_direct_blocked); + } else if (idl_txl->txl_cookie != NULL && + idl_txl->txl_cookie != ixa->ixa_cookie) { + DTRACE_PROBE2(ill__send__tx__collision, uintptr_t, cookie, + uintptr_t, idl_txl->txl_cookie); + /* bump kstat for cookie collision */ + } else { + connp->conn_direct_blocked = B_TRUE; + idl_txl->txl_cookie = cookie; + conn_drain_insert(connp, idl_txl); + if (!IPCL_IS_NONSTR(connp)) + noenable(connp->conn_wq); + inserted = B_TRUE; + } + mutex_exit(&idl_txl->txl_lock); + return (inserted); +} |