summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/ip_impl.h
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/inet/ip_impl.h')
-rw-r--r--usr/src/uts/common/inet/ip_impl.h410
1 files changed, 30 insertions, 380 deletions
diff --git a/usr/src/uts/common/inet/ip_impl.h b/usr/src/uts/common/inet/ip_impl.h
index 5f9d674e17..694f7a63b0 100644
--- a/usr/src/uts/common/inet/ip_impl.h
+++ b/usr/src/uts/common/inet/ip_impl.h
@@ -50,10 +50,12 @@ extern "C" {
#define IP_HDR_CSUM_TTL_ADJUST 256
#define IP_TCP_CSUM_COMP IPPROTO_TCP
#define IP_UDP_CSUM_COMP IPPROTO_UDP
+#define IP_ICMPV6_CSUM_COMP IPPROTO_ICMPV6
#else
#define IP_HDR_CSUM_TTL_ADJUST 1
#define IP_TCP_CSUM_COMP (IPPROTO_TCP << 8)
#define IP_UDP_CSUM_COMP (IPPROTO_UDP << 8)
+#define IP_ICMPV6_CSUM_COMP (IPPROTO_ICMPV6 << 8)
#endif
#define TCP_CHECKSUM_OFFSET 16
@@ -62,240 +64,20 @@ extern "C" {
#define UDP_CHECKSUM_OFFSET 6
#define UDP_CHECKSUM_SIZE 2
+#define ICMPV6_CHECKSUM_OFFSET 2
+#define ICMPV6_CHECKSUM_SIZE 2
+
#define IPH_TCPH_CHECKSUMP(ipha, hlen) \
((uint16_t *)(((uchar_t *)(ipha)) + ((hlen) + TCP_CHECKSUM_OFFSET)))
#define IPH_UDPH_CHECKSUMP(ipha, hlen) \
((uint16_t *)(((uchar_t *)(ipha)) + ((hlen) + UDP_CHECKSUM_OFFSET)))
+#define IPH_ICMPV6_CHECKSUMP(ipha, hlen) \
+ ((uint16_t *)(((uchar_t *)(ipha)) + ((hlen) + ICMPV6_CHECKSUM_OFFSET)))
+
#define ILL_HCKSUM_CAPABLE(ill) \
(((ill)->ill_capabilities & ILL_CAPAB_HCKSUM) != 0)
-/*
- * Macro that performs software checksum calculation on the IP header.
- */
-#define IP_HDR_CKSUM(ipha, sum, v_hlen_tos_len, ttl_protocol) { \
- (sum) += (ttl_protocol) + (ipha)->ipha_ident + \
- ((v_hlen_tos_len) >> 16) + \
- ((v_hlen_tos_len) & 0xFFFF) + \
- (ipha)->ipha_fragment_offset_and_flags; \
- (sum) = (((sum) & 0xFFFF) + ((sum) >> 16)); \
- (sum) = ~((sum) + ((sum) >> 16)); \
- (ipha)->ipha_hdr_checksum = (uint16_t)(sum); \
-}
-
-#define IS_IP_HDR_HWCKSUM(ipsec, mp, ill) \
- ((!ipsec) && (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) && \
- ILL_HCKSUM_CAPABLE(ill) && dohwcksum)
-
-/*
- * This macro acts as a wrapper around IP_CKSUM_XMIT_FAST, and it performs
- * several checks on the IRE and ILL (among other things) in order to see
- * whether or not hardware checksum offload is allowed for the outgoing
- * packet. It assumes that the caller has held a reference to the IRE.
- */
-#define IP_CKSUM_XMIT(ill, ire, mp, ihp, up, proto, start, end, \
- max_frag, ipsec_len, pseudo) { \
- uint32_t _hck_flags; \
- /* \
- * We offload checksum calculation to hardware when IPsec isn't \
- * present and if fragmentation isn't required. We also check \
- * if M_DATA fastpath is safe to be used on the corresponding \
- * IRE; this check is performed without grabbing ire_lock but \
- * instead by holding a reference to it. This is sufficient \
- * for IRE_CACHE; for IRE_BROADCAST on non-Ethernet links, the \
- * DL_NOTE_FASTPATH_FLUSH indication could come up from the \
- * driver and trigger the IRE (hence fp_mp) deletion. This is \
- * why only IRE_CACHE type is eligible for offload. \
- * \
- * The presense of IP options also forces the network stack to \
- * calculate the checksum in software. This is because: \
- * \
- * Wrap around: certain partial-checksum NICs (eri, ce) limit \
- * the size of "start offset" width to 6-bit. This effectively \
- * sets the largest value of the offset to 64-bytes, starting \
- * from the MAC header. When the cumulative MAC and IP headers \
- * exceed such limit, the offset will wrap around. This causes \
- * the checksum to be calculated at the wrong place. \
- * \
- * IPv4 source routing: none of the full-checksum capable NICs \
- * is capable of correctly handling the IPv4 source-routing \
- * option for purposes of calculating the pseudo-header; the \
- * actual destination is different from the destination in the \
- * header which is that of the next-hop. (This case may not be \
- * true for NICs which can parse IPv6 extension headers, but \
- * we choose to simplify the implementation by not offloading \
- * checksum when they are present.) \
- * \
- */ \
- if ((ill) != NULL && ILL_HCKSUM_CAPABLE(ill) && \
- !((ire)->ire_flags & RTF_MULTIRT) && \
- (!((ire)->ire_type & IRE_BROADCAST) || \
- (ill)->ill_type == IFT_ETHER) && \
- (ipsec_len) == 0 && \
- (((ire)->ire_ipversion == IPV4_VERSION && \
- (start) == IP_SIMPLE_HDR_LENGTH && \
- ((ire)->ire_nce != NULL && \
- (ire)->ire_nce->nce_fp_mp != NULL && \
- MBLKHEAD(mp) >= MBLKL((ire)->ire_nce->nce_fp_mp))) || \
- ((ire)->ire_ipversion == IPV6_VERSION && \
- (start) == IPV6_HDR_LEN && \
- (ire)->ire_nce->nce_fp_mp != NULL && \
- MBLKHEAD(mp) >= MBLKL((ire)->ire_nce->nce_fp_mp))) && \
- (max_frag) >= (uint_t)((end) + (ipsec_len)) && \
- dohwcksum) { \
- _hck_flags = (ill)->ill_hcksum_capab->ill_hcksum_txflags; \
- } else { \
- _hck_flags = 0; \
- } \
- IP_CKSUM_XMIT_FAST((ire)->ire_ipversion, _hck_flags, mp, ihp, \
- up, proto, start, end, pseudo); \
-}
-
-/*
- * Based on the device capabilities, this macro either marks an outgoing
- * packet with hardware checksum offload information or calculate the
- * checksum in software. If the latter is performed, the checksum field
- * of the dblk is cleared; otherwise it will be non-zero and contain the
- * necessary flag(s) for the driver.
- */
-#define IP_CKSUM_XMIT_FAST(ipver, hck_flags, mp, ihp, up, proto, start, \
- end, pseudo) { \
- uint32_t _sum; \
- /* \
- * Underlying interface supports hardware checksum offload for \
- * the payload; leave the payload checksum for the hardware to \
- * calculate. N.B: We only need to set up checksum info on the \
- * first mblk. \
- */ \
- DB_CKSUMFLAGS(mp) = 0; \
- if (((ipver) == IPV4_VERSION && \
- ((hck_flags) & HCKSUM_INET_FULL_V4)) || \
- ((ipver) == IPV6_VERSION && \
- ((hck_flags) & HCKSUM_INET_FULL_V6))) { \
- /* \
- * Hardware calculates pseudo-header, header and the \
- * payload checksums, so clear the checksum field in \
- * the protocol header. \
- */ \
- *(up) = 0; \
- DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM; \
- } else if ((hck_flags) & HCKSUM_INET_PARTIAL) { \
- /* \
- * Partial checksum offload has been enabled. Fill \
- * the checksum field in the protocl header with the \
- * pseudo-header checksum value. \
- */ \
- _sum = ((proto) == IPPROTO_UDP) ? \
- IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP; \
- _sum += *(up) + (pseudo); \
- _sum = (_sum & 0xFFFF) + (_sum >> 16); \
- *(up) = (_sum & 0xFFFF) + (_sum >> 16); \
- /* \
- * Offsets are relative to beginning of IP header. \
- */ \
- DB_CKSUMSTART(mp) = (start); \
- DB_CKSUMSTUFF(mp) = ((proto) == IPPROTO_UDP) ? \
- (start) + UDP_CHECKSUM_OFFSET : \
- (start) + TCP_CHECKSUM_OFFSET; \
- DB_CKSUMEND(mp) = (end); \
- DB_CKSUMFLAGS(mp) |= HCK_PARTIALCKSUM; \
- } else { \
- /* \
- * Software checksumming. \
- */ \
- _sum = ((proto) == IPPROTO_UDP) ? \
- IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP; \
- _sum += (pseudo); \
- _sum = IP_CSUM(mp, start, _sum); \
- *(up) = (uint16_t)(((proto) == IPPROTO_UDP) ? \
- (_sum ? _sum : ~_sum) : _sum); \
- } \
- /* \
- * Hardware supports IP header checksum offload; clear the \
- * contents of IP header checksum field as expected by NIC. \
- * Do this only if we offloaded either full or partial sum. \
- */ \
- if ((ipver) == IPV4_VERSION && DB_CKSUMFLAGS(mp) != 0 && \
- ((hck_flags) & HCKSUM_IPHDRCKSUM)) { \
- DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM; \
- ((ipha_t *)(ihp))->ipha_hdr_checksum = 0; \
- } \
-}
-
-/*
- * Macro to inspect the checksum of a fully-reassembled incoming datagram.
- */
-#define IP_CKSUM_RECV_REASS(hck_flags, off, pseudo, sum, err) { \
- (err) = B_FALSE; \
- if ((hck_flags) & HCK_FULLCKSUM) { \
- /* \
- * The sum of all fragment checksums should \
- * result in -0 (0xFFFF) or otherwise invalid. \
- */ \
- if ((sum) != 0xFFFF) \
- (err) = B_TRUE; \
- } else if ((hck_flags) & HCK_PARTIALCKSUM) { \
- (sum) += (pseudo); \
- (sum) = ((sum) & 0xFFFF) + ((sum) >> 16); \
- (sum) = ((sum) & 0xFFFF) + ((sum) >> 16); \
- if (~(sum) & 0xFFFF) \
- (err) = B_TRUE; \
- } else if (((sum) = IP_CSUM(mp, off, pseudo)) != 0) { \
- (err) = B_TRUE; \
- } \
-}
-
-/*
- * This macro inspects an incoming packet to see if the checksum value
- * contained in it is valid; if the hardware has provided the information,
- * the value is verified, otherwise it performs software checksumming.
- * The checksum value is returned to caller.
- */
-#define IP_CKSUM_RECV(hck_flags, sum, cksum_start, ulph_off, mp, mp1, err) { \
- int32_t _len; \
- \
- (err) = B_FALSE; \
- if ((hck_flags) & HCK_FULLCKSUM) { \
- /* \
- * Full checksum has been computed by the hardware \
- * and has been attached. If the driver wants us to \
- * verify the correctness of the attached value, in \
- * order to protect against faulty hardware, compare \
- * it against -0 (0xFFFF) to see if it's valid. \
- */ \
- (sum) = DB_CKSUM16(mp); \
- if (!((hck_flags) & HCK_FULLCKSUM_OK) && (sum) != 0xFFFF) \
- (err) = B_TRUE; \
- } else if (((hck_flags) & HCK_PARTIALCKSUM) && \
- ((mp1) == NULL || (mp1)->b_cont == NULL) && \
- (ulph_off) >= DB_CKSUMSTART(mp) && \
- ((_len = (ulph_off) - DB_CKSUMSTART(mp)) & 1) == 0) { \
- uint32_t _adj; \
- /* \
- * Partial checksum has been calculated by hardware \
- * and attached to the packet; in addition, any \
- * prepended extraneous data is even byte aligned, \
- * and there are at most two mblks associated with \
- * the packet. If any such data exists, we adjust \
- * the checksum; also take care any postpended data. \
- */ \
- IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, _len, _adj); \
- /* \
- * One's complement subtract extraneous checksum \
- */ \
- (sum) += DB_CKSUM16(mp); \
- if (_adj >= (sum)) \
- (sum) = ~(_adj - (sum)) & 0xFFFF; \
- else \
- (sum) -= _adj; \
- (sum) = ((sum) & 0xFFFF) + ((int)(sum) >> 16); \
- (sum) = ((sum) & 0xFFFF) + ((int)(sum) >> 16); \
- if (~(sum) & 0xFFFF) \
- (err) = B_TRUE; \
- } else if (((sum) = IP_CSUM(mp, ulph_off, sum)) != 0) { \
- (err) = B_TRUE; \
- } \
-}
/*
* Macro to adjust a given checksum value depending on any prepended
@@ -338,98 +120,37 @@ extern "C" {
} \
}
-#define ILL_MDT_CAPABLE(ill) \
- (((ill)->ill_capabilities & ILL_CAPAB_MDT) != 0)
-
-/*
- * ioctl identifier and structure for Multidata Transmit update
- * private M_CTL communication from IP to ULP.
- */
-#define MDT_IOC_INFO_UPDATE (('M' << 8) + 1020)
-
-typedef struct ip_mdt_info_s {
- uint_t mdt_info_id; /* MDT_IOC_INFO_UPDATE */
- ill_mdt_capab_t mdt_capab; /* ILL MDT capabilities */
-} ip_mdt_info_t;
+#define IS_SIMPLE_IPH(ipha) \
+ ((ipha)->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION)
/*
- * Macro that determines whether or not a given ILL is allowed for MDT.
+ * Currently supported flags for LSO.
*/
-#define ILL_MDT_USABLE(ill) \
- (ILL_MDT_CAPABLE(ill) && \
- ill->ill_mdt_capab != NULL && \
- ill->ill_mdt_capab->ill_mdt_version == MDT_VERSION_2 && \
- ill->ill_mdt_capab->ill_mdt_on != 0)
+#define LSO_BASIC_TCP_IPV4 DLD_LSO_BASIC_TCP_IPV4
+#define LSO_BASIC_TCP_IPV6 DLD_LSO_BASIC_TCP_IPV6
-#define ILL_LSO_CAPABLE(ill) \
- (((ill)->ill_capabilities & ILL_CAPAB_DLD_LSO) != 0)
+#define ILL_LSO_CAPABLE(ill) \
+ (((ill)->ill_capabilities & ILL_CAPAB_LSO) != 0)
-/*
- * ioctl identifier and structure for Large Segment Offload
- * private M_CTL communication from IP to ULP.
- */
-#define LSO_IOC_INFO_UPDATE (('L' << 24) + ('S' << 16) + ('O' << 8))
-
-typedef struct ip_lso_info_s {
- uint_t lso_info_id; /* LSO_IOC_INFO_UPDATE */
- ill_lso_capab_t lso_capab; /* ILL LSO capabilities */
-} ip_lso_info_t;
-
-/*
- * Macro that determines whether or not a given ILL is allowed for LSO.
- */
#define ILL_LSO_USABLE(ill) \
(ILL_LSO_CAPABLE(ill) && \
- ill->ill_lso_capab != NULL && \
- ill->ill_lso_capab->ill_lso_on != 0)
+ ill->ill_lso_capab != NULL)
-#define ILL_LSO_TCP_USABLE(ill) \
+#define ILL_LSO_TCP_IPV4_USABLE(ill) \
(ILL_LSO_USABLE(ill) && \
- ill->ill_lso_capab->ill_lso_flags & DLD_LSO_TX_BASIC_TCP_IPV4)
+ ill->ill_lso_capab->ill_lso_flags & LSO_BASIC_TCP_IPV4)
-/*
- * Macro that determines whether or not a given CONN may be considered
- * for fast path prior to proceeding further with LSO or Multidata.
- */
-#define CONN_IS_LSO_MD_FASTPATH(connp) \
- ((connp)->conn_dontroute == 0 && /* SO_DONTROUTE */ \
- !((connp)->conn_nexthop_set) && /* IP_NEXTHOP */ \
- (connp)->conn_outgoing_ill == NULL) /* IP{V6}_BOUND_IF */
-
-/* Definitions for fragmenting IP packets using MDT. */
-
-/*
- * Smaller and private version of pdescinfo_t used specifically for IP,
- * which allows for only a single payload span per packet.
- */
-typedef struct ip_pdescinfo_s PDESCINFO_STRUCT(2) ip_pdescinfo_t;
+#define ILL_LSO_TCP_IPV6_USABLE(ill) \
+ (ILL_LSO_USABLE(ill) && \
+ ill->ill_lso_capab->ill_lso_flags & LSO_BASIC_TCP_IPV6)
-/*
- * Macro version of ip_can_frag_mdt() which avoids the function call if we
- * only examine a single message block.
- */
-#define IP_CAN_FRAG_MDT(mp, hdr_len, len) \
- (((mp)->b_cont == NULL) ? \
- (MBLKL(mp) >= ((hdr_len) + ip_wput_frag_mdt_min)) : \
- ip_can_frag_mdt((mp), (hdr_len), (len)))
+#define ILL_ZCOPY_CAPABLE(ill) \
+ (((ill)->ill_capabilities & ILL_CAPAB_ZEROCOPY) != 0)
-/*
- * Macro that determines whether or not a given IPC requires
- * outbound IPSEC processing.
- */
-#define CONN_IPSEC_OUT_ENCAPSULATED(connp) \
- ((connp)->conn_out_enforce_policy || \
- ((connp)->conn_latch != NULL && \
- (connp)->conn_latch->ipl_out_policy != NULL))
+#define ILL_ZCOPY_USABLE(ill) \
+ (ILL_ZCOPY_CAPABLE(ill) && (ill->ill_zerocopy_capab != NULL) && \
+ (ill->ill_zerocopy_capab->ill_zerocopy_flags != 0))
-/*
- * Macro that checks whether or not a particular UDP conn is
- * flow-controlling on the read-side.
- *
- * Note that this check is done after the conn is found in
- * the UDP fanout table.
- */
-#define CONN_UDP_FLOWCTLD(connp) !canputnext((connp)->conn_rq)
/* Macro that follows definitions of flags for mac_tx() (see mac_client.h) */
#define IP_DROP_ON_NO_DESC 0x01 /* Equivalent to MAC_DROP_ON_NO_DESC */
@@ -437,74 +158,7 @@ typedef struct ip_pdescinfo_s PDESCINFO_STRUCT(2) ip_pdescinfo_t;
#define ILL_DIRECT_CAPABLE(ill) \
(((ill)->ill_capabilities & ILL_CAPAB_DLD_DIRECT) != 0)
-#define ILL_SEND_TX(ill, ire, hint, mp, flag, connp) { \
- if (ILL_DIRECT_CAPABLE(ill) && DB_TYPE(mp) == M_DATA) { \
- ill_dld_direct_t *idd; \
- uintptr_t cookie; \
- conn_t *udp_connp = (conn_t *)connp; \
- \
- idd = &(ill)->ill_dld_capab->idc_direct; \
- /* \
- * Send the packet directly to DLD, where it \
- * may be queued depending on the availability \
- * of transmit resources at the media layer. \
- * Ignore the returned value for the time being \
- * In future, we may want to take this into \
- * account and flow control the TCP. \
- */ \
- cookie = idd->idd_tx_df(idd->idd_tx_dh, mp, \
- (uintptr_t)(hint), flag); \
- \
- /* \
- * non-NULL cookie indicates flow control situation \
- * and the cookie itself identifies this specific \
- * Tx ring that is blocked. This cookie is used to \
- * block the UDP conn that is sending packets over \
- * this specific Tx ring. \
- */ \
- if ((cookie != NULL) && (udp_connp != NULL) && \
- (udp_connp->conn_ulp == IPPROTO_UDP)) { \
- idl_tx_list_t *idl_txl; \
- ip_stack_t *ipst; \
- \
- /* \
- * Flow controlled. \
- */ \
- DTRACE_PROBE2(ill__send__tx__cookie, \
- uintptr_t, cookie, conn_t *, udp_connp); \
- ipst = udp_connp->conn_netstack->netstack_ip; \
- idl_txl = \
- &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)];\
- mutex_enter(&idl_txl->txl_lock); \
- if (udp_connp->conn_direct_blocked || \
- (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, \
- cookie) == 0)) { \
- DTRACE_PROBE1(ill__tx__not__blocked, \
- boolean, \
- udp_connp->conn_direct_blocked); \
- } else if (idl_txl->txl_cookie != NULL && \
- idl_txl->txl_cookie != cookie) { \
- udp_t *udp = udp_connp->conn_udp; \
- udp_stack_t *us = udp->udp_us; \
- \
- DTRACE_PROBE2(ill__send__tx__collision, \
- uintptr_t, cookie, \
- uintptr_t, idl_txl->txl_cookie); \
- UDP_STAT(us, udp_cookie_coll); \
- } else { \
- udp_connp->conn_direct_blocked = B_TRUE;\
- idl_txl->txl_cookie = cookie; \
- conn_drain_insert(udp_connp, idl_txl); \
- DTRACE_PROBE1(ill__send__tx__insert, \
- conn_t *, udp_connp); \
- } \
- mutex_exit(&idl_txl->txl_lock); \
- } \
- } else { \
- putnext((ire)->ire_stq, mp); \
- } \
-}
-
+/* This macro is used by the mac layer */
#define MBLK_RX_FANOUT_SLOWPATH(mp, ipha) \
(DB_TYPE(mp) != M_DATA || DB_REF(mp) != 1 || !OK_32PTR(ipha) || \
(((uchar_t *)ipha + IP_SIMPLE_HDR_LENGTH) >= (mp)->b_wptr))
@@ -520,13 +174,11 @@ typedef struct ip_pdescinfo_s PDESCINFO_STRUCT(2) ip_pdescinfo_t;
netstackid_to_zoneid((ipst)->ips_netstack->netstack_stackid) : \
(zoneid))
-extern int ip_wput_frag_mdt_min;
-extern boolean_t ip_can_frag_mdt(mblk_t *, ssize_t, ssize_t);
-extern mblk_t *ip_prepend_zoneid(mblk_t *, zoneid_t, ip_stack_t *);
extern void ill_flow_enable(void *, ip_mac_tx_cookie_t);
-extern zoneid_t ip_get_zoneid_v4(ipaddr_t, mblk_t *, ip_stack_t *, zoneid_t);
+extern zoneid_t ip_get_zoneid_v4(ipaddr_t, mblk_t *, ip_recv_attr_t *,
+ zoneid_t);
extern zoneid_t ip_get_zoneid_v6(in6_addr_t *, mblk_t *, const ill_t *,
- ip_stack_t *, zoneid_t);
+ ip_recv_attr_t *, zoneid_t);
/*
* flag passed in by IP based protocols to get a private ip stream with
@@ -542,8 +194,6 @@ extern zoneid_t ip_get_zoneid_v6(in6_addr_t *, mblk_t *, const ill_t *,
#define DEV_IP "/devices/pseudo/ip@0:ip"
#define DEV_IP6 "/devices/pseudo/ip6@0:ip6"
-extern struct kmem_cache *ip_helper_stream_cache;
-
#endif /* _KERNEL */
#ifdef __cplusplus