diff options
author | priyanka <none@none> | 2005-12-12 15:24:29 -0800 |
---|---|---|
committer | priyanka <none@none> | 2005-12-12 15:24:29 -0800 |
commit | 43d18f1c320355e93c47399bea0b2e022fe06364 (patch) | |
tree | b34f2864b862c5ac66d6014d591939f3e9f6704d /usr/src/uts | |
parent | 8fbd927ce8f563deec0dfab8fbb461dd1bfff20c (diff) | |
download | illumos-joyent-43d18f1c320355e93c47399bea0b2e022fe06364.tar.gz |
PSARC 2005/603 IP_NEXTHOP socket option
6264845 Need Policy Based Routing support in Solaris
Diffstat (limited to 'usr/src/uts')
-rw-r--r-- | usr/src/uts/common/inet/ip.h | 9 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ip.c | 273 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ip_ire.c | 19 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip_impl.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip_ire.h | 4 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ipclassifier.h | 5 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ipsec_info.h | 5 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_opt_data.c | 41 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp.c | 264 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_opt_data.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/inet/udp/udp.c | 5 | ||||
-rw-r--r-- | usr/src/uts/common/inet/udp/udp_opt_data.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/netinet/in.h | 3 |
13 files changed, 448 insertions, 187 deletions
diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index 507dfad5d6..f286253080 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -618,6 +618,15 @@ typedef struct ip_m_s { */ #define IRE_MARK_USESRC_CHECK 0x0020 +/* + * IRE_MARK_PRIVATE_ADDR is used for IP_NEXTHOP. When IP_NEXTHOP is set, the + * routing table lookup for the destination is bypassed and the packet is + * sent directly to the specified nexthop. The associated IRE_CACHE entries + * should be marked with IRE_MARK_PRIVATE_ADDR flag so that they don't show up + * in regular ire cache lookups. + */ +#define IRE_MARK_PRIVATE_ADDR 0x0040 + /* Flags with ire_expire routine */ #define FLUSH_ARP_TIME 0x0001 /* ARP info potentially stale timer */ #define FLUSH_REDIRECT_TIME 0x0002 /* Redirects potentially stale */ diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index a988b67cbb..17884e9d59 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -4113,23 +4113,30 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp, MATCH_IRE_RJ_BHOLE)); } else { /* - * If conn_dontroute is set, and onlink ipif is not found - * set ENETUNREACH error + * If conn_dontroute is set or if conn_nexthop_set is set, + * and onlink ipif is not found set ENETUNREACH error. */ - if (connp->conn_dontroute) { + if (connp->conn_dontroute || connp->conn_nexthop_set) { ipif_t *ipif; - ipif = ipif_lookup_onlink_addr(dst_addr, zoneid); + ipif = ipif_lookup_onlink_addr(connp->conn_dontroute ? + dst_addr : connp->conn_nexthop_v4, zoneid); if (ipif == NULL) { error = ENETUNREACH; goto bad_addr; } ipif_refrele(ipif); } - dst_ire = ire_route_lookup(dst_addr, 0, 0, 0, NULL, &sire, - zoneid, - (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | - MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE)); + + if (connp->conn_nexthop_set) { + dst_ire = ire_route_lookup(connp->conn_nexthop_v4, 0, + 0, 0, NULL, NULL, zoneid, 0); + } else { + dst_ire = ire_route_lookup(dst_addr, 0, 0, 0, NULL, + &sire, zoneid, + (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | + MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE)); + } } /* * dst_ire can't be a broadcast when not ire_requested. @@ -6691,6 +6698,7 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp) ire_t *ire = NULL; mblk_t *res_mp; ipaddr_t *addrp; + ipaddr_t nexthop_addr; ipif_t *src_ipif = NULL; ill_t *dst_ill = NULL; ipha_t *ipha; @@ -6712,6 +6720,7 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp) boolean_t multirt_is_resolvable; boolean_t multirt_resolve_next; boolean_t do_attach_ill = B_FALSE; + boolean_t ip_nexthop = B_FALSE; zoneid_t zoneid; if (ip_debug > 2) { @@ -6760,6 +6769,10 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp) if (ill_is_probeonly(attach_ill)) ire_marks = IRE_MARK_HIDDEN; } + if (mctl_present && io->ipsec_out_ip_nexthop) { + ip_nexthop = B_TRUE; + nexthop_addr = io->ipsec_out_nexthop_addr; + } /* * If this IRE is created for forwarding or it is not for * traffic for congestion controlled protocols, mark it as temporary. @@ -6788,6 +6801,28 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp) if (in_ill != NULL) { ire = ire_srcif_table_lookup(dst, IRE_IF_RESOLVER, NULL, in_ill, MATCH_IRE_TYPE); + } else if (ip_nexthop) { + /* + * The first time we come here, we look for an IRE_INTERFACE + * entry for the specified nexthop, set the dst to be the + * nexthop address and create an IRE_CACHE entry for the + * nexthop. The next time around, we are able to find an + * IRE_CACHE entry for the nexthop, set the gateway to be the + * nexthop address and create an IRE_CACHE entry for the + * destination address via the specified nexthop. + */ + ire = ire_cache_lookup(nexthop_addr, zoneid); + if (ire != NULL) { + gw = nexthop_addr; + ire_marks |= IRE_MARK_PRIVATE_ADDR; + } else { + ire = ire_ftable_lookup(nexthop_addr, 0, 0, + IRE_INTERFACE, NULL, NULL, zoneid, 0, + MATCH_IRE_TYPE); + if (ire != NULL) { + dst = nexthop_addr; + } + } } else if (attach_ill == NULL) { ire = ire_ftable_lookup(dst, 0, 0, 0, NULL, &sire, zoneid, 0, @@ -7211,7 +7246,6 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp) ire_t *ipif_ire; mblk_t *ire_fp_mp; - ASSERT(sire != NULL); if (gw == 0) gw = ire->ire_gateway_addr; /* @@ -7219,7 +7253,8 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp) * off-link destination from the cache ire of the * gateway. * - * 1. The prefix ire 'sire' + * 1. The prefix ire 'sire' (Note that this does + * not apply to the conn_nexthop_set case) * 2. The cache ire of the gateway 'ire' * 3. The interface ire 'ipif_ire' * @@ -7227,9 +7262,14 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp) * * If there is no interface route to the gateway, * it is a race condition, where we found the cache - * but the inteface route has been deleted. + * but the interface route has been deleted. */ - ipif_ire = ire_ihandle_lookup_offlink(ire, sire); + if (ip_nexthop) { + ipif_ire = ire_ihandle_lookup_onlink(ire); + } else { + ipif_ire = + ire_ihandle_lookup_offlink(ire, sire); + } if (ipif_ire == NULL) { ip1dbg(("ip_newroute: " "ire_ihandle_lookup_offlink failed\n")); @@ -7268,19 +7308,21 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp) save_ire->ire_dlureq_mp, src_ipif, in_ill, /* incoming ill */ - sire->ire_mask, /* Parent mask */ - sire->ire_phandle, /* Parent handle */ + (sire != NULL) ? + sire->ire_mask : 0, /* Parent mask */ + (sire != NULL) ? + sire->ire_phandle : 0, /* Parent handle */ ipif_ire->ire_ihandle, /* Interface handle */ - sire->ire_flags & - (RTF_SETSRC | RTF_MULTIRT), /* flags if any */ - &(sire->ire_uinfo)); + (sire != NULL) ? (sire->ire_flags & + (RTF_SETSRC | RTF_MULTIRT)) : 0, /* flags */ + (sire != NULL) ? + &(sire->ire_uinfo) : &(save_ire->ire_uinfo)); if (ire == NULL) { ire_refrele(ipif_ire); ire_refrele(save_ire); break; } - ire->ire_marks |= ire_marks; /* @@ -7288,20 +7330,23 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp) * The newly created ire is tied to both of them via * the phandle and ihandle respectively. */ - IRB_REFHOLD(sire->ire_bucket); - /* Has it been removed already ? */ - if (sire->ire_marks & IRE_MARK_CONDEMNED) { - IRB_REFRELE(sire->ire_bucket); - ire_refrele(ipif_ire); - ire_refrele(save_ire); - break; + if (sire != NULL) { + IRB_REFHOLD(sire->ire_bucket); + /* Has it been removed already ? */ + if (sire->ire_marks & IRE_MARK_CONDEMNED) { + IRB_REFRELE(sire->ire_bucket); + ire_refrele(ipif_ire); + ire_refrele(save_ire); + break; + } } IRB_REFHOLD(ipif_ire->ire_bucket); /* Has it been removed already ? */ if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { IRB_REFRELE(ipif_ire->ire_bucket); - IRB_REFRELE(sire->ire_bucket); + if (sire != NULL) + IRB_REFRELE(sire->ire_bucket); ire_refrele(ipif_ire); ire_refrele(save_ire); break; @@ -7325,8 +7370,10 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp) ire_refrele(save_ire); /* Assert that sire is not deleted yet. */ - ASSERT(sire->ire_ptpn != NULL); - IRB_REFRELE(sire->ire_bucket); + if (sire != NULL) { + ASSERT(sire->ire_ptpn != NULL); + IRB_REFRELE(sire->ire_bucket); + } /* Assert that ipif_ire is not deleted yet. */ ASSERT(ipif_ire->ire_ptpn != NULL); @@ -7349,8 +7396,8 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, ill_t *in_ill, conn_t *connp) multirt_resolve_next = B_TRUE; continue; } - - ire_refrele(sire); + if (sire != NULL) + ire_refrele(sire); ipif_refrele(src_ipif); ill_refrele(dst_ill); return; @@ -9086,12 +9133,19 @@ ip_opt_set_ipif(conn_t *connp, ipaddr_t addr, boolean_t checkonly, int option, if (addr != INADDR_ANY || checkonly) { ASSERT(connp != NULL); - ipif = ipif_lookup_addr(addr, NULL, connp->conn_zoneid, - CONNP_TO_WQ(connp), first_mp, ip_restart_optmgmt, &error); + if (option == IP_NEXTHOP) { + ipif = + ipif_lookup_onlink_addr(addr, connp->conn_zoneid); + } else { + ipif = ipif_lookup_addr(addr, NULL, connp->conn_zoneid, + CONNP_TO_WQ(connp), first_mp, ip_restart_optmgmt, + &error); + } if (ipif == NULL) { if (error == EINPROGRESS) return (error); - else if (option == IP_MULTICAST_IF) + else if ((option == IP_MULTICAST_IF) || + (option == IP_NEXTHOP)) return (EHOSTUNREACH); else return (EINVAL); @@ -9156,6 +9210,10 @@ ip_opt_set_ipif(conn_t *connp, ipaddr_t addr, boolean_t checkonly, int option, case IP_MULTICAST_IF: connp->conn_multicast_ipif = ipif; break; + case IP_NEXTHOP: + connp->conn_nexthop_v4 = addr; + connp->conn_nexthop_set = B_TRUE; + break; } if (ipif != NULL) { @@ -9472,6 +9530,7 @@ ip_opt_set(queue_t *q, uint_t optset_context, int level, int name, break; case IPPROTO_IP: switch (name) { + case IP_NEXTHOP: case IP_MULTICAST_IF: case IP_DONTFAILOVER_IF: { ipaddr_t addr = *i1; @@ -10268,6 +10327,12 @@ ip_opt_get(queue_t *q, int level, int name, uchar_t *ptr) return (sizeof (int)); case IP_SEC_OPT: return (ipsec_req_from_conn(connp, req, IPSEC_AF_V4)); + case IP_NEXTHOP: + if (connp->conn_nexthop_set) { + *(ipaddr_t *)ptr = connp->conn_nexthop_v4; + return (sizeof (ipaddr_t)); + } else + return (0); default: break; } @@ -17748,6 +17813,9 @@ ip_output(void *arg, mblk_t *mp, void *arg2, int caller) zoneid_t zoneid; boolean_t need_decref = B_FALSE; boolean_t ignore_dontroute = B_FALSE; + boolean_t ignore_nexthop = B_FALSE; + boolean_t ip_nexthop = B_FALSE; + ipaddr_t nexthop_addr; #ifdef _BIG_ENDIAN #define V_HLEN (v_hlen_tos_len >> 24) @@ -17850,20 +17918,21 @@ ip_output(void *arg, mblk_t *mp, void *arg2, int caller) if (CLASSD(dst)) goto multicast; - if ((connp->conn_dontroute) || (connp->conn_xmit_if_ill != NULL)) { + if ((connp->conn_dontroute) || (connp->conn_xmit_if_ill != NULL) || + (connp->conn_nexthop_set)) { /* * If the destination is a broadcast or a loopback - * address, both SO_DONTROUTE and IP_XMIT_IF go + * address, SO_DONTROUTE, IP_XMIT_IF and IP_NEXTHOP go * through the standard path. But in the case of local - * destination only SO_DONTROUTE goes through the - * standard path not IP_XMIT_IF. + * destination only SO_DONTROUTE and IP_NEXTHOP go through + * the standard path not IP_XMIT_IF. */ ire = ire_cache_lookup(dst, zoneid); if ((ire == NULL) || ((ire->ire_type != IRE_BROADCAST) && (ire->ire_type != IRE_LOOPBACK))) { - - if ((connp->conn_dontroute) && (ire != NULL) && - (ire->ire_type == IRE_LOCAL)) + if ((connp->conn_dontroute || + connp->conn_nexthop_set) && (ire != NULL) && + (ire->ire_type == IRE_LOCAL)) goto standard_path; if (ire != NULL) { @@ -17875,8 +17944,13 @@ ip_output(void *arg, mblk_t *mp, void *arg2, int caller) * bypass routing checks and go directly to * interface. */ - if (connp->conn_dontroute) + if (connp->conn_dontroute) { goto dontroute; + } else if (connp->conn_nexthop_set) { + ip_nexthop = B_TRUE; + nexthop_addr = connp->conn_nexthop_v4; + goto send_from_ill; + } /* * If IP_XMIT_IF socket option is set, @@ -18227,52 +18301,69 @@ qnext: io = (ipsec_out_t *)first_mp->b_rptr; if (io->ipsec_out_attach_if || - io->ipsec_out_xmit_if) { + io->ipsec_out_xmit_if || + io->ipsec_out_ip_nexthop) { ill_t *ill; - ASSERT(io->ipsec_out_ill_index != 0); - ifindex = io->ipsec_out_ill_index; - ill = ill_lookup_on_ifindex(ifindex, B_FALSE, - NULL, NULL, NULL, NULL); /* - * ipsec_out_xmit_if bit is used to tell - * ip_wput to use the ill to send outgoing data - * as we have no conn when data comes from ICMP - * error msg routines. Currently this feature is - * only used by ip_mrtun_forward routine. + * We may have lost the conn context if we are + * coming here from ip_newroute(). Copy the + * nexthop information. */ - if (io->ipsec_out_xmit_if) { - xmit_ill = ill; - if (xmit_ill == NULL) { - ip1dbg(("ip_wput: bad ifindex for" - "xmit_ill %d\n", ifindex)); - freemsg(first_mp); - BUMP_MIB(&ip_mib, ipOutDiscards); - ASSERT(!need_decref); - return; - } - /* Free up the ipsec_out_t mblk */ - ASSERT(first_mp->b_cont == mp); - first_mp->b_cont = NULL; - freeb(first_mp); - /* Just send the IP header+ICMP+data */ - first_mp = mp; + if (io->ipsec_out_ip_nexthop) { + ip_nexthop = B_TRUE; + nexthop_addr = io->ipsec_out_nexthop_addr; + ipha = (ipha_t *)mp->b_rptr; dst = ipha->ipha_dst; goto send_from_ill; - } else { - attach_ill = ill; - } + ASSERT(io->ipsec_out_ill_index != 0); + ifindex = io->ipsec_out_ill_index; + ill = ill_lookup_on_ifindex(ifindex, B_FALSE, + NULL, NULL, NULL, NULL); + /* + * ipsec_out_xmit_if bit is used to tell + * ip_wput to use the ill to send outgoing data + * as we have no conn when data comes from ICMP + * error msg routines. Currently this feature is + * only used by ip_mrtun_forward routine. + */ + if (io->ipsec_out_xmit_if) { + xmit_ill = ill; + if (xmit_ill == NULL) { + ip1dbg(("ip_output:bad ifindex " + "for xmit_ill %d\n", + ifindex)); + freemsg(first_mp); + BUMP_MIB(&ip_mib, + ipOutDiscards); + ASSERT(!need_decref); + return; + } + /* Free up the ipsec_out_t mblk */ + ASSERT(first_mp->b_cont == mp); + first_mp->b_cont = NULL; + freeb(first_mp); + /* Just send the IP header+ICMP+data */ + first_mp = mp; + ipha = (ipha_t *)mp->b_rptr; + dst = ipha->ipha_dst; + goto send_from_ill; + } else { + attach_ill = ill; + } - if (attach_ill == NULL) { - ASSERT(xmit_ill == NULL); - ip1dbg(("ip_wput : bad ifindex for " - "(BIND TO IPIF_NOFAILOVER) %d\n", ifindex)); - freemsg(first_mp); - BUMP_MIB(&ip_mib, ipOutDiscards); - ASSERT(!need_decref); - return; + if (attach_ill == NULL) { + ASSERT(xmit_ill == NULL); + ip1dbg(("ip_output: bad ifindex for " + "(BIND TO IPIF_NOFAILOVER) %d\n", + ifindex)); + freemsg(first_mp); + BUMP_MIB(&ip_mib, ipOutDiscards); + ASSERT(!need_decref); + return; + } } } } @@ -18711,6 +18802,7 @@ qnext: if ((ire != NULL) && (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { ignore_dontroute = B_TRUE; + ignore_nexthop = B_TRUE; } if (ire != NULL) { ire_refrele(ire); @@ -18853,6 +18945,16 @@ send_from_ill: if (need_decref) CONN_DEC_REF(connp); return; + } else if (ip_nexthop || (connp != NULL && + (connp->conn_nexthop_set)) && !ignore_nexthop) { + if (!ip_nexthop) { + ip_nexthop = B_TRUE; + nexthop_addr = connp->conn_nexthop_v4; + } + match_flags = MATCH_IRE_MARK_PRIVATE_ADDR | + MATCH_IRE_GW; + ire = ire_ctable_lookup(dst, nexthop_addr, 0, + NULL, zoneid, match_flags); } else { ire = ire_cache_lookup(dst, zoneid); } @@ -18861,7 +18963,8 @@ send_from_ill: * Make sure we don't load spread if this * is IPIF_NOFAILOVER case. */ - if (attach_ill != NULL) { + if ((attach_ill != NULL) || + (ip_nexthop && !ignore_nexthop)) { if (mctl_present) { io = (ipsec_out_t *)first_mp->b_rptr; ASSERT(first_mp->b_datap->db_type == @@ -18890,9 +18993,15 @@ send_from_ill: first_mp->b_cont = mp; mctl_present = B_TRUE; } - io->ipsec_out_ill_index = attach_ill-> - ill_phyint->phyint_ifindex; - io->ipsec_out_attach_if = B_TRUE; + if (attach_ill != NULL) { + io->ipsec_out_ill_index = attach_ill-> + ill_phyint->phyint_ifindex; + io->ipsec_out_attach_if = B_TRUE; + } else { + io->ipsec_out_ip_nexthop = ip_nexthop; + io->ipsec_out_nexthop_addr = + nexthop_addr; + } } noirefound: /* diff --git a/usr/src/uts/common/inet/ip/ip_ire.c b/usr/src/uts/common/inet/ip/ip_ire.c index ed4c13a8e2..c1c903ff25 100644 --- a/usr/src/uts/common/inet/ip/ip_ire.c +++ b/usr/src/uts/common/inet/ip/ip_ire.c @@ -4393,6 +4393,18 @@ ire_match_args(ire_t *ire, ipaddr_t addr, ipaddr_t mask, ipaddr_t gateway, (ire->ire_marks & IRE_MARK_HIDDEN)) return (B_FALSE); + /* + * MATCH_IRE_MARK_PRIVATE_ADDR is set when IP_NEXTHOP option + * is used. In that case the routing table is bypassed and the + * packets are sent directly to the specified nexthop. The + * IRE_CACHE entry representing this route should be marked + * with IRE_MARK_PRIVATE_ADDR. + */ + + if (!(match_flags & MATCH_IRE_MARK_PRIVATE_ADDR) && + (ire->ire_marks & IRE_MARK_PRIVATE_ADDR)) + return (B_FALSE); + if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid) { /* * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is @@ -4498,6 +4510,9 @@ ire_match_args(ire_t *ire, ipaddr_t addr, ipaddr_t mask, ipaddr_t gateway, ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) || (ire->ire_type != IRE_CACHE || ire->ire_marks & IRE_MARK_HIDDEN)) && + ((!(match_flags & MATCH_IRE_MARK_PRIVATE_ADDR)) || + (ire->ire_type != IRE_CACHE || + ire->ire_marks & IRE_MARK_PRIVATE_ADDR)) && ((!(match_flags & MATCH_IRE_ILL)) || (ire_ill == ipif_ill)) && ((!(match_flags & MATCH_IRE_IHANDLE)) || @@ -5005,8 +5020,10 @@ ire_cache_lookup(ipaddr_t addr, zoneid_t zoneid) irb_ptr = &ip_cache_table[IRE_ADDR_HASH(addr, ip_cache_table_size)]; rw_enter(&irb_ptr->irb_lock, RW_READER); for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) { - if (ire->ire_marks & (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN)) + if (ire->ire_marks & (IRE_MARK_CONDEMNED | + IRE_MARK_HIDDEN | IRE_MARK_PRIVATE_ADDR)) { continue; + } if (ire->ire_addr == addr) { if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid || ire->ire_type == IRE_LOCAL) { diff --git a/usr/src/uts/common/inet/ip_impl.h b/usr/src/uts/common/inet/ip_impl.h index f55bb7d6ce..8a9f611fab 100644 --- a/usr/src/uts/common/inet/ip_impl.h +++ b/usr/src/uts/common/inet/ip_impl.h @@ -363,6 +363,7 @@ typedef struct ip_mdt_info_s { */ #define CONN_IS_MD_FASTPATH(connp) \ ((connp)->conn_dontroute == 0 && /* SO_DONTROUTE */ \ + !((connp)->conn_nexthop_set) && /* IP_NEXTHOP */ \ (connp)->conn_nofailover_ill == NULL && /* IPIF_NOFAILOVER */ \ (connp)->conn_xmit_if_ill == NULL && /* IP_XMIT_IF */ \ (connp)->conn_outgoing_pill == NULL && /* IP{V6}_BOUND_PIF */ \ diff --git a/usr/src/uts/common/inet/ip_ire.h b/usr/src/uts/common/inet/ip_ire.h index 5ed3cff368..2e4a3b99db 100644 --- a/usr/src/uts/common/inet/ip_ire.h +++ b/usr/src/uts/common/inet/ip_ire.h @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -126,6 +126,8 @@ extern "C" { #define MATCH_IRE_ZONEONLY 0x4000 /* Match IREs in specified zone, ie */ /* don't match IRE_LOCALs from other */ /* zones or shared IREs */ +#define MATCH_IRE_MARK_PRIVATE_ADDR 0x8000 /* Match IRE ire_marks with */ + /* IRE_MARK_PRIVATE_ADDR. */ /* Structure for ire_cache_count() */ typedef struct { diff --git a/usr/src/uts/common/inet/ipclassifier.h b/usr/src/uts/common/inet/ipclassifier.h index a5148c57c0..85302c350b 100644 --- a/usr/src/uts/common/inet/ipclassifier.h +++ b/usr/src/uts/common/inet/ipclassifier.h @@ -174,7 +174,8 @@ struct conn_s { conn_recvif : 1, /* IP_RECVIF option */ conn_recvslla : 1, /* IP_RECVSLLA option */ conn_mdt_ok : 1, /* MDT is permitted */ - pad_to_bit_31 : 2; + conn_nexthop_set : 1, + pad_to_bit_31 : 1; tcp_t *conn_tcp; /* Pointer to the tcp struct */ udp_t *conn_udp; /* Pointer to the udp struct */ @@ -257,6 +258,8 @@ struct conn_s { /* mtuinfo from IPV6_PACKET_TOO_BIG conditional on conn_pathmtu_valid */ struct ip6_mtuinfo mtuinfo; zoneid_t conn_zoneid; /* zone connection is in */ + in6_addr_t conn_nexthop_v6; /* nexthop IP address */ +#define conn_nexthop_v4 V4_PART_OF_V6(conn_nexthop_v6) #ifdef CONN_DEBUG #define CONN_TRACE_MAX 10 int conn_trace_last; /* ndx of last used tracebuf */ diff --git a/usr/src/uts/common/inet/ipsec_info.h b/usr/src/uts/common/inet/ipsec_info.h index 554dcdf0c1..f83f4e216b 100644 --- a/usr/src/uts/common/inet/ipsec_info.h +++ b/usr/src/uts/common/inet/ipsec_info.h @@ -219,7 +219,8 @@ typedef struct ipsec_out_s { * messages are to be trusted by all receivers. */ ipsec_out_icmp_loopback: 1, - ipsec_out_pad_bits : 12; + ipsec_out_ip_nexthop : 1, /* IP_NEXTHOP option is set */ + ipsec_out_pad_bits : 11; cred_t *ipsec_out_cred; uint32_t ipsec_out_capab_ill_index; @@ -235,6 +236,8 @@ typedef struct ipsec_out_s { crypto_data_t ipsec_out_crypto_mac; /* to store the MAC */ zoneid_t ipsec_out_zoneid; /* source zone for the datagram */ + in6_addr_t ipsec_out_nexthop_v6; /* nexthop IP address */ +#define ipsec_out_nexthop_addr V4_PART_OF_V6(ipsec_out_nexthop_v6) } ipsec_out_t; /* diff --git a/usr/src/uts/common/inet/sctp/sctp_opt_data.c b/usr/src/uts/common/inet/sctp/sctp_opt_data.c index 0bbd1cf47b..8299f6cd7b 100644 --- a/usr/src/uts/common/inet/sctp/sctp_opt_data.c +++ b/usr/src/uts/common/inet/sctp/sctp_opt_data.c @@ -44,6 +44,7 @@ #include <netinet/ip6.h> #include <inet/ip.h> #include <inet/ip_ire.h> +#include <inet/ip_if.h> #include <inet/ipclassifier.h> #include <inet/ipsec_impl.h> @@ -796,6 +797,7 @@ sctp_get_opt(sctp_t *sctp, int level, int name, void *ptr, socklen_t *optlen) int *i1 = (int *)ptr; int retval = 0; int buflen = *optlen; + conn_t *connp = sctp->sctp_connp; ip6_pkt_t *ipp = &sctp->sctp_sticky_ipp; /* In most cases, the return buffer is just an int */ *optlen = sizeof (int32_t); @@ -1041,6 +1043,14 @@ sctp_get_opt(sctp_t *sctp, int level, int name, void *ptr, socklen_t *optlen) case IP_TTL: *i1 = (int)sctp->sctp_ipha->ipha_ttl; break; + case IP_NEXTHOP: + if (connp->conn_nexthop_set) { + *(ipaddr_t *)ptr = connp->conn_nexthop_v4; + *optlen = sizeof (ipaddr_t); + } else { + *optlen = 0; + } + break; default: retval = EINVAL; break; @@ -1487,6 +1497,37 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp, case IP_UNSPEC_SRC: connp->conn_unspec_src = onoff; break; + case IP_NEXTHOP: { + ipaddr_t addr = *i1; + ipif_t *ipif = NULL; + ill_t *ill; + + if (secpolicy_net(CRED(), OP_CONFIG, B_TRUE) == 0) { + ipif = + ipif_lookup_onlink_addr(addr, + connp->conn_zoneid); + if (ipif == NULL) { + retval = EHOSTUNREACH; + break; + } + ill = ipif->ipif_ill; + mutex_enter(&ill->ill_lock); + if ((ill->ill_state_flags & ILL_CONDEMNED) || + (ipif->ipif_state_flags & IPIF_CONDEMNED)) { + mutex_exit(&ill->ill_lock); + ipif_refrele(ipif); + retval = EHOSTUNREACH; + break; + } + mutex_exit(&ill->ill_lock); + ipif_refrele(ipif); + mutex_enter(&connp->conn_lock); + connp->conn_nexthop_v4 = addr; + connp->conn_nexthop_set = B_TRUE; + mutex_exit(&connp->conn_lock); + } + break; + } default: retval = EINVAL; break; diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index a9ebe742ae..61495f4705 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -27,7 +27,7 @@ #pragma ident "%Z%%M% %I% %E% SMI" -const char tcp_version[] = "%Z%%M% %I% %E% SMI"; +const char tcp_version[] = "@(#)tcp.c 1.490 05/11/29 SMI"; #include <sys/types.h> #include <sys/stream.h> @@ -2468,7 +2468,7 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) tcp_hsp_t *hsp; ire_t *ire; ire_t *sire = NULL; - iulp_t *ire_uinfo; + iulp_t *ire_uinfo = NULL; uint32_t mss_max; uint32_t mss; boolean_t tcp_detached = TCP_IS_DETACHED(tcp); @@ -2486,32 +2486,58 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) BUMP_MIB(&ip_mib, ipInDiscards); return (0); } - - ire = ire_cache_lookup(tcp->tcp_connp->conn_rem, zoneid); - if (ire != NULL) { - ire_cacheable = B_TRUE; - ire_uinfo = (ire_mp != NULL) ? - &((ire_t *)ire_mp->b_rptr)->ire_uinfo: - &ire->ire_uinfo; - - } else { - if (ire_mp == NULL) { + /* + * If IP_NEXTHOP is set, then look for an IRE_CACHE + * for the destination with the nexthop as gateway. + * ire_ctable_lookup() is used because this particular + * ire, if it exists, will be marked private. + * If that is not available, use the interface ire + * for the nexthop. + */ + if (tcp->tcp_connp->conn_nexthop_set) { + ire = ire_ctable_lookup(tcp->tcp_connp->conn_rem, + tcp->tcp_connp->conn_nexthop_v4, 0, NULL, zoneid, + MATCH_IRE_MARK_PRIVATE_ADDR | MATCH_IRE_GW); + if (ire == NULL) { ire = ire_ftable_lookup( - tcp->tcp_connp->conn_rem, - 0, 0, 0, NULL, &sire, zoneid, 0, - (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT)); + tcp->tcp_connp->conn_nexthop_v4, + 0, 0, IRE_INTERFACE, NULL, NULL, zoneid, 0, + MATCH_IRE_TYPE); if (ire == NULL) return (0); - ire_uinfo = (sire != NULL) ? &sire->ire_uinfo : + } else { + ire_uinfo = &ire->ire_uinfo; + } + } else { + ire = ire_cache_lookup(tcp->tcp_connp->conn_rem, + zoneid); + if (ire != NULL) { + ire_cacheable = B_TRUE; + ire_uinfo = (ire_mp != NULL) ? + &((ire_t *)ire_mp->b_rptr)->ire_uinfo: &ire->ire_uinfo; + } else { - ire = (ire_t *)ire_mp->b_rptr; - ire_uinfo = - &((ire_t *)ire_mp->b_rptr)->ire_uinfo; + if (ire_mp == NULL) { + ire = ire_ftable_lookup( + tcp->tcp_connp->conn_rem, + 0, 0, 0, NULL, &sire, zoneid, 0, + (MATCH_IRE_RECURSIVE | + MATCH_IRE_DEFAULT)); + if (ire == NULL) + return (0); + ire_uinfo = (sire != NULL) ? + &sire->ire_uinfo : + &ire->ire_uinfo; + } else { + ire = (ire_t *)ire_mp->b_rptr; + ire_uinfo = + &((ire_t *) + ire_mp->b_rptr)->ire_uinfo; + } } } ASSERT(ire != NULL); - ASSERT(ire_uinfo != NULL); if ((ire->ire_src_addr == INADDR_ANY) || (ire->ire_type & IRE_BROADCAST)) { @@ -2550,7 +2576,19 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) tcp->tcp_ipha->ipha_fragment_offset_and_flags = htons(IPH_DF); } - tcp->tcp_localnet = (ire->ire_gateway_addr == 0); + /* + * If ire_uinfo is NULL, this is the IRE_INTERFACE case + * for IP_NEXTHOP. No cache ire has been found for the + * destination and we are working with the nexthop's + * interface ire. Since we need to forward all packets + * to the nexthop first, we "blindly" set tcp_localnet + * to false, eventhough the destination may also be + * onlink. + */ + if (ire_uinfo == NULL) + tcp->tcp_localnet = 0; + else + tcp->tcp_localnet = (ire->ire_gateway_addr == 0); } else { /* * For incoming connection ire_mp = NULL @@ -2662,94 +2700,100 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp) * Make use of the cached rtt and rtt_sd values to calculate the * initial RTO. Note that they are already initialized in * tcp_init_values(). - */ - if (ire_uinfo->iulp_rtt != 0) { - clock_t rto; - - tcp->tcp_rtt_sa = ire_uinfo->iulp_rtt; - tcp->tcp_rtt_sd = ire_uinfo->iulp_rtt_sd; - rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd + - tcp_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5); - - if (rto > tcp_rexmit_interval_max) { - tcp->tcp_rto = tcp_rexmit_interval_max; - } else if (rto < tcp_rexmit_interval_min) { - tcp->tcp_rto = tcp_rexmit_interval_min; - } else { - tcp->tcp_rto = rto; + * If ire_uinfo is NULL, i.e., we do not have a cache ire for + * IP_NEXTHOP, but instead are using the interface ire for the + * nexthop, then we do not use the ire_uinfo from that ire to + * do any initializations. + */ + if (ire_uinfo != NULL) { + if (ire_uinfo->iulp_rtt != 0) { + clock_t rto; + + tcp->tcp_rtt_sa = ire_uinfo->iulp_rtt; + tcp->tcp_rtt_sd = ire_uinfo->iulp_rtt_sd; + rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd + + tcp_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5); + + if (rto > tcp_rexmit_interval_max) { + tcp->tcp_rto = tcp_rexmit_interval_max; + } else if (rto < tcp_rexmit_interval_min) { + tcp->tcp_rto = tcp_rexmit_interval_min; + } else { + tcp->tcp_rto = rto; + } + } + if (ire_uinfo->iulp_ssthresh != 0) + tcp->tcp_cwnd_ssthresh = ire_uinfo->iulp_ssthresh; + else + tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN; + if (ire_uinfo->iulp_spipe > 0) { + tcp->tcp_xmit_hiwater = MIN(ire_uinfo->iulp_spipe, + tcp_max_buf); + if (tcp_snd_lowat_fraction != 0) + tcp->tcp_xmit_lowater = tcp->tcp_xmit_hiwater / + tcp_snd_lowat_fraction; + (void) tcp_maxpsz_set(tcp, B_TRUE); } - } - if (ire_uinfo->iulp_ssthresh != 0) - tcp->tcp_cwnd_ssthresh = ire_uinfo->iulp_ssthresh; - else - tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN; - if (ire_uinfo->iulp_spipe > 0) { - tcp->tcp_xmit_hiwater = MIN(ire_uinfo->iulp_spipe, - tcp_max_buf); - if (tcp_snd_lowat_fraction != 0) - tcp->tcp_xmit_lowater = tcp->tcp_xmit_hiwater / - tcp_snd_lowat_fraction; - (void) tcp_maxpsz_set(tcp, B_TRUE); - } - /* - * Note that up till now, acceptor always inherits receive - * window from the listener. But if there is a metrics associated - * with a host, we should use that instead of inheriting it from - * listener. Thus we need to pass this info back to the caller. - */ - if (ire_uinfo->iulp_rpipe > 0) { - tcp->tcp_rwnd = MIN(ire_uinfo->iulp_rpipe, tcp_max_buf); - } else { /* - * For passive open, set tcp_rwnd to 0 so that the caller - * knows that there is no rpipe metric for this connection. + * Note that up till now, acceptor always inherits receive + * window from the listener. But if there is a metrics + * associated with a host, we should use that instead of + * inheriting it from listener. Thus we need to pass this + * info back to the caller. */ - if (tcp_detached) - tcp->tcp_rwnd = 0; - } - if (ire_uinfo->iulp_rtomax > 0) { - tcp->tcp_second_timer_threshold = ire_uinfo->iulp_rtomax; - } + if (ire_uinfo->iulp_rpipe > 0) { + tcp->tcp_rwnd = MIN(ire_uinfo->iulp_rpipe, tcp_max_buf); + } + + if (ire_uinfo->iulp_rtomax > 0) { + tcp->tcp_second_timer_threshold = + ire_uinfo->iulp_rtomax; + } - /* - * Use the metric option settings, iulp_tstamp_ok and iulp_wscale_ok, - * only for active open. What this means is that if the other side - * uses timestamp or window scale option, TCP will also use those - * options. That is for passive open. If the application sets a - * large window, window scale is enabled regardless of the value in - * iulp_wscale_ok. This is the behavior since 2.6. So we keep it. - * The only case left in passive open processing is the check for SACK. - * - * For ECN, it should probably be like SACK. But the current - * value is binary, so we treat it like the other cases. The - * metric only controls active open. For passive open, the ndd - * param, tcp_ecn_permitted, controls the behavior. - */ - if (!tcp_detached) { - /* - * The if check means that the following can only be turned - * on by the metrics only IRE, but not off. - */ - if (ire_uinfo->iulp_tstamp_ok) - tcp->tcp_snd_ts_ok = B_TRUE; - if (ire_uinfo->iulp_wscale_ok) - tcp->tcp_snd_ws_ok = B_TRUE; - if (ire_uinfo->iulp_sack == 2) - tcp->tcp_snd_sack_ok = B_TRUE; - if (ire_uinfo->iulp_ecn_ok) - tcp->tcp_ecn_ok = B_TRUE; - } else { /* - * Passive open. - * - * As above, the if check means that SACK can only be - * turned on by the metric only IRE. + * Use the metric option settings, iulp_tstamp_ok and + * iulp_wscale_ok, only for active open. What this means + * is that if the other side uses timestamp or window + * scale option, TCP will also use those options. That + * is for passive open. If the application sets a + * large window, window scale is enabled regardless of + * the value in iulp_wscale_ok. This is the behavior + * since 2.6. So we keep it. + * The only case left in passive open processing is the + * check for SACK. + * For ECN, it should probably be like SACK. But the + * current value is binary, so we treat it like the other + * cases. The metric only controls active open.For passive + * open, the ndd param, tcp_ecn_permitted, controls the + * behavior. */ - if (ire_uinfo->iulp_sack > 0) { - tcp->tcp_snd_sack_ok = B_TRUE; + if (!tcp_detached) { + /* + * The if check means that the following can only + * be turned on by the metrics only IRE, but not off. + */ + if (ire_uinfo->iulp_tstamp_ok) + tcp->tcp_snd_ts_ok = B_TRUE; + if (ire_uinfo->iulp_wscale_ok) + tcp->tcp_snd_ws_ok = B_TRUE; + if (ire_uinfo->iulp_sack == 2) + tcp->tcp_snd_sack_ok = B_TRUE; + if (ire_uinfo->iulp_ecn_ok) + tcp->tcp_ecn_ok = B_TRUE; + } else { + /* + * Passive open. + * + * As above, the if check means that SACK can only be + * turned on by the metric only IRE. + */ + if (ire_uinfo->iulp_sack > 0) { + tcp->tcp_snd_sack_ok = B_TRUE; + } } } + /* * XXX: Note that currently, ire_max_frag can be as small as 68 * because of PMTUd. So tcp_mss may go to negative if combined @@ -5308,11 +5352,24 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) tcp->tcp_second_ctimer_threshold; /* + * tcp_adapt_ire() may change tcp_rwnd according to the ire metrics. + * If it does not, the eager's receive window will be set to the + * listener's receive window later in this function. + */ + eager->tcp_rwnd = 0; + + /* * Zones: tcp_adapt_ire() and tcp_send_data() both need the * zone id before the accept is completed in tcp_wput_accept(). */ econnp->conn_zoneid = connp->conn_zoneid; + /* Copy nexthop information from listener to eager */ + if (connp->conn_nexthop_set) { + econnp->conn_nexthop_set = connp->conn_nexthop_set; + econnp->conn_nexthop_v4 = connp->conn_nexthop_v4; + } + eager->tcp_hard_binding = B_TRUE; tcp_bind_hash_insert(&tcp_bind_fanout[ @@ -7662,6 +7719,7 @@ tcp_init_values(tcp_t *tcp) tcp->tcp_ms_we_have_waited = 0; tcp->tcp_last_recv_time = lbolt; tcp->tcp_cwnd_max = tcp_cwnd_max_; + tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN; tcp->tcp_snd_burst = TCP_CWND_INFINITE; tcp->tcp_maxpsz = tcp_maxpsz_multiplier; @@ -9362,6 +9420,9 @@ tcp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) case IP_TTL: *i1 = (int)tcp->tcp_ipha->ipha_ttl; break; + case IP_NEXTHOP: + /* Handled at IP level */ + return (-EINVAL); default: return (-1); } @@ -9912,6 +9973,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name, } break; case IP_BOUND_IF: + case IP_NEXTHOP: /* Handled at the IP level */ return (-EINVAL); case IP_SEC_OPT: @@ -17725,6 +17787,7 @@ tcp_zcopy_check(tcp_t *tcp) IPCL_IS_CONNECTED(connp) && (connp->conn_flags & IPCL_CHECK_POLICY) == 0 && connp->conn_dontroute == 0 && + !connp->conn_nexthop_set && connp->conn_xmit_if_ill == NULL && connp->conn_nofailover_ill == NULL && do_tcpzcopy == 1) { @@ -17895,6 +17958,7 @@ tcp_send_data(tcp_t *tcp, queue_t *q, mblk_t *mp) !IPCL_IS_CONNECTED(connp) || (connp->conn_flags & IPCL_CHECK_POLICY) != 0 || connp->conn_dontroute || + connp->conn_nexthop_set || connp->conn_xmit_if_ill != NULL || connp->conn_nofailover_ill != NULL || ipha->ipha_ident == IP_HDR_INCLUDED || diff --git a/usr/src/uts/common/inet/tcp/tcp_opt_data.c b/usr/src/uts/common/inet/tcp/tcp_opt_data.c index d0ab3d8130..2dd6db079e 100644 --- a/usr/src/uts/common/inet/tcp/tcp_opt_data.c +++ b/usr/src/uts/common/inet/tcp/tcp_opt_data.c @@ -140,6 +140,9 @@ opdes_t tcp_opt_arr[] = { { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 /* no ifindex */ }, +{ IP_NEXTHOP, IPPROTO_IP, OA_RW, OA_RW, OP_CONFIG, OP_PASSNEXT, + sizeof (in_addr_t), -1 /* not initialized */ }, + { IPV6_BOUND_PIF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 /* no ifindex */ }, diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c index d804018911..c13d7c485f 100644 --- a/usr/src/uts/common/inet/udp/udp.c +++ b/usr/src/uts/common/inet/udp/udp.c @@ -2989,6 +2989,9 @@ udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) case IP_TTL: *i1 = (int)udp->udp_ttl; break; /* goto sizeof (int) option return */ + case IP_NEXTHOP: + /* Handled at IP level */ + return (-EINVAL); case IP_MULTICAST_IF: /* 0 address if not set */ *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; @@ -3418,6 +3421,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case IP_SEC_OPT: + case IP_NEXTHOP: /* * "soft" error (negative) * option not handled at this level @@ -6054,6 +6058,7 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) (ire->ire_flags & RTF_MULTIRT) || ire->ire_stq == NULL || ire->ire_max_frag < ntohs(ipha->ipha_length) || (ire_fp_mp = ire->ire_fp_mp) == NULL || + (connp->conn_nexthop_set) || (ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp)) { if (ipif != NULL) ipif_refrele(ipif); diff --git a/usr/src/uts/common/inet/udp/udp_opt_data.c b/usr/src/uts/common/inet/udp/udp_opt_data.c index 33429cc59a..328f2cb44b 100644 --- a/usr/src/uts/common/inet/udp/udp_opt_data.c +++ b/usr/src/uts/common/inet/udp/udp_opt_data.c @@ -137,6 +137,9 @@ opdes_t udp_opt_arr[] = { { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, OP_PASSNEXT, sizeof (int), 0 }, +{ IP_NEXTHOP, IPPROTO_IP, OA_RW, OA_RW, OP_CONFIG, OP_PASSNEXT, + sizeof (in_addr_t), -1 /* not initialized */ }, + { MCAST_JOIN_GROUP, IPPROTO_IP, OA_X, OA_X, OP_NP, (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct group_req), -1 /* not initialized */ }, diff --git a/usr/src/uts/common/netinet/in.h b/usr/src/uts/common/netinet/in.h index 6e7f4066bb..7bed8be4b9 100644 --- a/usr/src/uts/common/netinet/in.h +++ b/usr/src/uts/common/netinet/in.h @@ -807,7 +807,8 @@ struct sockaddr_in6 { #define IP_BLOCK_SOURCE 0x15 /* block mcast pkts from source */ #define IP_UNBLOCK_SOURCE 0x16 /* unblock mcast pkts from source */ #define IP_ADD_SOURCE_MEMBERSHIP 0x17 /* add mcast group/source pair */ -#define IP_DROP_SOURCE_MEMBERSHIP 0x18 /* drop mcast gruop/source pair */ +#define IP_DROP_SOURCE_MEMBERSHIP 0x18 /* drop mcast group/source pair */ +#define IP_NEXTHOP 0x19 /* send directly to next hop */ #if !defined(_XPG4_2) || defined(__EXTENSIONS__) /* |