diff options
Diffstat (limited to 'usr/src/uts/common/inet/ip/ip_rts.c')
-rw-r--r-- | usr/src/uts/common/inet/ip/ip_rts.c | 923 |
1 files changed, 520 insertions, 403 deletions
diff --git a/usr/src/uts/common/inet/ip/ip_rts.c b/usr/src/uts/common/inet/ip/ip_rts.c index 70c8bd2ea1..228c7581a3 100644 --- a/usr/src/uts/common/inet/ip/ip_rts.c +++ b/usr/src/uts/common/inet/ip/ip_rts.c @@ -81,24 +81,33 @@ static size_t rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp); static void rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, - ipaddr_t author, const ipif_t *ipif, mblk_t *mp, uint_t, const tsol_gc_t *); + ipaddr_t author, ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp, + const tsol_gc_t *); static int rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp, sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error); static void rts_getifdata(if_data_t *if_data, const ipif_t *ipif); static int rts_getmetrics(ire_t *ire, rt_metrics_t *metrics); -static mblk_t *rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, - sa_family_t af); +static mblk_t *rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, + const in6_addr_t *setsrc, tsol_ire_gw_secattr_t *attrp, sa_family_t af); static void rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics); -static void ip_rts_request_retry(ipsq_t *, queue_t *q, mblk_t *mp, void *); +static ire_t *ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, + ipaddr_t gw_addr, const ill_t *ill, zoneid_t zoneid, + const ts_label_t *tsl, int match_flags, ip_stack_t *ipst, ire_t **pifire, + ipaddr_t *v4setsrcp, tsol_ire_gw_secattr_t **gwattrp); +static ire_t *ire_lookup_v6(const in6_addr_t *dst_addr_v6, + const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6, + const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags, + ip_stack_t *ipst, ire_t **pifire, + in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp); /* * Send `mp' to all eligible routing queues. A queue is ineligible if: * * 1. SO_USELOOPBACK is off and it is not the originating queue. - * 2. RTAW_UNDER_IPMP is on and RTSQ_UNDER_IPMP is clear in `flags'. - * 3. RTAW_UNDER_IPMP is off and RTSQ_NORMAL is clear in `flags'. + * 2. RTA_UNDER_IPMP is on and RTSQ_UNDER_IPMP is not set in `flags'. + * 3. RTA_UNDER_IPMP is off and RTSQ_NORMAL is not set in `flags'. * 4. It is not the same address family as `af', and `af' isn't AF_UNSPEC. */ void @@ -110,7 +119,7 @@ rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags, /* * Since we don't have an ill_t here, RTSQ_DEFAULT must already be - * resolved to one or more of RTSQ_NORMAL|RTSQ_UNDER_IPMP by now. + * resolved to one or more of RTSQ_NORMAL|RTSQ_UNDER_IPMP at this point. */ ASSERT(!(flags & RTSQ_DEFAULT)); @@ -119,7 +128,6 @@ rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags, for (; connp != NULL; connp = next_connp) { next_connp = connp->conn_next; - /* * If there was a family specified when this routing socket was * created and it doesn't match the family of the message to @@ -139,28 +147,27 @@ rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags, if (!(flags & RTSQ_NORMAL)) continue; } - /* * For the originating queue, we only copy the message upstream * if loopback is set. For others reading on the routing * socket, we check if there is room upstream for a copy of the * message. */ - if ((o_connp == connp) && connp->conn_loopback == 0) { + if ((o_connp == connp) && connp->conn_useloopback == 0) { connp = connp->conn_next; continue; } CONN_INC_REF(connp); mutex_exit(&ipst->ips_rts_clients->connf_lock); /* Pass to rts_input */ - if ((IPCL_IS_NONSTR(connp) && !PROTO_FLOW_CNTRLD(connp))|| - (!IPCL_IS_NONSTR(connp) && - canputnext(CONNP_TO_RQ(connp)))) { + if (IPCL_IS_NONSTR(connp) ? !connp->conn_flow_cntrld : + canputnext(connp->conn_rq)) { mp1 = dupmsg(mp); if (mp1 == NULL) mp1 = copymsg(mp); + /* Note that we pass a NULL ira to rts_input */ if (mp1 != NULL) - (connp->conn_recv)(connp, mp1, NULL); + (connp->conn_recv)(connp, mp1, NULL, NULL); } mutex_enter(&ipst->ips_rts_clients->connf_lock); @@ -176,7 +183,7 @@ rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags, * Takes an ire and sends an ack to all the routing sockets. This * routine is used * - when a route is created/deleted through the ioctl interface. - * - when ire_expire deletes a stale redirect + * - when a stale redirect is deleted */ void ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst) @@ -192,6 +199,8 @@ ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst) ASSERT(ire->ire_ipversion == IPV4_VERSION || ire->ire_ipversion == IPV6_VERSION); + ASSERT(!(ire->ire_type & IRE_IF_CLONE)); + if (ire->ire_flags & RTF_SETSRC) rtm_addrs |= RTA_SRC; @@ -202,8 +211,8 @@ ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst) if (mp == NULL) return; rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask, - ire->ire_gateway_addr, ire->ire_src_addr, 0, 0, NULL, mp, - 0, NULL); + ire->ire_gateway_addr, ire->ire_setsrc_addr, 0, 0, 0, NULL, + mp, NULL); break; case IPV6_VERSION: af = AF_INET6; @@ -215,8 +224,8 @@ ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst) mutex_exit(&ire->ire_lock); rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6, &ire->ire_mask_v6, &gw_addr_v6, - &ire->ire_src_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros, - NULL, mp, 0, NULL); + &ire->ire_setsrc_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros, + &ipv6_all_zeros, NULL, mp, NULL); break; } rtm = (rt_msghdr_t *)mp->b_rptr; @@ -230,13 +239,6 @@ ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst) rts_queue_input(mp, NULL, af, RTSQ_ALL, ipst); } -/* ARGSUSED */ -static void -ip_rts_request_retry(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, void *dummy) -{ - (void) ip_rts_request(q, mp, msg_getcred(mp, NULL)); -} - /* * This is a call from the RTS module * indicating that this is a Routing Socket @@ -248,7 +250,7 @@ ip_rts_register(conn_t *connp) { ip_stack_t *ipst = connp->conn_netstack->netstack_ip; - connp->conn_loopback = 1; + connp->conn_useloopback = 1; ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); } @@ -269,18 +271,9 @@ ip_rts_unregister(conn_t *connp) * * In general, this function does not consume the message supplied but rather * sends the message upstream with an appropriate UNIX errno. - * - * We may need to restart this operation if the ipif cannot be looked up - * due to an exclusive operation that is currently in progress. The restart - * entry point is ip_rts_request_retry. While the request is enqueud in the - * ipsq the ioctl could be aborted and the conn close. To ensure that we don't - * have stale conn pointers, ip_wput_ioctl does a conn refhold. This is - * released at the completion of the rts ioctl at the end of this function - * by calling CONN_OPER_PENDING_DONE or when the ioctl is aborted and - * conn close occurs in conn_ioctl_cleanup. */ int -ip_rts_request_common(queue_t *q, mblk_t *mp, conn_t *connp, cred_t *ioc_cr) +ip_rts_request_common(mblk_t *mp, conn_t *connp, cred_t *ioc_cr) { rt_msghdr_t *rtm = NULL; in6_addr_t dst_addr_v6; @@ -289,9 +282,12 @@ ip_rts_request_common(queue_t *q, mblk_t *mp, conn_t *connp, cred_t *ioc_cr) in6_addr_t net_mask_v6; in6_addr_t author_v6; in6_addr_t if_addr_v6; - mblk_t *mp1, *ioc_mp = mp; + mblk_t *mp1; ire_t *ire = NULL; - ire_t *sire = NULL; + ire_t *ifire = NULL; + ipaddr_t v4setsrc; + in6_addr_t v6setsrc = ipv6_all_zeros; + tsol_ire_gw_secattr_t *gwattr = NULL; int error = 0; int match_flags = MATCH_IRE_DSTONLY; int match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW; @@ -302,9 +298,6 @@ ip_rts_request_common(queue_t *q, mblk_t *mp, conn_t *connp, cred_t *ioc_cr) ipaddr_t src_addr; ipaddr_t net_mask; ushort_t index; - ipif_t *ipif = NULL; - ipif_t *tmp_ipif = NULL; - IOCP iocp = (IOCP)mp->b_rptr; boolean_t gcgrp_xtraref = B_FALSE; tsol_gcgrp_addr_t ga; tsol_rtsecattr_t rtsecattr; @@ -314,42 +307,11 @@ ip_rts_request_common(queue_t *q, mblk_t *mp, conn_t *connp, cred_t *ioc_cr) ts_label_t *tsl = NULL; zoneid_t zoneid; ip_stack_t *ipst; - - ip1dbg(("ip_rts_request: mp is %x\n", DB_TYPE(mp))); + ill_t *ill = NULL; zoneid = connp->conn_zoneid; ipst = connp->conn_netstack->netstack_ip; - ASSERT(mp->b_cont != NULL); - /* ioc_mp holds mp */ - mp = mp->b_cont; - - /* - * The Routing Socket data starts on - * next block. If there is no next block - * this is an indication from routing module - * that it is a routing socket stream queue. - * We need to support that for compatibility with SDP since - * it has a contract private interface to use IP_IOC_RTS_REQUEST. - */ - if (mp->b_cont == NULL) { - /* - * This is a message from SDP - * indicating that this is a Routing Socket - * Stream. Insert this conn_t in routing - * socket client list. - */ - connp->conn_loopback = 1; - ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); - goto done; - } - mp1 = dupmsg(mp->b_cont); - if (mp1 == NULL) { - error = ENOBUFS; - goto done; - } - mp = mp1; - if (mp->b_cont != NULL && !pullupmsg(mp, -1)) { freemsg(mp); error = EINVAL; @@ -446,20 +408,13 @@ ip_rts_request_common(queue_t *q, mblk_t *mp, conn_t *connp, cred_t *ioc_cr) */ ASSERT(af == AF_INET || af == AF_INET6); + /* Handle RTA_IFP */ if (index != 0) { - ill_t *ill; + ipif_t *ipif; lookup: - /* - * IPC must be refheld somewhere in ip_wput_nondata or - * ip_wput_ioctl etc... and cleaned up if ioctl is killed. - * If ILL_CHANGING the request is queued in the ipsq. - */ - ill = ill_lookup_on_ifindex(index, af == AF_INET6, - CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error, - ipst); + ill = ill_lookup_on_ifindex(index, af == AF_INET6, ipst); if (ill == NULL) { - if (error != EINPROGRESS) - error = EINVAL; + error = EINVAL; goto done; } @@ -474,13 +429,13 @@ lookup: switch (rtm->rtm_type) { case RTM_CHANGE: case RTM_DELETE: - ill_refrele(ill); error = EINVAL; goto done; case RTM_ADD: index = ipmp_ill_get_ipmp_ifindex(ill); ill_refrele(ill); if (index == 0) { + ill = NULL; /* already refrele'd */ error = EINVAL; goto done; } @@ -488,9 +443,18 @@ lookup: } } - ipif = ipif_get_next_ipif(NULL, ill); - ill_refrele(ill); match_flags |= MATCH_IRE_ILL; + /* + * This provides the same zoneid as in Solaris 10 + * that -ifp picks the zoneid from the first ipif on the ill. + * But it might not be useful since the first ipif will always + * have the same zoneid as the ill. + */ + ipif = ipif_get_next_ipif(NULL, ill); + if (ipif != NULL) { + zoneid = ipif->ipif_zoneid; + ipif_refrele(ipif); + } } /* @@ -545,6 +509,8 @@ lookup: switch (af) { case AF_INET: if (src_addr != INADDR_ANY) { + uint_t type; + /* * The RTF_SETSRC flag is present, check that * the supplied src address is not the loopback @@ -556,20 +522,11 @@ lookup: } /* * Also check that the supplied address is a - * valid, local one. + * valid, local one. Only allow IFF_UP ones */ - tmp_ipif = ipif_lookup_addr(src_addr, NULL, - ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp, - ip_rts_request_retry, &error, ipst); - if (tmp_ipif == NULL) { - if (error != EINPROGRESS) - error = EADDRNOTAVAIL; - goto done; - } - if (!(tmp_ipif->ipif_flags & IPIF_UP) || - (tmp_ipif->ipif_flags & - (IPIF_NOLOCAL | IPIF_ANYCAST))) { - error = EINVAL; + type = ip_type_v4(src_addr, ipst); + if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) { + error = EADDRNOTAVAIL; goto done; } } else { @@ -584,14 +541,15 @@ lookup: } error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr, - rtm->rtm_flags, ipif, &ire, B_FALSE, - WR(q), ioc_mp, ip_rts_request_retry, - rtsap, ipst); - if (ipif != NULL) - ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); + rtm->rtm_flags, ill, &ire, B_FALSE, + rtsap, ipst, zoneid); + if (ill != NULL) + ASSERT(!MUTEX_HELD(&ill->ill_lock)); break; case AF_INET6: if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) { + uint_t type; + /* * The RTF_SETSRC flag is present, check that * the supplied src address is not the loopback @@ -603,28 +561,17 @@ lookup: } /* * Also check that the supplied address is a - * valid, local one. + * valid, local one. Only allow UP ones. */ - tmp_ipif = ipif_lookup_addr_v6(&src_addr_v6, - NULL, ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp, - ip_rts_request_retry, &error, ipst); - if (tmp_ipif == NULL) { - if (error != EINPROGRESS) - error = EADDRNOTAVAIL; - goto done; - } - - if (!(tmp_ipif->ipif_flags & IPIF_UP) || - (tmp_ipif->ipif_flags & - (IPIF_NOLOCAL | IPIF_ANYCAST))) { - error = EINVAL; + type = ip_type_v6(&src_addr_v6, ipst); + if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) { + error = EADDRNOTAVAIL; goto done; } error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, &gw_addr_v6, &src_addr_v6, rtm->rtm_flags, - ipif, &ire, WR(q), ioc_mp, - ip_rts_request_retry, rtsap, ipst); + ill, &ire, rtsap, ipst, zoneid); break; } /* @@ -637,10 +584,9 @@ lookup: } error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6, &gw_addr_v6, NULL, rtm->rtm_flags, - ipif, &ire, WR(q), ioc_mp, - ip_rts_request_retry, rtsap, ipst); - if (ipif != NULL) - ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); + ill, &ire, rtsap, ipst, zoneid); + if (ill != NULL) + ASSERT(!MUTEX_HELD(&ill->ill_lock)); break; } if (error != 0) @@ -666,13 +612,13 @@ lookup: switch (af) { case AF_INET: error = ip_rt_delete(dst_addr, net_mask, gw_addr, - found_addrs, rtm->rtm_flags, ipif, B_FALSE, - WR(q), ioc_mp, ip_rts_request_retry, ipst); + found_addrs, rtm->rtm_flags, ill, B_FALSE, + ipst, zoneid); break; case AF_INET6: error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6, - &gw_addr_v6, found_addrs, rtm->rtm_flags, ipif, - WR(q), ioc_mp, ip_rts_request_retry, ipst); + &gw_addr_v6, found_addrs, rtm->rtm_flags, ill, + ipst, zoneid); break; } break; @@ -680,8 +626,7 @@ lookup: case RTM_CHANGE: /* * In the case of RTM_GET, the forwarding table should be - * searched recursively with default being matched if the - * specific route doesn't exist. Also, if a gateway was + * searched recursively. Also, if a gateway was * specified then the gateway address must also be matched. * * In the case of RTM_CHANGE, the gateway address (if supplied) @@ -706,9 +651,7 @@ lookup: } if (rtm->rtm_type == RTM_GET) { - match_flags |= - (MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE | - MATCH_IRE_SECATTR); + match_flags |= MATCH_IRE_SECATTR; match_flags_local |= MATCH_IRE_SECATTR; if ((found_addrs & RTA_GATEWAY) != 0) match_flags |= MATCH_IRE_GW; @@ -749,57 +692,34 @@ lookup: * IRE_LOCAL entry. * * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL - * entry, then look in the forwarding table. + * entry, then look for any other type of IRE. */ switch (af) { case AF_INET: if (net_mask == IP_HOST_MASK) { - ire = ire_ctable_lookup(dst_addr, gw_addr, + ire = ire_ftable_lookup_v4(dst_addr, 0, gw_addr, IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid, - tsl, match_flags_local, ipst); - /* - * If we found an IRE_LOCAL, make sure - * it is one that would be used by this - * zone to send packets. - */ - if (ire != NULL && - ire->ire_type == IRE_LOCAL && - ipst->ips_ip_restrict_interzone_loopback && - !ire_local_ok_across_zones(ire, - zoneid, &dst_addr, tsl, ipst)) { - ire_refrele(ire); - ire = NULL; - } + tsl, match_flags_local, 0, ipst, NULL); } if (ire == NULL) { - ire = ire_ftable_lookup(dst_addr, net_mask, - gw_addr, 0, ipif, &sire, zoneid, 0, - tsl, match_flags, ipst); + ire = ire_lookup_v4(dst_addr, net_mask, + gw_addr, ill, zoneid, tsl, match_flags, + ipst, &ifire, &v4setsrc, &gwattr); + IN6_IPADDR_TO_V4MAPPED(v4setsrc, &v6setsrc); } break; case AF_INET6: if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) { - ire = ire_ctable_lookup_v6(&dst_addr_v6, + ire = ire_ftable_lookup_v6(&dst_addr_v6, NULL, &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL, - zoneid, tsl, match_flags_local, ipst); - /* - * If we found an IRE_LOCAL, make sure - * it is one that would be used by this - * zone to send packets. - */ - if (ire != NULL && - ire->ire_type == IRE_LOCAL && - ipst->ips_ip_restrict_interzone_loopback && - !ire_local_ok_across_zones(ire, - zoneid, (void *)&dst_addr_v6, tsl, ipst)) { - ire_refrele(ire); - ire = NULL; - } + zoneid, tsl, match_flags_local, 0, ipst, + NULL); } if (ire == NULL) { - ire = ire_ftable_lookup_v6(&dst_addr_v6, - &net_mask_v6, &gw_addr_v6, 0, ipif, &sire, - zoneid, 0, tsl, match_flags, ipst); + ire = ire_lookup_v6(&dst_addr_v6, + &net_mask_v6, &gw_addr_v6, ill, zoneid, + tsl, match_flags, ipst, &ifire, &v6setsrc, + &gwattr); } break; } @@ -810,10 +730,21 @@ lookup: error = ESRCH; goto done; } + /* + * Want to return failure if we get an IRE_NOROUTE from + * ire_route_recursive + */ + if (ire->ire_type & IRE_NOROUTE) { + ire_refrele(ire); + ire = NULL; + error = ESRCH; + goto done; + } + /* we know the IRE before we come here */ switch (rtm->rtm_type) { case RTM_GET: - mp1 = rts_rtmget(mp, ire, sire, af); + mp1 = rts_rtmget(mp, ire, ifire, &v6setsrc, gwattr, af); if (mp1 == NULL) { error = ENOBUFS; goto done; @@ -843,7 +774,6 @@ lookup: */ switch (af) { case AF_INET: - ire_flush_cache_v4(ire, IRE_FLUSH_DELETE); if ((found_addrs & RTA_GATEWAY) != 0 && (ire->ire_gateway_addr != gw_addr)) { ire->ire_gateway_addr = gw_addr; @@ -863,9 +793,10 @@ lookup: if ((found_addrs & RTA_SRC) != 0 && (rtm->rtm_flags & RTF_SETSRC) != 0 && - (ire->ire_src_addr != src_addr)) { - + (ire->ire_setsrc_addr != src_addr)) { if (src_addr != INADDR_ANY) { + uint_t type; + /* * The RTF_SETSRC flag is * present, check that the @@ -880,50 +811,47 @@ lookup: goto done; } /* - * Also check that the the + * Also check that the * supplied addr is a valid * local address. */ - tmp_ipif = ipif_lookup_addr( - src_addr, NULL, ALL_ZONES, - WR(q), ioc_mp, - ip_rts_request_retry, - &error, ipst); - if (tmp_ipif == NULL) { - error = (error == - EINPROGRESS) ? - error : - EADDRNOTAVAIL; - goto done; - } - - if (!(tmp_ipif->ipif_flags & - IPIF_UP) || - (tmp_ipif->ipif_flags & - (IPIF_NOLOCAL | - IPIF_ANYCAST))) { - error = EINVAL; + type = ip_type_v4(src_addr, + ipst); + if (!(type & + (IRE_LOCAL|IRE_LOOPBACK))) { + error = EADDRNOTAVAIL; goto done; } ire->ire_flags |= RTF_SETSRC; + ire->ire_setsrc_addr = + src_addr; } else { ire->ire_flags &= ~RTF_SETSRC; + ire->ire_setsrc_addr = + INADDR_ANY; } - ire->ire_src_addr = src_addr; + /* + * Let conn_ixa caching know that + * source address selection changed + */ + ip_update_source_selection(ipst); } + ire_flush_cache_v4(ire, IRE_FLUSH_GWCHANGE); break; case AF_INET6: - ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); mutex_enter(&ire->ire_lock); if ((found_addrs & RTA_GATEWAY) != 0 && !IN6_ARE_ADDR_EQUAL( &ire->ire_gateway_addr_v6, &gw_addr_v6)) { ire->ire_gateway_addr_v6 = gw_addr_v6; } + mutex_exit(&ire->ire_lock); if (rtsap != NULL) { ga.ga_af = AF_INET6; + mutex_enter(&ire->ire_lock); ga.ga_addr = ire->ire_gateway_addr_v6; + mutex_exit(&ire->ire_lock); gcgrp = gcgrp_lookup(&ga, B_TRUE); if (gcgrp == NULL) { @@ -935,10 +863,11 @@ lookup: if ((found_addrs & RTA_SRC) != 0 && (rtm->rtm_flags & RTF_SETSRC) != 0 && !IN6_ARE_ADDR_EQUAL( - &ire->ire_src_addr_v6, &src_addr_v6)) { - + &ire->ire_setsrc_addr_v6, &src_addr_v6)) { if (!IN6_IS_ADDR_UNSPECIFIED( &src_addr_v6)) { + uint_t type; + /* * The RTF_SETSRC flag is * present, check that the @@ -949,54 +878,44 @@ lookup: */ if (IN6_IS_ADDR_LOOPBACK( &src_addr_v6)) { - mutex_exit( - &ire->ire_lock); error = EINVAL; goto done; } /* - * Also check that the the + * Also check that the * supplied addr is a valid * local address. */ - tmp_ipif = ipif_lookup_addr_v6( - &src_addr_v6, NULL, - ALL_ZONES, - CONNP_TO_WQ(connp), ioc_mp, - ip_rts_request_retry, - &error, ipst); - if (tmp_ipif == NULL) { - mutex_exit( - &ire->ire_lock); - error = (error == - EINPROGRESS) ? - error : - EADDRNOTAVAIL; - goto done; - } - if (!(tmp_ipif->ipif_flags & - IPIF_UP) || - (tmp_ipif->ipif_flags & - (IPIF_NOLOCAL | - IPIF_ANYCAST))) { - mutex_exit( - &ire->ire_lock); - error = EINVAL; + type = ip_type_v6(&src_addr_v6, + ipst); + if (!(type & + (IRE_LOCAL|IRE_LOOPBACK))) { + error = EADDRNOTAVAIL; goto done; } + mutex_enter(&ire->ire_lock); ire->ire_flags |= RTF_SETSRC; + ire->ire_setsrc_addr_v6 = + src_addr_v6; + mutex_exit(&ire->ire_lock); } else { + mutex_enter(&ire->ire_lock); ire->ire_flags &= ~RTF_SETSRC; + ire->ire_setsrc_addr_v6 = + ipv6_all_zeros; + mutex_exit(&ire->ire_lock); } - ire->ire_src_addr_v6 = src_addr_v6; + /* + * Let conn_ixa caching know that + * source address selection changed + */ + ip_update_source_selection(ipst); } - mutex_exit(&ire->ire_lock); + ire_flush_cache_v6(ire, IRE_FLUSH_GWCHANGE); break; } if (rtsap != NULL) { - in_addr_t ga_addr4; - ASSERT(gcgrp != NULL); /* @@ -1010,7 +929,7 @@ lookup: gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref); if (gc == NULL || (error = tsol_ire_init_gwattr(ire, - ire->ire_ipversion, gc, NULL)) != 0) { + ire->ire_ipversion, gc)) != 0) { if (gc != NULL) { GC_REFRELE(gc); } else { @@ -1019,21 +938,6 @@ lookup: } goto done; } - - /* - * Now delete any existing gateway IRE caches - * as well as all caches using the gateway, - * and allow them to be created on demand - * through ip_newroute{_v6}. - */ - IN6_V4MAPPED_TO_IPADDR(&ga.ga_addr, ga_addr4); - if (af == AF_INET) { - ire_clookup_delete_cache_gw( - ga_addr4, ALL_ZONES, ipst); - } else { - ire_clookup_delete_cache_gw_v6( - &ga.ga_addr, ALL_ZONES, ipst); - } } rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx); break; @@ -1046,21 +950,14 @@ lookup: done: if (ire != NULL) ire_refrele(ire); - if (sire != NULL) - ire_refrele(sire); - if (ipif != NULL) - ipif_refrele(ipif); - if (tmp_ipif != NULL) - ipif_refrele(tmp_ipif); + if (ifire != NULL) + ire_refrele(ifire); + if (ill != NULL) + ill_refrele(ill); if (gcgrp_xtraref) GCGRP_REFRELE(gcgrp); - if (error == EINPROGRESS) { - if (rtm != NULL) - freemsg(mp); - return (error); - } if (rtm != NULL) { ASSERT(mp->b_wptr <= mp->b_datap->db_lim); if (error != 0) { @@ -1074,12 +971,190 @@ done: } rts_queue_input(mp, connp, af, RTSQ_ALL, ipst); } + return (error); +} + +/* + * Helper function that can do recursive lookups including when + * MATCH_IRE_GW and/or MATCH_IRE_MASK is set. + */ +static ire_t * +ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, ipaddr_t gw_addr, + const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, + int match_flags, ip_stack_t *ipst, ire_t **pifire, ipaddr_t *v4setsrcp, + tsol_ire_gw_secattr_t **gwattrp) +{ + ire_t *ire; + ire_t *ifire = NULL; + uint_t ire_type; + + *pifire = NULL; + *v4setsrcp = INADDR_ANY; + *gwattrp = NULL; + + /* Skip IRE_IF_CLONE */ + match_flags |= MATCH_IRE_TYPE; + ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE; + + /* + * ire_route_recursive can't match gateway or mask thus if they are + * set we have to do two steps of lookups + */ + if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) { + ire = ire_ftable_lookup_v4(dst_addr, net_mask, gw_addr, + ire_type, ill, zoneid, tsl, match_flags, 0, ipst, NULL); + + if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) + return (ire); + + if (ire->ire_type & IRE_ONLINK) + return (ire); + + if (ire->ire_flags & RTF_SETSRC) { + ASSERT(ire->ire_setsrc_addr != INADDR_ANY); + *v4setsrcp = ire->ire_setsrc_addr; + v4setsrcp = NULL; + } + + /* The first ire_gw_secattr is passed back */ + if (ire->ire_gw_secattr != NULL) { + *gwattrp = ire->ire_gw_secattr; + gwattrp = NULL; + } + + /* Look for an interface ire recursively based on the gateway */ + dst_addr = ire->ire_gateway_addr; + match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK); + ifire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid, + tsl, match_flags, B_FALSE, 0, ipst, v4setsrcp, gwattrp, + NULL); + } else { + ire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid, + tsl, match_flags, B_FALSE, 0, ipst, v4setsrcp, gwattrp, + NULL); + } + *pifire = ifire; + return (ire); +} + +static ire_t * +ire_lookup_v6(const in6_addr_t *dst_addr_v6, + const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6, + const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags, + ip_stack_t *ipst, ire_t **pifire, + in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp) +{ + ire_t *ire; + ire_t *ifire = NULL; + uint_t ire_type; + + *pifire = NULL; + *v6setsrcp = ipv6_all_zeros; + *gwattrp = NULL; + + /* Skip IRE_IF_CLONE */ + match_flags |= MATCH_IRE_TYPE; + ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE; + + /* + * ire_route_recursive can't match gateway or mask thus if they are + * set we have to do two steps of lookups + */ + if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) { + in6_addr_t dst; + + ire = ire_ftable_lookup_v6(dst_addr_v6, net_mask_v6, + gw_addr_v6, ire_type, ill, zoneid, tsl, match_flags, 0, + ipst, NULL); + + if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) + return (ire); + + if (ire->ire_type & IRE_ONLINK) + return (ire); + + if (ire->ire_flags & RTF_SETSRC) { + ASSERT(!IN6_IS_ADDR_UNSPECIFIED( + &ire->ire_setsrc_addr_v6)); + *v6setsrcp = ire->ire_setsrc_addr_v6; + v6setsrcp = NULL; + } + + /* The first ire_gw_secattr is passed back */ + if (ire->ire_gw_secattr != NULL) { + *gwattrp = ire->ire_gw_secattr; + gwattrp = NULL; + } + + mutex_enter(&ire->ire_lock); + dst = ire->ire_gateway_addr_v6; + mutex_exit(&ire->ire_lock); + match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK); + ifire = ire_route_recursive_v6(&dst, ire_type, ill, zoneid, tsl, + match_flags, B_FALSE, 0, ipst, v6setsrcp, gwattrp, NULL); + } else { + ire = ire_route_recursive_v6(dst_addr_v6, ire_type, ill, zoneid, + tsl, match_flags, B_FALSE, 0, ipst, v6setsrcp, gwattrp, + NULL); + } + *pifire = ifire; + return (ire); +} + + +/* + * Handle IP_IOC_RTS_REQUEST ioctls + */ +int +ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) +{ + conn_t *connp = Q_TO_CONN(q); + IOCP iocp = (IOCP)mp->b_rptr; + mblk_t *mp1, *ioc_mp = mp; + int error = 0; + ip_stack_t *ipst; + ipst = connp->conn_netstack->netstack_ip; + + ASSERT(mp->b_cont != NULL); + /* ioc_mp holds mp */ + mp = mp->b_cont; + + /* + * The Routing Socket data starts on + * next block. If there is no next block + * this is an indication from routing module + * that it is a routing socket stream queue. + * We need to support that for compatibility with SDP since + * it has a contract private interface to use IP_IOC_RTS_REQUEST. + * Note: SDP no longer uses IP_IOC_RTS_REQUEST - we can remove this. + */ + if (mp->b_cont == NULL) { + /* + * This is a message from SDP + * indicating that this is a Routing Socket + * Stream. Insert this conn_t in routing + * socket client list. + */ + connp->conn_useloopback = 1; + ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); + goto done; + } + mp1 = dupmsg(mp->b_cont); + if (mp1 == NULL) { + error = ENOBUFS; + goto done; + } + mp = mp1; + + error = ip_rts_request_common(mp, connp, ioc_cr); +done: iocp->ioc_error = error; ioc_mp->b_datap->db_type = M_IOCACK; if (iocp->ioc_error != 0) iocp->ioc_count = 0; - (connp->conn_recv)(connp, ioc_mp, NULL); + /* Note that we pass a NULL ira to rts_input */ + (connp->conn_recv)(connp, ioc_mp, NULL, NULL); /* conn was refheld in ip_wput_ioctl. */ CONN_OPER_PENDING_DONE(connp); @@ -1087,12 +1162,6 @@ done: return (error); } -int -ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) -{ - return (ip_rts_request_common(q, mp, Q_TO_CONN(q), ioc_cr)); -} - /* * Build a reply to the RTM_GET request contained in the given message block * using the retrieved IRE of the destination address, the parent IRE (if it @@ -1102,26 +1171,34 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) * otherwise NULL is returned. */ static mblk_t * -rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af) +rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, const in6_addr_t *setsrc, + tsol_ire_gw_secattr_t *attrp, sa_family_t af) { rt_msghdr_t *rtm; rt_msghdr_t *new_rtm; mblk_t *new_mp; int rtm_addrs; int rtm_flags; - in6_addr_t gw_addr_v6; - tsol_ire_gw_secattr_t *attrp = NULL; tsol_gc_t *gc = NULL; tsol_gcgrp_t *gcgrp = NULL; - int sacnt = 0; + ill_t *ill; + ipif_t *ipif = NULL; + ipaddr_t brdaddr; /* IFF_POINTOPOINT destination */ + ipaddr_t ifaddr; + in6_addr_t brdaddr6; /* IFF_POINTOPOINT destination */ + in6_addr_t ifaddr6; + ipaddr_t v4setsrc; - ASSERT(ire->ire_ipif != NULL); rtm = (rt_msghdr_t *)mp->b_rptr; - if (sire != NULL && sire->ire_gw_secattr != NULL) - attrp = sire->ire_gw_secattr; - else if (ire->ire_gw_secattr != NULL) - attrp = ire->ire_gw_secattr; + /* + * Find the ill used to send packets. This will be NULL in case + * of a reject or blackhole. + */ + if (ifire != NULL) + ill = ire_nexthop_ill(ifire); + else + ill = ire_nexthop_ill(ire); if (attrp != NULL) { mutex_enter(&attrp->igsa_lock); @@ -1129,29 +1206,9 @@ rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af) gcgrp = gc->gc_grp; ASSERT(gcgrp != NULL); rw_enter(&gcgrp->gcgrp_rwlock, RW_READER); - sacnt = 1; - } else if ((gcgrp = attrp->igsa_gcgrp) != NULL) { - rw_enter(&gcgrp->gcgrp_rwlock, RW_READER); - gc = gcgrp->gcgrp_head; - sacnt = gcgrp->gcgrp_count; } mutex_exit(&attrp->igsa_lock); - - /* do nothing if there's no gc to report */ - if (gc == NULL) { - ASSERT(sacnt == 0); - if (gcgrp != NULL) { - /* we might as well drop the lock now */ - rw_exit(&gcgrp->gcgrp_rwlock); - gcgrp = NULL; - } - attrp = NULL; - } - - ASSERT(gc == NULL || (gcgrp != NULL && - RW_LOCK_HELD(&gcgrp->gcgrp_rwlock))); } - ASSERT(sacnt == 0 || gc != NULL); /* * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK. @@ -1162,16 +1219,36 @@ rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af) * point-to-point. */ rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK); - if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { + if ((rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) && ill != NULL) { rtm_addrs |= (RTA_IFP | RTA_IFA); - if (ire->ire_ipif->ipif_flags & IPIF_POINTOPOINT) - rtm_addrs |= RTA_BRD; + /* + * We associate an IRE with an ILL, hence we don't exactly + * know what might make sense for RTA_IFA and RTA_BRD. We + * pick the first ipif on the ill. + */ + ipif = ipif_get_next_ipif(NULL, ill); + if (ipif != NULL) { + if (ipif->ipif_isv6) + ifaddr6 = ipif->ipif_v6lcl_addr; + else + ifaddr = ipif->ipif_lcl_addr; + if (ipif->ipif_flags & IPIF_POINTOPOINT) { + rtm_addrs |= RTA_BRD; + if (ipif->ipif_isv6) + brdaddr6 = ipif->ipif_v6pp_dst_addr; + else + brdaddr = ipif->ipif_pp_dst_addr; + } + ipif_refrele(ipif); + } } - new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, sacnt); + new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, gc != NULL ? 1 : 0); if (new_mp == NULL) { if (gcgrp != NULL) rw_exit(&gcgrp->gcgrp_rwlock); + if (ill != NULL) + ill_refrele(ill); return (NULL); } @@ -1187,49 +1264,24 @@ rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af) ASSERT(af == AF_INET || af == AF_INET6); switch (af) { case AF_INET: - if (sire == NULL) { - rtm_flags = ire->ire_flags; - rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr, - ire->ire_mask, ire->ire_src_addr, ire->ire_src_addr, - ire->ire_ipif->ipif_pp_dst_addr, 0, ire->ire_ipif, - new_mp, sacnt, gc); - } else { - if (sire->ire_flags & RTF_SETSRC) - rtm_addrs |= RTA_SRC; - - rtm_flags = sire->ire_flags; - rts_fill_msg(RTM_GET, rtm_addrs, sire->ire_addr, - sire->ire_mask, sire->ire_gateway_addr, - (sire->ire_flags & RTF_SETSRC) ? - sire->ire_src_addr : ire->ire_src_addr, - ire->ire_ipif->ipif_pp_dst_addr, - 0, ire->ire_ipif, new_mp, sacnt, gc); - } + IN6_V4MAPPED_TO_IPADDR(setsrc, v4setsrc); + if (v4setsrc != INADDR_ANY) + rtm_addrs |= RTA_SRC; + + rtm_flags = ire->ire_flags; + rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr, + ire->ire_mask, ire->ire_gateway_addr, v4setsrc, + brdaddr, 0, ifaddr, ill, new_mp, gc); break; case AF_INET6: - if (sire == NULL) { - rtm_flags = ire->ire_flags; - rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6, - &ire->ire_mask_v6, &ire->ire_src_addr_v6, - &ire->ire_src_addr_v6, - &ire->ire_ipif->ipif_v6pp_dst_addr, - &ipv6_all_zeros, ire->ire_ipif, new_mp, - sacnt, gc); - } else { - if (sire->ire_flags & RTF_SETSRC) - rtm_addrs |= RTA_SRC; - - rtm_flags = sire->ire_flags; - mutex_enter(&sire->ire_lock); - gw_addr_v6 = sire->ire_gateway_addr_v6; - mutex_exit(&sire->ire_lock); - rts_fill_msg_v6(RTM_GET, rtm_addrs, &sire->ire_addr_v6, - &sire->ire_mask_v6, &gw_addr_v6, - (sire->ire_flags & RTF_SETSRC) ? - &sire->ire_src_addr_v6 : &ire->ire_src_addr_v6, - &ire->ire_ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros, - ire->ire_ipif, new_mp, sacnt, gc); - } + if (!IN6_IS_ADDR_UNSPECIFIED(setsrc)) + rtm_addrs |= RTA_SRC; + + rtm_flags = ire->ire_flags; + rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6, + &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, + setsrc, &brdaddr6, &ipv6_all_zeros, + &ifaddr6, ill, new_mp, gc); break; } @@ -1259,11 +1311,9 @@ rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af) new_rtm->rtm_use = rtm->rtm_use; new_rtm->rtm_addrs = rtm_addrs; new_rtm->rtm_flags = rtm_flags; - if (sire == NULL) - new_rtm->rtm_inits = rts_getmetrics(ire, &new_rtm->rtm_rmx); - else - new_rtm->rtm_inits = rts_getmetrics(sire, &new_rtm->rtm_rmx); - + new_rtm->rtm_inits = rts_getmetrics(ire, &new_rtm->rtm_rmx); + if (ill != NULL) + ill_refrele(ill); return (new_mp); } @@ -1273,10 +1323,11 @@ rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af) static void rts_getifdata(if_data_t *if_data, const ipif_t *ipif) { - if_data->ifi_type = ipif->ipif_type; /* ethernet, tokenring, etc */ + if_data->ifi_type = ipif->ipif_ill->ill_type; + /* ethernet, tokenring, etc */ if_data->ifi_addrlen = 0; /* media address length */ if_data->ifi_hdrlen = 0; /* media header length */ - if_data->ifi_mtu = ipif->ipif_mtu; /* maximum transmission unit */ + if_data->ifi_mtu = ipif->ipif_ill->ill_mtu; /* mtu */ if_data->ifi_metric = ipif->ipif_metric; /* metric (external only) */ if_data->ifi_baudrate = 0; /* linespeed */ @@ -1302,18 +1353,19 @@ rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics) { clock_t rtt; clock_t rtt_sd; - ipif_t *ipif; + ill_t *ill; ifrt_t *ifrt; mblk_t *mp; in6_addr_t gw_addr_v6; + /* Need to add back some metrics to the IRE? */ /* - * Bypass obtaining the lock and searching ipif_saved_ire_mp in the + * Bypass obtaining the lock and searching ill_saved_ire_mp in the * common case of no metrics. */ if (which == 0) return; - ire->ire_uinfo.iulp_set = B_TRUE; + ire->ire_metrics.iulp_set = B_TRUE; /* * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's @@ -1330,42 +1382,41 @@ rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics) */ mutex_enter(&ire->ire_lock); if (which & RTV_MTU) - ire->ire_max_frag = metrics->rmx_mtu; + ire->ire_metrics.iulp_mtu = metrics->rmx_mtu; if (which & RTV_RTT) - ire->ire_uinfo.iulp_rtt = rtt; + ire->ire_metrics.iulp_rtt = rtt; if (which & RTV_SSTHRESH) - ire->ire_uinfo.iulp_ssthresh = metrics->rmx_ssthresh; + ire->ire_metrics.iulp_ssthresh = metrics->rmx_ssthresh; if (which & RTV_RTTVAR) - ire->ire_uinfo.iulp_rtt_sd = rtt_sd; + ire->ire_metrics.iulp_rtt_sd = rtt_sd; if (which & RTV_SPIPE) - ire->ire_uinfo.iulp_spipe = metrics->rmx_sendpipe; + ire->ire_metrics.iulp_spipe = metrics->rmx_sendpipe; if (which & RTV_RPIPE) - ire->ire_uinfo.iulp_rpipe = metrics->rmx_recvpipe; + ire->ire_metrics.iulp_rpipe = metrics->rmx_recvpipe; mutex_exit(&ire->ire_lock); /* - * Search through the ifrt_t chain hanging off the IPIF in order to + * Search through the ifrt_t chain hanging off the ILL in order to * reflect the metric change there. */ - ipif = ire->ire_ipif; - if (ipif == NULL) + ill = ire->ire_ill; + if (ill == NULL) return; - ASSERT((ipif->ipif_isv6 && ire->ire_ipversion == IPV6_VERSION) || - ((!ipif->ipif_isv6 && ire->ire_ipversion == IPV4_VERSION))); - if (ipif->ipif_isv6) { + ASSERT((ill->ill_isv6 && ire->ire_ipversion == IPV6_VERSION) || + ((!ill->ill_isv6 && ire->ire_ipversion == IPV4_VERSION))); + if (ill->ill_isv6) { mutex_enter(&ire->ire_lock); gw_addr_v6 = ire->ire_gateway_addr_v6; mutex_exit(&ire->ire_lock); } - mutex_enter(&ipif->ipif_saved_ire_lock); - for (mp = ipif->ipif_saved_ire_mp; mp != NULL; mp = mp->b_cont) { + mutex_enter(&ill->ill_saved_ire_lock); + for (mp = ill->ill_saved_ire_mp; mp != NULL; mp = mp->b_cont) { /* - * On a given ipif, the triple of address, gateway and mask is - * unique for each saved IRE (in the case of ordinary interface - * routes, the gateway address is all-zeroes). + * On a given ill, the tuple of address, gateway, mask, + * ire_type and zoneid unique for each saved IRE. */ ifrt = (ifrt_t *)mp->b_rptr; - if (ipif->ipif_isv6) { + if (ill->ill_isv6) { if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr, &ire->ire_addr_v6) || !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr, @@ -1379,23 +1430,36 @@ rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics) ifrt->ifrt_mask != ire->ire_mask) continue; } + if (ifrt->ifrt_zoneid != ire->ire_zoneid || + ifrt->ifrt_type != ire->ire_type) + continue; + if (which & RTV_MTU) - ifrt->ifrt_max_frag = metrics->rmx_mtu; + ifrt->ifrt_metrics.iulp_mtu = metrics->rmx_mtu; if (which & RTV_RTT) - ifrt->ifrt_iulp_info.iulp_rtt = rtt; + ifrt->ifrt_metrics.iulp_rtt = rtt; if (which & RTV_SSTHRESH) { - ifrt->ifrt_iulp_info.iulp_ssthresh = + ifrt->ifrt_metrics.iulp_ssthresh = metrics->rmx_ssthresh; } if (which & RTV_RTTVAR) - ifrt->ifrt_iulp_info.iulp_rtt_sd = metrics->rmx_rttvar; + ifrt->ifrt_metrics.iulp_rtt_sd = metrics->rmx_rttvar; if (which & RTV_SPIPE) - ifrt->ifrt_iulp_info.iulp_spipe = metrics->rmx_sendpipe; + ifrt->ifrt_metrics.iulp_spipe = metrics->rmx_sendpipe; if (which & RTV_RPIPE) - ifrt->ifrt_iulp_info.iulp_rpipe = metrics->rmx_recvpipe; + ifrt->ifrt_metrics.iulp_rpipe = metrics->rmx_recvpipe; break; } - mutex_exit(&ipif->ipif_saved_ire_lock); + mutex_exit(&ill->ill_saved_ire_lock); + + /* + * Update any IRE_IF_CLONE hanging created from this IRE_IF so they + * get any new iulp_mtu. + * We do that by deleting them; ire_create_if_clone will pick + * up the new metrics. + */ + if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != 0) + ire_dep_delete_if_clone(ire); } /* @@ -1407,27 +1471,69 @@ rts_getmetrics(ire_t *ire, rt_metrics_t *metrics) int metrics_set = 0; bzero(metrics, sizeof (rt_metrics_t)); + /* * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as * microseconds. */ - metrics->rmx_rtt = ire->ire_uinfo.iulp_rtt * 1000; + metrics->rmx_rtt = ire->ire_metrics.iulp_rtt * 1000; metrics_set |= RTV_RTT; - metrics->rmx_mtu = ire->ire_max_frag; + metrics->rmx_mtu = ire->ire_metrics.iulp_mtu; metrics_set |= RTV_MTU; - metrics->rmx_ssthresh = ire->ire_uinfo.iulp_ssthresh; + metrics->rmx_ssthresh = ire->ire_metrics.iulp_ssthresh; metrics_set |= RTV_SSTHRESH; - metrics->rmx_rttvar = ire->ire_uinfo.iulp_rtt_sd * 1000; + metrics->rmx_rttvar = ire->ire_metrics.iulp_rtt_sd * 1000; metrics_set |= RTV_RTTVAR; - metrics->rmx_sendpipe = ire->ire_uinfo.iulp_spipe; + metrics->rmx_sendpipe = ire->ire_metrics.iulp_spipe; metrics_set |= RTV_SPIPE; - metrics->rmx_recvpipe = ire->ire_uinfo.iulp_rpipe; + metrics->rmx_recvpipe = ire->ire_metrics.iulp_rpipe; metrics_set |= RTV_RPIPE; return (metrics_set); } /* + * Given two sets of metrics (src and dst), use the dst values if they are + * set. If a dst value is not set but the src value is set, then we use + * the src value. + * dst is updated with the new values. + * This is used to merge information from a dce_t and ire_metrics, where the + * dce values takes precedence. + */ +void +rts_merge_metrics(iulp_t *dst, const iulp_t *src) +{ + if (!src->iulp_set) + return; + + if (dst->iulp_ssthresh == 0) + dst->iulp_ssthresh = src->iulp_ssthresh; + if (dst->iulp_rtt == 0) + dst->iulp_rtt = src->iulp_rtt; + if (dst->iulp_rtt_sd == 0) + dst->iulp_rtt_sd = src->iulp_rtt_sd; + if (dst->iulp_spipe == 0) + dst->iulp_spipe = src->iulp_spipe; + if (dst->iulp_rpipe == 0) + dst->iulp_rpipe = src->iulp_rpipe; + if (dst->iulp_rtomax == 0) + dst->iulp_rtomax = src->iulp_rtomax; + if (dst->iulp_sack == 0) + dst->iulp_sack = src->iulp_sack; + if (dst->iulp_tstamp_ok == 0) + dst->iulp_tstamp_ok = src->iulp_tstamp_ok; + if (dst->iulp_wscale_ok == 0) + dst->iulp_wscale_ok = src->iulp_wscale_ok; + if (dst->iulp_ecn_ok == 0) + dst->iulp_ecn_ok = src->iulp_ecn_ok; + if (dst->iulp_pmtud_ok == 0) + dst->iulp_pmtud_ok = src->iulp_pmtud_ok; + if (dst->iulp_mtu == 0) + dst->iulp_mtu = src->iulp_mtu; +} + + +/* * Takes a pointer to a routing message and extracts necessary info by looking * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers * passed (all of which must be valid). @@ -1552,7 +1658,8 @@ rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp, static void rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author, - const ipif_t *ipif, mblk_t *mp, uint_t sacnt, const tsol_gc_t *gc) + ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp, + const tsol_gc_t *gc) { rt_msghdr_t *rtm; sin_t *sin; @@ -1561,7 +1668,6 @@ rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask, int i; ASSERT(mp != NULL); - ASSERT(sacnt == 0 || gc != NULL); /* * First find the type of the message * and its length. @@ -1571,7 +1677,7 @@ rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask, * Now find the size of the data * that follows the message header. */ - data_size = rts_data_msg_size(rtm_addrs, AF_INET, sacnt); + data_size = rts_data_msg_size(rtm_addrs, AF_INET, gc != NULL ? 1 : 0); rtm = (rt_msghdr_t *)mp->b_rptr; mp->b_wptr = &mp->b_rptr[header_size]; @@ -1596,9 +1702,13 @@ rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask, cp += sizeof (sin_t); break; case RTA_IFP: - cp += ill_dls_info((struct sockaddr_dl *)cp, ipif); + cp += ill_dls_info((struct sockaddr_dl *)cp, ill); break; case RTA_IFA: + sin->sin_addr.s_addr = ifaddr; + sin->sin_family = AF_INET; + cp += sizeof (sin_t); + break; case RTA_SRC: sin->sin_addr.s_addr = src_addr; sin->sin_family = AF_INET; @@ -1625,24 +1735,20 @@ rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask, rtm_ext_t *rtm_ext; struct rtsa_s *rp_dst; tsol_rtsecattr_t *rsap; - int i; ASSERT(gc->gc_grp != NULL); ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock)); - ASSERT(sacnt > 0); rtm_ext = (rtm_ext_t *)cp; rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR; - rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(sacnt); + rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(1); rsap = (tsol_rtsecattr_t *)(rtm_ext + 1); - rsap->rtsa_cnt = sacnt; + rsap->rtsa_cnt = 1; rp_dst = rsap->rtsa_attr; - for (i = 0; i < sacnt; i++, gc = gc->gc_next, rp_dst++) { - ASSERT(gc->gc_db != NULL); - bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst)); - } + ASSERT(gc->gc_db != NULL); + bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst)); cp = (uchar_t *)rp_dst; } @@ -1659,6 +1765,7 @@ rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask, /* * Allocates and initializes a routing socket message. + * Note that sacnt is either zero or one. */ mblk_t * rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt) @@ -1755,7 +1862,7 @@ ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask, if (mp == NULL) return; rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0, - author, NULL, mp, 0, NULL); + author, 0, NULL, mp, NULL); rtm = (rt_msghdr_t *)mp->b_rptr; rtm->rtm_flags = flags; rtm->rtm_errno = error; @@ -1784,12 +1891,12 @@ ip_rts_xifmsg(const ipif_t *ipif, uint64_t set, uint64_t clear, uint_t flags) ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; /* - * This message should be generated only when the physical interface - * is changing state. + * This message should be generated only + * when the physical device is changing + * state. */ if (ipif->ipif_id != 0) return; - if (ipif->ipif_isv6) { af = AF_INET6; mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0); @@ -1797,14 +1904,15 @@ ip_rts_xifmsg(const ipif_t *ipif, uint64_t set, uint64_t clear, uint_t flags) return; rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, - &ipv6_all_zeros, &ipv6_all_zeros, ipif, mp, 0, NULL); + &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, + ipif->ipif_ill, mp, NULL); } else { af = AF_INET; mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0); if (mp == NULL) return; - rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, ipif, mp, - 0, NULL); + rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, 0, + ipif->ipif_ill, mp, NULL); } ifm = (if_msghdr_t *)mp->b_rptr; ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex; @@ -1843,6 +1951,12 @@ ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif, uint_t flags) sa_family_t af; ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; + /* + * Let conn_ixa caching know that source address selection + * changed + */ + ip_update_source_selection(ipst); + if (ipif->ipif_isv6) af = AF_INET6; else @@ -1875,15 +1989,17 @@ ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif, uint_t flags) case AF_INET: rts_fill_msg(ncmd, rtm_addrs, 0, ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr, - ipif->ipif_pp_dst_addr, 0, ipif, mp, - 0, NULL); + ipif->ipif_pp_dst_addr, 0, + ipif->ipif_lcl_addr, ipif->ipif_ill, + mp, NULL); break; case AF_INET6: rts_fill_msg_v6(ncmd, rtm_addrs, &ipv6_all_zeros, &ipif->ipif_v6net_mask, &ipv6_all_zeros, &ipif->ipif_v6lcl_addr, &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros, - ipif, mp, 0, NULL); + &ipif->ipif_v6lcl_addr, ipif->ipif_ill, + mp, NULL); break; } ifam = (ifa_msghdr_t *)mp->b_rptr; @@ -1904,14 +2020,15 @@ ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif, uint_t flags) case AF_INET: rts_fill_msg(cmd, rtm_addrs, ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0, - 0, 0, 0, NULL, mp, 0, NULL); + 0, 0, 0, 0, NULL, mp, NULL); break; case AF_INET6: rts_fill_msg_v6(cmd, rtm_addrs, &ipif->ipif_v6lcl_addr, &ipif->ipif_v6net_mask, &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros, - &ipv6_all_zeros, NULL, mp, 0, NULL); + &ipv6_all_zeros, &ipv6_all_zeros, + NULL, mp, NULL); break; } rtm = (rt_msghdr_t *)mp->b_rptr; |