diff options
Diffstat (limited to 'usr/src/cmd/cmd-inet/usr.sbin/in.routed/table.c')
-rw-r--r-- | usr/src/cmd/cmd-inet/usr.sbin/in.routed/table.c | 2795 |
1 files changed, 2795 insertions, 0 deletions
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/in.routed/table.c b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/table.c new file mode 100644 index 0000000000..1ad9e14e25 --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/table.c @@ -0,0 +1,2795 @@ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 1983, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgment: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sbin/routed/table.c,v 1.15 2000/08/11 08:24:38 sheldonh Exp $ + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "defs.h" +#include <fcntl.h> +#include <stropts.h> +#include <sys/tihdr.h> +#include <inet/mib2.h> +#include <inet/ip.h> + +/* This structure is used to store a disassembled routing socket message. */ +struct rt_addrinfo { + int rti_addrs; + struct sockaddr_storage *rti_info[RTAX_MAX]; +}; + +static struct rt_spare *rts_better(struct rt_entry *); +static struct rt_spare rts_empty = EMPTY_RT_SPARE; +static void set_need_flash(void); +static void rtbad(struct rt_entry *, struct interface *); +static int rt_xaddrs(struct rt_addrinfo *, struct sockaddr_storage *, + char *, int); +static struct interface *gwkludge_iflookup(in_addr_t, in_addr_t, in_addr_t); + +struct radix_node_head *rhead; /* root of the radix tree */ + +/* Flash update needed. _B_TRUE to suppress the 1st. */ +boolean_t need_flash = _B_TRUE; + +struct timeval age_timer; /* next check of old routes */ +struct timeval need_kern = { /* need to update kernel table */ + EPOCH+MIN_WAITTIME-1, 0 +}; + +static uint32_t total_routes; + +#define ROUNDUP_LONG(a) \ + ((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long)) + +/* + * It is desirable to "aggregate" routes, to combine differing routes of + * the same metric and next hop into a common route with a smaller netmask + * or to suppress redundant routes, routes that add no information to + * routes with smaller netmasks. + * + * A route is redundant if and only if any and all routes with smaller + * but matching netmasks and nets are the same. Since routes are + * kept sorted in the radix tree, redundant routes always come second. + * + * There are two kinds of aggregations. First, two routes of the same bit + * mask and differing only in the least significant bit of the network + * number can be combined into a single route with a coarser mask. + * + * Second, a route can be suppressed in favor of another route with a more + * coarse mask provided no incompatible routes with intermediate masks + * are present. The second kind of aggregation involves suppressing routes. + * A route must not be suppressed if an incompatible route exists with + * an intermediate mask, since the suppressed route would be covered + * by the intermediate. + * + * This code relies on the radix tree walk encountering routes + * sorted first by address, with the smallest address first. + */ + +static struct ag_info ag_slots[NUM_AG_SLOTS], *ag_avail, *ag_corsest, + *ag_finest; + +#ifdef DEBUG_AG +#define CHECK_AG() do { int acnt = 0; struct ag_info *cag; \ + for (cag = ag_avail; cag != NULL; cag = cag->ag_fine) \ + acnt++; \ + for (cag = ag_corsest; cag != NULL; cag = cag->ag_fine) \ + acnt++; \ + if (acnt != NUM_AG_SLOTS) \ + abort(); \ +} while (_B_FALSE) +#else +#define CHECK_AG() (void)0 +#endif + + +/* + * Output the contents of an aggregation table slot. + * This function must always be immediately followed with the deletion + * of the target slot. + */ +static void +ag_out(struct ag_info *ag, void (*out)(struct ag_info *)) +{ + struct ag_info *ag_cors; + uint32_t bit; + + + /* Forget it if this route should not be output for split-horizon. */ + if (ag->ag_state & AGS_SPLIT_HZ) + return; + + /* + * If we output both the even and odd twins, then the immediate parent, + * if it is present, is redundant, unless the parent manages to + * aggregate into something coarser. + * On successive calls, this code detects the even and odd twins, + * and marks the parent. + * + * Note that the order in which the radix tree code emits routes + * ensures that the twins are seen before the parent is emitted. + */ + ag_cors = ag->ag_cors; + if (ag_cors != NULL && + ag_cors->ag_mask == (ag->ag_mask << 1) && + ag_cors->ag_dst_h == (ag->ag_dst_h & ag_cors->ag_mask)) { + ag_cors->ag_state |= ((ag_cors->ag_dst_h == ag->ag_dst_h) ? + AGS_REDUN0 : AGS_REDUN1); + } + + /* + * Skip it if this route is itself redundant. + * + * It is ok to change the contents of the slot here, since it is + * always deleted next. + */ + if (ag->ag_state & AGS_REDUN0) { + if (ag->ag_state & AGS_REDUN1) + return; /* quit if fully redundant */ + /* make it finer if it is half-redundant */ + bit = (-ag->ag_mask) >> 1; + ag->ag_dst_h |= bit; + ag->ag_mask |= bit; + + } else if (ag->ag_state & AGS_REDUN1) { + /* make it finer if it is half-redundant */ + bit = (-ag->ag_mask) >> 1; + ag->ag_mask |= bit; + } + out(ag); +} + + +static void +ag_del(struct ag_info *ag) +{ + CHECK_AG(); + + if (ag->ag_cors == NULL) + ag_corsest = ag->ag_fine; + else + ag->ag_cors->ag_fine = ag->ag_fine; + + if (ag->ag_fine == NULL) + ag_finest = ag->ag_cors; + else + ag->ag_fine->ag_cors = ag->ag_cors; + + ag->ag_fine = ag_avail; + ag_avail = ag; + + CHECK_AG(); +} + + +/* Look for a route that can suppress the given route. */ +static struct ag_info * +ag_find_suppressor(struct ag_info *ag) +{ + struct ag_info *ag_cors; + in_addr_t dst_h = ag->ag_dst_h; + + for (ag_cors = ag->ag_cors; ag_cors != NULL; + ag_cors = ag_cors->ag_cors) { + + if ((dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h) { + /* + * We found a route with a coarser mask that covers + * the given target. It can suppress the target + * only if it has a good enough metric and it + * either has the same (gateway, ifp), or if its state + * includes AGS_CORS_GATE or the target's state + * includes AGS_FINE_GATE. + */ + if (ag_cors->ag_pref <= ag->ag_pref && + (((ag->ag_nhop == ag_cors->ag_nhop) && + (ag->ag_ifp == ag_cors->ag_ifp)) || + ag_cors->ag_state & AGS_CORS_GATE || + ag->ag_state & AGS_FINE_GATE)) { + return (ag_cors); + } + } + } + + return (NULL); +} + + +/* + * Flush routes waiting for aggregation. + * This must not suppress a route unless it is known that among all routes + * with coarser masks that match it, the one with the longest mask is + * appropriate. This is ensured by scanning the routes in lexical order, + * and with the most restrictive mask first among routes to the same + * destination. + */ +void +ag_flush(in_addr_t lim_dst_h, /* flush routes to here */ + in_addr_t lim_mask, /* matching this mask */ + void (*out)(struct ag_info *)) +{ + struct ag_info *ag, *ag_cors, *ag_supr; + in_addr_t dst_h; + + + for (ag = ag_finest; ag != NULL && ag->ag_mask >= lim_mask; + ag = ag_cors) { + /* Get the next route now, before we delete ag. */ + ag_cors = ag->ag_cors; + + /* Work on only the specified routes. */ + dst_h = ag->ag_dst_h; + if ((dst_h & lim_mask) != lim_dst_h) + continue; + + /* + * Don't try to suppress the route if its state doesn't + * include AGS_SUPPRESS. + */ + if (!(ag->ag_state & AGS_SUPPRESS)) { + ag_out(ag, out); + ag_del(ag); + continue; + } + + ag_supr = ag_find_suppressor(ag); + if (ag_supr == NULL) { + /* + * We didn't find a route which suppresses the + * target, so the target can go out. + */ + ag_out(ag, out); + } else { + /* + * We found a route which suppresses the target, so + * don't output the target. + */ + if (TRACEACTIONS) { + trace_misc("aggregated away %s", + rtname(htonl(ag->ag_dst_h), ag->ag_mask, + ag->ag_nhop)); + trace_misc("on coarser route %s", + rtname(htonl(ag_supr->ag_dst_h), + ag_supr->ag_mask, ag_supr->ag_nhop)); + } + /* + * If the suppressed target was redundant, then + * mark the suppressor as redundant. + */ + if (AG_IS_REDUN(ag->ag_state) && + ag_supr->ag_mask == (ag->ag_mask<<1)) { + if (ag_supr->ag_dst_h == dst_h) + ag_supr->ag_state |= AGS_REDUN0; + else + ag_supr->ag_state |= AGS_REDUN1; + } + if (ag->ag_tag != ag_supr->ag_tag) + ag_supr->ag_tag = 0; + if (ag->ag_nhop != ag_supr->ag_nhop) + ag_supr->ag_nhop = 0; + } + + /* The route has either been output or suppressed */ + ag_del(ag); + } + + CHECK_AG(); +} + + +/* Try to aggregate a route with previous routes. */ +void +ag_check(in_addr_t dst, + in_addr_t mask, + in_addr_t gate, + struct interface *ifp, + in_addr_t nhop, + uint8_t metric, + uint8_t pref, + uint32_t seqno, + uint16_t tag, + uint16_t state, + void (*out)(struct ag_info *)) /* output using this */ +{ + struct ag_info *ag, *nag, *ag_cors; + in_addr_t xaddr; + int tmp; + struct interface *xifp; + + dst = ntohl(dst); + + /* + * Don't bother trying to aggregate routes with non-contiguous + * subnet masks. + * + * (X & -X) contains a single bit if and only if X is a power of 2. + * (X + (X & -X)) == 0 if and only if X is a power of 2. + */ + if ((mask & -mask) + mask != 0) { + struct ag_info nc_ag; + + nc_ag.ag_dst_h = dst; + nc_ag.ag_mask = mask; + nc_ag.ag_gate = gate; + nc_ag.ag_ifp = ifp; + nc_ag.ag_nhop = nhop; + nc_ag.ag_metric = metric; + nc_ag.ag_pref = pref; + nc_ag.ag_tag = tag; + nc_ag.ag_state = state; + nc_ag.ag_seqno = seqno; + out(&nc_ag); + return; + } + + /* Search for the right slot in the aggregation table. */ + ag_cors = NULL; + ag = ag_corsest; + while (ag != NULL) { + if (ag->ag_mask >= mask) + break; + + /* + * Suppress old routes (i.e. combine with compatible routes + * with coarser masks) as we look for the right slot in the + * aggregation table for the new route. + * A route to an address less than the current destination + * will not be affected by the current route or any route + * seen hereafter. That means it is safe to suppress it. + * This check keeps poor routes (e.g. with large hop counts) + * from preventing suppression of finer routes. + */ + if (ag_cors != NULL && ag->ag_dst_h < dst && + (ag->ag_state & AGS_SUPPRESS) && + ag_cors->ag_pref <= ag->ag_pref && + (ag->ag_dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h && + ((ag_cors->ag_nhop == ag->ag_nhop && + (ag_cors->ag_ifp == ag->ag_ifp))|| + (ag->ag_state & AGS_FINE_GATE) || + (ag_cors->ag_state & AGS_CORS_GATE))) { + /* + * If the suppressed target was redundant, + * then mark the suppressor redundant. + */ + if (AG_IS_REDUN(ag->ag_state) && + ag_cors->ag_mask == (ag->ag_mask << 1)) { + if (ag_cors->ag_dst_h == dst) + ag_cors->ag_state |= AGS_REDUN0; + else + ag_cors->ag_state |= AGS_REDUN1; + } + if (ag->ag_tag != ag_cors->ag_tag) + ag_cors->ag_tag = 0; + if (ag->ag_nhop != ag_cors->ag_nhop) + ag_cors->ag_nhop = 0; + ag_del(ag); + CHECK_AG(); + } else { + ag_cors = ag; + } + ag = ag_cors->ag_fine; + } + + /* + * If we find the even/odd twin of the new route, and if the + * masks and so forth are equal, we can aggregate them. + * We can probably promote one of the pair. + * + * Since the routes are encountered in lexical order, + * the new route must be odd. However, the second or later + * times around this loop, it could be the even twin promoted + * from the even/odd pair of twins of the finer route. + */ + while (ag != NULL && ag->ag_mask == mask && + ((ag->ag_dst_h ^ dst) & (mask<<1)) == 0) { + + /* + * Here we know the target route and the route in the current + * slot have the same netmasks and differ by at most the + * last bit. They are either for the same destination, or + * for an even/odd pair of destinations. + */ + if (ag->ag_dst_h == dst) { + if (ag->ag_nhop == nhop && ag->ag_ifp == ifp) { + /* + * We have two routes to the same destination, + * with the same nexthop and interface. + * Routes are encountered in lexical order, + * so a route is never promoted until the + * parent route is already present. So we + * know that the new route is a promoted (or + * aggregated) pair and the route already in + * the slot is the explicit route. + * + * Prefer the best route if their metrics + * differ, or the aggregated one if not, + * following a sort of longest-match rule. + */ + if (pref <= ag->ag_pref) { + ag->ag_gate = gate; + ag->ag_ifp = ifp; + ag->ag_nhop = nhop; + ag->ag_tag = tag; + ag->ag_metric = metric; + ag->ag_pref = pref; + if (seqno > ag->ag_seqno) + ag->ag_seqno = seqno; + tmp = ag->ag_state; + ag->ag_state = state; + state = tmp; + } + + /* + * Some bits are set if they are set on + * either route, except when the route is + * for an interface. + */ + if (!(ag->ag_state & AGS_IF)) + ag->ag_state |= + (state & (AGS_AGGREGATE_EITHER | + AGS_REDUN0 | AGS_REDUN1)); + + return; + } else { + /* + * multiple routes to same dest/mask with + * differing gate nexthop/or ifp. Flush + * both out. + */ + break; + } + } + + /* + * If one of the routes can be promoted and the other can + * be suppressed, it may be possible to combine them or + * worthwhile to promote one. + * + * Any route that can be promoted is always + * marked to be eligible to be suppressed. + */ + if (!((state & AGS_AGGREGATE) && + (ag->ag_state & AGS_SUPPRESS)) && + !((ag->ag_state & AGS_AGGREGATE) && (state & AGS_SUPPRESS))) + break; + + /* + * A pair of even/odd twin routes can be combined + * if either is redundant, or if they are via the + * same gateway and have the same metric. + */ + if (AG_IS_REDUN(ag->ag_state) || AG_IS_REDUN(state) || + (ag->ag_nhop == nhop && ag->ag_ifp == ifp && + ag->ag_pref == pref && + (state & ag->ag_state & AGS_AGGREGATE) != 0)) { + + /* + * We have both the even and odd pairs. + * Since the routes are encountered in order, + * the route in the slot must be the even twin. + * + * Combine and promote (aggregate) the pair of routes. + */ + if (seqno < ag->ag_seqno) + seqno = ag->ag_seqno; + if (!AG_IS_REDUN(state)) + state &= ~AGS_REDUN1; + if (AG_IS_REDUN(ag->ag_state)) + state |= AGS_REDUN0; + else + state &= ~AGS_REDUN0; + state |= (ag->ag_state & AGS_AGGREGATE_EITHER); + if (ag->ag_tag != tag) + tag = 0; + if (ag->ag_nhop != nhop) + nhop = 0; + + /* + * Get rid of the even twin that was already + * in the slot. + */ + ag_del(ag); + + } else if (ag->ag_pref >= pref && + (ag->ag_state & AGS_AGGREGATE)) { + /* + * If we cannot combine the pair, maybe the route + * with the worse metric can be promoted. + * + * Promote the old, even twin, by giving its slot + * in the table to the new, odd twin. + */ + ag->ag_dst_h = dst; + + xaddr = ag->ag_gate; + ag->ag_gate = gate; + gate = xaddr; + + xifp = ag->ag_ifp; + ag->ag_ifp = ifp; + ifp = xifp; + + xaddr = ag->ag_nhop; + ag->ag_nhop = nhop; + nhop = xaddr; + + tmp = ag->ag_tag; + ag->ag_tag = tag; + tag = tmp; + + /* + * The promoted route is even-redundant only if the + * even twin was fully redundant. It is not + * odd-redundant because the odd-twin will still be + * in the table. + */ + tmp = ag->ag_state; + if (!AG_IS_REDUN(tmp)) + tmp &= ~AGS_REDUN0; + tmp &= ~AGS_REDUN1; + ag->ag_state = state; + state = tmp; + + tmp = ag->ag_metric; + ag->ag_metric = metric; + metric = tmp; + + tmp = ag->ag_pref; + ag->ag_pref = pref; + pref = tmp; + + /* take the newest sequence number */ + if (seqno <= ag->ag_seqno) + seqno = ag->ag_seqno; + else + ag->ag_seqno = seqno; + + } else { + if (!(state & AGS_AGGREGATE)) + break; /* cannot promote either twin */ + + /* + * Promote the new, odd twin by shaving its + * mask and address. + * The promoted route is odd-redundant only if the + * odd twin was fully redundant. It is not + * even-redundant because the even twin is still in + * the table. + */ + if (!AG_IS_REDUN(state)) + state &= ~AGS_REDUN1; + state &= ~AGS_REDUN0; + if (seqno < ag->ag_seqno) + seqno = ag->ag_seqno; + else + ag->ag_seqno = seqno; + } + + mask <<= 1; + dst &= mask; + + if (ag_cors == NULL) { + ag = ag_corsest; + break; + } + ag = ag_cors; + ag_cors = ag->ag_cors; + } + + /* + * When we can no longer promote and combine routes, + * flush the old route in the target slot. Also flush + * any finer routes that we know will never be aggregated by + * the new route. + * + * In case we moved toward coarser masks, + * get back where we belong + */ + if (ag != NULL && ag->ag_mask < mask) { + ag_cors = ag; + ag = ag->ag_fine; + } + + /* Empty the target slot */ + if (ag != NULL && ag->ag_mask == mask) { + ag_flush(ag->ag_dst_h, ag->ag_mask, out); + ag = (ag_cors == NULL) ? ag_corsest : ag_cors->ag_fine; + } + +#ifdef DEBUG_AG + if (ag == NULL && ag_cors != ag_finest) + abort(); + if (ag_cors == NULL && ag != ag_corsest) + abort(); + if (ag != NULL && ag->ag_cors != ag_cors) + abort(); + if (ag_cors != NULL && ag_cors->ag_fine != ag) + abort(); + CHECK_AG(); +#endif + + /* Save the new route on the end of the table. */ + nag = ag_avail; + ag_avail = nag->ag_fine; + + nag->ag_dst_h = dst; + nag->ag_mask = mask; + nag->ag_ifp = ifp; + nag->ag_gate = gate; + nag->ag_nhop = nhop; + nag->ag_metric = metric; + nag->ag_pref = pref; + nag->ag_tag = tag; + nag->ag_state = state; + nag->ag_seqno = seqno; + + nag->ag_fine = ag; + if (ag != NULL) + ag->ag_cors = nag; + else + ag_finest = nag; + nag->ag_cors = ag_cors; + if (ag_cors == NULL) + ag_corsest = nag; + else + ag_cors->ag_fine = nag; + CHECK_AG(); +} + + +static const char * +rtm_type_name(uchar_t type) +{ + static const char *rtm_types[] = { + "RTM_ADD", + "RTM_DELETE", + "RTM_CHANGE", + "RTM_GET", + "RTM_LOSING", + "RTM_REDIRECT", + "RTM_MISS", + "RTM_LOCK", + "RTM_OLDADD", + "RTM_OLDDEL", + "RTM_RESOLVE", + "RTM_NEWADDR", + "RTM_DELADDR", + "RTM_IFINFO", + "RTM_NEWMADDR", + "RTM_DELMADDR" + }; +#define NEW_RTM_PAT "RTM type %#x" + static char name0[sizeof (NEW_RTM_PAT) + 2]; + + if (type > sizeof (rtm_types) / sizeof (rtm_types[0]) || type == 0) { + (void) snprintf(name0, sizeof (name0), NEW_RTM_PAT, type); + return (name0); + } else { + return (rtm_types[type-1]); + } +#undef NEW_RTM_PAT +} + + +static void +dump_rt_msg(const char *act, struct rt_msghdr *rtm, int mlen) +{ + const char *mtype; + uchar_t *cp; + int i, j; + char buffer[16*3 + 1], *ibs; + struct ifa_msghdr *ifam; + struct if_msghdr *ifm; + + switch (rtm->rtm_type) { + case RTM_NEWADDR: + case RTM_DELADDR: + mtype = "ifam"; + break; + case RTM_IFINFO: + mtype = "ifm"; + break; + default: + mtype = "rtm"; + break; + } + trace_misc("%s %s %d bytes", act, mtype, mlen); + if (mlen > rtm->rtm_msglen) { + trace_misc("%s: extra %d bytes ignored", mtype, + mlen - rtm->rtm_msglen); + mlen = rtm->rtm_msglen; + } else if (mlen < rtm->rtm_msglen) { + trace_misc("%s: truncated by %d bytes", mtype, + rtm->rtm_msglen - mlen); + } + switch (rtm->rtm_type) { + case RTM_NEWADDR: + case RTM_DELADDR: + ifam = (struct ifa_msghdr *)rtm; + trace_misc("ifam: msglen %d version %d type %d addrs %X", + ifam->ifam_msglen, ifam->ifam_version, ifam->ifam_type, + ifam->ifam_addrs); + trace_misc("ifam: flags %X index %d metric %d", + ifam->ifam_flags, ifam->ifam_index, ifam->ifam_metric); + cp = (uchar_t *)(ifam + 1); + break; + case RTM_IFINFO: + ifm = (struct if_msghdr *)rtm; + trace_misc("ifm: msglen %d version %d type %d addrs %X", + ifm->ifm_msglen, ifm->ifm_version, ifm->ifm_type, + ifm->ifm_addrs); + ibs = if_bit_string(ifm->ifm_flags, _B_TRUE); + if (ibs == NULL) { + trace_misc("ifm: flags %#x index %d", ifm->ifm_flags, + ifm->ifm_index); + } else { + trace_misc("ifm: flags %s index %d", ibs, + ifm->ifm_index); + free(ibs); + } + cp = (uchar_t *)(ifm + 1); + break; + default: + trace_misc("rtm: msglen %d version %d type %d index %d", + rtm->rtm_msglen, rtm->rtm_version, rtm->rtm_type, + rtm->rtm_index); + trace_misc("rtm: flags %X addrs %X pid %d seq %d", + rtm->rtm_flags, rtm->rtm_addrs, rtm->rtm_pid, rtm->rtm_seq); + trace_misc("rtm: errno %d use %d inits %X", rtm->rtm_errno, + rtm->rtm_use, rtm->rtm_inits); + cp = (uchar_t *)(rtm + 1); + break; + } + i = mlen - (cp - (uint8_t *)rtm); + while (i > 0) { + buffer[0] = '\0'; + ibs = buffer; + for (j = 0; j < 16 && i > 0; j++, i--) + ibs += sprintf(ibs, " %02X", *cp++); + trace_misc("addr%s", buffer); + } +} + +/* + * Tell the kernel to add, delete or change a route + * Pass k_state from khash in for diagnostic info. + */ +static void +rtioctl(int action, /* RTM_DELETE, etc */ + in_addr_t dst, + in_addr_t gate, + in_addr_t mask, + struct interface *ifp, + uint8_t metric, + int flags) +{ + static int rt_sock_seqno = 0; + struct { + struct rt_msghdr w_rtm; + struct sockaddr_in w_dst; + struct sockaddr_in w_gate; + uint8_t w_space[512]; + } w; + struct sockaddr_in w_mask; + struct sockaddr_dl w_ifp; + uint8_t *cp; + long cc; +#define PAT " %-10s %s metric=%d flags=%#x" +#define ARGS rtm_type_name(action), rtname(dst, mask, gate), metric, flags + +again: + (void) memset(&w, 0, sizeof (w)); + (void) memset(&w_mask, 0, sizeof (w_mask)); + (void) memset(&w_ifp, 0, sizeof (w_ifp)); + cp = w.w_space; + w.w_rtm.rtm_msglen = sizeof (struct rt_msghdr) + + 2 * ROUNDUP_LONG(sizeof (struct sockaddr_in)); + w.w_rtm.rtm_version = RTM_VERSION; + w.w_rtm.rtm_type = action; + w.w_rtm.rtm_flags = flags; + w.w_rtm.rtm_seq = ++rt_sock_seqno; + w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY; + if (metric != 0 || action == RTM_CHANGE) { + w.w_rtm.rtm_rmx.rmx_hopcount = metric; + w.w_rtm.rtm_inits |= RTV_HOPCOUNT; + } + w.w_dst.sin_family = AF_INET; + w.w_dst.sin_addr.s_addr = dst; + w.w_gate.sin_family = AF_INET; + w.w_gate.sin_addr.s_addr = gate; + if (mask == HOST_MASK) { + w.w_rtm.rtm_flags |= RTF_HOST; + } else { + w.w_rtm.rtm_addrs |= RTA_NETMASK; + w_mask.sin_family = AF_INET; + w_mask.sin_addr.s_addr = htonl(mask); + (void) memmove(cp, &w_mask, sizeof (w_mask)); + cp += ROUNDUP_LONG(sizeof (struct sockaddr_in)); + w.w_rtm.rtm_msglen += ROUNDUP_LONG(sizeof (struct sockaddr_in)); + } + if (ifp == NULL) + ifp = iflookup(gate); + + if ((ifp == NULL) || (ifp->int_phys == NULL)) { + trace_misc("no ifp for" PAT, ARGS); + } else { + if (ifp->int_phys->phyi_index > UINT16_MAX) { + trace_misc("ifindex %d is too big for sdl_index", + ifp->int_phys->phyi_index); + } else { + w_ifp.sdl_family = AF_LINK; + w.w_rtm.rtm_addrs |= RTA_IFP; + w_ifp.sdl_index = ifp->int_phys->phyi_index; + (void) memmove(cp, &w_ifp, sizeof (w_ifp)); + w.w_rtm.rtm_msglen += + ROUNDUP_LONG(sizeof (struct sockaddr_dl)); + } + } + + + if (!no_install) { + if (TRACERTS) + dump_rt_msg("write", &w.w_rtm, w.w_rtm.rtm_msglen); + cc = write(rt_sock, &w, w.w_rtm.rtm_msglen); + if (cc < 0) { + if (errno == ESRCH && (action == RTM_CHANGE || + action == RTM_DELETE)) { + trace_act("route disappeared before" PAT, ARGS); + if (action == RTM_CHANGE) { + action = RTM_ADD; + goto again; + } + return; + } + writelog(LOG_WARNING, "write(rt_sock)" PAT ": %s ", + ARGS, rip_strerror(errno)); + return; + } else if (cc != w.w_rtm.rtm_msglen) { + msglog("write(rt_sock) wrote %ld instead of %d for" PAT, + cc, w.w_rtm.rtm_msglen, ARGS); + return; + } + } + if (TRACEKERNEL) + trace_misc("write kernel" PAT, ARGS); +#undef PAT +#undef ARGS +} + + +/* Hash table containing our image of the kernel forwarding table. */ +#define KHASH_SIZE 71 /* should be prime */ +#define KHASH(a, m) khash_bins[((a) ^ (m)) % KHASH_SIZE] +static struct khash *khash_bins[KHASH_SIZE]; + +#define K_KEEP_LIM 30 /* k_keep */ + +static struct khash * +kern_find(in_addr_t dst, in_addr_t mask, in_addr_t gate, + struct interface *ifp, struct khash ***ppk) +{ + struct khash *k, **pk; + + for (pk = &KHASH(dst, mask); (k = *pk) != NULL; pk = &k->k_next) { + if (k->k_dst == dst && k->k_mask == mask && + (gate == 0 || k->k_gate == gate) && + (ifp == NULL || k->k_ifp == ifp)) { + break; + } + } + if (ppk != NULL) + *ppk = pk; + return (k); +} + + +/* + * Find out if there is an alternate route to a given destination + * off of a given interface. + */ +static struct khash * +kern_alternate(in_addr_t dst, in_addr_t mask, in_addr_t gate, + struct interface *ifp, struct khash ***ppk) +{ + struct khash *k, **pk; + + for (pk = &KHASH(dst, mask); (k = *pk) != NULL; pk = &k->k_next) { + if (k->k_dst == dst && k->k_mask == mask && + (k->k_gate != gate) && + (k->k_ifp == ifp)) { + break; + } + } + if (ppk != NULL) + *ppk = pk; + return (k); +} + +static struct khash * +kern_add(in_addr_t dst, uint32_t mask, in_addr_t gate, struct interface *ifp) +{ + struct khash *k, **pk; + + k = kern_find(dst, mask, gate, ifp, &pk); + if (k != NULL) + return (k); + + k = rtmalloc(sizeof (*k), "kern_add"); + + (void) memset(k, 0, sizeof (*k)); + k->k_dst = dst; + k->k_mask = mask; + k->k_state = KS_NEW; + k->k_keep = now.tv_sec; + k->k_gate = gate; + k->k_ifp = ifp; + *pk = k; + + return (k); +} + +/* delete all khash entries that are wired through the interface ifp */ +void +kern_flush_ifp(struct interface *ifp) +{ + struct khash *k, *kprev, *knext; + int i; + + for (i = 0; i < KHASH_SIZE; i++) { + kprev = NULL; + for (k = khash_bins[i]; k != NULL; k = knext) { + knext = k->k_next; + if (k->k_ifp == ifp) { + if (kprev != NULL) + kprev->k_next = k->k_next; + else + khash_bins[i] = k->k_next; + free(k); + continue; + } + kprev = k; + } + } +} + +/* + * rewire khash entries that currently go through oldifp to + * go through newifp. + */ +void +kern_rewire_ifp(struct interface *oldifp, struct interface *newifp) +{ + struct khash *k; + int i; + + for (i = 0; i < KHASH_SIZE; i++) { + for (k = khash_bins[i]; k; k = k->k_next) { + if (k->k_ifp == oldifp) { + k->k_ifp = newifp; + trace_misc("kern_rewire_ifp k 0x%lx " + "from %s to %s", k, oldifp->int_name, + newifp->int_name); + } + } + } +} + + +/* + * Check that a static route it is still in the daemon table, and not + * deleted by interfaces coming and going. This is also the routine + * responsible for adding new static routes to the daemon table. + */ +static void +kern_check_static(struct khash *k, struct interface *ifp) +{ + struct rt_entry *rt; + struct rt_spare new; + uint16_t rt_state = RS_STATIC; + + (void) memset(&new, 0, sizeof (new)); + new.rts_ifp = ifp; + new.rts_gate = k->k_gate; + new.rts_router = (ifp != NULL) ? ifp->int_addr : loopaddr; + new.rts_metric = k->k_metric; + new.rts_time = now.tv_sec; + new.rts_origin = RO_STATIC; + + rt = rtget(k->k_dst, k->k_mask); + if ((ifp != NULL && !IS_IFF_ROUTING(ifp->int_if_flags)) || + (k->k_state & KS_PRIVATE)) + rt_state |= RS_NOPROPAGATE; + + if (rt != NULL) { + if ((rt->rt_state & RS_STATIC) == 0) { + /* + * We are already tracking this dest/mask + * via RIP/RDISC. Ignore the static route, + * because we don't currently have a good + * way to compare metrics on static routes + * with rip metrics, and therefore cannot + * mix and match the two. + */ + return; + } + rt_state |= rt->rt_state; + if (rt->rt_state != rt_state) + rtchange(rt, rt_state, &new, 0); + } else { + rtadd(k->k_dst, k->k_mask, rt_state, &new); + } +} + + +/* operate on a kernel entry */ +static void +kern_ioctl(struct khash *k, + int action, /* RTM_DELETE, etc */ + int flags) +{ + if (((k->k_state & (KS_IF|KS_PASSIVE)) == KS_IF) || + (k->k_state & KS_DEPRE_IF)) { + /* + * Prevent execution of RTM_DELETE, RTM_ADD or + * RTM_CHANGE of interface routes + */ + trace_act("Blocking execution of %s %s --> %s ", + rtm_type_name(action), + addrname(k->k_dst, k->k_mask, 0), naddr_ntoa(k->k_gate)); + return; + } + + switch (action) { + case RTM_DELETE: + k->k_state &= ~KS_DYNAMIC; + if (k->k_state & KS_DELETED) + return; + k->k_state |= KS_DELETED; + break; + case RTM_ADD: + k->k_state &= ~KS_DELETED; + break; + case RTM_CHANGE: + if (k->k_state & KS_DELETED) { + action = RTM_ADD; + k->k_state &= ~KS_DELETED; + } + break; + } + + rtioctl(action, k->k_dst, k->k_gate, k->k_mask, k->k_ifp, + k->k_metric, flags); +} + + +/* add a route the kernel told us */ +static void +rtm_add(struct rt_msghdr *rtm, + struct rt_addrinfo *info, + time_t keep, + boolean_t interf_route, + struct interface *ifptr) +{ + struct khash *k; + struct interface *ifp = ifptr; + in_addr_t mask, gate = 0; + static struct msg_limit msg_no_ifp; + + if (rtm->rtm_flags & RTF_HOST) { + mask = HOST_MASK; + } else if (INFO_MASK(info) != 0) { + mask = ntohl(S_ADDR(INFO_MASK(info))); + } else { + writelog(LOG_WARNING, + "ignore %s without mask", rtm_type_name(rtm->rtm_type)); + return; + } + + /* + * Find the interface toward the gateway. + */ + if (INFO_GATE(info) != NULL) + gate = S_ADDR(INFO_GATE(info)); + + if (ifp == NULL) { + if (INFO_GATE(info) != NULL) + ifp = iflookup(gate); + if (ifp == NULL) + msglim(&msg_no_ifp, gate, + "route %s --> %s nexthop is not directly connected", + addrname(S_ADDR(INFO_DST(info)), mask, 0), + naddr_ntoa(gate)); + } + + k = kern_add(S_ADDR(INFO_DST(info)), mask, gate, ifp); + + if (k->k_state & KS_NEW) + k->k_keep = now.tv_sec+keep; + if (INFO_GATE(info) == 0) { + trace_act("note %s without gateway", + rtm_type_name(rtm->rtm_type)); + k->k_metric = HOPCNT_INFINITY; + } else if (INFO_GATE(info)->ss_family != AF_INET) { + trace_act("note %s with gateway AF=%d", + rtm_type_name(rtm->rtm_type), + INFO_GATE(info)->ss_family); + k->k_metric = HOPCNT_INFINITY; + } else { + k->k_gate = S_ADDR(INFO_GATE(info)); + k->k_metric = rtm->rtm_rmx.rmx_hopcount; + if (k->k_metric < 0) + k->k_metric = 0; + else if (k->k_metric > HOPCNT_INFINITY-1) + k->k_metric = HOPCNT_INFINITY-1; + } + + if ((k->k_state & KS_NEW) && interf_route) { + if (k->k_gate != 0 && findifaddr(k->k_gate) == NULL) + k->k_state |= KS_DEPRE_IF; + else + k->k_state |= KS_IF; + } + + k->k_state &= ~(KS_NEW | KS_DELETE | KS_ADD | KS_CHANGE | KS_DEL_ADD | + KS_STATIC | KS_GATEWAY | KS_DELETED | KS_PRIVATE | KS_CHECK); + if (rtm->rtm_flags & RTF_GATEWAY) + k->k_state |= KS_GATEWAY; + if (rtm->rtm_flags & RTF_STATIC) + k->k_state |= KS_STATIC; + if (rtm->rtm_flags & RTF_PRIVATE) + k->k_state |= KS_PRIVATE; + + + if (rtm->rtm_flags & (RTF_DYNAMIC | RTF_MODIFIED)) { + if (INFO_AUTHOR(info) != 0 && + INFO_AUTHOR(info)->ss_family == AF_INET) + ifp = iflookup(S_ADDR(INFO_AUTHOR(info))); + else + ifp = NULL; + if (should_supply(ifp) && (ifp == NULL || + !(ifp->int_state & IS_REDIRECT_OK))) { + /* + * Routers are not supposed to listen to redirects, + * so delete it if it came via an unknown interface + * or the interface does not have special permission. + */ + k->k_state &= ~KS_DYNAMIC; + k->k_state |= KS_DELETE; + LIM_SEC(need_kern, 0); + trace_act("mark for deletion redirected %s --> %s" + " via %s", + addrname(k->k_dst, k->k_mask, 0), + naddr_ntoa(k->k_gate), + ifp ? ifp->int_name : "unknown interface"); + } else { + k->k_state |= KS_DYNAMIC; + k->k_redirect_time = now.tv_sec; + trace_act("accept redirected %s --> %s via %s", + addrname(k->k_dst, k->k_mask, 0), + naddr_ntoa(k->k_gate), + ifp ? ifp->int_name : "unknown interface"); + } + return; + } + + /* + * If it is not a static route, quit until the next comparison + * between the kernel and daemon tables, when it will be deleted. + */ + if (!(k->k_state & KS_STATIC)) { + if (!(k->k_state & (KS_IF|KS_DEPRE_IF|KS_FILE))) + k->k_state |= KS_DELETE; + LIM_SEC(need_kern, k->k_keep); + return; + } + + /* + * Put static routes with real metrics into the daemon table so + * they can be advertised. + */ + + kern_check_static(k, ifp); +} + + +/* deal with packet loss */ +static void +rtm_lose(struct rt_msghdr *rtm, struct rt_addrinfo *info) +{ + if (INFO_GATE(info) == NULL || INFO_GATE(info)->ss_family != AF_INET) { + trace_act("ignore %s without gateway", + rtm_type_name(rtm->rtm_type)); + age(0); + return; + } + + if (rdisc_ok) + rdisc_age(S_ADDR(INFO_GATE(info))); + age(S_ADDR(INFO_GATE(info))); +} + + +/* + * Make the gateway slot of an info structure point to something + * useful. If it is not already useful, but it specifies an interface, + * then fill in the sockaddr_in provided and point it there. + */ +static int +get_info_gate(struct sockaddr_storage **ssp, struct sockaddr_in *sin) +{ + struct sockaddr_dl *sdl = (struct sockaddr_dl *)*ssp; + struct interface *ifp; + + if (sdl == NULL) + return (0); + if ((sdl)->sdl_family == AF_INET) + return (1); + if ((sdl)->sdl_family != AF_LINK) + return (0); + + ifp = ifwithindex(sdl->sdl_index, _B_TRUE); + if (ifp == NULL) + return (0); + + sin->sin_addr.s_addr = ifp->int_addr; + sin->sin_family = AF_INET; + /* LINTED */ + *ssp = (struct sockaddr_storage *)sin; + + return (1); +} + + +/* + * Clean the kernel table by copying it to the daemon image. + * Eventually the daemon will delete any extra routes. + */ +void +sync_kern(void) +{ + int i; + struct khash *k; + struct { + struct T_optmgmt_req req; + struct opthdr hdr; + } req; + union { + struct T_optmgmt_ack ack; + unsigned char space[64]; + } ack; + struct opthdr *rh; + struct strbuf cbuf, dbuf; + int ipfd, nroutes, flags, r; + mib2_ipRouteEntry_t routes[8]; + mib2_ipRouteEntry_t *rp; + struct rt_msghdr rtm; + struct rt_addrinfo info; + struct sockaddr_in sin_dst; + struct sockaddr_in sin_gate; + struct sockaddr_in sin_mask; + struct sockaddr_in sin_author; + struct interface *ifp; + char ifname[LIFNAMSIZ + 1]; + + for (i = 0; i < KHASH_SIZE; i++) { + for (k = khash_bins[i]; k != NULL; k = k->k_next) { + if (!(k->k_state & (KS_IF|KS_DEPRE_IF))) + k->k_state |= KS_CHECK; + } + } + + ipfd = open(IP_DEV_NAME, O_RDWR); + if (ipfd == -1) { + msglog("open " IP_DEV_NAME ": %s", rip_strerror(errno)); + goto hash_clean; + } + + req.req.PRIM_type = T_OPTMGMT_REQ; + req.req.OPT_offset = (caddr_t)&req.hdr - (caddr_t)&req; + req.req.OPT_length = sizeof (req.hdr); + req.req.MGMT_flags = T_CURRENT; + + req.hdr.level = MIB2_IP; + req.hdr.name = 0; + req.hdr.len = 0; + + cbuf.buf = (caddr_t)&req; + cbuf.len = sizeof (req); + + if (putmsg(ipfd, &cbuf, NULL, 0) == -1) { + msglog("T_OPTMGMT_REQ putmsg: %s", rip_strerror(errno)); + goto hash_clean; + } + + for (;;) { + cbuf.buf = (caddr_t)&ack; + cbuf.maxlen = sizeof (ack); + dbuf.buf = (caddr_t)routes; + dbuf.maxlen = sizeof (routes); + flags = 0; + r = getmsg(ipfd, &cbuf, &dbuf, &flags); + if (r == -1) { + msglog("T_OPTMGMT_REQ getmsg: %s", rip_strerror(errno)); + goto hash_clean; + } + + if (cbuf.len < sizeof (struct T_optmgmt_ack) || + ack.ack.PRIM_type != T_OPTMGMT_ACK || + ack.ack.MGMT_flags != T_SUCCESS || + ack.ack.OPT_length < sizeof (struct opthdr)) { + msglog("bad T_OPTMGMT response; len=%d prim=%d " + "flags=%d optlen=%d", cbuf.len, ack.ack.PRIM_type, + ack.ack.MGMT_flags, ack.ack.OPT_length); + goto hash_clean; + } + /* LINTED */ + rh = (struct opthdr *)((caddr_t)&ack + ack.ack.OPT_offset); + if (rh->level == 0 && rh->name == 0) { + break; + } + if (rh->level != MIB2_IP || rh->name != MIB2_IP_21) { + while (r == MOREDATA) { + r = getmsg(ipfd, NULL, &dbuf, &flags); + } + continue; + } + break; + } + + (void) memset(&rtm, 0, sizeof (rtm)); + (void) memset(&info, 0, sizeof (info)); + (void) memset(&sin_dst, 0, sizeof (sin_dst)); + (void) memset(&sin_gate, 0, sizeof (sin_gate)); + (void) memset(&sin_mask, 0, sizeof (sin_mask)); + (void) memset(&sin_author, 0, sizeof (sin_author)); + sin_dst.sin_family = AF_INET; + /* LINTED */ + info.rti_info[RTAX_DST] = (struct sockaddr_storage *)&sin_dst; + sin_gate.sin_family = AF_INET; + /* LINTED */ + info.rti_info[RTAX_GATEWAY] = (struct sockaddr_storage *)&sin_gate; + sin_mask.sin_family = AF_INET; + /* LINTED */ + info.rti_info[RTAX_NETMASK] = (struct sockaddr_storage *)&sin_mask; + sin_dst.sin_family = AF_INET; + /* LINTED */ + info.rti_info[RTAX_AUTHOR] = (struct sockaddr_storage *)&sin_author; + + for (;;) { + nroutes = dbuf.len / sizeof (mib2_ipRouteEntry_t); + for (rp = routes; nroutes > 0; ++rp, nroutes--) { + + /* + * Ignore IRE cache, broadcast, and local address + * entries; they're not subject to routing socket + * control. + */ + if (rp->ipRouteInfo.re_ire_type & + (IRE_BROADCAST | IRE_CACHE | IRE_LOCAL)) + continue; + + /* ignore multicast addresses */ + if (IN_MULTICAST(ntohl(rp->ipRouteDest))) + continue; + + +#ifdef DEBUG_KERNEL_ROUTE_READ + (void) fprintf(stderr, "route type %d, ire type %08X, " + "flags %08X: %s", rp->ipRouteType, + rp->ipRouteInfo.re_ire_type, + rp->ipRouteInfo.re_flags, + naddr_ntoa(rp->ipRouteDest)); + (void) fprintf(stderr, " %s", + naddr_ntoa(rp->ipRouteMask)); + (void) fprintf(stderr, " %s\n", + naddr_ntoa(rp->ipRouteNextHop)); +#endif + + /* Fake up the needed entries */ + rtm.rtm_flags = rp->ipRouteInfo.re_flags; + rtm.rtm_type = RTM_GET; + rtm.rtm_rmx.rmx_hopcount = rp->ipRouteMetric1; + + (void) memset(ifname, 0, sizeof (ifname)); + if (rp->ipRouteIfIndex.o_length < + sizeof (rp->ipRouteIfIndex.o_bytes)) + rp->ipRouteIfIndex.o_bytes[ + rp->ipRouteIfIndex.o_length] = '\0'; + (void) strncpy(ifname, + rp->ipRouteIfIndex.o_bytes, + sizeof (ifname)); + + /* + * First try to match up on gwkludge entries + * before trying to match ifp by name. + */ + if ((ifp = gwkludge_iflookup(rp->ipRouteDest, + rp->ipRouteNextHop, rp->ipRouteMask)) == NULL) + ifp = ifwithname(ifname); + + info.rti_addrs = RTA_DST | RTA_GATEWAY | RTA_NETMASK; + if (rp->ipRouteInfo.re_ire_type & IRE_HOST_REDIRECT) + info.rti_addrs |= RTA_AUTHOR; + sin_dst.sin_addr.s_addr = rp->ipRouteDest; + sin_gate.sin_addr.s_addr = rp->ipRouteNextHop; + sin_mask.sin_addr.s_addr = rp->ipRouteMask; + sin_author.sin_addr.s_addr = + rp->ipRouteInfo.re_src_addr; + + /* + * Note static routes and interface routes, and also + * preload the image of the kernel table so that + * we can later clean it, as well as avoid making + * unneeded changes. Keep the old kernel routes for a + * few seconds to allow a RIP or router-discovery + * response to be heard. + */ + rtm_add(&rtm, &info, MAX_WAITTIME, + ((rp->ipRouteInfo.re_ire_type & + (IRE_INTERFACE|IRE_LOOPBACK)) != 0), ifp); + } + if (r == 0) { + break; + } + r = getmsg(ipfd, NULL, &dbuf, &flags); + } + +hash_clean: + if (ipfd != -1) + (void) close(ipfd); + for (i = 0; i < KHASH_SIZE; i++) { + for (k = khash_bins[i]; k != NULL; k = k->k_next) { + + /* + * KS_DELETED routes have been removed from the + * kernel, but we keep them around for reasons + * stated in del_static(), so we skip the check + * for KS_DELETED routes here. + */ + if ((k->k_state & (KS_CHECK|KS_DELETED)) == KS_CHECK) { + + if (!(k->k_state & KS_DYNAMIC)) + writelog(LOG_WARNING, + "%s --> %s disappeared from kernel", + addrname(k->k_dst, k->k_mask, 0), + naddr_ntoa(k->k_gate)); + del_static(k->k_dst, k->k_mask, k->k_gate, + k->k_ifp, 1); + + } + } + } +} + + +/* Listen to announcements from the kernel */ +void +read_rt(void) +{ + long cc; + struct interface *ifp; + struct sockaddr_in gate_sin; + in_addr_t mask, gate; + union { + struct { + struct rt_msghdr rtm; + struct sockaddr_storage addrs[RTA_NUMBITS]; + } r; + struct if_msghdr ifm; + } m; + char str[100], *strp; + struct rt_addrinfo info; + + + for (;;) { + cc = read(rt_sock, &m, sizeof (m)); + if (cc <= 0) { + if (cc < 0 && errno != EWOULDBLOCK) + LOGERR("read(rt_sock)"); + return; + } + + if (TRACERTS) + dump_rt_msg("read", &m.r.rtm, cc); + + if (cc < m.r.rtm.rtm_msglen) { + msglog("routing message truncated (%d < %d)", + cc, m.r.rtm.rtm_msglen); + } + + if (m.r.rtm.rtm_version != RTM_VERSION) { + msglog("bogus routing message version %d", + m.r.rtm.rtm_version); + continue; + } + + ifp = NULL; + + if (m.r.rtm.rtm_type == RTM_IFINFO || + m.r.rtm.rtm_type == RTM_NEWADDR || + m.r.rtm.rtm_type == RTM_DELADDR) { + strp = if_bit_string(m.ifm.ifm_flags, _B_TRUE); + if (strp == NULL) { + strp = str; + (void) sprintf(str, "%#x", m.ifm.ifm_flags); + } + ifp = ifwithindex(m.ifm.ifm_index, + m.r.rtm.rtm_type != RTM_DELADDR); + if (ifp == NULL) { + char ifname[LIFNAMSIZ], *ifnamep; + + ifnamep = if_indextoname(m.ifm.ifm_index, + ifname); + if (ifnamep == NULL) { + trace_act("note %s with flags %s" + " for unknown interface index #%d", + rtm_type_name(m.r.rtm.rtm_type), + strp, m.ifm.ifm_index); + } else { + trace_act("note %s with flags %s" + " for unknown interface %s", + rtm_type_name(m.r.rtm.rtm_type), + strp, ifnamep); + } + } else { + trace_act("note %s with flags %s for %s", + rtm_type_name(m.r.rtm.rtm_type), + strp, ifp->int_name); + } + if (strp != str) + free(strp); + + /* + * After being informed of a change to an interface, + * check them all now if the check would otherwise + * be a long time from now, if the interface is + * not known, or if the interface has been turned + * off or on. + */ + if (ifscan_timer.tv_sec-now.tv_sec >= + CHECK_BAD_INTERVAL || ifp == NULL || + ((ifp->int_if_flags ^ m.ifm.ifm_flags) & + IFF_UP) != 0) + ifscan_timer.tv_sec = now.tv_sec; + continue; + } else { + if (m.r.rtm.rtm_index != 0) + ifp = ifwithindex(m.r.rtm.rtm_index, 1); + } + + (void) strlcpy(str, rtm_type_name(m.r.rtm.rtm_type), + sizeof (str)); + strp = &str[strlen(str)]; + if (m.r.rtm.rtm_type <= RTM_CHANGE) + strp += snprintf(strp, sizeof (str) - (strp - str), + " from pid %d", (int)m.r.rtm.rtm_pid); + + /* LINTED */ + (void) rt_xaddrs(&info, (struct sockaddr_storage *)(&m.r.rtm + + 1), (char *)&m + cc, m.r.rtm.rtm_addrs); + + if (INFO_DST(&info) == 0) { + trace_act("ignore %s without dst", str); + continue; + } + + if (INFO_DST(&info)->ss_family != AF_INET) { + trace_act("ignore %s for AF %d", str, + INFO_DST(&info)->ss_family); + continue; + } + + mask = ((INFO_MASK(&info) != 0) ? + ntohl(S_ADDR(INFO_MASK(&info))) : + (m.r.rtm.rtm_flags & RTF_HOST) ? + HOST_MASK : std_mask(S_ADDR(INFO_DST(&info)))); + + strp += snprintf(strp, sizeof (str) - (strp - str), ": %s", + addrname(S_ADDR(INFO_DST(&info)), mask, 0)); + + if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info))))) { + trace_act("ignore multicast %s", str); + continue; + } + + if (m.r.rtm.rtm_flags & RTF_LLINFO) { + trace_act("ignore ARP %s", str); + continue; + } + + if (get_info_gate(&INFO_GATE(&info), &gate_sin)) { + gate = S_ADDR(INFO_GATE(&info)); + strp += snprintf(strp, sizeof (str) - (strp - str), + " --> %s", naddr_ntoa(gate)); + } else { + gate = 0; + } + + if (INFO_AUTHOR(&info) != 0) + strp += snprintf(strp, sizeof (str) - (strp - str), + " by authority of %s", + saddr_ntoa(INFO_AUTHOR(&info))); + + switch (m.r.rtm.rtm_type) { + case RTM_ADD: + case RTM_CHANGE: + case RTM_REDIRECT: + if (m.r.rtm.rtm_errno != 0) { + trace_act("ignore %s with \"%s\" error", + str, rip_strerror(m.r.rtm.rtm_errno)); + } else { + trace_act("%s", str); + rtm_add(&m.r.rtm, &info, 0, + !(m.r.rtm.rtm_flags & RTF_GATEWAY) && + m.r.rtm.rtm_type != RTM_REDIRECT, ifp); + + } + break; + + case RTM_DELETE: + if (m.r.rtm.rtm_errno != 0 && + m.r.rtm.rtm_errno != ESRCH) { + trace_act("ignore %s with \"%s\" error", + str, rip_strerror(m.r.rtm.rtm_errno)); + } else { + trace_act("%s", str); + del_static(S_ADDR(INFO_DST(&info)), mask, + gate, ifp, 1); + } + break; + + case RTM_LOSING: + trace_act("%s", str); + rtm_lose(&m.r.rtm, &info); + break; + + default: + trace_act("ignore %s", str); + break; + } + } +} + + +/* + * Disassemble a routing message. The result is an array of pointers + * to sockaddr_storage structures stored in the info argument. + * + * ss is a pointer to the beginning of the data following the + * rt_msghdr contained in the routing socket message, which consists + * of a string of concatenated sockaddr structure of different types. + */ +static int +rt_xaddrs(struct rt_addrinfo *info, + struct sockaddr_storage *ss, + char *lim, + int addrs) +{ + int retv = 0; + int i; + int abit; + int complaints; + static int prev_complaints; + +#define XBAD_AF 0x1 +#define XBAD_SHORT 0x2 +#define XBAD_LONG 0x4 + + (void) memset(info, 0, sizeof (*info)); + info->rti_addrs = addrs; + complaints = 0; + for (i = 0, abit = 1; i < RTAX_MAX && (char *)ss < lim; + i++, abit <<= 1) { + if ((addrs & abit) == 0) + continue; + info->rti_info[i] = ss; + /* Horrible interface here */ + switch (ss->ss_family) { + case AF_UNIX: + /* LINTED */ + ss = (struct sockaddr_storage *)( + (struct sockaddr_un *)ss + 1); + break; + case AF_INET: + /* LINTED */ + ss = (struct sockaddr_storage *)( + (struct sockaddr_in *)ss + 1); + break; + case AF_LINK: + /* LINTED */ + ss = (struct sockaddr_storage *)( + (struct sockaddr_dl *)ss + 1); + break; + case AF_INET6: + /* LINTED */ + ss = (struct sockaddr_storage *)( + (struct sockaddr_in6 *)ss + 1); + break; + default: + if (!(prev_complaints & XBAD_AF)) + writelog(LOG_WARNING, + "unknown address family %d " + "encountered", ss->ss_family); + if (complaints & XBAD_AF) + goto xaddr_done; + /* LINTED */ + ss = (struct sockaddr_storage *)( + (struct sockaddr *)ss + 1); + complaints |= XBAD_AF; + info->rti_addrs &= abit - 1; + addrs = info->rti_addrs; + retv = -1; + break; + } + if ((char *)ss > lim) { + if (!(prev_complaints & XBAD_SHORT)) + msglog("sockaddr %d too short by %d " + "bytes", i + 1, (char *)ss - lim); + complaints |= XBAD_SHORT; + info->rti_info[i] = NULL; + info->rti_addrs &= abit - 1; + retv = -1; + goto xaddr_done; + } + } + if ((char *)ss != lim) { + if (!(prev_complaints & XBAD_LONG)) + msglog("%d bytes of routing message left over", + lim - (char *)ss); + complaints |= XBAD_LONG; + retv = -1; + } +xaddr_done: + prev_complaints = complaints; + return (retv); +} + + +/* after aggregating, note routes that belong in the kernel */ +static void +kern_out(struct ag_info *ag) +{ + struct khash *k; + + /* + * Do not install bad routes if they are not already present. + * This includes routes that had RS_NET_SYN for interfaces that + * recently died. + */ + if (ag->ag_metric == HOPCNT_INFINITY) { + k = kern_find(htonl(ag->ag_dst_h), ag->ag_mask, + ag->ag_nhop, ag->ag_ifp, NULL); + if (k == NULL) + return; + } else { + k = kern_add(htonl(ag->ag_dst_h), ag->ag_mask, ag->ag_nhop, + ag->ag_ifp); + } + + if (k->k_state & KS_NEW) { + /* will need to add new entry to the kernel table */ + k->k_state = KS_ADD; + if (ag->ag_state & AGS_GATEWAY) + k->k_state |= KS_GATEWAY; + if (ag->ag_state & AGS_IF) + k->k_state |= KS_IF; + if (ag->ag_state & AGS_PASSIVE) + k->k_state |= KS_PASSIVE; + if (ag->ag_state & AGS_FILE) + k->k_state |= KS_FILE; + k->k_gate = ag->ag_nhop; + k->k_ifp = ag->ag_ifp; + k->k_metric = ag->ag_metric; + return; + } + + if ((k->k_state & (KS_STATIC|KS_DEPRE_IF)) || + ((k->k_state & (KS_IF|KS_PASSIVE)) == KS_IF)) { + return; + } + + /* modify existing kernel entry if necessary */ + if (k->k_gate == ag->ag_nhop && k->k_ifp == ag->ag_ifp && + k->k_metric != ag->ag_metric) { + /* + * Must delete bad interface routes etc. + * to change them. + */ + if (k->k_metric == HOPCNT_INFINITY) + k->k_state |= KS_DEL_ADD; + k->k_gate = ag->ag_nhop; + k->k_metric = ag->ag_metric; + k->k_state |= KS_CHANGE; + } + + /* + * If the daemon thinks the route should exist, forget + * about any redirections. + * If the daemon thinks the route should exist, eventually + * override manual intervention by the operator. + */ + if ((k->k_state & (KS_DYNAMIC | KS_DELETED)) != 0) { + k->k_state &= ~KS_DYNAMIC; + k->k_state |= (KS_ADD | KS_DEL_ADD); + } + + if ((k->k_state & KS_GATEWAY) && !(ag->ag_state & AGS_GATEWAY)) { + k->k_state &= ~KS_GATEWAY; + k->k_state |= (KS_ADD | KS_DEL_ADD); + } else if (!(k->k_state & KS_GATEWAY) && (ag->ag_state & AGS_GATEWAY)) { + k->k_state |= KS_GATEWAY; + k->k_state |= (KS_ADD | KS_DEL_ADD); + } + + /* + * Deleting-and-adding is necessary to change aspects of a route. + * Just delete instead of deleting and then adding a bad route. + * Otherwise, we want to keep the route in the kernel. + */ + if (k->k_metric == HOPCNT_INFINITY && (k->k_state & KS_DEL_ADD)) + k->k_state |= KS_DELETE; + else + k->k_state &= ~KS_DELETE; +#undef RT +} + +/* + * Update our image of the kernel forwarding table using the given + * route from our internal routing table. + */ + +/*ARGSUSED1*/ +static int +walk_kern(struct radix_node *rn, void *argp) +{ +#define RT ((struct rt_entry *)rn) + uint8_t metric, pref; + uint_t ags = 0; + int i; + struct rt_spare *rts; + + /* Do not install synthetic routes */ + if (RT->rt_state & RS_NET_SYN) + return (0); + + /* + * Do not install static routes here. Only + * read_rt->rtm_add->kern_add should install those + */ + if ((RT->rt_state & RS_STATIC) && + (RT->rt_spares[0].rts_origin != RO_FILE)) + return (0); + + /* Do not clobber kernel if this is a route for a dead interface */ + if (RT->rt_state & RS_BADIF) + return (0); + + if (!(RT->rt_state & RS_IF)) { + /* This is an ordinary route, not for an interface. */ + + /* + * aggregate, ordinary good routes without regard to + * their metric + */ + pref = 1; + ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE); + + /* + * Do not install host routes directly to hosts, to avoid + * interfering with ARP entries in the kernel table. + */ + if (RT_ISHOST(RT) && ntohl(RT->rt_dst) == RT->rt_gate) + return (0); + + } else { + /* + * This is an interface route. + * Do not install routes for "external" remote interfaces. + */ + if (RT->rt_ifp != NULL && (RT->rt_ifp->int_state & IS_EXTERNAL)) + return (0); + + /* Interfaces should override received routes. */ + pref = 0; + ags |= (AGS_IF | AGS_CORS_GATE); + if (RT->rt_ifp != NULL && + !(RT->rt_ifp->int_if_flags & IFF_LOOPBACK) && + (RT->rt_ifp->int_state & (IS_PASSIVE|IS_ALIAS)) == + IS_PASSIVE) { + ags |= AGS_PASSIVE; + } + + /* + * If it is not an interface, or an alias for an interface, + * it must be a "gateway." + * + * If it is a "remote" interface, it is also a "gateway" to + * the kernel if is not a alias. + */ + if (RT->rt_ifp == NULL || (RT->rt_ifp->int_state & IS_REMOTE)) + ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE); + } + + metric = RT->rt_metric; + if (metric == HOPCNT_INFINITY) { + /* If the route is dead, try hard to aggregate. */ + pref = HOPCNT_INFINITY; + ags |= (AGS_FINE_GATE | AGS_SUPPRESS); + ags &= ~(AGS_IF | AGS_CORS_GATE); + } + + /* + * dump all routes that have the same metric as rt_spares[0] + * into the kern_table, to be added to the kernel. + */ + for (i = 0; i < RT->rt_num_spares; i++) { + rts = &RT->rt_spares[i]; + + /* Do not install external routes */ + if (rts->rts_flags & RTS_EXTERNAL) + continue; + + if (rts->rts_metric == metric) { + ag_check(RT->rt_dst, RT->rt_mask, + rts->rts_router, rts->rts_ifp, rts->rts_gate, + metric, pref, 0, 0, + (rts->rts_origin & RO_FILE) ? (ags|AGS_FILE) : ags, + kern_out); + } + } + return (0); +#undef RT +} + + +/* Update the kernel table to match the daemon table. */ +static void +fix_kern(void) +{ + int i; + struct khash *k, *pk, *knext; + + + need_kern = age_timer; + + /* Walk daemon table, updating the copy of the kernel table. */ + (void) rn_walktree(rhead, walk_kern, NULL); + ag_flush(0, 0, kern_out); + + for (i = 0; i < KHASH_SIZE; i++) { + pk = NULL; + for (k = khash_bins[i]; k != NULL; k = knext) { + knext = k->k_next; + + /* Do not touch local interface routes */ + if ((k->k_state & KS_DEPRE_IF) || + (k->k_state & (KS_IF|KS_PASSIVE)) == KS_IF) { + pk = k; + continue; + } + + /* Do not touch static routes */ + if (k->k_state & KS_STATIC) { + kern_check_static(k, 0); + pk = k; + continue; + } + + /* check hold on routes deleted by the operator */ + if (k->k_keep > now.tv_sec) { + /* ensure we check when the hold is over */ + LIM_SEC(need_kern, k->k_keep); + pk = k; + continue; + } + + if ((k->k_state & KS_DELETE) && + !(k->k_state & KS_DYNAMIC)) { + if ((k->k_dst == RIP_DEFAULT) && + (k->k_ifp != NULL) && + (kern_alternate(RIP_DEFAULT, + k->k_mask, k->k_gate, k->k_ifp, + NULL) == NULL)) + rdisc_restore(k->k_ifp); + kern_ioctl(k, RTM_DELETE, 0); + if (pk != NULL) + pk->k_next = knext; + else + khash_bins[i] = knext; + free(k); + continue; + } + + if (k->k_state & KS_DEL_ADD) + kern_ioctl(k, RTM_DELETE, 0); + + if (k->k_state & KS_ADD) { + if ((k->k_dst == RIP_DEFAULT) && + (k->k_ifp != NULL)) + rdisc_suppress(k->k_ifp); + kern_ioctl(k, RTM_ADD, + ((0 != (k->k_state & (KS_GATEWAY | + KS_DYNAMIC))) ? RTF_GATEWAY : 0)); + } else if (k->k_state & KS_CHANGE) { + /* + * Should be using RTM_CHANGE here, but + * since RTM_CHANGE is currently + * not multipath-aware, and assumes + * that RTF_GATEWAY implies the gateway + * of the route for dst has to be + * changed, we play safe, and do a del + add. + */ + kern_ioctl(k, RTM_DELETE, 0); + kern_ioctl(k, RTM_ADD, + ((0 != (k->k_state & (KS_GATEWAY | + KS_DYNAMIC))) ? RTF_GATEWAY : 0)); + } + k->k_state &= ~(KS_ADD|KS_CHANGE|KS_DEL_ADD); + + /* + * Mark this route to be deleted in the next cycle. + * This deletes routes that disappear from the + * daemon table, since the normal aging code + * will clear the bit for routes that have not + * disappeared from the daemon table. + */ + k->k_state |= KS_DELETE; + pk = k; + } + } +} + + +/* Delete a static route in the image of the kernel table. */ +void +del_static(in_addr_t dst, in_addr_t mask, in_addr_t gate, + struct interface *ifp, int gone) +{ + struct khash *k; + struct rt_entry *rt; + + /* + * Just mark it in the table to be deleted next time the kernel + * table is updated. + * If it has already been deleted, mark it as such, and set its + * keep-timer so that it will not be deleted again for a while. + * This lets the operator delete a route added by the daemon + * and add a replacement. + */ + k = kern_find(dst, mask, gate, ifp, NULL); + if (k != NULL && (gate == 0 || k->k_gate == gate)) { + k->k_state &= ~(KS_STATIC | KS_DYNAMIC | KS_CHECK); + k->k_state |= KS_DELETE; + if (gone) { + k->k_state |= KS_DELETED; + k->k_keep = now.tv_sec + K_KEEP_LIM; + } + } + + rt = rtget(dst, mask); + if (rt != NULL && (rt->rt_state & RS_STATIC)) + rtbad(rt, NULL); +} + + +/* + * Delete all routes generated from ICMP Redirects that use a given gateway, + * as well as old redirected routes. + */ +void +del_redirects(in_addr_t bad_gate, time_t old) +{ + int i; + struct khash *k; + boolean_t dosupply = should_supply(NULL); + + for (i = 0; i < KHASH_SIZE; i++) { + for (k = khash_bins[i]; k != NULL; k = k->k_next) { + if (!(k->k_state & KS_DYNAMIC) || + (k->k_state & (KS_STATIC|KS_IF|KS_DEPRE_IF))) + continue; + + if (k->k_gate != bad_gate && k->k_redirect_time > old && + !dosupply) + continue; + + k->k_state |= KS_DELETE; + k->k_state &= ~KS_DYNAMIC; + need_kern.tv_sec = now.tv_sec; + trace_act("mark redirected %s --> %s for deletion", + addrname(k->k_dst, k->k_mask, 0), + naddr_ntoa(k->k_gate)); + } + } +} + +/* Start the daemon tables. */ +void +rtinit(void) +{ + int i; + struct ag_info *ag; + + /* Initialize the radix trees */ + rn_init(); + (void) rn_inithead((void**)&rhead, 32); + + /* mark all of the slots in the table free */ + ag_avail = ag_slots; + for (ag = ag_slots, i = 1; i < NUM_AG_SLOTS; i++) { + ag->ag_fine = ag+1; + ag++; + } +} + + +static struct sockaddr_in dst_sock = {AF_INET}; +static struct sockaddr_in mask_sock = {AF_INET}; + + +static void +set_need_flash(void) +{ + if (!need_flash) { + need_flash = _B_TRUE; + /* + * Do not send the flash update immediately. Wait a little + * while to hear from other routers. + */ + no_flash.tv_sec = now.tv_sec + MIN_WAITTIME; + } +} + + +/* Get a particular routing table entry */ +struct rt_entry * +rtget(in_addr_t dst, in_addr_t mask) +{ + struct rt_entry *rt; + + dst_sock.sin_addr.s_addr = dst; + mask_sock.sin_addr.s_addr = htonl(mask); + rt = (struct rt_entry *)rhead->rnh_lookup(&dst_sock, &mask_sock, rhead); + if (rt == NULL || rt->rt_dst != dst || rt->rt_mask != mask) + return (NULL); + + return (rt); +} + + +/* Find a route to dst as the kernel would. */ +struct rt_entry * +rtfind(in_addr_t dst) +{ + dst_sock.sin_addr.s_addr = dst; + return ((struct rt_entry *)rhead->rnh_matchaddr(&dst_sock, rhead)); +} + + +/* add a route to the table */ +void +rtadd(in_addr_t dst, + in_addr_t mask, + uint16_t state, /* rt_state for the entry */ + struct rt_spare *new) +{ + struct rt_entry *rt; + in_addr_t smask; + int i; + struct rt_spare *rts; + + /* This is the only function that increments total_routes. */ + if (total_routes == MAX_ROUTES) { + msglog("have maximum (%d) routes", total_routes); + return; + } + + rt = rtmalloc(sizeof (*rt), "rtadd"); + (void) memset(rt, 0, sizeof (*rt)); + rt->rt_spares = rtmalloc(SPARE_INC * sizeof (struct rt_spare), + "rtadd"); + rt->rt_num_spares = SPARE_INC; + (void) memset(rt->rt_spares, 0, SPARE_INC * sizeof (struct rt_spare)); + for (rts = rt->rt_spares, i = rt->rt_num_spares; i != 0; i--, rts++) + rts->rts_metric = HOPCNT_INFINITY; + + rt->rt_nodes->rn_key = (uint8_t *)&rt->rt_dst_sock; + rt->rt_dst = dst; + rt->rt_dst_sock.sin_family = AF_INET; + if (mask != HOST_MASK) { + smask = std_mask(dst); + if ((smask & ~mask) == 0 && mask > smask) + state |= RS_SUBNET; + } + mask_sock.sin_addr.s_addr = htonl(mask); + rt->rt_mask = mask; + rt->rt_spares[0] = *new; + rt->rt_state = state; + rt->rt_time = now.tv_sec; + rt->rt_poison_metric = HOPCNT_INFINITY; + rt->rt_seqno = update_seqno; + + if (TRACEACTIONS) + trace_add_del("Add", rt); + + need_kern.tv_sec = now.tv_sec; + set_need_flash(); + + if (NULL == rhead->rnh_addaddr(&rt->rt_dst_sock, &mask_sock, rhead, + rt->rt_nodes)) { + msglog("rnh_addaddr() failed for %s mask=%s", + naddr_ntoa(dst), naddr_ntoa(htonl(mask))); + free(rt); + } + + total_routes++; +} + + +/* notice a changed route */ +void +rtchange(struct rt_entry *rt, + uint16_t state, /* new state bits */ + struct rt_spare *new, + char *label) +{ + if (rt->rt_metric != new->rts_metric) { + /* + * Fix the kernel immediately if it seems the route + * has gone bad, since there may be a working route that + * aggregates this route. + */ + if (new->rts_metric == HOPCNT_INFINITY) { + need_kern.tv_sec = now.tv_sec; + if (new->rts_time >= now.tv_sec - EXPIRE_TIME) + new->rts_time = now.tv_sec - EXPIRE_TIME; + } + rt->rt_seqno = update_seqno; + set_need_flash(); + } + + if (rt->rt_gate != new->rts_gate) { + need_kern.tv_sec = now.tv_sec; + rt->rt_seqno = update_seqno; + set_need_flash(); + } + + state |= (rt->rt_state & RS_SUBNET); + + /* Keep various things from deciding ageless routes are stale. */ + if (!AGE_RT(state, rt->rt_spares[0].rts_origin, new->rts_ifp)) + new->rts_time = now.tv_sec; + + if (TRACEACTIONS) + trace_change(rt, state, new, + label ? label : "Chg "); + + rt->rt_state = state; + /* + * If the interface state of the new primary route is good, + * turn off RS_BADIF flag + */ + if ((rt->rt_state & RS_BADIF) && + IS_IFF_UP(new->rts_ifp->int_if_flags) && + !(new->rts_ifp->int_state & (IS_BROKE | IS_SICK))) + rt->rt_state &= ~(RS_BADIF); + + rt->rt_spares[0] = *new; +} + + +/* check for a better route among the spares */ +static struct rt_spare * +rts_better(struct rt_entry *rt) +{ + struct rt_spare *rts, *rts1; + int i; + + /* find the best alternative among the spares */ + rts = rt->rt_spares+1; + for (i = rt->rt_num_spares, rts1 = rts+1; i > 2; i--, rts1++) { + if (BETTER_LINK(rt, rts1, rts)) + rts = rts1; + } + + return (rts); +} + + +/* switch to a backup route */ +void +rtswitch(struct rt_entry *rt, + struct rt_spare *rts) +{ + struct rt_spare swap; + char label[10]; + + /* Do not change permanent routes */ + if (0 != (rt->rt_state & (RS_MHOME | RS_STATIC | + RS_NET_SYN | RS_IF))) + return; + + /* find the best alternative among the spares */ + if (rts == NULL) + rts = rts_better(rt); + + /* Do not bother if it is not worthwhile. */ + if (!BETTER_LINK(rt, rts, rt->rt_spares)) + return; + + swap = rt->rt_spares[0]; + (void) snprintf(label, sizeof (label), "Use #%d", + (int)(rts - rt->rt_spares)); + rtchange(rt, rt->rt_state & ~(RS_NET_SYN), rts, label); + + if (swap.rts_metric == HOPCNT_INFINITY) { + *rts = rts_empty; + } else { + *rts = swap; + } + +} + + +void +rtdelete(struct rt_entry *rt) +{ + struct rt_entry *deleted_rt; + struct rt_spare *rts; + int i; + in_addr_t gate = rt->rt_gate; /* for debugging */ + + if (TRACEACTIONS) + trace_add_del("Del", rt); + + for (i = 0; i < rt->rt_num_spares; i++) { + rts = &rt->rt_spares[i]; + rts_delete(rt, rts); + } + + dst_sock.sin_addr.s_addr = rt->rt_dst; + mask_sock.sin_addr.s_addr = htonl(rt->rt_mask); + if (rt != (deleted_rt = + ((struct rt_entry *)rhead->rnh_deladdr(&dst_sock, &mask_sock, + rhead)))) { + msglog("rnh_deladdr(%s) failed; found rt 0x%lx", + rtname(rt->rt_dst, rt->rt_mask, gate), deleted_rt); + if (deleted_rt != NULL) + free(deleted_rt); + } + total_routes--; + free(rt); + + if (dst_sock.sin_addr.s_addr == RIP_DEFAULT) { + /* + * we just deleted the default route. Trigger rdisc_sort + * so that we can recover from any rdisc information that + * is valid + */ + rdisc_timer.tv_sec = 0; + } +} + +void +rts_delete(struct rt_entry *rt, struct rt_spare *rts) +{ + struct khash *k; + + trace_upslot(rt, rts, &rts_empty); + k = kern_find(rt->rt_dst, rt->rt_mask, + rts->rts_gate, rts->rts_ifp, NULL); + if (k != NULL && + !(k->k_state & KS_DEPRE_IF) && + ((k->k_state & (KS_IF|KS_PASSIVE)) != KS_IF)) { + k->k_state |= KS_DELETE; + need_kern.tv_sec = now.tv_sec; + } + + *rts = rts_empty; +} + +/* + * Get rid of a bad route, and try to switch to a replacement. + * If the route has gone bad because of a bad interface, + * the information about the dead interface is available in badifp + * for the purpose of sanity checks, if_flags checks etc. + */ +static void +rtbad(struct rt_entry *rt, struct interface *badifp) +{ + struct rt_spare new; + uint16_t rt_state; + + + if (badifp == NULL || (rt->rt_spares[0].rts_ifp == badifp)) { + /* Poison the route */ + new = rt->rt_spares[0]; + new.rts_metric = HOPCNT_INFINITY; + rt_state = rt->rt_state & ~(RS_IF | RS_LOCAL | RS_STATIC); + } + + if (badifp != NULL) { + /* + * Dont mark the rtentry bad unless the ifp for the primary + * route is the bad ifp + */ + if (rt->rt_spares[0].rts_ifp != badifp) + return; + /* + * badifp has just gone bad. We want to keep this + * rt_entry around so that we tell our rip-neighbors + * about the bad route, but we can't do anything + * to the kernel itself, so mark it as RS_BADIF + */ + trace_misc("rtbad:Setting RS_BADIF (%s)", badifp->int_name); + rt_state |= RS_BADIF; + new.rts_ifp = &dummy_ifp; + } + rtchange(rt, rt_state, &new, 0); + rtswitch(rt, 0); +} + + +/* + * Junk a RS_NET_SYN or RS_LOCAL route, + * unless it is needed by another interface. + */ +void +rtbad_sub(struct rt_entry *rt, struct interface *badifp) +{ + struct interface *ifp, *ifp1; + struct intnet *intnetp; + uint_t state; + + + ifp1 = NULL; + state = 0; + + if (rt->rt_state & RS_LOCAL) { + /* + * Is this the route through loopback for the interface? + * If so, see if it is used by any other interfaces, such + * as a point-to-point interface with the same local address. + */ + for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) { + /* Retain it if another interface needs it. */ + if (ifp->int_addr == rt->rt_ifp->int_addr) { + state |= RS_LOCAL; + ifp1 = ifp; + break; + } + } + + } + + if (!(state & RS_LOCAL)) { + /* + * Retain RIPv1 logical network route if there is another + * interface that justifies it. + */ + if (rt->rt_state & RS_NET_SYN) { + for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) { + if ((ifp->int_state & IS_NEED_NET_SYN) && + rt->rt_mask == ifp->int_std_mask && + rt->rt_dst == ifp->int_std_addr) { + state |= RS_NET_SYN; + ifp1 = ifp; + break; + } + } + } + + /* or if there is an authority route that needs it. */ + for (intnetp = intnets; intnetp != NULL; + intnetp = intnetp->intnet_next) { + if (intnetp->intnet_addr == rt->rt_dst && + intnetp->intnet_mask == rt->rt_mask) { + state |= (RS_NET_SYN | RS_NET_INT); + break; + } + } + } + + if (ifp1 != NULL || (state & RS_NET_SYN)) { + struct rt_spare new = rt->rt_spares[0]; + new.rts_ifp = ifp1; + rtchange(rt, ((rt->rt_state & ~(RS_NET_SYN|RS_LOCAL)) | state), + &new, 0); + } else { + rtbad(rt, badifp); + } +} + +/* + * Called while walking the table looking for sick interfaces + * or after a time change. + */ +int +walk_bad(struct radix_node *rn, + void *argp) +{ +#define RT ((struct rt_entry *)rn) + struct rt_spare *rts; + int i, j = -1; + + /* fix any spare routes through the interface */ + for (i = 1; i < RT->rt_num_spares; i++) { + rts = &((struct rt_entry *)rn)->rt_spares[i]; + + if (rts->rts_metric < HOPCNT_INFINITY && + (rts->rts_ifp == NULL || + (rts->rts_ifp->int_state & IS_BROKE))) + rts_delete(RT, rts); + else { + if (rts->rts_origin != RO_NONE) + j = i; + } + } + + /* + * Deal with the main route + * finished if it has been handled before or if its interface is ok + */ + if (RT->rt_ifp == NULL || !(RT->rt_ifp->int_state & IS_BROKE)) + return (0); + + /* Bad routes for other than interfaces are easy. */ + if (!(RT->rt_state & (RS_IF | RS_NET_SYN | RS_LOCAL))) { + if (j > 0) + rtswitch(RT, NULL); + else + rtbad(RT, (struct interface *)argp); + return (0); + } + + rtbad_sub(RT, (struct interface *)argp); + return (0); +#undef RT +} + +/* + * Called while walking the table to replace a duplicate interface + * with a backup. + */ +int +walk_rewire(struct radix_node *rn, void *argp) +{ + struct rt_entry *RT = (struct rt_entry *)rn; + struct rewire_data *wire = (struct rewire_data *)argp; + struct rt_spare *rts; + int i; + + /* fix any spare routes through the interface */ + rts = RT->rt_spares; + for (i = RT->rt_num_spares; i > 0; i--, rts++) { + if (rts->rts_ifp == wire->if_old) { + rts->rts_ifp = wire->if_new; + if ((RT->rt_dst == RIP_DEFAULT) && + (wire->if_old->int_state & IS_SUPPRESS_RDISC)) + rdisc_suppress(rts->rts_ifp); + if ((rts->rts_metric += wire->metric_delta) > + HOPCNT_INFINITY) + rts->rts_metric = HOPCNT_INFINITY; + + /* + * If the main route is getting a worse metric, + * then it may be time to switch to a backup. + */ + if (i == RT->rt_num_spares && wire->metric_delta > 0) { + rtswitch(RT, NULL); + } + } + } + + return (0); +} + +/* Check the age of an individual route. */ +static int +walk_age(struct radix_node *rn, void *argp) +{ +#define RT ((struct rt_entry *)rn) + struct interface *ifp; + struct rt_spare *rts; + int i; + in_addr_t age_bad_gate = *(in_addr_t *)argp; + + + /* + * age all of the spare routes, including the primary route + * currently in use + */ + rts = RT->rt_spares; + for (i = RT->rt_num_spares; i != 0; i--, rts++) { + + ifp = rts->rts_ifp; + if (i == RT->rt_num_spares) { + if (!AGE_RT(RT->rt_state, rts->rts_origin, ifp)) { + /* + * Keep various things from deciding ageless + * routes are stale + */ + rts->rts_time = now.tv_sec; + continue; + } + + /* forget RIP routes after RIP has been turned off. */ + if (rip_sock < 0) { + rts->rts_time = now_stale + 1; + } + } + + /* age failing routes */ + if (age_bad_gate == rts->rts_gate && + rts->rts_time >= now_stale) { + rts->rts_time -= SUPPLY_INTERVAL; + } + + /* trash the spare routes when they go bad */ + if (rts->rts_origin == RO_RIP && + ((rip_sock < 0) || + (rts->rts_metric < HOPCNT_INFINITY && + now_garbage > rts->rts_time)) && + i != RT->rt_num_spares) { + rts_delete(RT, rts); + } + } + + + /* finished if the active route is still fresh */ + if (now_stale <= RT->rt_time) + return (0); + + /* try to switch to an alternative */ + rtswitch(RT, NULL); + + /* Delete a dead route after it has been publically mourned. */ + if (now_garbage > RT->rt_time) { + rtdelete(RT); + return (0); + } + + /* Start poisoning a bad route before deleting it. */ + if (now.tv_sec - RT->rt_time > EXPIRE_TIME) { + struct rt_spare new = RT->rt_spares[0]; + + new.rts_metric = HOPCNT_INFINITY; + rtchange(RT, RT->rt_state, &new, 0); + } + return (0); +} + + +/* Watch for dead routes and interfaces. */ +void +age(in_addr_t bad_gate) +{ + struct interface *ifp; + int need_query = 0; + + /* + * If not listening to RIP, there is no need to age the routes in + * the table. + */ + age_timer.tv_sec = (now.tv_sec + + ((rip_sock < 0) ? NEVER : SUPPLY_INTERVAL)); + + /* + * Check for dead IS_REMOTE interfaces by timing their + * transmissions. + */ + for (ifp = ifnet; ifp; ifp = ifp->int_next) { + if (!(ifp->int_state & IS_REMOTE)) + continue; + + /* ignore unreachable remote interfaces */ + if (!check_remote(ifp)) + continue; + + /* Restore remote interface that has become reachable */ + if (ifp->int_state & IS_BROKE) + if_ok(ifp, "remote ", _B_FALSE); + + if (ifp->int_act_time != NEVER && + now.tv_sec - ifp->int_act_time > EXPIRE_TIME) { + writelog(LOG_NOTICE, + "remote interface %s to %s timed out after" + " %ld:%ld", + ifp->int_name, + naddr_ntoa(ifp->int_dstaddr), + (now.tv_sec - ifp->int_act_time)/60, + (now.tv_sec - ifp->int_act_time)%60); + if_sick(ifp, _B_FALSE); + } + + /* + * If we have not heard from the other router + * recently, ask it. + */ + if (now.tv_sec >= ifp->int_query_time) { + ifp->int_query_time = NEVER; + need_query = 1; + } + } + + /* Age routes. */ + (void) rn_walktree(rhead, walk_age, &bad_gate); + + /* + * delete old redirected routes to keep the kernel table small + * and prevent blackholes + */ + del_redirects(bad_gate, now.tv_sec-STALE_TIME); + + /* Update the kernel routing table. */ + fix_kern(); + + /* poke reticent remote gateways */ + if (need_query) + rip_query(); +} + +void +kern_dump(void) +{ + int i; + struct khash *k; + + for (i = 0; i < KHASH_SIZE; i++) { + for (k = khash_bins[i]; k != NULL; k = k->k_next) + trace_khash(k); + } +} + + +static struct interface * +gwkludge_iflookup(in_addr_t dstaddr, in_addr_t addr, in_addr_t mask) +{ + uint32_t int_state; + struct interface *ifp; + + for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) { + int_state = ifp->int_state; + + if (!(int_state & IS_REMOTE)) + continue; + + if (ifp->int_dstaddr == dstaddr && ifp->int_addr == addr && + ifp->int_mask == mask) + return (ifp); + } + return (NULL); +} |