summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/ip.h
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/inet/ip.h')
-rw-r--r--usr/src/uts/common/inet/ip.h688
1 files changed, 392 insertions, 296 deletions
diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h
index 323c8fd0de..41595280cb 100644
--- a/usr/src/uts/common/inet/ip.h
+++ b/usr/src/uts/common/inet/ip.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -56,6 +56,7 @@ extern "C" {
#include <net/route.h>
#include <sys/systm.h>
#include <sys/multidata.h>
+#include <sys/list.h>
#include <net/radix.h>
#include <sys/modhash.h>
@@ -565,15 +566,21 @@ typedef struct ipha_s {
#define IPH_ECN_ECT0 0x2 /* ECN-Capable Transport, ECT(0) */
#define IPH_ECN_CE 0x3 /* ECN-Congestion Experienced (CE) */
+struct ill_s;
+
+typedef boolean_t ip_v6intfid_func_t(struct ill_s *, in6_addr_t *);
+typedef boolean_t ip_v6mapinfo_func_t(uint_t, uint8_t *, uint8_t *, uint32_t *,
+ in6_addr_t *);
+typedef boolean_t ip_v4mapinfo_func_t(uint_t, uint8_t *, uint8_t *, uint32_t *,
+ ipaddr_t *);
+
/* IP Mac info structure */
typedef struct ip_m_s {
- t_uscalar_t ip_m_mac_type; /* From <sys/dlpi.h> */
- int ip_m_type; /* From <net/if_types.h> */
- boolean_t (*ip_m_v4mapinfo)(uint_t, uint8_t *, uint8_t *,
- uint32_t *, ipaddr_t *);
- boolean_t (*ip_m_v6mapinfo)(uint_t, uint8_t *, uint8_t *,
- uint32_t *, in6_addr_t *);
- boolean_t (*ip_m_v6intfid)(uint_t, uint8_t *, in6_addr_t *);
+ t_uscalar_t ip_m_mac_type; /* From <sys/dlpi.h> */
+ int ip_m_type; /* From <net/if_types.h> */
+ ip_v4mapinfo_func_t *ip_m_v4mapinfo;
+ ip_v6mapinfo_func_t *ip_m_v6mapinfo;
+ ip_v6intfid_func_t *ip_m_v6intfid;
} ip_m_t;
/*
@@ -583,18 +590,22 @@ typedef struct ip_m_s {
* layer multicast address range.
* b. map from IPv6 multicast address range (ff00::/8) to the link
* layer multicast address range.
- * c. derive the default IPv6 interface identifier from the link layer
- * address.
+ * c. derive the default IPv6 interface identifier from the interface.
+ * d. derive the default IPv6 destination interface identifier from
+ * the interface (point-to-point only).
*/
#define MEDIA_V4MINFO(ip_m, plen, bphys, maddr, hwxp, v4ptr) \
(((ip_m)->ip_m_v4mapinfo != NULL) && \
(*(ip_m)->ip_m_v4mapinfo)(plen, bphys, maddr, hwxp, v4ptr))
-#define MEDIA_V6INTFID(ip_m, plen, phys, v6ptr) \
- (((ip_m)->ip_m_v6intfid != NULL) && \
- (*(ip_m)->ip_m_v6intfid)(plen, phys, v6ptr))
#define MEDIA_V6MINFO(ip_m, plen, bphys, maddr, hwxp, v6ptr) \
(((ip_m)->ip_m_v6mapinfo != NULL) && \
(*(ip_m)->ip_m_v6mapinfo)(plen, bphys, maddr, hwxp, v6ptr))
+#define MEDIA_V6INTFID(ip_m, ill, v6ptr) \
+ (((ip_m)->ip_m_v6intfid != NULL) && \
+ (*(ip_m)->ip_m_v6intfid)(ill, v6ptr))
+#define MEDIA_V6DESTINTFID(ip_m, ill, v6ptr) \
+ (((ip_m)->ip_m_v6destintfid != NULL) && \
+ (*(ip_m)->ip_m_v6destintfid)(ill, v6ptr))
/* Router entry types */
#define IRE_BROADCAST 0x0001 /* Route entry for broadcast address */
@@ -621,18 +632,12 @@ typedef struct ip_m_s {
* the bucket should delete this IRE from this bucket.
*/
#define IRE_MARK_CONDEMNED 0x0001
+
/*
- * If a broadcast IRE is marked with IRE_MARK_NORECV, ip_rput will drop the
- * broadcast packets received on that interface. This is marked only
- * on broadcast ires. Employed by IPMP, where we have multiple NICs on the
- * same subnet receiving the same broadcast packet.
- */
-#define IRE_MARK_NORECV 0x0002
-/*
- * IRE_CACHE marked this way won't be returned by ire_cache_lookup. Need
- * to look specifically using MATCH_IRE_MARK_HIDDEN. Used by IPMP.
+ * An IRE with IRE_MARK_TESTHIDDEN is used by in.mpathd for test traffic. It
+ * can only be looked up by requesting MATCH_IRE_MARK_TESTHIDDEN.
*/
-#define IRE_MARK_HIDDEN 0x0004 /* Typically Used by in.mpathd */
+#define IRE_MARK_TESTHIDDEN 0x0004
/*
* An IRE with IRE_MARK_NOADD is created in ip_newroute_ipif when the outgoing
@@ -788,45 +793,18 @@ typedef struct mrec_s {
* ilm records the state of multicast memberships with the driver and is
* maintained per interface.
*
- * Notes :
- *
- * 1) There is no direct link between a given ilg and ilm. If the
- * application has joined a group G with ifindex I, we will have
- * an ilg with ilg_v6group and ilg_ill. There will be a corresponding
- * ilm with ilm_ill/ilm_v6addr recording the multicast membership.
- * To delete the membership,
- *
- * a) Search for ilg matching on G and I with ilg_v6group
- * and ilg_ill. Delete ilg_ill.
- * b) Search the corresponding ilm matching on G and I with
- * ilm_v6addr and ilm_ill. Delete ilm.
- *
- * In IPv4, the only difference is, we look using ipifs instead of
- * ills.
- *
- * 2) With IP multipathing, we want to keep receiving even after the
- * interface has failed. We do this by moving multicast memberships
- * to a new_ill within the group. This is achieved by sending
- * DL_DISABMULTI_REQS on ilg_ill/ilm_ill and sending DL_ENABMULTIREQS
- * on the new_ill and changing ilg_ill/ilm_ill to new_ill. But, we
- * need to be able to delete memberships which will still come down
- * with the ifindex of the old ill which is what the application
- * knows of. Thus we store the ilm_/ilg_orig_ifindex to keep track
- * of where we joined initially so that we can lookup even after we
- * moved the membership. It is also used for moving back the membership
- * when the old ill has been repaired. This is done by looking up for
- * ilms with ilm_orig_ifindex matching on the old ill's ifindex. Only
- * ilms actually move from old ill to new ill. ilgs don't move (just
- * the ilg_ill is changed when it moves) as it just records the state
- * of the application that has joined a group G where as ilm records
- * the state joined with the driver. Thus when we send DL_XXXMULTI_REQs
- * we also need to keep the ilm in the right ill.
- *
- * In IPv4, as ipifs move from old ill to new_ill, ilgs and ilms move
- * implicitly as we use only ipifs in IPv4. Thus, one can always lookup
- * a given ilm/ilg even after it fails without the support of
- * orig_ifindex. We move ilms still to record the driver state as
- * mentioned above.
+ * There is no direct link between a given ilg and ilm. If the
+ * application has joined a group G with ifindex I, we will have
+ * an ilg with ilg_v6group and ilg_ill. There will be a corresponding
+ * ilm with ilm_ill/ilm_v6addr recording the multicast membership.
+ * To delete the membership:
+ *
+ * a) Search for ilg matching on G and I with ilg_v6group
+ * and ilg_ill. Delete ilg_ill.
+ * b) Search the corresponding ilm matching on G and I with
+ * ilm_v6addr and ilm_ill. Delete ilm.
+ *
+ * For IPv4 the only difference is that we look using ipifs, not ills.
*/
/*
@@ -839,7 +817,6 @@ typedef struct ilg_s {
in6_addr_t ilg_v6group;
struct ipif_s *ilg_ipif; /* Logical interface we are member on */
struct ill_s *ilg_ill; /* Used by IPv6 */
- int ilg_orig_ifindex; /* Interface originally joined on */
uint_t ilg_flags;
mcast_record_t ilg_fmode; /* MODE_IS_INCLUDE/MODE_IS_EXCLUDE */
slist_t *ilg_filter;
@@ -866,9 +843,7 @@ typedef struct ilm_s {
struct ilm_s *ilm_next; /* Linked list for each ill */
uint_t ilm_state; /* state of the membership */
struct ill_s *ilm_ill; /* Back pointer to ill for IPv6 */
- int ilm_orig_ifindex; /* V6_MULTICAST_IF/ilm_ipif index */
uint_t ilm_flags;
- boolean_t ilm_is_new; /* new ilm */
boolean_t ilm_notify_driver; /* Need to notify the driver */
zoneid_t ilm_zoneid;
int ilm_no_ilg_cnt; /* number of joins w/ no ilg */
@@ -881,28 +856,11 @@ typedef struct ilm_s {
#define ilm_addr V4_PART_OF_V6(ilm_v6addr)
-/*
- * ilm_walker_cleanup needs to execute when the ilm_walker_cnt goes down to
- * zero. In addition it needs to block new walkers while it is unlinking ilm's
- * from the list. Thus simple atomics for the ill_ilm_walker_cnt don't suffice.
- */
-#define ILM_WALKER_HOLD(ill) { \
- mutex_enter(&(ill)->ill_lock); \
- ill->ill_ilm_walker_cnt++; \
- mutex_exit(&(ill)->ill_lock); \
-}
-
-/*
- * ilm_walker_cleanup releases ill_lock
- */
-#define ILM_WALKER_RELE(ill) { \
- mutex_enter(&(ill)->ill_lock); \
- (ill)->ill_ilm_walker_cnt--; \
- if ((ill)->ill_ilm_walker_cnt == 0 && (ill)->ill_ilm_cleanup_reqd) \
- ilm_walker_cleanup(ill); \
- else \
- mutex_exit(&(ill)->ill_lock); \
-}
+typedef struct ilm_walker {
+ struct ill_s *ilw_ill; /* associated ill */
+ struct ill_s *ilw_ipmp_ill; /* associated ipmp ill (if any) */
+ struct ill_s *ilw_walk_ill; /* current ill being walked */
+} ilm_walker_t;
/*
* Soft reference to an IPsec SA.
@@ -1047,11 +1005,8 @@ typedef struct conn_s conn_t;
* ipc_acking_unbind conn_acking_unbind
* ipc_pad_to_bit_31 conn_pad_to_bit_31
*
- * ipc_nofailover_ill conn_nofailover_ill
- *
* ipc_proto conn_proto
* ipc_incoming_ill conn_incoming_ill
- * ipc_outgoing_pill conn_outgoing_pill
* ipc_pending_ill conn_pending_ill
* ipc_unbind_mp conn_unbind_mp
* ipc_ilg conn_ilg
@@ -1061,8 +1016,6 @@ typedef struct conn_s conn_t;
* ipc_refcv conn_refcv
* ipc_multicast_ipif conn_multicast_ipif
* ipc_multicast_ill conn_multicast_ill
- * ipc_orig_bound_ifindex conn_orig_bound_ifindex
- * ipc_orig_multicast_ifindex conn_orig_multicast_ifindex
* ipc_drain_next conn_drain_next
* ipc_drain_prev conn_drain_prev
* ipc_idl conn_idl
@@ -1263,7 +1216,6 @@ typedef struct th_hash_s {
/* The following are ipif_state_flags */
#define IPIF_CONDEMNED 0x1 /* The ipif is being removed */
#define IPIF_CHANGING 0x2 /* A critcal ipif field is changing */
-#define IPIF_MOVING 0x8 /* The ipif is being moved */
#define IPIF_SET_LINKLOCAL 0x10 /* transient flag during bringup */
#define IPIF_ZERO_SOURCE 0x20 /* transient flag during bringup */
@@ -1273,7 +1225,6 @@ typedef struct ipif_s {
struct ill_s *ipif_ill; /* Back pointer to our ill */
int ipif_id; /* Logical unit number */
uint_t ipif_mtu; /* Starts at ipif_ill->ill_max_frag */
- uint_t ipif_saved_mtu; /* Save of mtu during ipif_move() */
in6_addr_t ipif_v6lcl_addr; /* Local IP address for this if. */
in6_addr_t ipif_v6src_addr; /* Source IP address for this if. */
in6_addr_t ipif_v6subnet; /* Subnet prefix for this if. */
@@ -1306,17 +1257,15 @@ typedef struct ipif_s {
uint_t ipif_ob_pkt_count; /* Outbound packets to our dead IREs */
/* Exclusive bit fields, protected by ipsq_t */
unsigned int
- ipif_multicast_up : 1, /* We have joined the allhosts group */
- ipif_replace_zero : 1, /* Replacement for zero */
+ ipif_multicast_up : 1, /* ipif_multicast_up() successful */
ipif_was_up : 1, /* ipif was up before */
ipif_addr_ready : 1, /* DAD is done */
-
ipif_was_dup : 1, /* DAD had failed */
+
+ ipif_joined_allhosts : 1, /* allhosts joined */
ipif_pad_to_31 : 27;
- int ipif_orig_ifindex; /* ifindex before SLIFFAILOVER */
uint_t ipif_seqid; /* unique index across all ills */
- uint_t ipif_orig_ipifid; /* ipif_id before SLIFFAILOVER */
uint_t ipif_state_flags; /* See IPIF_* flag defs above */
uint_t ipif_refcnt; /* active consistent reader cnt */
@@ -1328,6 +1277,16 @@ typedef struct ipif_s {
zoneid_t ipif_zoneid; /* zone ID number */
timeout_id_t ipif_recovery_id; /* Timer for DAD recovery */
boolean_t ipif_trace_disable; /* True when alloc fails */
+ /*
+ * For an IPMP interface, ipif_bound_ill tracks the ill whose hardware
+ * information this ipif is associated with via ARP/NDP. We can use
+ * an ill pointer (rather than an index) because only ills that are
+ * part of a group will be pointed to, and an ill cannot disappear
+ * while it's in a group.
+ */
+ struct ill_s *ipif_bound_ill;
+ struct ipif_s *ipif_bound_next; /* bound ipif chain */
+ boolean_t ipif_bound; /* B_TRUE if we successfully bound */
} ipif_t;
/*
@@ -1405,8 +1364,6 @@ typedef struct ipif_s {
*
* bit fields ill_lock ill_lock
*
- * ipif_orig_ifindex ipsq None
- * ipif_orig_ipifid ipsq None
* ipif_seqid ipsq Write once
*
* ipif_state_flags ill_lock ill_lock
@@ -1414,6 +1371,10 @@ typedef struct ipif_s {
* ipif_ire_cnt ill_lock ill_lock
* ipif_ilm_cnt ill_lock ill_lock
* ipif_saved_ire_cnt
+ *
+ * ipif_bound_ill ipsq + ipmp_lock ipsq OR ipmp_lock
+ * ipif_bound_next ipsq ipsq
+ * ipif_bound ipsq ipsq
*/
#define IP_TR_HASH(tid) ((((uintptr_t)tid) >> 6) & (IP_TR_HASH_MAX - 1))
@@ -1457,103 +1418,154 @@ typedef struct ipif_s {
#define IPI2MODE(ipi) ((ipi)->ipi_flags & IPI_GET_CMD ? COPYOUT : NO_COPYOUT)
/*
- * The IP-MT design revolves around the serialization object ipsq_t.
- * It is associated with an IPMP group. If IPMP is not enabled, there is
- * 1 ipsq_t per phyint. Eg. an ipsq_t would cover both hme0's IPv4 stream
- *
- * ipsq_lock protects
- * ipsq_reentry_cnt, ipsq_writer, ipsq_xopq_mphead, ipsq_xopq_mptail,
- * ipsq_mphead, ipsq_mptail, ipsq_split
- *
- * ipsq_pending_ipif, ipsq_current_ipif, ipsq_pending_mp, ipsq_flags,
- * ipsq_waitfor
- *
- * The fields in the last line above below are set mostly by a writer thread
- * But there is an exception in the last call to ipif_ill_refrele_tail which
- * could also race with a conn close which could be cleaning up the
- * fields. So we choose to protect using ipsq_lock instead of depending on
- * the property of the writer.
- * ill_g_lock protects
- * ipsq_refs, ipsq_phyint_list
- */
-typedef struct ipsq_s {
- kmutex_t ipsq_lock;
- int ipsq_reentry_cnt;
- kthread_t *ipsq_writer; /* current owner (thread id) */
- int ipsq_flags;
- mblk_t *ipsq_xopq_mphead; /* list of excl ops mostly ioctls */
- mblk_t *ipsq_xopq_mptail;
- mblk_t *ipsq_mphead; /* msgs on ipsq linked thru b_next */
- mblk_t *ipsq_mptail; /* msgs on ipsq linked thru b_next */
- int ipsq_current_ioctl; /* current ioctl, or 0 if no ioctl */
- boolean_t ipsq_current_done; /* is the current op done? */
- ipif_t *ipsq_current_ipif; /* ipif associated with current op */
- ipif_t *ipsq_pending_ipif; /* ipif associated w. ipsq_pending_mp */
- mblk_t *ipsq_pending_mp; /* current ioctl mp while waiting for */
- /* response from another module */
- struct ipsq_s *ipsq_next; /* list of all syncq's (ipsq_g_list) */
- uint_t ipsq_refs; /* Number of phyints on this ipsq */
- struct phyint *ipsq_phyint_list; /* List of phyints on this ipsq */
- boolean_t ipsq_split; /* ipsq may need to be split */
- int ipsq_waitfor; /* Values encoded below */
- char ipsq_name[LIFNAMSIZ+1]; /* same as phyint_groupname */
- ip_stack_t *ipsq_ipst; /* Does not have a netstack_hold */
-
+ * The IP-MT design revolves around the serialization objects ipsq_t (IPSQ)
+ * and ipxop_t (exclusive operation or "xop"). Becoming "writer" on an IPSQ
+ * ensures that no other threads can become "writer" on any IPSQs sharing that
+ * IPSQ's xop until the writer thread is done.
+ *
+ * Each phyint points to one IPSQ that remains fixed over the phyint's life.
+ * Each IPSQ points to one xop that can change over the IPSQ's life. If a
+ * phyint is *not* in an IPMP group, then its IPSQ will refer to the IPSQ's
+ * "own" xop (ipsq_ownxop). If a phyint *is* part of an IPMP group, then its
+ * IPSQ will refer to the "group" xop, which is shorthand for the xop of the
+ * IPSQ of the IPMP meta-interface's phyint. Thus, all phyints that are part
+ * of the same IPMP group will have their IPSQ's point to the group xop, and
+ * thus becoming "writer" on any phyint in the group will prevent any other
+ * writer on any other phyint in the group. All IPSQs sharing the same xop
+ * are chained together through ipsq_next (in the degenerate common case,
+ * ipsq_next simply refers to itself). Note that the group xop is guaranteed
+ * to exist at least as long as there are members in the group, since the IPMP
+ * meta-interface can only be destroyed if the group is empty.
+ *
+ * Incoming exclusive operation requests are enqueued on the IPSQ they arrived
+ * on rather than the xop. This makes switching xop's (as would happen when a
+ * phyint leaves an IPMP group) simple, because after the phyint leaves the
+ * group, any operations enqueued on its IPSQ can be safely processed with
+ * respect to its new xop, and any operations enqueued on the IPSQs of its
+ * former group can be processed with respect to their existing group xop.
+ * Even so, switching xops is a subtle dance; see ipsq_dq() for details.
+ *
+ * An IPSQ's "own" xop is embedded within the IPSQ itself since they have have
+ * identical lifetimes, and because doing so simplifies pointer management.
+ * While each phyint and IPSQ point to each other, it is not possible to free
+ * the IPSQ when the phyint is freed, since we may still *inside* the IPSQ
+ * when the phyint is being freed. Thus, ipsq_phyint is set to NULL when the
+ * phyint is freed, and the IPSQ free is later done in ipsq_exit().
+ *
+ * ipsq_t synchronization: read write
+ *
+ * ipsq_xopq_mphead ipx_lock ipx_lock
+ * ipsq_xopq_mptail ipx_lock ipx_lock
+ * ipsq_xop_switch_mp ipsq_lock ipsq_lock
+ * ipsq_phyint write once write once
+ * ipsq_next RW_READER ill_g_lock RW_WRITER ill_g_lock
+ * ipsq_xop ipsq_lock or ipsq ipsq_lock + ipsq
+ * ipsq_swxop ipsq ipsq
+ * ipsq_ownxop see ipxop_t see ipxop_t
+ * ipsq_ipst write once write once
+ *
+ * ipxop_t synchronization: read write
+ *
+ * ipx_writer ipx_lock ipx_lock
+ * ipx_xop_queued ipx_lock ipx_lock
+ * ipx_mphead ipx_lock ipx_lock
+ * ipx_mptail ipx_lock ipx_lock
+ * ipx_ipsq write once write once
+ * ips_ipsq_queued ipx_lock ipx_lock
+ * ipx_waitfor ipsq or ipx_lock ipsq + ipx_lock
+ * ipx_reentry_cnt ipsq or ipx_lock ipsq + ipx_lock
+ * ipx_current_done ipsq ipsq
+ * ipx_current_ioctl ipsq ipsq
+ * ipx_current_ipif ipsq or ipx_lock ipsq + ipx_lock
+ * ipx_pending_ipif ipsq or ipx_lock ipsq + ipx_lock
+ * ipx_pending_mp ipsq or ipx_lock ipsq + ipx_lock
+ * ipx_forced ipsq ipsq
+ * ipx_depth ipsq ipsq
+ * ipx_stack ipsq ipsq
+ */
+typedef struct ipxop_s {
+ kmutex_t ipx_lock; /* see above */
+ kthread_t *ipx_writer; /* current owner */
+ mblk_t *ipx_mphead; /* messages tied to this op */
+ mblk_t *ipx_mptail;
+ struct ipsq_s *ipx_ipsq; /* associated ipsq */
+ boolean_t ipx_ipsq_queued; /* ipsq using xop has queued op */
+ int ipx_waitfor; /* waiting; values encoded below */
+ int ipx_reentry_cnt;
+ boolean_t ipx_current_done; /* is the current operation done? */
+ int ipx_current_ioctl; /* current ioctl, or 0 if no ioctl */
+ ipif_t *ipx_current_ipif; /* ipif for current op */
+ ipif_t *ipx_pending_ipif; /* ipif for ipsq_pending_mp */
+ mblk_t *ipx_pending_mp; /* current ioctl mp while waiting */
+ boolean_t ipx_forced; /* debugging aid */
#ifdef DEBUG
- int ipsq_depth; /* debugging aid */
-#define IPSQ_STACK_DEPTH 15
- pc_t ipsq_stack[IPSQ_STACK_DEPTH]; /* debugging aid */
+ int ipx_depth; /* debugging aid */
+#define IPX_STACK_DEPTH 15
+ pc_t ipx_stack[IPX_STACK_DEPTH]; /* debugging aid */
#endif
-} ipsq_t;
+} ipxop_t;
-/* ipsq_flags */
-#define IPSQ_GROUP 0x1 /* This ipsq belongs to an IPMP group */
+typedef struct ipsq_s {
+ kmutex_t ipsq_lock; /* see above */
+ mblk_t *ipsq_switch_mp; /* op to handle right after switch */
+ mblk_t *ipsq_xopq_mphead; /* list of excl ops (mostly ioctls) */
+ mblk_t *ipsq_xopq_mptail;
+ struct phyint *ipsq_phyint; /* associated phyint */
+ struct ipsq_s *ipsq_next; /* next ipsq sharing ipsq_xop */
+ struct ipxop_s *ipsq_xop; /* current xop synchronization info */
+ struct ipxop_s *ipsq_swxop; /* switch xop to on ipsq_exit() */
+ struct ipxop_s ipsq_ownxop; /* our own xop (may not be in-use) */
+ ip_stack_t *ipsq_ipst; /* does not have a netstack_hold */
+} ipsq_t;
/*
- * ipsq_waitfor:
- *
- * IPIF_DOWN 1 ipif_down waiting for refcnts to drop
- * ILL_DOWN 2 ill_down waiting for refcnts to drop
- * IPIF_FREE 3 ipif_free waiting for refcnts to drop
- * ILL_FREE 4 ill unplumb waiting for refcnts to drop
- * ILL_MOVE_OK 5 failover waiting for refcnts to drop
+ * ipx_waitfor values:
*/
+enum {
+ IPIF_DOWN = 1, /* ipif_down() waiting for refcnts to drop */
+ ILL_DOWN, /* ill_down() waiting for refcnts to drop */
+ IPIF_FREE, /* ipif_free() waiting for refcnts to drop */
+ ILL_FREE /* ill unplumb waiting for refcnts to drop */
+};
-enum { IPIF_DOWN = 1, ILL_DOWN, IPIF_FREE, ILL_FREE, ILL_MOVE_OK };
+/* Operation types for ipsq_try_enter() */
+#define CUR_OP 0 /* request writer within current operation */
+#define NEW_OP 1 /* request writer for a new operation */
+#define SWITCH_OP 2 /* request writer once IPSQ XOP switches */
-/* Flags passed to ipsq_try_enter */
-#define CUR_OP 0 /* Current ioctl continuing again */
-#define NEW_OP 1 /* New ioctl starting afresh */
+/*
+ * Kstats tracked on each IPMP meta-interface. Order here must match
+ * ipmp_kstats[] in ip/ipmp.c.
+ */
+enum {
+ IPMP_KSTAT_OBYTES, IPMP_KSTAT_OBYTES64, IPMP_KSTAT_RBYTES,
+ IPMP_KSTAT_RBYTES64, IPMP_KSTAT_OPACKETS, IPMP_KSTAT_OPACKETS64,
+ IPMP_KSTAT_OERRORS, IPMP_KSTAT_IPACKETS, IPMP_KSTAT_IPACKETS64,
+ IPMP_KSTAT_IERRORS, IPMP_KSTAT_MULTIRCV, IPMP_KSTAT_MULTIXMT,
+ IPMP_KSTAT_BRDCSTRCV, IPMP_KSTAT_BRDCSTXMT, IPMP_KSTAT_LINK_UP,
+ IPMP_KSTAT_MAX /* keep last */
+};
/*
* phyint represents state that is common to both IPv4 and IPv6 interfaces.
* There is a separate ill_t representing IPv4 and IPv6 which has a
* backpointer to the phyint structure for accessing common state.
- *
- * NOTE : It just stores the group name as there is only one name for
- * IPv4 and IPv6 i.e it is a underlying link property. Actually
- * IPv4 and IPv6 ill are grouped together when their phyints have
- * the same name.
*/
typedef struct phyint {
struct ill_s *phyint_illv4;
struct ill_s *phyint_illv6;
- uint_t phyint_ifindex; /* SIOCLSLIFINDEX */
- char *phyint_groupname; /* SIOCSLIFGROUPNAME */
- uint_t phyint_groupname_len;
+ uint_t phyint_ifindex; /* SIOCSLIFINDEX */
uint64_t phyint_flags;
avl_node_t phyint_avl_by_index; /* avl tree by index */
avl_node_t phyint_avl_by_name; /* avl tree by name */
kmutex_t phyint_lock;
struct ipsq_s *phyint_ipsq; /* back pointer to ipsq */
- struct phyint *phyint_ipsq_next; /* phyint list on this ipsq */
- /* Once Clearview IPMP is added the follow two fields can be removed */
- uint_t phyint_group_ifindex; /* index assigned to group */
- uint_t phyint_hook_ifindex; /* index used with neti/hook */
+ struct ipmp_grp_s *phyint_grp; /* associated IPMP group */
+ char phyint_name[LIFNAMSIZ]; /* physical interface name */
+ uint64_t phyint_kstats0[IPMP_KSTAT_MAX]; /* baseline kstats */
} phyint_t;
#define CACHE_ALIGN_SIZE 64
-
#define CACHE_ALIGN(align_struct) P2ROUNDUP(sizeof (struct align_struct),\
CACHE_ALIGN_SIZE)
struct _phyint_list_s_ {
@@ -1568,34 +1580,6 @@ typedef union phyint_list_u {
#define phyint_list_avl_by_index phyint_list_s.phyint_list_avl_by_index
#define phyint_list_avl_by_name phyint_list_s.phyint_list_avl_by_name
-/*
- * ILL groups. We group ills,
- *
- * - if the ills have the same group name. (New way)
- *
- * ill_group locking notes:
- *
- * illgrp_lock protects ill_grp_ill_schednext.
- *
- * ill_g_lock protects ill_grp_next, illgrp_ill, illgrp_ill_count.
- * Holding ill_g_lock freezes the memberships of ills in IPMP groups.
- * It also freezes the global list of ills and all ipifs in all ills.
- *
- * To remove an ipif from the linked list of ipifs of that ill ipif_free_tail
- * holds both ill_g_lock, and ill_lock. Similarly to remove an ill from the
- * global list of ills, ill_glist_delete() holds ill_g_lock as writer.
- * This simplifies things for ipif_select_source, illgrp_scheduler etc.
- * that need to walk the members of an illgrp. They just hold ill_g_lock
- * as reader to do the walk.
- *
- */
-typedef struct ill_group {
- kmutex_t illgrp_lock;
- struct ill_group *illgrp_next; /* Next ill_group */
- struct ill_s *illgrp_ill_schednext; /* Next ill to be scheduled */
- struct ill_s *illgrp_ill; /* First ill in the group */
- int illgrp_ill_count;
-} ill_group_t;
/*
* Fragmentation hash bucket
@@ -1792,6 +1776,108 @@ typedef struct ill_lso_capab_s ill_lso_capab_t;
#define IS_LOOPBACK(ill) \
((ill)->ill_phyint->phyint_flags & PHYI_LOOPBACK)
+/* Is this an IPMP meta-interface ILL? */
+#define IS_IPMP(ill) \
+ ((ill)->ill_phyint->phyint_flags & PHYI_IPMP)
+
+/* Is this ILL under an IPMP meta-interface? (aka "in a group?") */
+#define IS_UNDER_IPMP(ill) \
+ ((ill)->ill_grp != NULL && !IS_IPMP(ill))
+
+/* Is ill1 in the same illgrp as ill2? */
+#define IS_IN_SAME_ILLGRP(ill1, ill2) \
+ ((ill1)->ill_grp != NULL && ((ill1)->ill_grp == (ill2)->ill_grp))
+
+/* Is ill1 on the same LAN as ill2? */
+#define IS_ON_SAME_LAN(ill1, ill2) \
+ ((ill1) == (ill2) || IS_IN_SAME_ILLGRP(ill1, ill2))
+
+#define ILL_OTHER(ill) \
+ ((ill)->ill_isv6 ? (ill)->ill_phyint->phyint_illv4 : \
+ (ill)->ill_phyint->phyint_illv6)
+
+/*
+ * IPMP group ILL state structure -- up to two per IPMP group (V4 and V6).
+ * Created when the V4 and/or V6 IPMP meta-interface is I_PLINK'd. It is
+ * guaranteed to persist while there are interfaces of that type in the group.
+ * In general, most fields are accessed outside of the IPSQ (e.g., in the
+ * datapath), and thus use locks in addition to the IPSQ for protection.
+ *
+ * synchronization: read write
+ *
+ * ig_if ipsq or ill_g_lock ipsq and ill_g_lock
+ * ig_actif ipsq or ipmp_lock ipsq and ipmp_lock
+ * ig_nactif ipsq or ipmp_lock ipsq and ipmp_lock
+ * ig_next_ill ipsq or ipmp_lock ipsq and ipmp_lock
+ * ig_ipmp_ill write once write once
+ * ig_cast_ill ipsq or ipmp_lock ipsq and ipmp_lock
+ * ig_arpent ipsq ipsq
+ * ig_mtu ipsq ipsq
+ */
+typedef struct ipmp_illgrp_s {
+ list_t ig_if; /* list of all interfaces */
+ list_t ig_actif; /* list of active interfaces */
+ uint_t ig_nactif; /* number of active interfaces */
+ struct ill_s *ig_next_ill; /* next active interface to use */
+ struct ill_s *ig_ipmp_ill; /* backpointer to IPMP meta-interface */
+ struct ill_s *ig_cast_ill; /* nominated ill for multi/broadcast */
+ list_t ig_arpent; /* list of ARP entries */
+ uint_t ig_mtu; /* ig_ipmp_ill->ill_max_mtu */
+} ipmp_illgrp_t;
+
+/*
+ * IPMP group state structure -- one per IPMP group. Created when the
+ * IPMP meta-interface is plumbed; it is guaranteed to persist while there
+ * are interfaces in it.
+ *
+ * ipmp_grp_t synchronization: read write
+ *
+ * gr_name ipmp_lock ipmp_lock
+ * gr_ifname write once write once
+ * gr_mactype ipmp_lock ipmp_lock
+ * gr_phyint write once write once
+ * gr_nif ipmp_lock ipmp_lock
+ * gr_nactif ipsq ipsq
+ * gr_v4 ipmp_lock ipmp_lock
+ * gr_v6 ipmp_lock ipmp_lock
+ * gr_nv4 ipmp_lock ipmp_lock
+ * gr_nv6 ipmp_lock ipmp_lock
+ * gr_pendv4 ipmp_lock ipmp_lock
+ * gr_pendv6 ipmp_lock ipmp_lock
+ * gr_linkdownmp ipsq ipsq
+ * gr_ksp ipmp_lock ipmp_lock
+ * gr_kstats0 atomic atomic
+ */
+typedef struct ipmp_grp_s {
+ char gr_name[LIFGRNAMSIZ]; /* group name */
+ char gr_ifname[LIFNAMSIZ]; /* interface name */
+ t_uscalar_t gr_mactype; /* DLPI mactype of group */
+ phyint_t *gr_phyint; /* IPMP group phyint */
+ uint_t gr_nif; /* number of interfaces in group */
+ uint_t gr_nactif; /* number of active interfaces */
+ ipmp_illgrp_t *gr_v4; /* V4 group information */
+ ipmp_illgrp_t *gr_v6; /* V6 group information */
+ uint_t gr_nv4; /* number of ills in V4 group */
+ uint_t gr_nv6; /* number of ills in V6 group */
+ uint_t gr_pendv4; /* number of pending ills in V4 group */
+ uint_t gr_pendv6; /* number of pending ills in V6 group */
+ mblk_t *gr_linkdownmp; /* message used to bring link down */
+ kstat_t *gr_ksp; /* group kstat pointer */
+ uint64_t gr_kstats0[IPMP_KSTAT_MAX]; /* baseline group kstats */
+} ipmp_grp_t;
+
+/*
+ * IPMP ARP entry -- one per SIOCS*ARP entry tied to the group. Used to keep
+ * ARP up-to-date as the active set of interfaces in the group changes.
+ */
+typedef struct ipmp_arpent_s {
+ mblk_t *ia_area_mp; /* AR_ENTRY_ADD pointer */
+ ipaddr_t ia_ipaddr; /* IP address for this entry */
+ boolean_t ia_proxyarp; /* proxy ARP entry? */
+ boolean_t ia_notified; /* ARP notified about this entry? */
+ list_node_t ia_node; /* next ARP entry in list */
+} ipmp_arpent_t;
+
/*
* IP Lower level Structure.
* Instance data structure in ip_open when there is a device below us.
@@ -1851,6 +1937,7 @@ typedef struct ill_s {
mblk_t *ill_unbind_mp; /* unbind mp from ill_dl_up() */
mblk_t *ill_promiscoff_mp; /* for ill_leave_allmulti() */
mblk_t *ill_dlpi_deferred; /* b_next chain of control messages */
+ mblk_t *ill_ardeact_mp; /* deact mp from ipmp_ill_activate() */
mblk_t *ill_phys_addr_mp; /* mblk which holds ill_phys_addr */
#define ill_last_mp_to_free ill_phys_addr_mp
@@ -1867,21 +1954,19 @@ typedef struct ill_s {
ill_dlpi_style_set : 1,
ill_ifname_pending : 1,
- ill_move_in_progress : 1, /* FAILOVER/FAILBACK in progress */
ill_join_allmulti : 1,
ill_logical_down : 1,
-
ill_is_6to4tun : 1, /* Interface is a 6to4 tunnel */
+
ill_promisc_on_phys : 1, /* phys interface in promisc mode */
ill_dl_up : 1,
ill_up_ipifs : 1,
-
ill_note_link : 1, /* supports link-up notification */
+
ill_capab_reneg : 1, /* capability renegotiation to be done */
ill_dld_capab_inprog : 1, /* direct dld capab call in prog */
ill_need_recover_multicast : 1,
-
- ill_pad_to_bit_31 : 16;
+ ill_pad_to_bit_31 : 17;
/* Following bit fields protected by ill_lock */
uint_t
@@ -1891,10 +1976,8 @@ typedef struct ill_s {
ill_arp_closing : 1,
ill_arp_bringup_pending : 1,
- ill_mtu_userspecified : 1, /* SIOCSLIFLNKINFO has set the mtu */
ill_arp_extend : 1, /* ARP has DAD extensions */
-
- ill_pad_bit_31 : 25;
+ ill_pad_bit_31 : 26;
/*
* Used in SIOCSIFMUXID and SIOCGIFMUXID for 'ifconfig unplumb'.
@@ -1931,6 +2014,7 @@ typedef struct ill_s {
*/
uint8_t ill_max_hops; /* Maximum hops for any logical interface */
uint_t ill_max_mtu; /* Maximum MTU for any logical interface */
+ uint_t ill_user_mtu; /* User-specified MTU via SIOCSLIFLNKINFO */
uint32_t ill_reachable_time; /* Value for ND algorithm in msec */
uint32_t ill_reachable_retrans_time; /* Value for ND algorithm msec */
uint_t ill_max_buf; /* Max # of req to buffer for ND */
@@ -1953,13 +2037,9 @@ typedef struct ill_s {
* of the ipif.
*/
mblk_t *ill_arp_on_mp;
- /* Peer ill of an IPMP move operation */
- struct ill_s *ill_move_peer;
phyint_t *ill_phyint;
uint64_t ill_flags;
- ill_group_t *ill_group;
- struct ill_s *ill_group_next;
kmutex_t ill_lock; /* Please see table below */
/*
@@ -2005,6 +2085,18 @@ typedef struct ill_s {
void *ill_flownotify_mh; /* Tx flow ctl, mac cb handle */
uint_t ill_ilm_cnt; /* ilms referencing this ill */
uint_t ill_ipallmulti_cnt; /* ip_join_allmulti() calls */
+ /*
+ * IPMP fields.
+ */
+ ipmp_illgrp_t *ill_grp; /* IPMP group information */
+ list_node_t ill_actnode; /* next active ill in group */
+ list_node_t ill_grpnode; /* next ill in group */
+ ipif_t *ill_src_ipif; /* source address selection rotor */
+ ipif_t *ill_move_ipif; /* ipif awaiting move to new ill */
+ boolean_t ill_nom_cast; /* nominated for mcast/bcast */
+ uint_t ill_bound_cnt; /* # of data addresses bound to ill */
+ ipif_t *ill_bound_ipif; /* ipif chain bound to ill */
+ timeout_id_t ill_refresh_tid; /* ill refresh retry timeout id */
} ill_t;
/*
@@ -2088,6 +2180,7 @@ typedef struct ill_s {
*
* ill_max_mtu
*
+ * ill_user_mtu ipsq + ill_lock ill_lock
* ill_reachable_time ipsq + ill_lock ill_lock
* ill_reachable_retrans_time ipsq + ill_lock ill_lock
* ill_max_buf ipsq + ill_lock ill_lock
@@ -2102,12 +2195,9 @@ typedef struct ill_s {
* ill_arp_down_mp ipsq ipsq
* ill_arp_del_mapping_mp ipsq ipsq
* ill_arp_on_mp ipsq ipsq
- * ill_move_peer ipsq ipsq
*
* ill_phyint ipsq, ill_g_lock, ill_lock Any of them
* ill_flags ill_lock ill_lock
- * ill_group ipsq, ill_g_lock, ill_lock Any of them
- * ill_group_next ipsq, ill_g_lock, ill_lock Any of them
* ill_nd_lla_mp ipsq + down ill only when ill is up
* ill_nd_lla ipsq + down ill only when ill is up
* ill_nd_lla_len ipsq + down ill only when ill is up
@@ -2122,11 +2212,26 @@ typedef struct ill_s {
* ill_ilm_walker_cnt ill_lock ill_lock
* ill_nce_cnt ill_lock ill_lock
* ill_ilm_cnt ill_lock ill_lock
+ * ill_src_ipif ill_g_lock ill_g_lock
* ill_trace ill_lock ill_lock
* ill_usesrc_grp_next ill_g_usesrc_lock ill_g_usesrc_lock
* ill_dhcpinit atomics atomics
* ill_flownotify_mh write once write once
* ill_capab_pending_cnt ipsq ipsq
+ *
+ * ill_bound_cnt ipsq ipsq
+ * ill_bound_ipif ipsq ipsq
+ * ill_actnode ipsq + ipmp_lock ipsq OR ipmp_lock
+ * ill_grpnode ipsq + ill_g_lock ipsq OR ill_g_lock
+ * ill_src_ipif ill_g_lock ill_g_lock
+ * ill_move_ipif ipsq ipsq
+ * ill_nom_cast ipsq ipsq OR advisory
+ * ill_refresh_tid ill_lock ill_lock
+ * ill_grp (for IPMP ill) write once write once
+ * ill_grp (for underlying ill) ipsq + ill_g_lock ipsq OR ill_g_lock
+ *
+ * NOTE: It's OK to make heuristic decisions on an underlying interface
+ * by using IS_UNDER_IPMP() or comparing ill_grp's raw pointer value.
*/
/*
@@ -2167,7 +2272,7 @@ enum { IF_CMD = 1, LIF_CMD, TUN_CMD, ARP_CMD, XARP_CMD, MSFILT_CMD, MISC_CMD };
#define IPI_MODOK 0x2 /* Permitted on mod instance of IP */
#define IPI_WR 0x4 /* Need to grab writer access */
#define IPI_GET_CMD 0x8 /* branch to mi_copyout on success */
-#define IPI_REPL 0x10 /* valid for replacement ipif created in MOVE */
+/* unused 0x10 */
#define IPI_NULL_BCONT 0x20 /* ioctl has not data and hence no b_cont */
#define IPI_PASS_DOWN 0x40 /* pass this ioctl down when a module only */
@@ -2176,17 +2281,6 @@ extern ip_ioctl_cmd_t ip_misc_ioctl_table[];
extern int ip_ndx_ioctl_count;
extern int ip_misc_ioctl_count;
-#define ILL_CLEAR_MOVE(ill) { \
- ill_t *peer_ill; \
- \
- peer_ill = (ill)->ill_move_peer; \
- ASSERT(peer_ill != NULL); \
- (ill)->ill_move_in_progress = B_FALSE; \
- peer_ill->ill_move_in_progress = B_FALSE; \
- (ill)->ill_move_peer = NULL; \
- peer_ill->ill_move_peer = NULL; \
-}
-
/* Passed down by ARP to IP during I_PLINK/I_PUNLINK */
typedef struct ipmx_s {
char ipmx_name[LIFNAMSIZ]; /* if name */
@@ -2799,19 +2893,11 @@ typedef struct ip_pktinfo {
(!((ipif)->ipif_state_flags & (IPIF_CONDEMNED)) || \
IAM_WRITER_IPIF(ipif))
-/*
- * These macros are used by critical set ioctls and failover ioctls to
- * mark the ipif appropriately before starting the operation and to clear the
- * marks after completing the operation.
- */
-#define IPIF_UNMARK_MOVING(ipif) \
- (ipif)->ipif_state_flags &= ~IPIF_MOVING & ~IPIF_CHANGING;
-
#define ILL_UNMARK_CHANGING(ill) \
(ill)->ill_state_flags &= ~ILL_CHANGING;
/* Macros used to assert that this thread is a writer */
-#define IAM_WRITER_IPSQ(ipsq) ((ipsq)->ipsq_writer == curthread)
+#define IAM_WRITER_IPSQ(ipsq) ((ipsq)->ipsq_xop->ipx_writer == curthread)
#define IAM_WRITER_ILL(ill) IAM_WRITER_IPSQ((ill)->ill_phyint->phyint_ipsq)
#define IAM_WRITER_IPIF(ipif) IAM_WRITER_ILL((ipif)->ipif_ill)
@@ -2837,9 +2923,9 @@ typedef struct ip_pktinfo {
#define RELEASE_ILL_LOCKS(ill_1, ill_2) \
{ \
if (ill_1 != NULL) \
- mutex_exit(&(ill_1)->ill_lock); \
+ mutex_exit(&(ill_1)->ill_lock); \
if (ill_2 != NULL && ill_2 != ill_1) \
- mutex_exit(&(ill_2)->ill_lock); \
+ mutex_exit(&(ill_2)->ill_lock); \
}
/* Get the other protocol instance ill */
@@ -2847,14 +2933,9 @@ typedef struct ip_pktinfo {
((ill)->ill_isv6 ? (ill)->ill_phyint->phyint_illv4 : \
(ill)->ill_phyint->phyint_illv6)
-#define MATCH_V4_ONLY 0x1
-#define MATCH_V6_ONLY 0x2
-#define MATCH_ILL_ONLY 0x4
-
/* ioctl command info: Ioctl properties extracted and stored in here */
typedef struct cmd_info_s
{
- char ci_groupname[LIFNAMSIZ + 1]; /* SIOCSLIFGROUPNAME */
ipif_t *ci_ipif; /* ipif associated with [l]ifreq ioctl's */
sin_t *ci_sin; /* the sin struct passed down */
sin6_t *ci_sin6; /* the sin6_t struct passed down */
@@ -2990,10 +3071,8 @@ extern struct module_info ip_mod_info;
((ipst)->ips_ip6_loopback_out_event.he_interested)
/*
- * Hooks marcos used inside of ip
+ * Hooks macros used inside of ip
*/
-#define IPHA_VHL ipha_version_and_hdr_length
-
#define FW_HOOKS(_hook, _event, _ilp, _olp, _iph, _fm, _m, _llm, ipst) \
\
if ((_hook).he_interested) { \
@@ -3002,21 +3081,8 @@ extern struct module_info ip_mod_info;
_NOTE(CONSTCOND) \
ASSERT((_ilp != NULL) || (_olp != NULL)); \
\
- _NOTE(CONSTCOND) \
- if ((_ilp != NULL) && \
- (((ill_t *)(_ilp))->ill_phyint != NULL)) \
- info.hpe_ifp = (phy_if_t)((ill_t *) \
- (_ilp))->ill_phyint->phyint_hook_ifindex; \
- else \
- info.hpe_ifp = 0; \
- \
- _NOTE(CONSTCOND) \
- if ((_olp != NULL) && \
- (((ill_t *)(_olp))->ill_phyint != NULL)) \
- info.hpe_ofp = (phy_if_t)((ill_t *) \
- (_olp))->ill_phyint->phyint_hook_ifindex; \
- else \
- info.hpe_ofp = 0; \
+ FW_SET_ILL_INDEX(info.hpe_ifp, (ill_t *)_ilp); \
+ FW_SET_ILL_INDEX(info.hpe_ofp, (ill_t *)_olp); \
info.hpe_protocol = ipst->ips_ipv4_net_data; \
info.hpe_hdr = _iph; \
info.hpe_mp = &(_fm); \
@@ -3026,10 +3092,8 @@ extern struct module_info ip_mod_info;
_event, (hook_data_t)&info) != 0) { \
ip2dbg(("%s hook dropped mblk chain %p hdr %p\n",\
(_hook).he_name, (void *)_fm, (void *)_m)); \
- if (_fm != NULL) { \
- freemsg(_fm); \
- _fm = NULL; \
- } \
+ freemsg(_fm); \
+ _fm = NULL; \
_iph = NULL; \
_m = NULL; \
} else { \
@@ -3046,21 +3110,8 @@ extern struct module_info ip_mod_info;
_NOTE(CONSTCOND) \
ASSERT((_ilp != NULL) || (_olp != NULL)); \
\
- _NOTE(CONSTCOND) \
- if ((_ilp != NULL) && \
- (((ill_t *)(_ilp))->ill_phyint != NULL)) \
- info.hpe_ifp = (phy_if_t)((ill_t *) \
- (_ilp))->ill_phyint->phyint_hook_ifindex; \
- else \
- info.hpe_ifp = 0; \
- \
- _NOTE(CONSTCOND) \
- if ((_olp != NULL) && \
- (((ill_t *)(_olp))->ill_phyint != NULL)) \
- info.hpe_ofp = (phy_if_t)((ill_t *) \
- (_olp))->ill_phyint->phyint_hook_ifindex; \
- else \
- info.hpe_ofp = 0; \
+ FW_SET_ILL_INDEX(info.hpe_ifp, (ill_t *)_ilp); \
+ FW_SET_ILL_INDEX(info.hpe_ofp, (ill_t *)_olp); \
info.hpe_protocol = ipst->ips_ipv6_net_data; \
info.hpe_hdr = _iph; \
info.hpe_mp = &(_fm); \
@@ -3070,10 +3121,8 @@ extern struct module_info ip_mod_info;
_event, (hook_data_t)&info) != 0) { \
ip2dbg(("%s hook dropped mblk chain %p hdr %p\n",\
(_hook).he_name, (void *)_fm, (void *)_m)); \
- if (_fm != NULL) { \
- freemsg(_fm); \
- _fm = NULL; \
- } \
+ freemsg(_fm); \
+ _fm = NULL; \
_iph = NULL; \
_m = NULL; \
} else { \
@@ -3082,6 +3131,17 @@ extern struct module_info ip_mod_info;
} \
}
+#define FW_SET_ILL_INDEX(fp, ill) \
+ _NOTE(CONSTCOND) \
+ if ((ill) == NULL || (ill)->ill_phyint == NULL) { \
+ (fp) = 0; \
+ _NOTE(CONSTCOND) \
+ } else if (IS_UNDER_IPMP(ill)) { \
+ (fp) = ipmp_ill_get_ipmp_ifindex(ill); \
+ } else { \
+ (fp) = (ill)->ill_phyint->phyint_ifindex; \
+ }
+
/*
* Network byte order macros
*/
@@ -3146,16 +3206,15 @@ struct ipsec_out_s;
struct mac_header_info_s;
-extern boolean_t ip_assign_ifindex(uint_t *, ip_stack_t *);
extern void ill_frag_timer(void *);
extern ill_t *ill_first(int, int, ill_walk_context_t *, ip_stack_t *);
extern ill_t *ill_next(ill_walk_context_t *, ill_t *);
extern void ill_frag_timer_start(ill_t *);
extern void ill_nic_event_dispatch(ill_t *, lif_if_t, nic_event_t,
nic_event_data_t, size_t);
-extern void ill_nic_event_plumb(ill_t *, boolean_t);
extern mblk_t *ip_carve_mp(mblk_t **, ssize_t);
extern mblk_t *ip_dlpi_alloc(size_t, t_uscalar_t);
+extern mblk_t *ip_dlnotify_alloc(uint_t, uint_t);
extern char *ip_dot_addr(ipaddr_t, char *);
extern const char *mac_colon_addr(const uint8_t *, size_t, char *, size_t);
extern void ip_lwput(queue_t *, mblk_t *);
@@ -3239,8 +3298,49 @@ extern int ip_hdr_complete(ipha_t *, zoneid_t, ip_stack_t *);
extern struct qinit iprinitv6;
extern struct qinit ipwinitv6;
-extern void conn_drain_insert(conn_t *connp);
-extern int conn_ipsec_length(conn_t *connp);
+extern void ipmp_init(ip_stack_t *);
+extern void ipmp_destroy(ip_stack_t *);
+extern ipmp_grp_t *ipmp_grp_create(const char *, phyint_t *);
+extern void ipmp_grp_destroy(ipmp_grp_t *);
+extern void ipmp_grp_info(const ipmp_grp_t *, lifgroupinfo_t *);
+extern int ipmp_grp_rename(ipmp_grp_t *, const char *);
+extern ipmp_grp_t *ipmp_grp_lookup(const char *, ip_stack_t *);
+extern int ipmp_grp_vet_phyint(ipmp_grp_t *, phyint_t *);
+extern ipmp_illgrp_t *ipmp_illgrp_create(ill_t *);
+extern void ipmp_illgrp_destroy(ipmp_illgrp_t *);
+extern ill_t *ipmp_illgrp_add_ipif(ipmp_illgrp_t *, ipif_t *);
+extern void ipmp_illgrp_del_ipif(ipmp_illgrp_t *, ipif_t *);
+extern ill_t *ipmp_illgrp_next_ill(ipmp_illgrp_t *);
+extern ill_t *ipmp_illgrp_hold_next_ill(ipmp_illgrp_t *);
+extern ill_t *ipmp_illgrp_cast_ill(ipmp_illgrp_t *);
+extern ill_t *ipmp_illgrp_hold_cast_ill(ipmp_illgrp_t *);
+extern ill_t *ipmp_illgrp_ipmp_ill(ipmp_illgrp_t *);
+extern void ipmp_illgrp_refresh_mtu(ipmp_illgrp_t *);
+extern ipmp_arpent_t *ipmp_illgrp_create_arpent(ipmp_illgrp_t *, mblk_t *,
+ boolean_t);
+extern void ipmp_illgrp_destroy_arpent(ipmp_illgrp_t *, ipmp_arpent_t *);
+extern ipmp_arpent_t *ipmp_illgrp_lookup_arpent(ipmp_illgrp_t *, ipaddr_t *);
+extern void ipmp_illgrp_refresh_arpent(ipmp_illgrp_t *);
+extern void ipmp_illgrp_mark_arpent(ipmp_illgrp_t *, ipmp_arpent_t *);
+extern ill_t *ipmp_illgrp_find_ill(ipmp_illgrp_t *, uchar_t *, uint_t);
+extern void ipmp_illgrp_link_grp(ipmp_illgrp_t *, ipmp_grp_t *);
+extern int ipmp_illgrp_unlink_grp(ipmp_illgrp_t *);
+extern uint_t ipmp_ill_get_ipmp_ifindex(const ill_t *);
+extern void ipmp_ill_join_illgrp(ill_t *, ipmp_illgrp_t *);
+extern void ipmp_ill_leave_illgrp(ill_t *);
+extern ill_t *ipmp_ill_hold_ipmp_ill(ill_t *);
+extern boolean_t ipmp_ill_is_active(ill_t *);
+extern void ipmp_ill_refresh_active(ill_t *);
+extern void ipmp_phyint_join_grp(phyint_t *, ipmp_grp_t *);
+extern void ipmp_phyint_leave_grp(phyint_t *);
+extern void ipmp_phyint_refresh_active(phyint_t *);
+extern ill_t *ipmp_ipif_bound_ill(const ipif_t *);
+extern ill_t *ipmp_ipif_hold_bound_ill(const ipif_t *);
+extern boolean_t ipmp_ipif_is_dataaddr(const ipif_t *);
+extern boolean_t ipmp_ipif_is_stubaddr(const ipif_t *);
+
+extern void conn_drain_insert(conn_t *connp);
+extern int conn_ipsec_length(conn_t *connp);
extern void ip_wput_ipsec_out(queue_t *, mblk_t *, ipha_t *, ill_t *,
ire_t *);
extern ipaddr_t ip_get_dst(ipha_t *);
@@ -3274,9 +3374,6 @@ extern int ip_srcid_report(queue_t *, mblk_t *, caddr_t, cred_t *);
extern uint8_t ipoptp_next(ipoptp_t *);
extern uint8_t ipoptp_first(ipoptp_t *, ipha_t *);
extern int ip_opt_get_user(const ipha_t *, uchar_t *);
-extern ill_t *ip_grab_attach_ill(ill_t *, mblk_t *, int, boolean_t,
- ip_stack_t *);
-extern ire_t *conn_set_outgoing_ill(conn_t *, ire_t *, ill_t **);
extern int ipsec_req_from_conn(conn_t *, ipsec_req_t *, int);
extern int ip_snmp_get(queue_t *q, mblk_t *mctl, int level);
extern int ip_snmp_set(queue_t *q, int, int, uchar_t *, int);
@@ -3295,7 +3392,6 @@ extern void ip_savebuf(void **, uint_t *, boolean_t, const void *, uint_t);
extern boolean_t ipsq_pending_mp_cleanup(ill_t *, conn_t *);
extern void conn_ioctl_cleanup(conn_t *);
extern ill_t *conn_get_held_ill(conn_t *, ill_t **, int *);
-extern ill_t *ip_newroute_get_dst_ill(ill_t *);
struct multidata_s;
struct pdesc_s;
@@ -3314,9 +3410,6 @@ extern boolean_t ip_md_zcopy_attr(struct multidata_s *, struct pdesc_s *,
uint_t);
extern void ip_unbind(conn_t *connp);
-extern phyint_t *phyint_lookup_group(char *, boolean_t, ip_stack_t *);
-extern phyint_t *phyint_lookup_group_ifindex(uint_t, ip_stack_t *);
-
extern void tnet_init(void);
extern void tnet_fini(void);
@@ -3434,6 +3527,8 @@ typedef struct ipobs_cb {
* ihd_ifindex Interface index that the packet was received/sent over.
* For local packets, this is the index of the interface
* associated with the local destination address.
+ * ihd_grifindex IPMP group interface index (zero unless ihd_ifindex
+ * is an IPMP underlying interface).
* ihd_stack Netstack the packet is from.
*/
typedef struct ipobs_hook_data {
@@ -3443,6 +3538,7 @@ typedef struct ipobs_hook_data {
ipobs_hook_type_t ihd_htype;
uint16_t ihd_ipver;
uint64_t ihd_ifindex;
+ uint64_t ihd_grifindex;
netstack_t *ihd_stack;
} ipobs_hook_data_t;