summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/sys/mac_flow_impl.h
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/sys/mac_flow_impl.h')
-rw-r--r--usr/src/uts/common/sys/mac_flow_impl.h537
1 files changed, 537 insertions, 0 deletions
diff --git a/usr/src/uts/common/sys/mac_flow_impl.h b/usr/src/uts/common/sys/mac_flow_impl.h
new file mode 100644
index 0000000000..6029873930
--- /dev/null
+++ b/usr/src/uts/common/sys/mac_flow_impl.h
@@ -0,0 +1,537 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _MAC_FLOW_IMPL_H
+#define _MAC_FLOW_IMPL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/param.h>
+#include <sys/atomic.h>
+#include <sys/ksynch.h>
+#include <sys/mac_flow.h>
+#include <sys/stream.h>
+#include <sys/sdt.h>
+#include <net/if.h>
+
+/*
+ * Macros to increment/decrement the reference count on a flow_entry_t.
+ */
+#define FLOW_REFHOLD(flent) { \
+ DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent)); \
+ mutex_enter(&(flent)->fe_lock); \
+ (flent)->fe_refcnt++; \
+ mutex_exit(&(flent)->fe_lock); \
+}
+
+/*
+ * Data paths must not attempt to use a flow entry if it is marked INCIPIENT
+ * or QUIESCE. In the former case the set up is not yet complete and the
+ * data path could stumble on inconsistent data structures. In the latter
+ * case a control operation is waiting for quiescence so that it can
+ * change callbacks or other structures without the use of locks.
+ */
+#define FLOW_TRY_REFHOLD(flent, err) { \
+ DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent)); \
+ (err) = 0; \
+ mutex_enter(&(flent)->fe_lock); \
+ if ((flent)->fe_flags & (FE_INCIPIENT | FE_QUIESCE | FE_CONDEMNED | \
+ FE_UF_NO_DATAPATH | FE_MC_NO_DATAPATH)) \
+ (err) = -1; \
+ else \
+ (flent)->fe_refcnt++; \
+ mutex_exit(&(flent)->fe_lock); \
+}
+
+#define FLOW_REFRELE(flent) { \
+ DTRACE_PROBE1(flow_refrele, flow_entry_t *, (flent)); \
+ mutex_enter(&(flent)->fe_lock); \
+ ASSERT((flent)->fe_refcnt != 0); \
+ (flent)->fe_refcnt--; \
+ if ((flent)->fe_flags & FE_WAITER) { \
+ ASSERT((flent)->fe_refcnt != 0); \
+ cv_signal(&(flent)->fe_cv); \
+ mutex_exit(&(flent)->fe_lock); \
+ } else if ((flent)->fe_refcnt == 0) { \
+ mac_flow_destroy(flent); \
+ } else { \
+ mutex_exit(&(flent)->fe_lock); \
+ } \
+}
+
+#define FLOW_USER_REFHOLD(flent) { \
+ mutex_enter(&(flent)->fe_lock); \
+ (flent)->fe_user_refcnt++; \
+ mutex_exit(&(flent)->fe_lock); \
+}
+
+#define FLOW_USER_REFRELE(flent) { \
+ mutex_enter(&(flent)->fe_lock); \
+ ASSERT((flent)->fe_user_refcnt != 0); \
+ if (--(flent)->fe_user_refcnt == 0 && \
+ ((flent)->fe_flags & FE_WAITER)) \
+ cv_signal(&(flent)->fe_cv); \
+ mutex_exit(&(flent)->fe_lock); \
+}
+
+#define FLOW_FINAL_REFRELE(flent) { \
+ ASSERT(flent->fe_refcnt == 1 && flent->fe_user_refcnt == 0); \
+ FLOW_REFRELE(flent); \
+}
+
+/*
+ * Mark or unmark the flent with a bit flag
+ */
+#define FLOW_MARK(flent, flag) { \
+ mutex_enter(&(flent)->fe_lock); \
+ (flent)->fe_flags |= flag; \
+ mutex_exit(&(flent)->fe_lock); \
+}
+
+#define FLOW_UNMARK(flent, flag) { \
+ mutex_enter(&(flent)->fe_lock); \
+ (flent)->fe_flags &= ~flag; \
+ mutex_exit(&(flent)->fe_lock); \
+}
+
+#define FLENT_TO_MIP(flent) \
+ (flent->fe_mbg != NULL ? mac_bcast_grp_mip(flent->fe_mbg) : \
+ ((mac_client_impl_t *)flent->fe_mcip)->mci_mip)
+
+/* Convert a bandwidth expressed in bps to a number of bytes per tick. */
+#define FLOW_BYTES_PER_TICK(bps) (((bps) >> 3) / hz)
+
+/*
+ * Given an underlying range and a priority level, obtain the minimum for the
+ * new range.
+ */
+#define FLOW_MIN_PRIORITY(min, max, pri) \
+ ((min) + ((((max) - (min)) / MRP_PRIORITY_LEVELS) * (pri)))
+
+/*
+ * Given an underlying range and a minimum level (base), obtain the maximum
+ * for the new range.
+ */
+#define FLOW_MAX_PRIORITY(min, max, base) \
+ ((base) + (((max) - (min)) / MRP_PRIORITY_LEVELS))
+
+/*
+ * Given an underlying range and a priority level, get the absolute
+ * priority value. For now there are just 3 values, high, low and
+ * medium so we can just return max, min or min + (max - min) / 2.
+ * If there are more than three we need to change this computation.
+ */
+#define FLOW_PRIORITY(min, max, pri) \
+ (pri) == MPL_HIGH ? (max) : \
+ (pri) == MPL_LOW ? (min) : \
+ ((min) + (((max) - (min)) / 2))
+
+#define MAC_FLOW_TAB_SIZE 500
+
+typedef struct flow_entry_s flow_entry_t;
+typedef struct flow_tab_s flow_tab_t;
+typedef struct flow_state_s flow_state_t;
+struct mac_impl_s;
+struct mac_client_impl_s;
+
+/*
+ * Classification flags used to lookup the flow.
+ */
+#define FLOW_INBOUND 0x01
+#define FLOW_OUTBOUND 0x02
+/* Don't compare VID when classifying the packets, see mac_rx_classify() */
+#define FLOW_IGNORE_VLAN 0x04
+
+/* Generic flow client function signature */
+typedef void (*flow_fn_t)(void *, void *, mblk_t *, boolean_t);
+
+/* Flow state */
+typedef enum {
+ FLOW_DRIVER_UPCALL,
+ FLOW_USER_REF
+} mac_flow_state_t;
+
+/* Matches a flow_entry_t using the extracted flow_state_t info */
+typedef boolean_t (*flow_match_fn_t)(flow_tab_t *, flow_entry_t *,
+ flow_state_t *);
+
+/* fe_flags */
+#define FE_QUIESCE 0x01 /* Quiesce the flow */
+#define FE_WAITER 0x02 /* Flow has a waiter */
+#define FE_FLOW_TAB 0x04 /* Flow is in the flow tab list */
+#define FE_G_FLOW_HASH 0x08 /* Flow is in the global flow hash */
+#define FE_INCIPIENT 0x10 /* Being setup */
+#define FE_CONDEMNED 0x20 /* Being deleted */
+#define FE_UF_NO_DATAPATH 0x40 /* No datapath setup for User flow */
+#define FE_MC_NO_DATAPATH 0x80 /* No datapath setup for mac client */
+
+/* fe_type */
+#define FLOW_PRIMARY_MAC 0x01 /* NIC primary MAC address */
+#define FLOW_VNIC_MAC 0x02 /* VNIC flow */
+#define FLOW_MCAST 0x04 /* Multicast (and broadcast) */
+#define FLOW_OTHER 0x08 /* Other flows configured */
+#define FLOW_USER 0x10 /* User defined flow */
+#define FLOW_VNIC FLOW_VNIC_MAC
+#define FLOW_NO_STATS 0x20 /* Don't create stats for the flow */
+
+/*
+ * Shared Bandwidth control counters between the soft ring set and its
+ * associated soft rings. In case the flow associated with NIC/VNIC
+ * has a group of Rx rings assigned to it, we have the same
+ * number of soft ring sets as we have the Rx ring in the group
+ * and each individual SRS (and its soft rings) decide when to
+ * poll their Rx ring independently. But if there is a B/W limit
+ * associated with the NIC/VNIC, then the B/W control counter is
+ * shared across all the SRS in the group and their associated
+ * soft rings.
+ *
+ * There is a many to 1 mapping between the SRS and
+ * mac_bw_ctl if the flow has a group of Rx rings associated with
+ * it.
+ */
+typedef struct mac_bw_ctl_s {
+ kmutex_t mac_bw_lock;
+ uint32_t mac_bw_state;
+ size_t mac_bw_sz; /* ?? Is it needed */
+ size_t mac_bw_limit; /* Max bytes to process per tick */
+ size_t mac_bw_used; /* Bytes processed in current tick */
+ size_t mac_bw_drop_threshold; /* Max queue length */
+ size_t mac_bw_drop_bytes;
+ size_t mac_bw_polled;
+ size_t mac_bw_intr;
+ clock_t mac_bw_curr_time;
+} mac_bw_ctl_t;
+
+struct flow_entry_s { /* Protected by */
+ struct flow_entry_s *fe_next; /* ft_lock */
+
+ datalink_id_t fe_link_id; /* WO */
+
+ /* Properties as specified for this flow */
+ mac_resource_props_t fe_resource_props; /* SL */
+
+ /* Properties actually effective at run time for this flow */
+ mac_resource_props_t fe_effective_props; /* SL */
+
+ kmutex_t fe_lock;
+ char fe_flow_name[MAXFLOWNAME]; /* fe_lock */
+ flow_desc_t fe_flow_desc; /* fe_lock */
+ kcondvar_t fe_cv; /* fe_lock */
+ /*
+ * Initial flow ref is 1 on creation. A thread that lookups the
+ * flent typically by a mac_flow_lookup() dynamically holds a ref.
+ * If the ref is 1, it means there arent' any upcalls from the driver
+ * or downcalls from the stack using this flent. Structures pointing
+ * to the flent or flent inserted in lists don't count towards this
+ * refcnt. Instead they are tracked using fe_flags. Only a control
+ * thread doing a teardown operation deletes the flent, after waiting
+ * for upcalls to finish synchronously. The fe_refcnt tracks
+ * the number of upcall refs
+ */
+ uint32_t fe_refcnt; /* fe_lock */
+
+ /*
+ * This tracks lookups done using the global hash list for user
+ * generated flows. This refcnt only protects the flent itself
+ * from disappearing and helps walkers to read the flent info such
+ * as flow spec. However the flent may be quiesced and the SRS could
+ * be deleted. The fe_user_refcnt tracks the number of global flow
+ * has refs.
+ */
+ uint32_t fe_user_refcnt; /* fe_lock */
+ uint_t fe_flags; /* fe_lock */
+
+ /*
+ * Function/args to invoke for delivering matching packets
+ * Only the function ff_fn may be changed dynamically and atomically.
+ * The ff_arg1 and ff_arg2 are set at creation time and may not
+ * be changed.
+ */
+ flow_fn_t fe_cb_fn; /* fe_lock */
+ void *fe_cb_arg1; /* fe_lock */
+ void *fe_cb_arg2; /* fe_lock */
+
+ void *fe_client_cookie; /* WO */
+ void *fe_rx_ring_group; /* SL */
+ void *fe_rx_srs[MAX_RINGS_PER_GROUP]; /* fe_lock */
+ int fe_rx_srs_cnt; /* fe_lock */
+ void *fe_tx_srs; /* WO */
+
+ /*
+ * This is a unicast flow, and is a mac_client_impl_t
+ */
+ void *fe_mcip; /* WO */
+
+ /*
+ * Used by mci_flent_list of mac_client_impl_t to track flows sharing
+ * the same mac_client_impl_t.
+ */
+ struct flow_entry_s *fe_client_next;
+
+ /*
+ * This is a broadcast or multicast flow and is a mac_bcast_grp_t
+ */
+ void *fe_mbg; /* WO */
+ uint_t fe_type; /* WO */
+
+ /*
+ * BW control info.
+ */
+ mac_bw_ctl_t fe_tx_bw;
+ mac_bw_ctl_t fe_rx_bw;
+
+ /*
+ * Used by flow table lookup code
+ */
+ flow_match_fn_t fe_match;
+
+ /*
+ * Used by mac_flow_remove().
+ */
+ int fe_index;
+ flow_tab_t *fe_flow_tab;
+
+ kstat_t *fe_ksp;
+ flow_stats_t fe_flowstats;
+ boolean_t fe_desc_logged;
+ zoneid_t fe_zoneid;
+ uint64_t fe_nic_speed;
+};
+
+/*
+ * Various structures used by the flows framework for keeping track
+ * of packet state information.
+ */
+
+/* Layer 2 */
+typedef struct flow_l2info_s {
+ uchar_t *l2_start;
+ uint8_t *l2_daddr;
+ uint16_t l2_vid;
+ uint32_t l2_sap;
+ uint_t l2_hdrsize;
+} flow_l2info_t;
+
+/* Layer 3 */
+typedef struct flow_l3info_s {
+ uchar_t *l3_start;
+ uint8_t l3_protocol;
+ uint8_t l3_version;
+ boolean_t l3_dst_or_src;
+ uint_t l3_hdrsize;
+ boolean_t l3_fragmented;
+} flow_l3info_t;
+
+/* Layer 4 */
+typedef struct flow_l4info_s {
+ uchar_t *l4_start;
+ uint16_t l4_src_port;
+ uint16_t l4_dst_port;
+ uint16_t l4_hash_port;
+} flow_l4info_t;
+
+/*
+ * Combined state structure.
+ * Holds flow direction and an mblk_t pointer.
+ */
+struct flow_state_s {
+ uint_t fs_flags;
+ mblk_t *fs_mp;
+ flow_l2info_t fs_l2info;
+ flow_l3info_t fs_l3info;
+ flow_l4info_t fs_l4info;
+};
+
+/*
+ * Flow ops vector.
+ * There are two groups of functions. The ones ending with _fe are
+ * called when a flow is being added. The others (hash, accept) are
+ * called at flow lookup time.
+ */
+#define FLOW_MAX_ACCEPT 16
+typedef struct flow_ops_s {
+ /*
+ * fo_accept_fe():
+ * Validates the contents of the flow and checks whether
+ * it's compatible with the flow table. sets the fe_match
+ * function of the flow.
+ */
+ int (*fo_accept_fe)(flow_tab_t *, flow_entry_t *);
+ /*
+ * fo_hash_fe():
+ * Generates a hash index to the flow table. This function
+ * must use the same algorithm as fo_hash(), which is used
+ * by the flow lookup code path.
+ */
+ uint32_t (*fo_hash_fe)(flow_tab_t *, flow_entry_t *);
+ /*
+ * fo_match_fe():
+ * This is used for finding identical flows.
+ */
+ boolean_t (*fo_match_fe)(flow_tab_t *, flow_entry_t *,
+ flow_entry_t *);
+ /*
+ * fo_insert_fe():
+ * Used for inserting a flow to a flow chain.
+ * Protocols that have special ordering requirements would
+ * need to implement this. For those that don't,
+ * flow_generic_insert_fe() may be used.
+ */
+ int (*fo_insert_fe)(flow_tab_t *, flow_entry_t **,
+ flow_entry_t *);
+
+ /*
+ * Calculates the flow hash index based on the accumulated
+ * state in flow_state_t. Must use the same algorithm as
+ * fo_hash_fe().
+ */
+ uint32_t (*fo_hash)(flow_tab_t *, flow_state_t *);
+
+ /*
+ * Array of accept fuctions.
+ * Each function in the array will accumulate enough state
+ * (header length, protocol) to allow the next function to
+ * proceed. We support up to FLOW_MAX_ACCEPT functions which
+ * should be sufficient for all practical purposes.
+ */
+ int (*fo_accept[FLOW_MAX_ACCEPT])(flow_tab_t *,
+ flow_state_t *);
+} flow_ops_t;
+
+/*
+ * Generic flow table.
+ */
+struct flow_tab_s {
+ krwlock_t ft_lock;
+ /*
+ * Contains a list of functions (described above)
+ * specific to this table type.
+ */
+ flow_ops_t ft_ops;
+
+ /*
+ * Indicates what types of flows are supported.
+ */
+ flow_mask_t ft_mask;
+
+ /*
+ * An array of flow_entry_t * of size ft_size.
+ * Each element is the beginning of a hash chain.
+ */
+ flow_entry_t **ft_table;
+ uint_t ft_size;
+
+ /*
+ * The number of flows inserted into ft_table.
+ */
+ uint_t ft_flow_count;
+ struct mac_impl_s *ft_mip;
+ struct mac_client_impl_s *ft_mcip;
+};
+
+/*
+ * This is used for describing what type of flow table can be created.
+ * mac_flow.c contains a list of these structures.
+ */
+typedef struct flow_tab_info_s {
+ flow_ops_t *fti_ops;
+ flow_mask_t fti_mask;
+ uint_t fti_size;
+} flow_tab_info_t;
+
+#define FLOW_TAB_EMPTY(ft) ((ft) == NULL || (ft)->ft_flow_count == 0)
+
+/*
+ * This is used by mac_tx_send.
+ */
+typedef struct mac_tx_stats_s {
+ uint_t ts_opackets;
+ uint_t ts_obytes;
+ uint_t ts_oerrors;
+} mac_tx_stats_t;
+
+#define FLOW_STAT_UPDATE(f, s, c) { \
+ ((flow_entry_t *)(f))->fe_flowstats.fs_##s += ((uint64_t)(c)); \
+}
+
+#define FLOW_TX_STATS_UPDATE(f, s) { \
+ FLOW_STAT_UPDATE((f), opackets, (s)->ts_opackets); \
+ FLOW_STAT_UPDATE((f), obytes, (s)->ts_obytes); \
+ FLOW_STAT_UPDATE((f), oerrors, (s)->ts_oerrors); \
+}
+
+extern void mac_flow_init();
+extern void mac_flow_fini();
+extern int mac_flow_create(flow_desc_t *, mac_resource_props_t *,
+ char *, void *, uint_t, flow_entry_t **);
+
+extern int mac_flow_add(flow_tab_t *, flow_entry_t *);
+extern int mac_flow_add_subflow(mac_client_handle_t, flow_entry_t *,
+ boolean_t);
+extern int mac_flow_hash_add(flow_entry_t *);
+extern int mac_flow_lookup_byname(char *, flow_entry_t **);
+extern int mac_flow_lookup(flow_tab_t *, mblk_t *, uint_t,
+ flow_entry_t **);
+
+extern int mac_flow_walk(flow_tab_t *, int (*)(flow_entry_t *, void *),
+ void *);
+
+extern int mac_flow_walk_nolock(flow_tab_t *,
+ int (*)(flow_entry_t *, void *), void *);
+
+extern void mac_flow_modify(flow_tab_t *, flow_entry_t *,
+ mac_resource_props_t *);
+
+extern void *mac_flow_get_client_cookie(flow_entry_t *);
+
+extern uint32_t mac_flow_modify_props(flow_entry_t *, mac_resource_props_t *);
+
+extern int mac_flow_update(flow_tab_t *, flow_entry_t *, flow_desc_t *);
+extern void mac_flow_get_desc(flow_entry_t *, flow_desc_t *);
+extern void mac_flow_set_desc(flow_entry_t *, flow_desc_t *);
+
+extern void mac_flow_remove(flow_tab_t *, flow_entry_t *, boolean_t);
+extern void mac_flow_hash_remove(flow_entry_t *);
+extern void mac_flow_wait(flow_entry_t *, mac_flow_state_t);
+extern void mac_flow_quiesce(flow_entry_t *);
+extern void mac_flow_restart(flow_entry_t *);
+extern void mac_flow_cleanup(flow_entry_t *);
+extern void mac_flow_destroy(flow_entry_t *);
+
+extern void mac_flow_tab_create(flow_ops_t *, flow_mask_t, uint_t,
+ struct mac_impl_s *, flow_tab_t **);
+extern void mac_flow_l2tab_create(struct mac_impl_s *, flow_tab_t **);
+extern void mac_flow_tab_destroy(flow_tab_t *);
+extern void mac_flow_drop(void *, void *, mblk_t *);
+extern void flow_stat_destroy(flow_entry_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MAC_FLOW_IMPL_H */