diff options
Diffstat (limited to 'usr/src/uts/common/sys/mac_flow_impl.h')
| -rw-r--r-- | usr/src/uts/common/sys/mac_flow_impl.h | 537 |
1 files changed, 537 insertions, 0 deletions
diff --git a/usr/src/uts/common/sys/mac_flow_impl.h b/usr/src/uts/common/sys/mac_flow_impl.h new file mode 100644 index 0000000000..6029873930 --- /dev/null +++ b/usr/src/uts/common/sys/mac_flow_impl.h @@ -0,0 +1,537 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _MAC_FLOW_IMPL_H +#define _MAC_FLOW_IMPL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/param.h> +#include <sys/atomic.h> +#include <sys/ksynch.h> +#include <sys/mac_flow.h> +#include <sys/stream.h> +#include <sys/sdt.h> +#include <net/if.h> + +/* + * Macros to increment/decrement the reference count on a flow_entry_t. + */ +#define FLOW_REFHOLD(flent) { \ + DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent)); \ + mutex_enter(&(flent)->fe_lock); \ + (flent)->fe_refcnt++; \ + mutex_exit(&(flent)->fe_lock); \ +} + +/* + * Data paths must not attempt to use a flow entry if it is marked INCIPIENT + * or QUIESCE. In the former case the set up is not yet complete and the + * data path could stumble on inconsistent data structures. In the latter + * case a control operation is waiting for quiescence so that it can + * change callbacks or other structures without the use of locks. + */ +#define FLOW_TRY_REFHOLD(flent, err) { \ + DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent)); \ + (err) = 0; \ + mutex_enter(&(flent)->fe_lock); \ + if ((flent)->fe_flags & (FE_INCIPIENT | FE_QUIESCE | FE_CONDEMNED | \ + FE_UF_NO_DATAPATH | FE_MC_NO_DATAPATH)) \ + (err) = -1; \ + else \ + (flent)->fe_refcnt++; \ + mutex_exit(&(flent)->fe_lock); \ +} + +#define FLOW_REFRELE(flent) { \ + DTRACE_PROBE1(flow_refrele, flow_entry_t *, (flent)); \ + mutex_enter(&(flent)->fe_lock); \ + ASSERT((flent)->fe_refcnt != 0); \ + (flent)->fe_refcnt--; \ + if ((flent)->fe_flags & FE_WAITER) { \ + ASSERT((flent)->fe_refcnt != 0); \ + cv_signal(&(flent)->fe_cv); \ + mutex_exit(&(flent)->fe_lock); \ + } else if ((flent)->fe_refcnt == 0) { \ + mac_flow_destroy(flent); \ + } else { \ + mutex_exit(&(flent)->fe_lock); \ + } \ +} + +#define FLOW_USER_REFHOLD(flent) { \ + mutex_enter(&(flent)->fe_lock); \ + (flent)->fe_user_refcnt++; \ + mutex_exit(&(flent)->fe_lock); \ +} + +#define FLOW_USER_REFRELE(flent) { \ + mutex_enter(&(flent)->fe_lock); \ + ASSERT((flent)->fe_user_refcnt != 0); \ + if (--(flent)->fe_user_refcnt == 0 && \ + ((flent)->fe_flags & FE_WAITER)) \ + cv_signal(&(flent)->fe_cv); \ + mutex_exit(&(flent)->fe_lock); \ +} + +#define FLOW_FINAL_REFRELE(flent) { \ + ASSERT(flent->fe_refcnt == 1 && flent->fe_user_refcnt == 0); \ + FLOW_REFRELE(flent); \ +} + +/* + * Mark or unmark the flent with a bit flag + */ +#define FLOW_MARK(flent, flag) { \ + mutex_enter(&(flent)->fe_lock); \ + (flent)->fe_flags |= flag; \ + mutex_exit(&(flent)->fe_lock); \ +} + +#define FLOW_UNMARK(flent, flag) { \ + mutex_enter(&(flent)->fe_lock); \ + (flent)->fe_flags &= ~flag; \ + mutex_exit(&(flent)->fe_lock); \ +} + +#define FLENT_TO_MIP(flent) \ + (flent->fe_mbg != NULL ? mac_bcast_grp_mip(flent->fe_mbg) : \ + ((mac_client_impl_t *)flent->fe_mcip)->mci_mip) + +/* Convert a bandwidth expressed in bps to a number of bytes per tick. */ +#define FLOW_BYTES_PER_TICK(bps) (((bps) >> 3) / hz) + +/* + * Given an underlying range and a priority level, obtain the minimum for the + * new range. + */ +#define FLOW_MIN_PRIORITY(min, max, pri) \ + ((min) + ((((max) - (min)) / MRP_PRIORITY_LEVELS) * (pri))) + +/* + * Given an underlying range and a minimum level (base), obtain the maximum + * for the new range. + */ +#define FLOW_MAX_PRIORITY(min, max, base) \ + ((base) + (((max) - (min)) / MRP_PRIORITY_LEVELS)) + +/* + * Given an underlying range and a priority level, get the absolute + * priority value. For now there are just 3 values, high, low and + * medium so we can just return max, min or min + (max - min) / 2. + * If there are more than three we need to change this computation. + */ +#define FLOW_PRIORITY(min, max, pri) \ + (pri) == MPL_HIGH ? (max) : \ + (pri) == MPL_LOW ? (min) : \ + ((min) + (((max) - (min)) / 2)) + +#define MAC_FLOW_TAB_SIZE 500 + +typedef struct flow_entry_s flow_entry_t; +typedef struct flow_tab_s flow_tab_t; +typedef struct flow_state_s flow_state_t; +struct mac_impl_s; +struct mac_client_impl_s; + +/* + * Classification flags used to lookup the flow. + */ +#define FLOW_INBOUND 0x01 +#define FLOW_OUTBOUND 0x02 +/* Don't compare VID when classifying the packets, see mac_rx_classify() */ +#define FLOW_IGNORE_VLAN 0x04 + +/* Generic flow client function signature */ +typedef void (*flow_fn_t)(void *, void *, mblk_t *, boolean_t); + +/* Flow state */ +typedef enum { + FLOW_DRIVER_UPCALL, + FLOW_USER_REF +} mac_flow_state_t; + +/* Matches a flow_entry_t using the extracted flow_state_t info */ +typedef boolean_t (*flow_match_fn_t)(flow_tab_t *, flow_entry_t *, + flow_state_t *); + +/* fe_flags */ +#define FE_QUIESCE 0x01 /* Quiesce the flow */ +#define FE_WAITER 0x02 /* Flow has a waiter */ +#define FE_FLOW_TAB 0x04 /* Flow is in the flow tab list */ +#define FE_G_FLOW_HASH 0x08 /* Flow is in the global flow hash */ +#define FE_INCIPIENT 0x10 /* Being setup */ +#define FE_CONDEMNED 0x20 /* Being deleted */ +#define FE_UF_NO_DATAPATH 0x40 /* No datapath setup for User flow */ +#define FE_MC_NO_DATAPATH 0x80 /* No datapath setup for mac client */ + +/* fe_type */ +#define FLOW_PRIMARY_MAC 0x01 /* NIC primary MAC address */ +#define FLOW_VNIC_MAC 0x02 /* VNIC flow */ +#define FLOW_MCAST 0x04 /* Multicast (and broadcast) */ +#define FLOW_OTHER 0x08 /* Other flows configured */ +#define FLOW_USER 0x10 /* User defined flow */ +#define FLOW_VNIC FLOW_VNIC_MAC +#define FLOW_NO_STATS 0x20 /* Don't create stats for the flow */ + +/* + * Shared Bandwidth control counters between the soft ring set and its + * associated soft rings. In case the flow associated with NIC/VNIC + * has a group of Rx rings assigned to it, we have the same + * number of soft ring sets as we have the Rx ring in the group + * and each individual SRS (and its soft rings) decide when to + * poll their Rx ring independently. But if there is a B/W limit + * associated with the NIC/VNIC, then the B/W control counter is + * shared across all the SRS in the group and their associated + * soft rings. + * + * There is a many to 1 mapping between the SRS and + * mac_bw_ctl if the flow has a group of Rx rings associated with + * it. + */ +typedef struct mac_bw_ctl_s { + kmutex_t mac_bw_lock; + uint32_t mac_bw_state; + size_t mac_bw_sz; /* ?? Is it needed */ + size_t mac_bw_limit; /* Max bytes to process per tick */ + size_t mac_bw_used; /* Bytes processed in current tick */ + size_t mac_bw_drop_threshold; /* Max queue length */ + size_t mac_bw_drop_bytes; + size_t mac_bw_polled; + size_t mac_bw_intr; + clock_t mac_bw_curr_time; +} mac_bw_ctl_t; + +struct flow_entry_s { /* Protected by */ + struct flow_entry_s *fe_next; /* ft_lock */ + + datalink_id_t fe_link_id; /* WO */ + + /* Properties as specified for this flow */ + mac_resource_props_t fe_resource_props; /* SL */ + + /* Properties actually effective at run time for this flow */ + mac_resource_props_t fe_effective_props; /* SL */ + + kmutex_t fe_lock; + char fe_flow_name[MAXFLOWNAME]; /* fe_lock */ + flow_desc_t fe_flow_desc; /* fe_lock */ + kcondvar_t fe_cv; /* fe_lock */ + /* + * Initial flow ref is 1 on creation. A thread that lookups the + * flent typically by a mac_flow_lookup() dynamically holds a ref. + * If the ref is 1, it means there arent' any upcalls from the driver + * or downcalls from the stack using this flent. Structures pointing + * to the flent or flent inserted in lists don't count towards this + * refcnt. Instead they are tracked using fe_flags. Only a control + * thread doing a teardown operation deletes the flent, after waiting + * for upcalls to finish synchronously. The fe_refcnt tracks + * the number of upcall refs + */ + uint32_t fe_refcnt; /* fe_lock */ + + /* + * This tracks lookups done using the global hash list for user + * generated flows. This refcnt only protects the flent itself + * from disappearing and helps walkers to read the flent info such + * as flow spec. However the flent may be quiesced and the SRS could + * be deleted. The fe_user_refcnt tracks the number of global flow + * has refs. + */ + uint32_t fe_user_refcnt; /* fe_lock */ + uint_t fe_flags; /* fe_lock */ + + /* + * Function/args to invoke for delivering matching packets + * Only the function ff_fn may be changed dynamically and atomically. + * The ff_arg1 and ff_arg2 are set at creation time and may not + * be changed. + */ + flow_fn_t fe_cb_fn; /* fe_lock */ + void *fe_cb_arg1; /* fe_lock */ + void *fe_cb_arg2; /* fe_lock */ + + void *fe_client_cookie; /* WO */ + void *fe_rx_ring_group; /* SL */ + void *fe_rx_srs[MAX_RINGS_PER_GROUP]; /* fe_lock */ + int fe_rx_srs_cnt; /* fe_lock */ + void *fe_tx_srs; /* WO */ + + /* + * This is a unicast flow, and is a mac_client_impl_t + */ + void *fe_mcip; /* WO */ + + /* + * Used by mci_flent_list of mac_client_impl_t to track flows sharing + * the same mac_client_impl_t. + */ + struct flow_entry_s *fe_client_next; + + /* + * This is a broadcast or multicast flow and is a mac_bcast_grp_t + */ + void *fe_mbg; /* WO */ + uint_t fe_type; /* WO */ + + /* + * BW control info. + */ + mac_bw_ctl_t fe_tx_bw; + mac_bw_ctl_t fe_rx_bw; + + /* + * Used by flow table lookup code + */ + flow_match_fn_t fe_match; + + /* + * Used by mac_flow_remove(). + */ + int fe_index; + flow_tab_t *fe_flow_tab; + + kstat_t *fe_ksp; + flow_stats_t fe_flowstats; + boolean_t fe_desc_logged; + zoneid_t fe_zoneid; + uint64_t fe_nic_speed; +}; + +/* + * Various structures used by the flows framework for keeping track + * of packet state information. + */ + +/* Layer 2 */ +typedef struct flow_l2info_s { + uchar_t *l2_start; + uint8_t *l2_daddr; + uint16_t l2_vid; + uint32_t l2_sap; + uint_t l2_hdrsize; +} flow_l2info_t; + +/* Layer 3 */ +typedef struct flow_l3info_s { + uchar_t *l3_start; + uint8_t l3_protocol; + uint8_t l3_version; + boolean_t l3_dst_or_src; + uint_t l3_hdrsize; + boolean_t l3_fragmented; +} flow_l3info_t; + +/* Layer 4 */ +typedef struct flow_l4info_s { + uchar_t *l4_start; + uint16_t l4_src_port; + uint16_t l4_dst_port; + uint16_t l4_hash_port; +} flow_l4info_t; + +/* + * Combined state structure. + * Holds flow direction and an mblk_t pointer. + */ +struct flow_state_s { + uint_t fs_flags; + mblk_t *fs_mp; + flow_l2info_t fs_l2info; + flow_l3info_t fs_l3info; + flow_l4info_t fs_l4info; +}; + +/* + * Flow ops vector. + * There are two groups of functions. The ones ending with _fe are + * called when a flow is being added. The others (hash, accept) are + * called at flow lookup time. + */ +#define FLOW_MAX_ACCEPT 16 +typedef struct flow_ops_s { + /* + * fo_accept_fe(): + * Validates the contents of the flow and checks whether + * it's compatible with the flow table. sets the fe_match + * function of the flow. + */ + int (*fo_accept_fe)(flow_tab_t *, flow_entry_t *); + /* + * fo_hash_fe(): + * Generates a hash index to the flow table. This function + * must use the same algorithm as fo_hash(), which is used + * by the flow lookup code path. + */ + uint32_t (*fo_hash_fe)(flow_tab_t *, flow_entry_t *); + /* + * fo_match_fe(): + * This is used for finding identical flows. + */ + boolean_t (*fo_match_fe)(flow_tab_t *, flow_entry_t *, + flow_entry_t *); + /* + * fo_insert_fe(): + * Used for inserting a flow to a flow chain. + * Protocols that have special ordering requirements would + * need to implement this. For those that don't, + * flow_generic_insert_fe() may be used. + */ + int (*fo_insert_fe)(flow_tab_t *, flow_entry_t **, + flow_entry_t *); + + /* + * Calculates the flow hash index based on the accumulated + * state in flow_state_t. Must use the same algorithm as + * fo_hash_fe(). + */ + uint32_t (*fo_hash)(flow_tab_t *, flow_state_t *); + + /* + * Array of accept fuctions. + * Each function in the array will accumulate enough state + * (header length, protocol) to allow the next function to + * proceed. We support up to FLOW_MAX_ACCEPT functions which + * should be sufficient for all practical purposes. + */ + int (*fo_accept[FLOW_MAX_ACCEPT])(flow_tab_t *, + flow_state_t *); +} flow_ops_t; + +/* + * Generic flow table. + */ +struct flow_tab_s { + krwlock_t ft_lock; + /* + * Contains a list of functions (described above) + * specific to this table type. + */ + flow_ops_t ft_ops; + + /* + * Indicates what types of flows are supported. + */ + flow_mask_t ft_mask; + + /* + * An array of flow_entry_t * of size ft_size. + * Each element is the beginning of a hash chain. + */ + flow_entry_t **ft_table; + uint_t ft_size; + + /* + * The number of flows inserted into ft_table. + */ + uint_t ft_flow_count; + struct mac_impl_s *ft_mip; + struct mac_client_impl_s *ft_mcip; +}; + +/* + * This is used for describing what type of flow table can be created. + * mac_flow.c contains a list of these structures. + */ +typedef struct flow_tab_info_s { + flow_ops_t *fti_ops; + flow_mask_t fti_mask; + uint_t fti_size; +} flow_tab_info_t; + +#define FLOW_TAB_EMPTY(ft) ((ft) == NULL || (ft)->ft_flow_count == 0) + +/* + * This is used by mac_tx_send. + */ +typedef struct mac_tx_stats_s { + uint_t ts_opackets; + uint_t ts_obytes; + uint_t ts_oerrors; +} mac_tx_stats_t; + +#define FLOW_STAT_UPDATE(f, s, c) { \ + ((flow_entry_t *)(f))->fe_flowstats.fs_##s += ((uint64_t)(c)); \ +} + +#define FLOW_TX_STATS_UPDATE(f, s) { \ + FLOW_STAT_UPDATE((f), opackets, (s)->ts_opackets); \ + FLOW_STAT_UPDATE((f), obytes, (s)->ts_obytes); \ + FLOW_STAT_UPDATE((f), oerrors, (s)->ts_oerrors); \ +} + +extern void mac_flow_init(); +extern void mac_flow_fini(); +extern int mac_flow_create(flow_desc_t *, mac_resource_props_t *, + char *, void *, uint_t, flow_entry_t **); + +extern int mac_flow_add(flow_tab_t *, flow_entry_t *); +extern int mac_flow_add_subflow(mac_client_handle_t, flow_entry_t *, + boolean_t); +extern int mac_flow_hash_add(flow_entry_t *); +extern int mac_flow_lookup_byname(char *, flow_entry_t **); +extern int mac_flow_lookup(flow_tab_t *, mblk_t *, uint_t, + flow_entry_t **); + +extern int mac_flow_walk(flow_tab_t *, int (*)(flow_entry_t *, void *), + void *); + +extern int mac_flow_walk_nolock(flow_tab_t *, + int (*)(flow_entry_t *, void *), void *); + +extern void mac_flow_modify(flow_tab_t *, flow_entry_t *, + mac_resource_props_t *); + +extern void *mac_flow_get_client_cookie(flow_entry_t *); + +extern uint32_t mac_flow_modify_props(flow_entry_t *, mac_resource_props_t *); + +extern int mac_flow_update(flow_tab_t *, flow_entry_t *, flow_desc_t *); +extern void mac_flow_get_desc(flow_entry_t *, flow_desc_t *); +extern void mac_flow_set_desc(flow_entry_t *, flow_desc_t *); + +extern void mac_flow_remove(flow_tab_t *, flow_entry_t *, boolean_t); +extern void mac_flow_hash_remove(flow_entry_t *); +extern void mac_flow_wait(flow_entry_t *, mac_flow_state_t); +extern void mac_flow_quiesce(flow_entry_t *); +extern void mac_flow_restart(flow_entry_t *); +extern void mac_flow_cleanup(flow_entry_t *); +extern void mac_flow_destroy(flow_entry_t *); + +extern void mac_flow_tab_create(flow_ops_t *, flow_mask_t, uint_t, + struct mac_impl_s *, flow_tab_t **); +extern void mac_flow_l2tab_create(struct mac_impl_s *, flow_tab_t **); +extern void mac_flow_tab_destroy(flow_tab_t *); +extern void mac_flow_drop(void *, void *, mblk_t *); +extern void flow_stat_destroy(flow_entry_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _MAC_FLOW_IMPL_H */ |
