diff options
author | Joshua M. Clulow <jmc@joyent.com> | 2015-11-09 18:17:06 +0000 |
---|---|---|
committer | Joshua M. Clulow <jmc@joyent.com> | 2015-11-09 18:17:06 +0000 |
commit | 1b7bb3212990f7ea79c8665261b26e041c34e3b4 (patch) | |
tree | f5f11f7771ce6f440f416f6b32f8f9f756fd3b49 | |
parent | 9c193627e4eeb1f60921ac952878adcccb7a5698 (diff) | |
download | illumos-joyent-jclulow_pf.tar.gz |
XXX more pf work; starting to transform BSD queue/tree.h into list/avljclulow_pf
-rw-r--r-- | usr/src/uts/common/inet/pf/README.txt | 125 | ||||
-rw-r--r-- | usr/src/uts/common/inet/pf/pf.c | 26 | ||||
-rw-r--r-- | usr/src/uts/common/inet/pf/pf_ioctl.c | 49 | ||||
-rw-r--r-- | usr/src/uts/common/inet/pf/pfvar.h | 98 |
4 files changed, 239 insertions, 59 deletions
diff --git a/usr/src/uts/common/inet/pf/README.txt b/usr/src/uts/common/inet/pf/README.txt index c4f55650b5..825dc908ce 100644 --- a/usr/src/uts/common/inet/pf/README.txt +++ b/usr/src/uts/common/inet/pf/README.txt @@ -67,32 +67,143 @@ SLIST_ is single-link, but hell, start with a "list_t" to begin with. RB_TREE --> avl_tree_t + ** Define the type: + + RB_HEAD(HEADNAME, ELEM_TYPE) + HEADNAME as in "struct HEADNAME" + ELEM_TYPE is the element type + + --> Instead, we just use the "avl_tree_t" type. + + ** Forward declaration of implementation functions: + RB_PROTOTYPE(HEADNAME, ELEM_TYPE, FIELD, COMPARATOR) + ** Function definition: + RB_GENERATE(HEADNAME, ELEM_TYPE, FIELD, COMPARATOR) + + --> Instead, we provide details in "avl_create()" + + ** Define linkage structure (node): + + RB_ENTRY(ELEM_TYPE) + + --> embed "avl_node_t", size/offset provided to "avl_create()" + --------------------- "struct mbuf" -> "mblk_t" + - allocb(), dupb(), freeb(), freemsg(), etc + Used in "pf.c": - m_adj(): + m_adj(struct mbuf *mp, int req_len): + Trims req_len bytes of data from the mbuf chain pointed to by mp. + If req_len is positive, the data will be trimmed from the head of + the mbuf chain and if it is negative, it will be trimmed from the + tail of the mbuf chain. + + --> int adjmsg(mblk_t *mp, ssize_t len) - m_pulldown(): + m_pulldown(struct mbuf *m, int off, int len, int *offp): + Ensure that the data in the mbuf chain starting at off and ending + at off+len will be put in a continuous memory region. len must + be smaller or equal than MCLBYTES. The pointer returned points + to an mbuf in the chain and the new offset for data in this mbuf + is *offp. If this function fails, m is freed. - m_split(): + *** THIS IS ONLY USED IN ONE PLACE, and we could probably + just "pullupmsg()" or "msgpullup()" here... - m_cat(): + m_split(struct mbuf *m0, int len0, int wait): + Split an mbuf chain in two pieces, returning a pointer to the + tail (which is made of the previous mbuf chain except the first + len0 bytes). - m_copyback(): + m_cat(struct mbuf *m, struct mbuf *n): + Concatenate the mbuf chain pointed to by n to the mbuf chain + pointed to by m. The mbuf chains must be of the same type. + + --> "linkb()" ? + (sticks "n" into "b_cont" on last in chain "m") + + m_copyback(struct mbuf *m0, int off, int len, caddr_t cp): + Copy data from a buffer pointed to by cp back into the mbuf chain + pointed to by m0 starting at off bytes from the beginning, ex- + tending the mbuf chain if necessary. The mbuf chain must be ini- + tialized properly, including setting m_len. + + --> "mb_copyback()" from "uts/common/inet/ipf/misc.c" ?? m_gethdr(M_DONTWAIT, MT_HEADER): + Return a pointer to an mbuf of the type specified after initial- + izing it to contain a packet header. See m_get() for a descrip- + tion of how. + m_get(int how, int type) + Return a pointer to an mbuf of the type specified. + If the how argument is M_WAITOK, the function may + call tsleep(9) to await resources. + If how is M_DONTWAIT and resources are not available, + m_get() returns NULL. + + ** THIS SEEMS TO JUST BE allocation of mbufs of various types. m_copym(m, 0, M_COPYALL, M_NOWAIT): + m_copym(struct mbuf *m, int off, int len, int wait): + Copy an mbuf chain starting at off bytes from the beginning and + continuing for len bytes. If off is zero and m has the M_PKTHDR + flag set, the header is copied. If len is M_COPYALL the whole + mbuf is copied. The wait parameter can be M_WAIT or M_DONTWAIT. + It does not copy clusters, it just increases their reference + count. + m_copym2(struct mbuf *m, int off, int len, int wait): + The same as m_copym() except that it copies cluster mbufs, where- + as m_copym() just increases the reference count of the clusters. - m_copym2(): + ** WE DO NOT USE this in any form other than M_COPYALL, so + this probably devolves to something like "dupmsg()" - m_freem(): + m_freem(struct mbuf *m): + Free the mbuf chain pointed to by m. + + --> freeb() or freemsg()? m_tag_find(): ? m_tag_prepend(): ? +--------------------- + +tsleep() appears to be used to have "pf_purge_thread()" wake up every second to +do work. This could just be "ddi_periodic_add()" or whatever. + + +--------------------- + +struct rwlock / RWLOCK_INITIALIZER() + - pf_ioctl.c -- pf_consistency_lock + + +--------------------- + +FROM GLOBALS TO PER-ZONE STATE: + + One "pf_netstack_t" object will exist for each zone's netstack. + +--------------------- + +POOLS (pool_put, pool_get) BECOME kmem_cache_alloc/free()... + + "pf_state_pl" --> "struct pf_state" + + +--------------------- + + "struct pf_state" + - pf_create_state() + +MODULE INIT: + + pfattach() in "pf_ioctl.c" + + diff --git a/usr/src/uts/common/inet/pf/pf.c b/usr/src/uts/common/inet/pf/pf.c index 9ed6342783..be8ce11e97 100644 --- a/usr/src/uts/common/inet/pf/pf.c +++ b/usr/src/uts/common/inet/pf/pf.c @@ -307,20 +307,10 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { } while (0) static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); -static __inline int pf_state_compare_key(struct pf_state_key *, - struct pf_state_key *); -static __inline int pf_state_compare_id(struct pf_state *, - struct pf_state *); struct pf_src_tree tree_src_tracking; -struct pf_state_tree_id tree_id; -struct pf_state_queue state_list; - RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); -RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); -RB_GENERATE(pf_state_tree_id, pf_state, - entry_id, pf_state_compare_id); __inline int pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) @@ -649,9 +639,11 @@ pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn) /* state table stuff */ -static __inline int -pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) +int +pf_state_compare_key(const void *aa, const void *bb) { + pf_state_key_t *a = aa; + pf_state_key_t *b = bb; int diff; if ((diff = a->proto - b->proto) != 0) @@ -671,9 +663,12 @@ pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) return (0); } -static __inline int -pf_state_compare_id(struct pf_state *a, struct pf_state *b) +int +pf_state_compare_id(const void *aa, const void *bb) { + pf_state_t *a = aa; + pf_state_t *b = bb; + if (a->id > b->id) return (1); if (a->id < b->id) @@ -978,6 +973,7 @@ pf_state_insert(pf_netstack_t *pfns, struct pfi_kif *kif, pf_detach_state(s); return (-1); } + list_insert_tail(&pfns->pfns_state_list, s); TAILQ_INSERT_TAIL(&state_list, s, entry_list); pfns->pfns_status.fcounters[FCNT_STATE_INSERT]++; pfns->pfns_status.states++; @@ -993,7 +989,7 @@ pf_find_state_byid(pf_netstack_t *pfns, struct pf_state_cmp *key) { pfns->pfns_status.fcounters[FCNT_STATE_SEARCH]++; - return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); + return (avl_find(&pfns->pfns_tree_id, key)); } int diff --git a/usr/src/uts/common/inet/pf/pf_ioctl.c b/usr/src/uts/common/inet/pf/pf_ioctl.c index 6939a4357e..6f27ced8a4 100644 --- a/usr/src/uts/common/inet/pf/pf_ioctl.c +++ b/usr/src/uts/common/inet/pf/pf_ioctl.c @@ -104,7 +104,6 @@ void pf_qid2qname(u_int16_t, char *); void pf_qid_unref(u_int16_t); struct pf_rule pf_default_rule, pf_default_rule_new; -struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER("pfcnslk"); struct { char statusif[IFNAMSIZ]; @@ -133,6 +132,46 @@ int pf_rtlabel_add(struct pf_addr_wrap *); void pf_rtlabel_remove(struct pf_addr_wrap *); void pf_rtlabel_copyout(struct pf_addr_wrap *); +/* + * XXX Do the things that "pfattach()" did globally: + */ +int +pf_netstack_attach(void) +{ + pf_netstack_t *pfns = kmem_zalloc(sizeof (*pfns), KM_SLEEP); + + mutex_init(&pfns->pfns_lock, NULL, MUTEX_DRIVER, NULL); + + /* + * Replacing the global "state_list" and "tree_id": + */ + list_create(&pfns->pfns_state_list, sizeof (pf_state_t), + offsetof(pf_state_t, entry_list)); + avl_create(&pfns->pfns_tree_id, pf_state_compare_id, + sizeof (pf_state_t), offsetof(pf_state_t, entry_id)); + + /* + * And "pf_statetbl": + */ + avl_create(&pfns->pfns_statetbl, pf_state_compare_key, + sizeof (pf_state_key_t), offsetof(pf_state_key_t, entry)); + + /* + * XXX pf queues... is this a bandwidth limiting / qos thing? + */ + list_create(&pfns->pfns_queues[0], sizeof (pf_queuespec_t), + offsetof(pf_queuespec_t, entries)); + list_create(&pfns->pfns_queues[1], sizeof (pf_queuespec_t), + offsetof(pf_queuespec_t, entries)); + pfns->pfns_queues_active = &pfns->pfns_queues[0]; + pfns->pfns_queues_inactive = &pfns->pfns_queues[1]; + + /* + * XXX Is this lock to be used in a (soft) interrupt handler? + * (see rwlock(9F)). + */ + rw_init(&pfns->pfns_consistency_lock, NULL, RW_DRIVER, NULL); +} void pfattach(int num) @@ -170,10 +209,12 @@ pfattach(int num) RB_INIT(&tree_src_tracking); RB_INIT(&pf_anchors); pf_init_ruleset(&pf_main_ruleset); +#if 0 /* NOW IN pf_netstack_attach */ TAILQ_INIT(&pf_queues[0]); TAILQ_INIT(&pf_queues[1]); pf_queues_active = &pf_queues[0]; pf_queues_inactive = &pf_queues[1]; +#endif TAILQ_INIT(&state_list); /* default rule should never be garbage collected */ @@ -906,6 +947,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) return (EACCES); } + /* + * XXX these need to operator on "pfns->pfns_consistency_lock". + */ if (flags & FWRITE) rw_enter_write(&pf_consistency_lock); else @@ -2306,6 +2350,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } fail: splx(s); + /* + * XXX these need to operator on "pfns->pfns_consistency_lock". + */ if (flags & FWRITE) rw_exit_write(&pf_consistency_lock); else diff --git a/usr/src/uts/common/inet/pf/pfvar.h b/usr/src/uts/common/inet/pf/pfvar.h index eb07abef76..8f7c25a831 100644 --- a/usr/src/uts/common/inet/pf/pfvar.h +++ b/usr/src/uts/common/inet/pf/pfvar.h @@ -623,7 +623,7 @@ SLIST_HEAD(pf_rule_slist, pf_rule_item); enum pf_sn_types { PF_SN_NONE, PF_SN_NAT, PF_SN_RDR, PF_SN_ROUTE, PF_SN_MAX }; struct pf_src_node { - RB_ENTRY(pf_src_node) entry; + avl_node_t entry; struct pf_addr addr; struct pf_addr raddr; union pf_rule_ptr rule; @@ -684,8 +684,6 @@ struct pf_state_peer { u_int8_t pad[1]; }; -TAILQ_HEAD(pf_state_queue, pf_state); - /* keep synced with struct pf_state_key, used in RB_FIND */ struct pf_state_key_cmp { struct pf_addr addr[2]; @@ -709,7 +707,7 @@ struct pf_state_key { sa_family_t af; u_int8_t proto; - RB_ENTRY(pf_state_key) entry; + avl_node_t entry; struct pf_statelisthead states; struct pf_state_key *reverse; struct inpcb *inp; @@ -726,15 +724,21 @@ struct pf_state_cmp { u_int8_t pad[3]; }; -struct pf_state { +typedef struct pf_state { u_int64_t id; u_int32_t creatorid; u_int8_t direction; u_int8_t pad[3]; - TAILQ_ENTRY(pf_state) sync_list; - TAILQ_ENTRY(pf_state) entry_list; - RB_ENTRY(pf_state) entry_id; + /* + * XXX this one is for pfsync apparently... + */ + list_node_t sync_list; + /* + * This one is for "pfns_state_list": + */ + list_node_t entry_list; + avl_node_t entry_id; struct pf_state_peer src; struct pf_state_peer dst; struct pf_rule_slist match_rules; @@ -779,7 +783,7 @@ struct pf_state { u_int16_t if_index_in; u_int16_t if_index_out; u_int8_t pad2[2]; -}; +} pf_state_t; /* * Unified state structures for pulling states out of the kernel @@ -925,8 +929,8 @@ struct pf_ruleset { RB_HEAD(pf_anchor_global, pf_anchor); RB_HEAD(pf_anchor_node, pf_anchor); struct pf_anchor { - RB_ENTRY(pf_anchor) entry_global; - RB_ENTRY(pf_anchor) entry_node; + avl_node_t entry_global; + avl_node_t entry_node; struct pf_anchor *parent; struct pf_anchor_node children; char name[PF_ANCHOR_NAME_SIZE]; @@ -1089,7 +1093,7 @@ SLIST_HEAD(pfr_ktableworkq, pfr_ktable); RB_HEAD(pfr_ktablehead, pfr_ktable); struct pfr_ktable { struct pfr_tstats pfrkt_ts; - RB_ENTRY(pfr_ktable) pfrkt_tree; + avl_node_t pfrkt_tree; SLIST_ENTRY(pfr_ktable) pfrkt_workq; struct radix_node_head *pfrkt_ip4; struct radix_node_head *pfrkt_ip6; @@ -1117,18 +1121,12 @@ struct pfr_ktable { typedef struct pf_state_tree pf_state_tree_t; -RB_HEAD(pf_state_tree, pf_state_key); -RB_PROTOTYPE(pf_state_tree, pf_state_key, entry, pf_state_compare_key) - RB_HEAD(pf_state_tree_ext_gwy, pf_state_key); RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state_key, entry_ext_gwy, pf_state_compare_ext_gwy) RB_HEAD(pfi_ifhead, pfi_kif); -/* state tables */ -extern struct pf_state_tree pf_statetbl; - /* keep synced with pfi_kif, used in RB_FIND */ struct pfi_kif_cmp { char pfik_name[IFNAMSIZ]; @@ -1139,7 +1137,7 @@ struct ifg_group; struct pfi_kif { char pfik_name[IFNAMSIZ]; - RB_ENTRY(pfi_kif) pfik_tree; + avl_node_t pfik_tree; u_int64_t pfik_packets[2][2][2]; u_int64_t pfik_bytes[2][2][2]; time_t pfik_tzero; @@ -1379,8 +1377,8 @@ struct pf_queue_scspec { u_int d; }; -struct pf_queuespec { - TAILQ_ENTRY(pf_queuespec) entries; +typedef struct pf_queuespec { + list_node_t entries; char qname[PF_QNAME_SIZE]; char parent[PF_QNAME_SIZE]; char ifname[IFNAMSIZ]; @@ -1392,7 +1390,7 @@ struct pf_queuespec { u_int qlimit; u_int32_t qid; u_int32_t parent_qid; -}; +} pf_queuespec_t; struct cbq_opts { u_int minburst; @@ -1669,16 +1667,12 @@ RB_HEAD(pf_src_tree, pf_src_node); RB_PROTOTYPE(pf_src_tree, pf_src_node, entry, pf_src_compare); extern struct pf_src_tree tree_src_tracking; -RB_HEAD(pf_state_tree_id, pf_state); -RB_PROTOTYPE(pf_state_tree_id, pf_state, - entry_id, pf_state_compare_id); -extern struct pf_state_tree_id tree_id; -extern struct pf_state_queue state_list; -typedef struct pf_queuehead pf_queuehead_t; -TAILQ_HEAD(pf_queuehead, pf_queuespec); -extern struct pf_queuehead pf_queues[2]; -extern struct pf_queuehead *pf_queues_active, *pf_queues_inactive; +/* + * XXX AVL Comparators: + */ +extern int pf_state_compare_id(const void *, const void *); +extern int pf_state_compare_key(const void *, const void *); /* * Track pf state. This structure is the per-netstack analogue of various @@ -1690,11 +1684,44 @@ typedef struct pf_netstack { /* * Formerly globals in "pf.c": */ - pf_state_tree_t pfns_statetbl; - pf_queuehead_t pfns_queues[2]; - pf_queuehead_t *pfns_queues_active; - pf_queuehead_t *pfns_queues_inactive; pf_status_t pfns_status; + + /* + * Formerly "struct pf_state_queue state_list", a TAILQ. + */ + list_t pfns_state_list; + + /* + * Formerly "RB_HEAD(pf_state_tree_id, pf_state)" and + * "RB_PROTOTYPE(pf_state_tree_id, pf_state, entry_id, + * pf_state_compare_id)". Instantiated as + * + * struct pf_state_tree_id tree_id; + */ + avl_tree_t pfns_tree_id; + + /* + * Formerly "RB_HEAD(pf_state_tree, pf_state_key)" and + * "RB_PROTOTYPE(pf_state_tree, pf_state_key, entry, + * pf_state_compare_key)". Instantiated as + * + * extern struct pf_state_tree pf_statetbl; + */ + avl_tree_t pfns_statetbl; + + /* + * Formerly "pf_queues" (also, "pf_queues_active" and + * "pf_queues_inactive") + */ + list_t pfns_queues[2]; + list_t *pfns_queues_active; + list_t *pfns_queues_inactive; + + /* + * Formerly "pf_consistency_lock" global. + */ + krwlock_t pfns_consistency_lock; + } pf_netstack_t; extern u_int32_t ticket_pabuf; @@ -1885,7 +1912,6 @@ int pf_addr_compare(struct pf_addr *, struct pf_addr *, extern struct pf_status pf_status; extern struct pool pf_frent_pl, pf_frag_pl; -extern struct rwlock pf_consistency_lock; struct pf_pool_limit { void *pp; |