summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua M. Clulow <jmc@joyent.com>2015-11-09 18:17:06 +0000
committerJoshua M. Clulow <jmc@joyent.com>2015-11-09 18:17:06 +0000
commit1b7bb3212990f7ea79c8665261b26e041c34e3b4 (patch)
treef5f11f7771ce6f440f416f6b32f8f9f756fd3b49
parent9c193627e4eeb1f60921ac952878adcccb7a5698 (diff)
downloadillumos-joyent-jclulow_pf.tar.gz
XXX more pf work; starting to transform BSD queue/tree.h into list/avljclulow_pf
-rw-r--r--usr/src/uts/common/inet/pf/README.txt125
-rw-r--r--usr/src/uts/common/inet/pf/pf.c26
-rw-r--r--usr/src/uts/common/inet/pf/pf_ioctl.c49
-rw-r--r--usr/src/uts/common/inet/pf/pfvar.h98
4 files changed, 239 insertions, 59 deletions
diff --git a/usr/src/uts/common/inet/pf/README.txt b/usr/src/uts/common/inet/pf/README.txt
index c4f55650b5..825dc908ce 100644
--- a/usr/src/uts/common/inet/pf/README.txt
+++ b/usr/src/uts/common/inet/pf/README.txt
@@ -67,32 +67,143 @@ SLIST_ is single-link, but hell, start with a "list_t" to begin with.
RB_TREE --> avl_tree_t
+ ** Define the type:
+
+ RB_HEAD(HEADNAME, ELEM_TYPE)
+ HEADNAME as in "struct HEADNAME"
+ ELEM_TYPE is the element type
+
+ --> Instead, we just use the "avl_tree_t" type.
+
+ ** Forward declaration of implementation functions:
+ RB_PROTOTYPE(HEADNAME, ELEM_TYPE, FIELD, COMPARATOR)
+ ** Function definition:
+ RB_GENERATE(HEADNAME, ELEM_TYPE, FIELD, COMPARATOR)
+
+ --> Instead, we provide details in "avl_create()"
+
+ ** Define linkage structure (node):
+
+ RB_ENTRY(ELEM_TYPE)
+
+ --> embed "avl_node_t", size/offset provided to "avl_create()"
+
---------------------
"struct mbuf" -> "mblk_t"
+ - allocb(), dupb(), freeb(), freemsg(), etc
+
Used in "pf.c":
- m_adj():
+ m_adj(struct mbuf *mp, int req_len):
+ Trims req_len bytes of data from the mbuf chain pointed to by mp.
+ If req_len is positive, the data will be trimmed from the head of
+ the mbuf chain and if it is negative, it will be trimmed from the
+ tail of the mbuf chain.
+
+ --> int adjmsg(mblk_t *mp, ssize_t len)
- m_pulldown():
+ m_pulldown(struct mbuf *m, int off, int len, int *offp):
+ Ensure that the data in the mbuf chain starting at off and ending
+ at off+len will be put in a continuous memory region. len must
+ be smaller or equal than MCLBYTES. The pointer returned points
+ to an mbuf in the chain and the new offset for data in this mbuf
+ is *offp. If this function fails, m is freed.
- m_split():
+ *** THIS IS ONLY USED IN ONE PLACE, and we could probably
+ just "pullupmsg()" or "msgpullup()" here...
- m_cat():
+ m_split(struct mbuf *m0, int len0, int wait):
+ Split an mbuf chain in two pieces, returning a pointer to the
+ tail (which is made of the previous mbuf chain except the first
+ len0 bytes).
- m_copyback():
+ m_cat(struct mbuf *m, struct mbuf *n):
+ Concatenate the mbuf chain pointed to by n to the mbuf chain
+ pointed to by m. The mbuf chains must be of the same type.
+
+ --> "linkb()" ?
+ (sticks "n" into "b_cont" on last in chain "m")
+
+ m_copyback(struct mbuf *m0, int off, int len, caddr_t cp):
+ Copy data from a buffer pointed to by cp back into the mbuf chain
+ pointed to by m0 starting at off bytes from the beginning, ex-
+ tending the mbuf chain if necessary. The mbuf chain must be ini-
+ tialized properly, including setting m_len.
+
+ --> "mb_copyback()" from "uts/common/inet/ipf/misc.c" ??
m_gethdr(M_DONTWAIT, MT_HEADER):
+ Return a pointer to an mbuf of the type specified after initial-
+ izing it to contain a packet header. See m_get() for a descrip-
+ tion of how.
+ m_get(int how, int type)
+ Return a pointer to an mbuf of the type specified.
+ If the how argument is M_WAITOK, the function may
+ call tsleep(9) to await resources.
+ If how is M_DONTWAIT and resources are not available,
+ m_get() returns NULL.
+
+ ** THIS SEEMS TO JUST BE allocation of mbufs of various types.
m_copym(m, 0, M_COPYALL, M_NOWAIT):
+ m_copym(struct mbuf *m, int off, int len, int wait):
+ Copy an mbuf chain starting at off bytes from the beginning and
+ continuing for len bytes. If off is zero and m has the M_PKTHDR
+ flag set, the header is copied. If len is M_COPYALL the whole
+ mbuf is copied. The wait parameter can be M_WAIT or M_DONTWAIT.
+ It does not copy clusters, it just increases their reference
+ count.
+ m_copym2(struct mbuf *m, int off, int len, int wait):
+ The same as m_copym() except that it copies cluster mbufs, where-
+ as m_copym() just increases the reference count of the clusters.
- m_copym2():
+ ** WE DO NOT USE this in any form other than M_COPYALL, so
+ this probably devolves to something like "dupmsg()"
- m_freem():
+ m_freem(struct mbuf *m):
+ Free the mbuf chain pointed to by m.
+
+ --> freeb() or freemsg()?
m_tag_find(): ?
m_tag_prepend(): ?
+---------------------
+
+tsleep() appears to be used to have "pf_purge_thread()" wake up every second to
+do work. This could just be "ddi_periodic_add()" or whatever.
+
+
+---------------------
+
+struct rwlock / RWLOCK_INITIALIZER()
+ - pf_ioctl.c -- pf_consistency_lock
+
+
+---------------------
+
+FROM GLOBALS TO PER-ZONE STATE:
+
+ One "pf_netstack_t" object will exist for each zone's netstack.
+
+---------------------
+
+POOLS (pool_put, pool_get) BECOME kmem_cache_alloc/free()...
+
+ "pf_state_pl" --> "struct pf_state"
+
+
+---------------------
+
+ "struct pf_state"
+ - pf_create_state()
+
+MODULE INIT:
+
+ pfattach() in "pf_ioctl.c"
+
+
diff --git a/usr/src/uts/common/inet/pf/pf.c b/usr/src/uts/common/inet/pf/pf.c
index 9ed6342783..be8ce11e97 100644
--- a/usr/src/uts/common/inet/pf/pf.c
+++ b/usr/src/uts/common/inet/pf/pf.c
@@ -307,20 +307,10 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
} while (0)
static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
-static __inline int pf_state_compare_key(struct pf_state_key *,
- struct pf_state_key *);
-static __inline int pf_state_compare_id(struct pf_state *,
- struct pf_state *);
struct pf_src_tree tree_src_tracking;
-struct pf_state_tree_id tree_id;
-struct pf_state_queue state_list;
-
RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
-RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key);
-RB_GENERATE(pf_state_tree_id, pf_state,
- entry_id, pf_state_compare_id);
__inline int
pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
@@ -649,9 +639,11 @@ pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn)
/* state table stuff */
-static __inline int
-pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b)
+int
+pf_state_compare_key(const void *aa, const void *bb)
{
+ pf_state_key_t *a = aa;
+ pf_state_key_t *b = bb;
int diff;
if ((diff = a->proto - b->proto) != 0)
@@ -671,9 +663,12 @@ pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b)
return (0);
}
-static __inline int
-pf_state_compare_id(struct pf_state *a, struct pf_state *b)
+int
+pf_state_compare_id(const void *aa, const void *bb)
{
+ pf_state_t *a = aa;
+ pf_state_t *b = bb;
+
if (a->id > b->id)
return (1);
if (a->id < b->id)
@@ -978,6 +973,7 @@ pf_state_insert(pf_netstack_t *pfns, struct pfi_kif *kif,
pf_detach_state(s);
return (-1);
}
+ list_insert_tail(&pfns->pfns_state_list, s);
TAILQ_INSERT_TAIL(&state_list, s, entry_list);
pfns->pfns_status.fcounters[FCNT_STATE_INSERT]++;
pfns->pfns_status.states++;
@@ -993,7 +989,7 @@ pf_find_state_byid(pf_netstack_t *pfns, struct pf_state_cmp *key)
{
pfns->pfns_status.fcounters[FCNT_STATE_SEARCH]++;
- return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
+ return (avl_find(&pfns->pfns_tree_id, key));
}
int
diff --git a/usr/src/uts/common/inet/pf/pf_ioctl.c b/usr/src/uts/common/inet/pf/pf_ioctl.c
index 6939a4357e..6f27ced8a4 100644
--- a/usr/src/uts/common/inet/pf/pf_ioctl.c
+++ b/usr/src/uts/common/inet/pf/pf_ioctl.c
@@ -104,7 +104,6 @@ void pf_qid2qname(u_int16_t, char *);
void pf_qid_unref(u_int16_t);
struct pf_rule pf_default_rule, pf_default_rule_new;
-struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER("pfcnslk");
struct {
char statusif[IFNAMSIZ];
@@ -133,6 +132,46 @@ int pf_rtlabel_add(struct pf_addr_wrap *);
void pf_rtlabel_remove(struct pf_addr_wrap *);
void pf_rtlabel_copyout(struct pf_addr_wrap *);
+/*
+ * XXX Do the things that "pfattach()" did globally:
+ */
+int
+pf_netstack_attach(void)
+{
+ pf_netstack_t *pfns = kmem_zalloc(sizeof (*pfns), KM_SLEEP);
+
+ mutex_init(&pfns->pfns_lock, NULL, MUTEX_DRIVER, NULL);
+
+ /*
+ * Replacing the global "state_list" and "tree_id":
+ */
+ list_create(&pfns->pfns_state_list, sizeof (pf_state_t),
+ offsetof(pf_state_t, entry_list));
+ avl_create(&pfns->pfns_tree_id, pf_state_compare_id,
+ sizeof (pf_state_t), offsetof(pf_state_t, entry_id));
+
+ /*
+ * And "pf_statetbl":
+ */
+ avl_create(&pfns->pfns_statetbl, pf_state_compare_key,
+ sizeof (pf_state_key_t), offsetof(pf_state_key_t, entry));
+
+ /*
+ * XXX pf queues... is this a bandwidth limiting / qos thing?
+ */
+ list_create(&pfns->pfns_queues[0], sizeof (pf_queuespec_t),
+ offsetof(pf_queuespec_t, entries));
+ list_create(&pfns->pfns_queues[1], sizeof (pf_queuespec_t),
+ offsetof(pf_queuespec_t, entries));
+ pfns->pfns_queues_active = &pfns->pfns_queues[0];
+ pfns->pfns_queues_inactive = &pfns->pfns_queues[1];
+
+ /*
+ * XXX Is this lock to be used in a (soft) interrupt handler?
+ * (see rwlock(9F)).
+ */
+ rw_init(&pfns->pfns_consistency_lock, NULL, RW_DRIVER, NULL);
+}
void
pfattach(int num)
@@ -170,10 +209,12 @@ pfattach(int num)
RB_INIT(&tree_src_tracking);
RB_INIT(&pf_anchors);
pf_init_ruleset(&pf_main_ruleset);
+#if 0 /* NOW IN pf_netstack_attach */
TAILQ_INIT(&pf_queues[0]);
TAILQ_INIT(&pf_queues[1]);
pf_queues_active = &pf_queues[0];
pf_queues_inactive = &pf_queues[1];
+#endif
TAILQ_INIT(&state_list);
/* default rule should never be garbage collected */
@@ -906,6 +947,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
return (EACCES);
}
+ /*
+ * XXX these need to operator on "pfns->pfns_consistency_lock".
+ */
if (flags & FWRITE)
rw_enter_write(&pf_consistency_lock);
else
@@ -2306,6 +2350,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
}
fail:
splx(s);
+ /*
+ * XXX these need to operator on "pfns->pfns_consistency_lock".
+ */
if (flags & FWRITE)
rw_exit_write(&pf_consistency_lock);
else
diff --git a/usr/src/uts/common/inet/pf/pfvar.h b/usr/src/uts/common/inet/pf/pfvar.h
index eb07abef76..8f7c25a831 100644
--- a/usr/src/uts/common/inet/pf/pfvar.h
+++ b/usr/src/uts/common/inet/pf/pfvar.h
@@ -623,7 +623,7 @@ SLIST_HEAD(pf_rule_slist, pf_rule_item);
enum pf_sn_types { PF_SN_NONE, PF_SN_NAT, PF_SN_RDR, PF_SN_ROUTE, PF_SN_MAX };
struct pf_src_node {
- RB_ENTRY(pf_src_node) entry;
+ avl_node_t entry;
struct pf_addr addr;
struct pf_addr raddr;
union pf_rule_ptr rule;
@@ -684,8 +684,6 @@ struct pf_state_peer {
u_int8_t pad[1];
};
-TAILQ_HEAD(pf_state_queue, pf_state);
-
/* keep synced with struct pf_state_key, used in RB_FIND */
struct pf_state_key_cmp {
struct pf_addr addr[2];
@@ -709,7 +707,7 @@ struct pf_state_key {
sa_family_t af;
u_int8_t proto;
- RB_ENTRY(pf_state_key) entry;
+ avl_node_t entry;
struct pf_statelisthead states;
struct pf_state_key *reverse;
struct inpcb *inp;
@@ -726,15 +724,21 @@ struct pf_state_cmp {
u_int8_t pad[3];
};
-struct pf_state {
+typedef struct pf_state {
u_int64_t id;
u_int32_t creatorid;
u_int8_t direction;
u_int8_t pad[3];
- TAILQ_ENTRY(pf_state) sync_list;
- TAILQ_ENTRY(pf_state) entry_list;
- RB_ENTRY(pf_state) entry_id;
+ /*
+ * XXX this one is for pfsync apparently...
+ */
+ list_node_t sync_list;
+ /*
+ * This one is for "pfns_state_list":
+ */
+ list_node_t entry_list;
+ avl_node_t entry_id;
struct pf_state_peer src;
struct pf_state_peer dst;
struct pf_rule_slist match_rules;
@@ -779,7 +783,7 @@ struct pf_state {
u_int16_t if_index_in;
u_int16_t if_index_out;
u_int8_t pad2[2];
-};
+} pf_state_t;
/*
* Unified state structures for pulling states out of the kernel
@@ -925,8 +929,8 @@ struct pf_ruleset {
RB_HEAD(pf_anchor_global, pf_anchor);
RB_HEAD(pf_anchor_node, pf_anchor);
struct pf_anchor {
- RB_ENTRY(pf_anchor) entry_global;
- RB_ENTRY(pf_anchor) entry_node;
+ avl_node_t entry_global;
+ avl_node_t entry_node;
struct pf_anchor *parent;
struct pf_anchor_node children;
char name[PF_ANCHOR_NAME_SIZE];
@@ -1089,7 +1093,7 @@ SLIST_HEAD(pfr_ktableworkq, pfr_ktable);
RB_HEAD(pfr_ktablehead, pfr_ktable);
struct pfr_ktable {
struct pfr_tstats pfrkt_ts;
- RB_ENTRY(pfr_ktable) pfrkt_tree;
+ avl_node_t pfrkt_tree;
SLIST_ENTRY(pfr_ktable) pfrkt_workq;
struct radix_node_head *pfrkt_ip4;
struct radix_node_head *pfrkt_ip6;
@@ -1117,18 +1121,12 @@ struct pfr_ktable {
typedef struct pf_state_tree pf_state_tree_t;
-RB_HEAD(pf_state_tree, pf_state_key);
-RB_PROTOTYPE(pf_state_tree, pf_state_key, entry, pf_state_compare_key)
-
RB_HEAD(pf_state_tree_ext_gwy, pf_state_key);
RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state_key,
entry_ext_gwy, pf_state_compare_ext_gwy)
RB_HEAD(pfi_ifhead, pfi_kif);
-/* state tables */
-extern struct pf_state_tree pf_statetbl;
-
/* keep synced with pfi_kif, used in RB_FIND */
struct pfi_kif_cmp {
char pfik_name[IFNAMSIZ];
@@ -1139,7 +1137,7 @@ struct ifg_group;
struct pfi_kif {
char pfik_name[IFNAMSIZ];
- RB_ENTRY(pfi_kif) pfik_tree;
+ avl_node_t pfik_tree;
u_int64_t pfik_packets[2][2][2];
u_int64_t pfik_bytes[2][2][2];
time_t pfik_tzero;
@@ -1379,8 +1377,8 @@ struct pf_queue_scspec {
u_int d;
};
-struct pf_queuespec {
- TAILQ_ENTRY(pf_queuespec) entries;
+typedef struct pf_queuespec {
+ list_node_t entries;
char qname[PF_QNAME_SIZE];
char parent[PF_QNAME_SIZE];
char ifname[IFNAMSIZ];
@@ -1392,7 +1390,7 @@ struct pf_queuespec {
u_int qlimit;
u_int32_t qid;
u_int32_t parent_qid;
-};
+} pf_queuespec_t;
struct cbq_opts {
u_int minburst;
@@ -1669,16 +1667,12 @@ RB_HEAD(pf_src_tree, pf_src_node);
RB_PROTOTYPE(pf_src_tree, pf_src_node, entry, pf_src_compare);
extern struct pf_src_tree tree_src_tracking;
-RB_HEAD(pf_state_tree_id, pf_state);
-RB_PROTOTYPE(pf_state_tree_id, pf_state,
- entry_id, pf_state_compare_id);
-extern struct pf_state_tree_id tree_id;
-extern struct pf_state_queue state_list;
-typedef struct pf_queuehead pf_queuehead_t;
-TAILQ_HEAD(pf_queuehead, pf_queuespec);
-extern struct pf_queuehead pf_queues[2];
-extern struct pf_queuehead *pf_queues_active, *pf_queues_inactive;
+/*
+ * XXX AVL Comparators:
+ */
+extern int pf_state_compare_id(const void *, const void *);
+extern int pf_state_compare_key(const void *, const void *);
/*
* Track pf state. This structure is the per-netstack analogue of various
@@ -1690,11 +1684,44 @@ typedef struct pf_netstack {
/*
* Formerly globals in "pf.c":
*/
- pf_state_tree_t pfns_statetbl;
- pf_queuehead_t pfns_queues[2];
- pf_queuehead_t *pfns_queues_active;
- pf_queuehead_t *pfns_queues_inactive;
pf_status_t pfns_status;
+
+ /*
+ * Formerly "struct pf_state_queue state_list", a TAILQ.
+ */
+ list_t pfns_state_list;
+
+ /*
+ * Formerly "RB_HEAD(pf_state_tree_id, pf_state)" and
+ * "RB_PROTOTYPE(pf_state_tree_id, pf_state, entry_id,
+ * pf_state_compare_id)". Instantiated as
+ *
+ * struct pf_state_tree_id tree_id;
+ */
+ avl_tree_t pfns_tree_id;
+
+ /*
+ * Formerly "RB_HEAD(pf_state_tree, pf_state_key)" and
+ * "RB_PROTOTYPE(pf_state_tree, pf_state_key, entry,
+ * pf_state_compare_key)". Instantiated as
+ *
+ * extern struct pf_state_tree pf_statetbl;
+ */
+ avl_tree_t pfns_statetbl;
+
+ /*
+ * Formerly "pf_queues" (also, "pf_queues_active" and
+ * "pf_queues_inactive")
+ */
+ list_t pfns_queues[2];
+ list_t *pfns_queues_active;
+ list_t *pfns_queues_inactive;
+
+ /*
+ * Formerly "pf_consistency_lock" global.
+ */
+ krwlock_t pfns_consistency_lock;
+
} pf_netstack_t;
extern u_int32_t ticket_pabuf;
@@ -1885,7 +1912,6 @@ int pf_addr_compare(struct pf_addr *, struct pf_addr *,
extern struct pf_status pf_status;
extern struct pool pf_frent_pl, pf_frag_pl;
-extern struct rwlock pf_consistency_lock;
struct pf_pool_limit {
void *pp;