summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/ip/spd.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/inet/ip/spd.c')
-rw-r--r--usr/src/uts/common/inet/ip/spd.c2197
1 files changed, 2016 insertions, 181 deletions
diff --git a/usr/src/uts/common/inet/ip/spd.c b/usr/src/uts/common/inet/ip/spd.c
index a7386ee6b3..308ba2bee7 100644
--- a/usr/src/uts/common/inet/ip/spd.c
+++ b/usr/src/uts/common/inet/ip/spd.c
@@ -44,6 +44,7 @@
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/kmem.h>
+#include <sys/ddi.h>
#include <sys/crypto/api.h>
@@ -66,31 +67,47 @@
#include <inet/ipsecesp.h>
#include <inet/ipdrop.h>
#include <inet/ipclassifier.h>
+#include <inet/tun.h>
static void ipsec_update_present_flags();
static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *);
static void ipsec_out_free(void *);
static void ipsec_in_free(void *);
-static boolean_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *,
- ipha_t *, ip6_t *);
static mblk_t *ipsec_attach_global_policy(mblk_t *, conn_t *,
ipsec_selector_t *);
static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *,
ipsec_selector_t *);
static mblk_t *ipsec_check_ipsecin_policy(queue_t *, mblk_t *,
- ipsec_policy_t *, ipha_t *, ip6_t *);
+ ipsec_policy_t *, ipha_t *, ip6_t *, uint64_t);
static void ipsec_in_release_refs(ipsec_in_t *);
static void ipsec_out_release_refs(ipsec_out_t *);
static void ipsec_action_reclaim(void *);
static void ipsid_init(void);
static void ipsid_fini(void);
+
+/* sel_flags values for ipsec_init_inbound_sel(). */
+#define SEL_NONE 0x0000
+#define SEL_PORT_POLICY 0x0001
+#define SEL_IS_ICMP 0x0002
+#define SEL_TUNNEL_MODE 0x0004
+
+/* Return values for ipsec_init_inbound_sel(). */
+typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG}
+ selret_t;
+
+static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *,
+ ipha_t *, ip6_t *, uint8_t);
+
static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *,
struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **,
kstat_named_t **);
-static int32_t ipsec_act_ovhd(const ipsec_act_t *act);
static void ipsec_unregister_prov_update(void);
static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *);
-static uint32_t selector_hash(ipsec_selector_t *);
+static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *);
+static int tunnel_compare(const void *, const void *);
+static void ipsec_freemsg_chain(mblk_t *);
+static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *,
+ struct kstat_named *, ipdropper_t *);
/*
* Policy rule index generator. We assume this won't wrap in the
@@ -108,8 +125,15 @@ uint64_t ipsec_next_policy_index = 1;
static ipsec_policy_head_t system_policy;
static ipsec_policy_head_t inactive_policy;
+/*
+ * Tunnel policies - AVL tree indexed by tunnel name.
+ */
+krwlock_t tunnel_policy_lock;
+uint64_t tunnel_policy_gen; /* To keep track of updates w/o searches. */
+avl_tree_t tunnel_policies;
+
/* Packet dropper for generic SPD drops. */
-static ipdropper_t spd_dropper;
+ipdropper_t spd_dropper;
/*
* For now, use a trivially sized hash table for actions.
@@ -126,6 +150,11 @@ static ipdropper_t spd_dropper;
#define IPSEC_SPDHASH_DEFAULT 251
uint32_t ipsec_spd_hashsize = 0;
+/* SPD hash-size tunable per tunnel. */
+#define TUN_SPDHASH_DEFAULT 5
+uint32_t tun_spd_hashsize;
+
+
#define IPSEC_SEL_NOHASH ((uint32_t)(~0))
static HASH_HEAD(ipsec_action_s) ipsec_action_hash[IPSEC_ACTION_HASH_SIZE];
@@ -141,12 +170,22 @@ boolean_t ipsec_outbound_v4_policy_present = B_FALSE;
boolean_t ipsec_inbound_v6_policy_present = B_FALSE;
boolean_t ipsec_outbound_v6_policy_present = B_FALSE;
+/* Frag cache prototypes */
+static void ipsec_fragcache_clean(ipsec_fragcache_t *);
+static ipsec_fragcache_entry_t *fragcache_delentry(int,
+ ipsec_fragcache_entry_t *, ipsec_fragcache_t *);
+boolean_t ipsec_fragcache_init(ipsec_fragcache_t *);
+void ipsec_fragcache_uninit(ipsec_fragcache_t *);
+mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int);
+
/*
* Because policy needs to know what algorithms are supported, keep the
* lists of algorithms here.
*/
kmutex_t alg_lock;
+krwlock_t itp_get_byaddr_rw_lock;
+ipsec_tun_pol_t *(*itp_get_byaddr)(uint32_t *, uint32_t *, int);
uint8_t ipsec_nalgs[IPSEC_NALGTYPES];
ipsec_alginfo_t *ipsec_alglists[IPSEC_NALGTYPES][IPSEC_MAX_ALGS];
uint8_t ipsec_sortlist[IPSEC_NALGTYPES][IPSEC_MAX_ALGS];
@@ -168,10 +207,17 @@ int ipsec_weird_null_inbound_policy = 0;
(((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \
(((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid))))
-#define IPPOL_UNCHAIN(php, ip) \
- HASHLIST_UNCHAIN((ip), ipsp_hash); \
- avl_remove(&(php)->iph_rulebyid, (ip)); \
- IPPOL_REFRELE(ip);
+/*
+ * IPv4 Fragments
+ */
+#define IS_V4_FRAGMENT(ipha_fragment_offset_and_flags) \
+ (((ntohs(ipha_fragment_offset_and_flags) & IPH_OFFSET) != 0) || \
+ ((ntohs(ipha_fragment_offset_and_flags) & IPH_MF) != 0))
+
+/*
+ * IPv6 Fragments
+ */
+#define IS_V6_FRAGMENT(ipp) (ipp.ipp_fields & IPPF_FRAGHDR)
/*
* Policy failure messages.
@@ -227,6 +273,37 @@ hrtime_t ipsec_policy_failure_last = 0;
* entries..
*/
+/* Convenient functions for freeing or dropping a b_next linked mblk chain */
+
+/* Free all messages in an mblk chain */
+static void
+ipsec_freemsg_chain(mblk_t *mp)
+{
+ mblk_t *mpnext;
+ while (mp != NULL) {
+ ASSERT(mp->b_prev == NULL);
+ mpnext = mp->b_next;
+ mp->b_next = NULL;
+ freemsg(mp); /* Always works, even if NULL */
+ mp = mpnext;
+ }
+}
+
+/* ip_drop all messages in an mblk chain */
+static void
+ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *arriving,
+ ire_t *outbound_ire, struct kstat_named *counter, ipdropper_t *who_called)
+{
+ mblk_t *mpnext;
+ while (mp != NULL) {
+ ASSERT(mp->b_prev == NULL);
+ mpnext = mp->b_next;
+ mp->b_next = NULL;
+ ip_drop_packet(mp, inbound, arriving, outbound_ire, counter,
+ who_called);
+ mp = mpnext;
+ }
+}
/*
* AVL tree comparison function.
@@ -281,12 +358,10 @@ ipsec_policy_cmpbyid(const void *a, const void *b)
return (0);
}
-static void
+void
ipsec_polhead_free_table(ipsec_policy_head_t *iph)
{
- int dir, nchains;
-
- nchains = ipsec_spd_hashsize;
+ int dir;
for (dir = 0; dir < IPSEC_NTYPES; dir++) {
ipsec_policy_root_t *ipr = &iph->iph_root[dir];
@@ -294,12 +369,12 @@ ipsec_polhead_free_table(ipsec_policy_head_t *iph)
if (ipr->ipr_hash == NULL)
continue;
- kmem_free(ipr->ipr_hash, nchains *
+ kmem_free(ipr->ipr_hash, ipr->ipr_nchains *
sizeof (ipsec_policy_hash_t));
}
}
-static void
+void
ipsec_polhead_destroy(ipsec_policy_head_t *iph)
{
int dir;
@@ -309,10 +384,9 @@ ipsec_polhead_destroy(ipsec_policy_head_t *iph)
for (dir = 0; dir < IPSEC_NTYPES; dir++) {
ipsec_policy_root_t *ipr = &iph->iph_root[dir];
- int nchains = ipr->ipr_nchains;
int chain;
- for (chain = 0; chain < nchains; chain++)
+ for (chain = 0; chain < ipr->ipr_nchains; chain++)
mutex_destroy(&(ipr->ipr_hash[chain].hash_lock));
}
@@ -326,10 +400,27 @@ void
ipsec_policy_destroy(void)
{
int i;
+ void *cookie;
+ ipsec_tun_pol_t *node;
ip_drop_unregister(&spd_dropper);
ip_drop_destroy();
+ rw_enter(&tunnel_policy_lock, RW_WRITER);
+ /*
+ * It's possible we can just ASSERT() the tree is empty. After all,
+ * we aren't called until IP is ready to unload (and presumably all
+ * tunnels have been unplumbed). But we'll play it safe for now, the
+ * loop will just exit immediately if it's empty.
+ */
+ cookie = NULL;
+ while ((node = (ipsec_tun_pol_t *)
+ avl_destroy_nodes(&tunnel_policies, &cookie)) != NULL) {
+ ITP_REFRELE(node);
+ }
+ avl_destroy(&tunnel_policies);
+ rw_exit(&tunnel_policy_lock);
+ rw_destroy(&tunnel_policy_lock);
ipsec_polhead_destroy(&system_policy);
ipsec_polhead_destroy(&inactive_policy);
@@ -373,20 +464,21 @@ ipsec_alloc_tables_failed()
* Attempt to allocate the tables in a single policy head.
* Return nonzero on failure after cleaning up any work in progress.
*/
-static int
-ipsec_alloc_table(ipsec_policy_head_t *iph, int kmflag)
+int
+ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag,
+ boolean_t global_cleanup)
{
- int dir, nchains;
-
- nchains = ipsec_spd_hashsize;
+ int dir;
for (dir = 0; dir < IPSEC_NTYPES; dir++) {
ipsec_policy_root_t *ipr = &iph->iph_root[dir];
+ ipr->ipr_nchains = nchains;
ipr->ipr_hash = kmem_zalloc(nchains *
sizeof (ipsec_policy_hash_t), kmflag);
if (ipr->ipr_hash == NULL)
- return (ipsec_alloc_tables_failed());
+ return (global_cleanup ? ipsec_alloc_tables_failed() :
+ ENOMEM);
}
return (0);
}
@@ -400,11 +492,13 @@ ipsec_alloc_tables(int kmflag)
{
int error;
- error = ipsec_alloc_table(&system_policy, kmflag);
+ error = ipsec_alloc_table(&system_policy, ipsec_spd_hashsize, kmflag,
+ B_TRUE);
if (error != 0)
return (error);
- error = ipsec_alloc_table(&inactive_policy, kmflag);
+ error = ipsec_alloc_table(&inactive_policy, ipsec_spd_hashsize, kmflag,
+ B_TRUE);
if (error != 0)
return (error);
@@ -420,12 +514,10 @@ ipsec_alloc_tables(int kmflag)
/*
* After table allocation, initialize a policy head.
*/
-static void
-ipsec_polhead_init(ipsec_policy_head_t *iph)
+void
+ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains)
{
- int dir, chain, nchains;
-
- nchains = ipsec_spd_hashsize;
+ int dir, chain;
rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL);
avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid,
@@ -468,9 +560,22 @@ ipsec_policy_init()
(void) ipsec_alloc_tables(KM_SLEEP);
}
+ /* Just set a default for tunnels. */
+ if (tun_spd_hashsize == 0)
+ tun_spd_hashsize = TUN_SPDHASH_DEFAULT;
+
ipsid_init();
- ipsec_polhead_init(&system_policy);
- ipsec_polhead_init(&inactive_policy);
+ /*
+ * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting
+ * to free them.
+ */
+ system_policy.iph_refs = 1;
+ inactive_policy.iph_refs = 1;
+ ipsec_polhead_init(&system_policy, ipsec_spd_hashsize);
+ ipsec_polhead_init(&inactive_policy, ipsec_spd_hashsize);
+ rw_init(&tunnel_policy_lock, NULL, RW_DEFAULT, NULL);
+ avl_create(&tunnel_policies, tunnel_compare, sizeof (ipsec_tun_pol_t),
+ 0);
for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++)
mutex_init(&(ipsec_action_hash[i].hash_lock),
@@ -500,6 +605,12 @@ ipsec_policy_init()
ip_drop_init();
ip_drop_register(&spd_dropper, "IPsec SPD");
+
+ /* Set function to dummy until tun is loaded */
+ rw_init(&itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL);
+ rw_enter(&itp_get_byaddr_rw_lock, RW_WRITER);
+ itp_get_byaddr = itp_get_byaddr_dummy;
+ rw_exit(&itp_get_byaddr_rw_lock);
}
/*
@@ -628,52 +739,59 @@ ipsec_inactive_policy(void)
* pointers.
*/
void
-ipsec_swap_policy(void)
+ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive)
{
int af, dir;
avl_tree_t r1, r2;
- rw_enter(&inactive_policy.iph_lock, RW_WRITER);
- rw_enter(&system_policy.iph_lock, RW_WRITER);
+ rw_enter(&inactive->iph_lock, RW_WRITER);
+ rw_enter(&active->iph_lock, RW_WRITER);
- r1 = system_policy.iph_rulebyid;
- r2 = inactive_policy.iph_rulebyid;
- system_policy.iph_rulebyid = r2;
- inactive_policy.iph_rulebyid = r1;
+ r1 = active->iph_rulebyid;
+ r2 = inactive->iph_rulebyid;
+ active->iph_rulebyid = r2;
+ inactive->iph_rulebyid = r1;
for (dir = 0; dir < IPSEC_NTYPES; dir++) {
ipsec_policy_hash_t *h1, *h2;
- h1 = system_policy.iph_root[dir].ipr_hash;
- h2 = inactive_policy.iph_root[dir].ipr_hash;
- system_policy.iph_root[dir].ipr_hash = h2;
- inactive_policy.iph_root[dir].ipr_hash = h1;
+ h1 = active->iph_root[dir].ipr_hash;
+ h2 = inactive->iph_root[dir].ipr_hash;
+ active->iph_root[dir].ipr_hash = h2;
+ inactive->iph_root[dir].ipr_hash = h1;
for (af = 0; af < IPSEC_NAF; af++) {
ipsec_policy_t *t1, *t2;
- t1 = system_policy.iph_root[dir].ipr_nonhash[af];
- t2 = inactive_policy.iph_root[dir].ipr_nonhash[af];
- system_policy.iph_root[dir].ipr_nonhash[af] = t2;
- inactive_policy.iph_root[dir].ipr_nonhash[af] = t1;
+ t1 = active->iph_root[dir].ipr_nonhash[af];
+ t2 = inactive->iph_root[dir].ipr_nonhash[af];
+ active->iph_root[dir].ipr_nonhash[af] = t2;
+ inactive->iph_root[dir].ipr_nonhash[af] = t1;
if (t1 != NULL) {
t1->ipsp_hash.hash_pp =
- &(inactive_policy.iph_root[dir].
- ipr_nonhash[af]);
+ &(inactive->iph_root[dir].ipr_nonhash[af]);
}
if (t2 != NULL) {
t2->ipsp_hash.hash_pp =
- &(system_policy.iph_root[dir].
- ipr_nonhash[af]);
+ &(active->iph_root[dir].ipr_nonhash[af]);
}
}
}
- system_policy.iph_gen++;
- inactive_policy.iph_gen++;
+ active->iph_gen++;
+ inactive->iph_gen++;
ipsec_update_present_flags();
- rw_exit(&system_policy.iph_lock);
- rw_exit(&inactive_policy.iph_lock);
+ rw_exit(&active->iph_lock);
+ rw_exit(&inactive->iph_lock);
+}
+
+/*
+ * Swap global policy primary/secondary.
+ */
+void
+ipsec_swap_global_policy(void)
+{
+ ipsec_swap_policy(&system_policy, &inactive_policy);
}
/*
@@ -739,7 +857,7 @@ ipsec_copy_chain(ipsec_policy_head_t *dph, ipsec_policy_t *src,
* the source policy head. Note that we only need to read-lock the source
* policy head as we are not changing it.
*/
-static int
+int
ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph)
{
int af, dir, chain, nchains;
@@ -793,6 +911,40 @@ ipsec_clone_system_policy(void)
return (ipsec_copy_polhead(&system_policy, &inactive_policy));
}
+/*
+ * Generic "do we have IPvN policy" answer.
+ */
+boolean_t
+iph_ipvN(ipsec_policy_head_t *iph, boolean_t v6)
+{
+ int i, hval;
+ uint32_t valbit;
+ ipsec_policy_root_t *ipr;
+ ipsec_policy_t *ipp;
+
+ if (v6) {
+ valbit = IPSL_IPV6;
+ hval = IPSEC_AF_V6;
+ } else {
+ valbit = IPSL_IPV4;
+ hval = IPSEC_AF_V4;
+ }
+
+ ASSERT(RW_LOCK_HELD(&iph->iph_lock));
+ for (ipr = iph->iph_root; ipr < &(iph->iph_root[IPSEC_NTYPES]); ipr++) {
+ if (ipr->ipr_nonhash[hval] != NULL)
+ return (B_TRUE);
+ for (i = 0; i < ipr->ipr_nchains; i++) {
+ for (ipp = ipr->ipr_hash[i].hash_head; ipp != NULL;
+ ipp = ipp->ipsp_hash.hash_next) {
+ if (ipp->ipsp_sel->ipsl_key.ipsl_valid & valbit)
+ return (B_TRUE);
+ }
+ }
+ }
+
+ return (B_FALSE);
+}
/*
* Extract the string from ipsec_policy_failure_msgs[type] and
@@ -893,12 +1045,14 @@ act_alg_adjust(uint_t algtype, uint_t algid,
*minbits = algp->alg_default_bits;
ASSERT(*minbits >= algp->alg_minbits);
} else {
- *minbits = MAX(*minbits, algp->alg_minbits);
+ *minbits = MAX(MIN(*minbits, algp->alg_maxbits),
+ algp->alg_minbits);
}
if (*maxbits == 0)
*maxbits = algp->alg_maxbits;
else
- *maxbits = MIN(*maxbits, algp->alg_maxbits);
+ *maxbits = MIN(MAX(*maxbits, algp->alg_minbits),
+ algp->alg_maxbits);
ASSERT(*minbits <= *maxbits);
} else {
*minbits = 0;
@@ -1190,7 +1344,7 @@ ipsec_req_from_act(ipsec_action_t *ap, ipsec_req_t *req)
* Convert a new-style action back to an ipsec_req_t (more backwards compat).
* We assume caller has already zero'ed *req for us.
*/
-static int
+int
ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af)
{
ipsec_policy_t *p;
@@ -1201,7 +1355,7 @@ ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af)
for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af];
p != NULL;
p = p->ipsp_hash.hash_next) {
- if ((p->ipsp_sel->ipsl_key.ipsl_valid&IPSL_WILDCARD) == 0)
+ if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0)
return (ipsec_req_from_act(p->ipsp_act, req));
}
return (sizeof (*req));
@@ -1325,14 +1479,12 @@ ipsec_check_loopback_policy(queue_t *q, mblk_t *first_mp,
* expected by the SAs it traversed on the way in.
*/
static boolean_t
-ipsec_check_ipsecin_unique(ipsec_in_t *ii, mblk_t *mp,
- ipha_t *ipha, ip6_t *ip6h,
- const char **reason, kstat_named_t **counter)
+ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason,
+ kstat_named_t **counter, uint64_t pkt_unique)
{
- uint64_t pkt_unique, ah_mask, esp_mask;
+ uint64_t ah_mask, esp_mask;
ipsa_t *ah_assoc;
ipsa_t *esp_assoc;
- ipsec_selector_t sel;
ASSERT(ii->ipsec_in_secure);
ASSERT(!ii->ipsec_in_loopback);
@@ -1347,32 +1499,23 @@ ipsec_check_ipsecin_unique(ipsec_in_t *ii, mblk_t *mp,
if ((ah_mask == 0) && (esp_mask == 0))
return (B_TRUE);
- if (!ipsec_init_inbound_sel(&sel, mp, ipha, ip6h)) {
- /*
- * Technically not a policy mismatch, but it is
- * an internal failure.
- */
- *reason = "ipsec_init_inbound_sel";
- *counter = &ipdrops_spd_nomem;
- return (B_FALSE);
- }
-
- pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port,
- sel.ips_protocol);
+ /*
+ * The pkt_unique check will also check for tunnel mode on the SA
+ * vs. the tunneled_packet boolean. "Be liberal in what you receive"
+ * should not apply in this case. ;)
+ */
- if (ah_mask != 0) {
- if (ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) {
- *reason = "AH inner header mismatch";
- *counter = &ipdrops_spd_ah_innermismatch;
- return (B_FALSE);
- }
+ if (ah_mask != 0 &&
+ ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) {
+ *reason = "AH inner header mismatch";
+ *counter = &ipdrops_spd_ah_innermismatch;
+ return (B_FALSE);
}
- if (esp_mask != 0) {
- if (esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) {
- *reason = "ESP inner header mismatch";
- *counter = &ipdrops_spd_esp_innermismatch;
- return (B_FALSE);
- }
+ if (esp_mask != 0 &&
+ esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) {
+ *reason = "ESP inner header mismatch";
+ *counter = &ipdrops_spd_esp_innermismatch;
+ return (B_FALSE);
}
return (B_TRUE);
}
@@ -1555,12 +1698,59 @@ spd_match_inbound_ids(ipsec_latch_t *ipl, ipsa_t *sa)
}
/*
+ * Takes a latched conn and an inbound packet and returns a unique_id suitable
+ * for SA comparisons. Most of the time we will copy from the conn_t, but
+ * there are cases when the conn_t is latched but it has wildcard selectors,
+ * and then we need to fallback to scooping them out of the packet.
+ *
+ * Assume we'll never have 0 with a conn_t present, so use 0 as a failure. We
+ * can get away with this because we only have non-zero ports/proto for
+ * latched conn_ts.
+ *
+ * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough
+ * to not be a nice macro.
+ */
+static uint64_t
+conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h)
+{
+ ipsec_selector_t sel;
+ uint8_t ulp = connp->conn_ulp;
+
+ ASSERT(connp->conn_latch->ipl_in_policy != NULL);
+
+ if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) &&
+ (connp->conn_fport == 0 || connp->conn_lport == 0)) {
+ /* Slow path - we gotta grab from the packet. */
+ if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h,
+ SEL_NONE) != SELRET_SUCCESS) {
+ /* Failure -> have caller free packet with ENOMEM. */
+ return (0);
+ }
+ return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port,
+ sel.ips_protocol, 0));
+ }
+
+#ifdef DEBUG_NOT_UNTIL_6478464
+ if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) ==
+ SELRET_SUCCESS) {
+ ASSERT(sel.ips_local_port == connp->conn_lport);
+ ASSERT(sel.ips_remote_port == connp->conn_fport);
+ ASSERT(sel.ips_protocol == connp->conn_ulp);
+ }
+ ASSERT(connp->conn_ulp != 0);
+#endif
+
+ return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0));
+}
+
+/*
* Called to check policy on a latched connection, both from this file
* and from tcp.c
*/
boolean_t
ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl,
- ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter)
+ ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter,
+ conn_t *connp)
{
ASSERT(ipl->ipl_ids_latched == B_TRUE);
@@ -1584,8 +1774,13 @@ ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl,
return (B_FALSE);
}
- if (!ipsec_check_ipsecin_unique(ii, mp, ipha, ip6h, reason,
- counter)) {
+ /*
+ * Can fudge pkt_unique from connp because we're latched.
+ * In DEBUG kernels (see conn_to_unique()'s implementation),
+ * verify this even if it REALLY slows things down.
+ */
+ if (!ipsec_check_ipsecin_unique(ii, reason, counter,
+ conn_to_unique(connp, mp, ipha, ip6h))) {
return (B_FALSE);
}
}
@@ -1604,7 +1799,7 @@ ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl,
*/
static mblk_t *
ipsec_check_ipsecin_policy(queue_t *q, mblk_t *first_mp, ipsec_policy_t *ipsp,
- ipha_t *ipha, ip6_t *ip6h)
+ ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique)
{
ipsec_in_t *ii;
ipsec_action_t *ap;
@@ -1643,8 +1838,7 @@ ipsec_check_ipsecin_policy(queue_t *q, mblk_t *first_mp, ipsec_policy_t *ipsp,
goto drop;
}
- if (!ipsec_check_ipsecin_unique(ii, data_mp, ipha, ip6h,
- &reason, &counter))
+ if (!ipsec_check_ipsecin_unique(ii, &reason, &counter, pkt_unique))
goto drop;
/*
@@ -1678,7 +1872,7 @@ drop:
* sleazy prefix-length-based compare.
* another inlining candidate..
*/
-static boolean_t
+boolean_t
ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p)
{
int offset = pfxlen>>3;
@@ -1774,10 +1968,9 @@ ipsec_find_policy_chain(ipsec_policy_t *best, ipsec_policy_t *chain,
* is not the original "best", we need to release that reference
* before returning.
*/
-static ipsec_policy_t *
-ipsec_find_policy_head(ipsec_policy_t *best,
- ipsec_policy_head_t *head, int direction, ipsec_selector_t *sel,
- int selhash)
+ipsec_policy_t *
+ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head,
+ int direction, ipsec_selector_t *sel)
{
ipsec_policy_t *curbest;
ipsec_policy_root_t *root;
@@ -1807,7 +2000,8 @@ ipsec_find_policy_head(ipsec_policy_t *best,
if (root->ipr_nchains > 0) {
curbest = ipsec_find_policy_chain(curbest,
- root->ipr_hash[selhash].hash_head, sel, is_icmp_inv_acq);
+ root->ipr_hash[selector_hash(sel, root)].hash_head, sel,
+ is_icmp_inv_acq);
}
curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel,
is_icmp_inv_acq);
@@ -1842,16 +2036,14 @@ ipsec_find_policy(int direction, conn_t *connp, ipsec_out_t *io,
ipsec_selector_t *sel)
{
ipsec_policy_t *p;
- int selhash = selector_hash(sel);
- p = ipsec_find_policy_head(NULL, &system_policy, direction, sel,
- selhash);
+ p = ipsec_find_policy_head(NULL, &system_policy, direction, sel);
if ((connp != NULL) && (connp->conn_policy != NULL)) {
p = ipsec_find_policy_head(p, connp->conn_policy,
- direction, sel, selhash);
+ direction, sel);
} else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) {
p = ipsec_find_policy_head(p, io->ipsec_out_polhead,
- direction, sel, selhash);
+ direction, sel);
}
return (p);
@@ -1881,6 +2073,7 @@ ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp,
boolean_t policy_present;
kstat_named_t *counter;
ipsec_in_t *ii = NULL;
+ uint64_t pkt_unique;
data_mp = mctl_present ? first_mp->b_cont : first_mp;
ipsec_mp = mctl_present ? first_mp : NULL;
@@ -1921,9 +2114,14 @@ ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp,
if (p != NULL) {
IPPOL_REFHOLD(p);
}
+ /*
+ * Fudge sel for UNIQUE_ID setting below.
+ */
+ pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h);
} else {
/* Initialize the ports in the selector */
- if (!ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h)) {
+ if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h,
+ SEL_NONE) == SELRET_NOMEM) {
/*
* Technically not a policy mismatch, but it is
* an internal failure.
@@ -1946,6 +2144,8 @@ ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp,
*/
p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel);
+ pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port,
+ sel.ips_local_port, sel.ips_protocol, 0);
}
if (p == NULL) {
@@ -1964,7 +2164,8 @@ ipsec_check_global_policy(mblk_t *first_mp, conn_t *connp,
}
}
if ((ii != NULL) && (ii->ipsec_in_secure))
- return (ipsec_check_ipsecin_policy(q, ipsec_mp, p, ipha, ip6h));
+ return (ipsec_check_ipsecin_policy(q, ipsec_mp, p, ipha, ip6h,
+ pkt_unique));
if (p->ipsp_act->ipa_allow_clear) {
BUMP_MIB(&ip_mib, ipsecInSucceeded);
IPPOL_REFRELE(p);
@@ -2054,8 +2255,13 @@ ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h)
/*
* If it is not ICMP, fail this request.
*/
- if (ipha->ipha_protocol != IPPROTO_ICMP)
+ if (ipha->ipha_protocol != IPPROTO_ICMP) {
+#ifdef FRAGCACHE_DEBUG
+ cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n",
+ ipha->ipha_protocol);
+#endif
return (B_FALSE);
+ }
iph_hdr_length = IPH_HDR_LENGTH(ipha);
icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
/*
@@ -2099,6 +2305,9 @@ ipsec_inbound_accept_clear(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h)
* Be in sync with icmp_inbound, where we have
* already set ire_max_frag.
*/
+#ifdef FRAGCACHE_DEBUG
+ cmn_err(CE_WARN, "ICMP frag needed\n");
+#endif
return (B_TRUE);
case ICMP_HOST_UNREACHABLE:
case ICMP_NET_UNREACHABLE:
@@ -2196,6 +2405,7 @@ ipsec_check_inbound_policy(mblk_t *first_mp, conn_t *connp,
mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp;
mblk_t *ipsec_mp = mctl_present ? first_mp : NULL;
ipsec_latch_t *ipl;
+ uint64_t unique_id;
ASSERT(connp != NULL);
ipl = connp->conn_latch;
@@ -2273,8 +2483,7 @@ clear:
* mp->b_cont could be either a M_CTL message
* for icmp errors being sent up or a M_DATA message.
*/
- ASSERT(mp->b_datap->db_type == M_CTL ||
- mp->b_datap->db_type == M_DATA);
+ ASSERT(mp->b_datap->db_type == M_CTL || mp->b_datap->db_type == M_DATA);
ASSERT(ii->ipsec_in_type == IPSEC_IN);
@@ -2294,7 +2503,7 @@ clear:
const char *reason;
kstat_named_t *counter;
if (ipsec_check_ipsecin_latch(ii, mp, ipl,
- ipha, ip6h, &reason, &counter)) {
+ ipha, ip6h, &reason, &counter, connp)) {
BUMP_MIB(&ip_mib, ipsecInSucceeded);
return (first_mp);
}
@@ -2314,9 +2523,10 @@ clear:
return (first_mp);
}
+ unique_id = conn_to_unique(connp, mp, ipha, ip6h);
IPPOL_REFHOLD(ipl->ipl_in_policy);
first_mp = ipsec_check_ipsecin_policy(CONNP_TO_WQ(connp), first_mp,
- ipl->ipl_in_policy, ipha, ip6h);
+ ipl->ipl_in_policy, ipha, ip6h, unique_id);
/*
* NOTE: ipsecIn{Failed,Succeeeded} bumped by
* ipsec_check_ipsecin_policy().
@@ -2326,43 +2536,70 @@ clear:
return (first_mp);
}
-boolean_t
-ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp,
- ipha_t *ipha, ip6_t *ip6h)
+/*
+ * Returns:
+ *
+ * SELRET_NOMEM --> msgpullup() needed to gather things failed.
+ * SELRET_BADPKT --> If we're being called after tunnel-mode fragment
+ * gathering, the initial fragment is too short for
+ * useful data. Only returned if SEL_TUNNEL_FIRSTFRAG is
+ * set.
+ * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data.
+ * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet. Caller
+ * should put this packet in a fragment-gathering queue.
+ * Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY
+ * is set.
+ */
+static selret_t
+ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha,
+ ip6_t *ip6h, uint8_t sel_flags)
{
uint16_t *ports;
ushort_t hdr_len;
+ int outer_hdr_len = 0; /* For ICMP tunnel-mode cases... */
mblk_t *spare_mp = NULL;
uint8_t *nexthdrp;
uint8_t nexthdr;
uint8_t *typecode;
uint8_t check_proto;
+ ip6_pkt_t ipp;
+ boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY);
+ boolean_t is_icmp = (sel_flags & SEL_IS_ICMP);
+ boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE);
ASSERT((ipha == NULL && ip6h != NULL) ||
(ipha != NULL && ip6h == NULL));
if (ip6h != NULL) {
+ if (is_icmp)
+ outer_hdr_len = ((uint8_t *)ip6h) - mp->b_rptr;
+
check_proto = IPPROTO_ICMPV6;
sel->ips_isv4 = B_FALSE;
sel->ips_local_addr_v6 = ip6h->ip6_dst;
sel->ips_remote_addr_v6 = ip6h->ip6_src;
+ bzero(&ipp, sizeof (ipp));
+ (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL);
+
nexthdr = ip6h->ip6_nxt;
switch (nexthdr) {
case IPPROTO_HOPOPTS:
case IPPROTO_ROUTING:
case IPPROTO_DSTOPTS:
+ case IPPROTO_FRAGMENT:
/*
* Use ip_hdr_length_nexthdr_v6(). And have a spare
* mblk that's contiguous to feed it
*/
if ((spare_mp = msgpullup(mp, -1)) == NULL)
- return (B_FALSE);
+ return (SELRET_NOMEM);
if (!ip_hdr_length_nexthdr_v6(spare_mp,
- (ip6_t *)spare_mp->b_rptr, &hdr_len, &nexthdrp)) {
- /* Malformed packet - XXX ip_drop_packet()? */
- freemsg(spare_mp);
- return (B_FALSE);
+ (ip6_t *)(spare_mp->b_rptr + outer_hdr_len),
+ &hdr_len, &nexthdrp)) {
+ /* Malformed packet - caller frees. */
+ ipsec_freemsg_chain(spare_mp);
+ return (SELRET_BADPKT);
}
nexthdr = *nexthdrp;
/* We can just extract based on hdr_len now. */
@@ -2371,21 +2608,39 @@ ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp,
hdr_len = IPV6_HDR_LEN;
break;
}
+
+ if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) {
+ /* IPv6 Fragment */
+ ipsec_freemsg_chain(spare_mp);
+ return (SELRET_TUNFRAG);
+ }
} else {
+ if (is_icmp)
+ outer_hdr_len = ((uint8_t *)ipha) - mp->b_rptr;
check_proto = IPPROTO_ICMP;
sel->ips_isv4 = B_TRUE;
sel->ips_local_addr_v4 = ipha->ipha_dst;
sel->ips_remote_addr_v4 = ipha->ipha_src;
nexthdr = ipha->ipha_protocol;
hdr_len = IPH_HDR_LENGTH(ipha);
+
+ if (port_policy_present &&
+ IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) &&
+ !is_icmp) {
+ /* IPv4 Fragment */
+ ipsec_freemsg_chain(spare_mp);
+ return (SELRET_TUNFRAG);
+ }
+
}
sel->ips_protocol = nexthdr;
- if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP &&
- nexthdr != IPPROTO_SCTP && nexthdr != check_proto) {
+ if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP &&
+ nexthdr != IPPROTO_SCTP && nexthdr != check_proto) ||
+ (!port_policy_present && tunnel_mode)) {
sel->ips_remote_port = sel->ips_local_port = 0;
- freemsg(spare_mp); /* Always works, even if NULL. */
- return (B_TRUE);
+ ipsec_freemsg_chain(spare_mp);
+ return (SELRET_SUCCESS);
}
if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) {
@@ -2398,11 +2653,11 @@ ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp,
ipsec_hdr_pullup_needed++;
if (spare_mp == NULL &&
(spare_mp = msgpullup(mp, -1)) == NULL) {
- return (B_FALSE);
+ return (SELRET_NOMEM);
}
- ports = (uint16_t *)&spare_mp->b_rptr[hdr_len];
+ ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len];
} else {
- ports = (uint16_t *)&mp->b_rptr[hdr_len];
+ ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len];
}
if (nexthdr == check_proto) {
@@ -2410,19 +2665,17 @@ ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp,
sel->ips_icmp_type = *typecode++;
sel->ips_icmp_code = *typecode;
sel->ips_remote_port = sel->ips_local_port = 0;
- freemsg(spare_mp); /* Always works, even if NULL */
- return (B_TRUE);
+ } else {
+ sel->ips_remote_port = *ports++;
+ sel->ips_local_port = *ports;
}
-
- sel->ips_remote_port = *ports++;
- sel->ips_local_port = *ports;
- freemsg(spare_mp); /* Always works, even if NULL */
- return (B_TRUE);
+ ipsec_freemsg_chain(spare_mp);
+ return (SELRET_SUCCESS);
}
static boolean_t
ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha,
- ip6_t *ip6h)
+ ip6_t *ip6h, int outer_hdr_len)
{
/*
* XXX cut&paste shared with ipsec_init_inbound_sel
@@ -2445,6 +2698,7 @@ ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha,
case IPPROTO_HOPOPTS:
case IPPROTO_ROUTING:
case IPPROTO_DSTOPTS:
+ case IPPROTO_FRAGMENT:
/*
* Use ip_hdr_length_nexthdr_v6(). And have a spare
* mblk that's contiguous to feed it
@@ -2452,11 +2706,12 @@ ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha,
spare_mp = msgpullup(mp, -1);
if (spare_mp == NULL ||
!ip_hdr_length_nexthdr_v6(spare_mp,
- (ip6_t *)spare_mp->b_rptr, &hdr_len,
- &nexthdrp)) {
+ (ip6_t *)(spare_mp->b_rptr + outer_hdr_len),
+ &hdr_len, &nexthdrp)) {
/* Always works, even if NULL. */
- freemsg(spare_mp);
- freemsg(mp);
+ ipsec_freemsg_chain(spare_mp);
+ ip_drop_packet_chain(mp, B_FALSE, NULL, NULL,
+ &ipdrops_spd_nomem, &spd_dropper);
return (B_FALSE);
} else {
nexthdr = *nexthdrp;
@@ -2477,11 +2732,11 @@ ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha,
if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP &&
nexthdr != IPPROTO_SCTP && nexthdr != check_proto) {
sel->ips_local_port = sel->ips_remote_port = 0;
- freemsg(spare_mp); /* Always works, even if NULL. */
+ ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */
return (B_TRUE);
}
- if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) {
+ if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) {
/* If we didn't pullup a copy already, do so now. */
/*
* XXX performance, will upper-layers frequently split TCP/UDP
@@ -2492,12 +2747,13 @@ ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha,
*/
if (spare_mp == NULL &&
(spare_mp = msgpullup(mp, -1)) == NULL) {
- freemsg(mp);
+ ip_drop_packet_chain(mp, B_FALSE, NULL, NULL,
+ &ipdrops_spd_nomem, &spd_dropper);
return (B_FALSE);
}
- ports = (uint16_t *)&spare_mp->b_rptr[hdr_len];
+ ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len];
} else {
- ports = (uint16_t *)&mp->b_rptr[hdr_len];
+ ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len];
}
if (nexthdr == check_proto) {
@@ -2505,13 +2761,11 @@ ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha,
sel->ips_icmp_type = *typecode++;
sel->ips_icmp_code = *typecode;
sel->ips_remote_port = sel->ips_local_port = 0;
- freemsg(spare_mp); /* Always works, even if NULL */
- return (B_TRUE);
+ } else {
+ sel->ips_local_port = *ports++;
+ sel->ips_remote_port = *ports;
}
-
- sel->ips_local_port = *ports++;
- sel->ips_remote_port = *ports;
- freemsg(spare_mp); /* Always works, even if NULL */
+ ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */
return (B_TRUE);
}
@@ -2618,7 +2872,7 @@ ipsec_in_to_out_action(ipsec_in_t *ii)
* effective MTU, yielding the inner payload size which reflects a
* packet with *minimum* ESP padding..
*/
-static int32_t
+int32_t
ipsec_act_ovhd(const ipsec_act_t *act)
{
int32_t overhead = 0;
@@ -2662,8 +2916,8 @@ policy_hash(int size, const void *start, const void *end)
* into trouble from lots of collisions on ::1 addresses and the like
* (seems unlikely).
*/
-#define IPSEC_IPV4_HASH(a) ((a) % ipsec_spd_hashsize)
-#define IPSEC_IPV6_HASH(a) ((a.s6_addr32[3]) % ipsec_spd_hashsize)
+#define IPSEC_IPV4_HASH(a, n) ((a) % (n))
+#define IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n))
/*
* These two hash functions should produce coordinated values
@@ -2679,22 +2933,25 @@ selkey_hash(const ipsec_selkey_t *selkey)
if (valid & IPSL_IPV4) {
if (selkey->ipsl_remote_pfxlen == 32)
- return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4));
+ return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4,
+ ipsec_spd_hashsize));
}
if (valid & IPSL_IPV6) {
if (selkey->ipsl_remote_pfxlen == 128)
- return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6));
+ return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6,
+ ipsec_spd_hashsize));
}
return (IPSEC_SEL_NOHASH);
}
static uint32_t
-selector_hash(ipsec_selector_t *sel)
+selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root)
{
if (sel->ips_isv4) {
- return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4));
+ return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4,
+ root->ipr_nchains));
}
- return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6));
+ return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains));
}
/*
@@ -2864,7 +3121,8 @@ ipsec_find_sel(ipsec_selkey_t *selkey)
!(selkey->ipsl_valid & IPSL_IPV6));
hval = selkey_hash(selkey);
- selkey->ipsl_hval = hval;
+ /* Set pol_hval to uninitialized until we put it in a polhead. */
+ selkey->ipsl_sel_hval = hval;
bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval;
@@ -2872,7 +3130,8 @@ ipsec_find_sel(ipsec_selkey_t *selkey)
HASH_LOCK(ipsec_sel_hash, bucket);
for (HASH_ITERATE(sp, ipsl_hash, ipsec_sel_hash, bucket)) {
- if (bcmp(&sp->ipsl_key, selkey, sizeof (*selkey)) == 0)
+ if (bcmp(&sp->ipsl_key, selkey,
+ offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0)
break;
}
if (sp != NULL) {
@@ -2891,6 +3150,11 @@ ipsec_find_sel(ipsec_selkey_t *selkey)
HASH_INSERT(sp, ipsl_hash, ipsec_sel_hash, bucket);
sp->ipsl_refs = 2; /* one for hash table, one for caller */
sp->ipsl_key = *selkey;
+ /* Set to uninitalized and have insertion into polhead fix things. */
+ if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH)
+ sp->ipsl_key.ipsl_pol_hval = 0;
+ else
+ sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH;
HASH_UNLOCK(ipsec_sel_hash, bucket);
@@ -2901,7 +3165,7 @@ static void
ipsec_sel_rel(ipsec_sel_t **spp)
{
ipsec_sel_t *sp = *spp;
- int hval = sp->ipsl_key.ipsl_hval;
+ int hval = sp->ipsl_key.ipsl_sel_hval;
*spp = NULL;
if (hval == IPSEC_SEL_NOHASH)
@@ -2942,12 +3206,15 @@ ipsec_policy_free(ipsec_policy_t *ipp)
*/
ipsec_policy_t *
ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a,
- int nacts, int prio)
+ int nacts, int prio, uint64_t *index_ptr)
{
ipsec_action_t *ap;
ipsec_sel_t *sp;
ipsec_policy_t *ipp;
+ if (index_ptr == NULL)
+ index_ptr = &ipsec_next_policy_index;
+
ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP);
ap = ipsec_act_find(a, nacts);
sp = ipsec_find_sel(keys);
@@ -2969,7 +3236,8 @@ ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a,
ipp->ipsp_sel = sp;
ipp->ipsp_act = ap;
ipp->ipsp_prio = prio; /* rule priority */
- ipp->ipsp_index = ipsec_next_policy_index++;
+ ipp->ipsp_index = *index_ptr;
+ (*index_ptr)++;
return (ipp);
}
@@ -3018,10 +3286,10 @@ ipsec_policy_delete(ipsec_policy_head_t *php, ipsec_selkey_t *keys, int dir)
rw_enter(&php->iph_lock, RW_WRITER);
- if (keys->ipsl_hval == IPSEC_SEL_NOHASH) {
+ if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) {
head = pr->ipr_nonhash[af];
} else {
- head = pr->ipr_hash[keys->ipsl_hval].hash_head;
+ head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head;
}
for (ip = head; ip != NULL; ip = nip) {
@@ -3096,7 +3364,8 @@ ipsec_policy_delete_index(ipsec_policy_head_t *php, uint64_t policy_index)
/*
* Given a constructed ipsec_policy_t policy rule, see if it can be entered
- * into the correct policy ruleset.
+ * into the correct policy ruleset. As a side-effect, it sets the hash
+ * entries on "ipp"'s ipsp_pol_hval.
*
* Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a
* duplicate policy exists with exactly the same selectors), or an icmp
@@ -3129,10 +3398,17 @@ ipsec_check_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction)
* Because selectors are interned below, we need only compare pointers
* for equality.
*/
- if (selkey->ipsl_hval == IPSEC_SEL_NOHASH) {
+ if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) {
head = pr->ipr_nonhash[af];
} else {
- head = pr->ipr_hash[selkey->ipsl_hval].hash_head;
+ selkey->ipsl_pol_hval =
+ (selkey->ipsl_valid & IPSL_IPV4) ?
+ IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4,
+ pr->ipr_nchains) :
+ IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6,
+ pr->ipr_nchains);
+
+ head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head;
}
for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) {
@@ -3275,7 +3551,7 @@ ipsec_enter_policy(ipsec_policy_head_t *php, ipsec_policy_t *ipp, int direction)
ipsec_policy_root_t *pr = &php->iph_root[direction];
ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key;
uint32_t valid = selkey->ipsl_valid;
- uint32_t hval = selkey->ipsl_hval;
+ uint32_t hval = selkey->ipsl_pol_hval;
int af = -1;
ASSERT(RW_WRITE_HELD(&php->iph_lock));
@@ -3329,7 +3605,6 @@ ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr)
}
}
-
void
ipsec_polhead_flush(ipsec_policy_head_t *php)
{
@@ -3346,11 +3621,22 @@ ipsec_polhead_flush(ipsec_policy_head_t *php)
void
ipsec_polhead_free(ipsec_policy_head_t *php)
{
+ int dir;
+
ASSERT(php->iph_refs == 0);
rw_enter(&php->iph_lock, RW_WRITER);
ipsec_polhead_flush(php);
rw_exit(&php->iph_lock);
rw_destroy(&php->iph_lock);
+ for (dir = 0; dir < IPSEC_NTYPES; dir++) {
+ ipsec_policy_root_t *ipr = &php->iph_root[dir];
+ int chain;
+
+ for (chain = 0; chain < ipr->ipr_nchains; chain++)
+ mutex_destroy(&(ipr->ipr_hash[chain].hash_lock));
+
+ }
+ ipsec_polhead_free_table(php);
kmem_free(php, sizeof (*php));
}
@@ -3367,7 +3653,7 @@ ipsec_ipr_init(ipsec_policy_root_t *ipr)
}
}
-extern ipsec_policy_head_t *
+ipsec_policy_head_t *
ipsec_polhead_create(void)
{
ipsec_policy_head_t *php;
@@ -3394,7 +3680,7 @@ ipsec_polhead_create(void)
* old one and return the only reference to the new one.
* If the old one had a refcount of 1, just return it.
*/
-extern ipsec_policy_head_t *
+ipsec_policy_head_t *
ipsec_polhead_split(ipsec_policy_head_t *php)
{
ipsec_policy_head_t *nphp;
@@ -3494,7 +3780,7 @@ ipsec_in_to_out(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h)
io->ipsec_out_frtn.free_arg = (char *)io;
io->ipsec_out_act = reflect_action;
- if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h))
+ if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0))
return (B_FALSE);
io->ipsec_out_src_port = sel.ips_local_port;
@@ -3570,7 +3856,8 @@ ipsec_out_tag(mblk_t *mp, mblk_t *cont)
nmp = ipsec_alloc_ipsec_out();
if (nmp == NULL) {
- freemsg(cont); /* XXX ip_drop_packet() ? */
+ ip_drop_packet_chain(cont, B_FALSE, NULL, NULL,
+ &ipdrops_spd_nomem, &spd_dropper);
return (NULL);
}
ASSERT(nmp->b_datap->db_type == M_CTL);
@@ -3829,8 +4116,8 @@ ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol,
* it from the packet.
*/
- if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h)) {
- /* XXX any cleanup required here?? */
+ if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) {
+ /* Callee did ip_drop_packet(). */
return (NULL);
}
io->ipsec_out_src_port = sel.ips_local_port;
@@ -3854,7 +4141,16 @@ ipsec_init_ipsec_out(mblk_t *ipsec_mp, conn_t *connp, ipsec_policy_t *pol,
IPPH_REFHOLD(connp->conn_policy);
io->ipsec_out_polhead = connp->conn_policy;
}
+ } else {
+ /* Handle explicit drop action. */
+ if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD ||
+ p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) {
+ ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL,
+ &ipdrops_spd_explicit, &spd_dropper);
+ ipsec_mp = NULL;
+ }
}
+
return (ipsec_mp);
}
@@ -4013,6 +4309,7 @@ ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire,
ipsec_mp = mp;
io = NULL;
}
+ ASSERT(io == NULL || !io->ipsec_out_tunnel);
}
if (((io == NULL) || (io->ipsec_out_polhead == NULL)) &&
((connp == NULL) || (connp->conn_policy == NULL)))
@@ -4045,6 +4342,7 @@ ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire,
ipsec_mp = mp;
io = NULL;
}
+ ASSERT(io == NULL || !io->ipsec_out_tunnel);
}
if (ipha != NULL) {
@@ -4104,15 +4402,14 @@ ip_wput_attach_policy(mblk_t *ipsec_mp, ipha_t *ipha, ip6_t *ip6h, ire_t *ire,
}
}
- if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h)) {
+ if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) {
if (ipha != NULL) {
BUMP_MIB(&ip_mib, ipOutDiscards);
} else {
BUMP_MIB(&ip6_mib, ipv6OutDiscards);
}
- ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL,
- &ipdrops_spd_nomem, &spd_dropper);
+ /* Callee dropped the packet. */
return (NULL);
}
@@ -4832,3 +5129,1541 @@ ipsec_unregister_prov_update(void)
if (prov_update_handle != NULL)
crypto_unnotify_events(prov_update_handle);
}
+
+/*
+ * Tunnel-mode support routines.
+ */
+
+/*
+ * Returns an mblk chain suitable for putnext() if policies match and IPsec
+ * SAs are available. If there's no per-tunnel policy, or a match comes back
+ * with no match, then still return the packet and have global policy take
+ * a crack at it in IP.
+ *
+ * Remember -> we can be forwarding packets. Keep that in mind w.r.t.
+ * inner-packet contents.
+ */
+mblk_t *
+ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4,
+ ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len)
+{
+ ipsec_tun_pol_t *itp = atp->tun_itp;
+ ipsec_policy_head_t *polhead;
+ ipsec_selector_t sel;
+ mblk_t *ipsec_mp, *ipsec_mp_head, *nmp;
+ mblk_t *spare_mp = NULL;
+ ipsec_out_t *io;
+ boolean_t is_fragment;
+ ipsec_policy_t *pol;
+
+ ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL ||
+ outer_ipv4 != NULL && outer_ipv6 == NULL);
+ /* We take care of inners in a bit. */
+
+ /* No policy on this tunnel - let global policy have at it. */
+ if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE))
+ return (mp);
+ polhead = itp->itp_policy;
+
+ bzero(&sel, sizeof (sel));
+ if (inner_ipv4 != NULL) {
+ ASSERT(inner_ipv6 == NULL);
+ sel.ips_isv4 = B_TRUE;
+ sel.ips_local_addr_v4 = inner_ipv4->ipha_src;
+ sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst;
+ sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol;
+ is_fragment =
+ IS_V4_FRAGMENT(inner_ipv4->ipha_fragment_offset_and_flags);
+ } else {
+ ASSERT(inner_ipv6 != NULL);
+ sel.ips_isv4 = B_FALSE;
+ sel.ips_local_addr_v6 = inner_ipv6->ip6_src;
+ /* Use ip_get_dst_v6() just for the fragment bit. */
+ sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6,
+ &is_fragment);
+ /*
+ * Reset, because we don't care about routing-header dests
+ * in the forwarding/tunnel path.
+ */
+ sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst;
+ }
+
+ if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) {
+ if (is_fragment) {
+ ipha_t *oiph;
+ ipha_t *iph = NULL;
+ ip6_t *ip6h = NULL;
+ int hdr_len;
+ uint16_t ip6_hdr_length;
+ uint8_t v6_proto;
+ uint8_t *v6_proto_p;
+
+ /*
+ * We have a fragment we need to track!
+ */
+ mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp,
+ outer_hdr_len);
+ if (mp == NULL)
+ return (NULL);
+
+ /*
+ * If we get here, we have a full
+ * fragment chain
+ */
+
+ oiph = (ipha_t *)mp->b_rptr;
+ if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) {
+ hdr_len = ((outer_hdr_len != 0) ?
+ IPH_HDR_LENGTH(oiph) : 0);
+ iph = (ipha_t *)(mp->b_rptr + hdr_len);
+ } else {
+ ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION);
+ if ((spare_mp = msgpullup(mp, -1)) == NULL) {
+ ip_drop_packet_chain(mp, B_FALSE,
+ NULL, NULL, &ipdrops_spd_nomem,
+ &spd_dropper);
+ }
+ ip6h = (ip6_t *)spare_mp->b_rptr;
+ (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h,
+ &ip6_hdr_length, &v6_proto_p);
+ hdr_len = ip6_hdr_length;
+ }
+ outer_hdr_len = hdr_len;
+
+ if (sel.ips_isv4) {
+ if (iph == NULL) {
+ /* Was v6 outer */
+ iph = (ipha_t *)(mp->b_rptr + hdr_len);
+ }
+ inner_ipv4 = iph;
+ sel.ips_local_addr_v4 = inner_ipv4->ipha_src;
+ sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst;
+ sel.ips_protocol =
+ (uint8_t)inner_ipv4->ipha_protocol;
+ } else {
+ if ((spare_mp == NULL) &&
+ ((spare_mp = msgpullup(mp, -1)) == NULL)) {
+ ip_drop_packet_chain(mp, B_FALSE,
+ NULL, NULL, &ipdrops_spd_nomem,
+ &spd_dropper);
+ }
+ inner_ipv6 = (ip6_t *)(spare_mp->b_rptr +
+ hdr_len);
+ sel.ips_local_addr_v6 = inner_ipv6->ip6_src;
+ sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst;
+ (void) ip_hdr_length_nexthdr_v6(spare_mp,
+ inner_ipv6, &ip6_hdr_length,
+ &v6_proto_p);
+ v6_proto = *v6_proto_p;
+ sel.ips_protocol = v6_proto;
+#ifdef FRAGCACHE_DEBUG
+ cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n",
+ sel.ips_protocol);
+#endif
+ }
+ /* Ports are extracted below */
+ }
+
+ /* Get ports... */
+ if (spare_mp != NULL) {
+ if (!ipsec_init_outbound_ports(&sel, spare_mp,
+ inner_ipv4, inner_ipv6, outer_hdr_len)) {
+ /*
+ * callee did ip_drop_packet_chain() on
+ * spare_mp
+ */
+ ipsec_freemsg_chain(mp);
+ return (NULL);
+ }
+ } else {
+ if (!ipsec_init_outbound_ports(&sel, mp,
+ inner_ipv4, inner_ipv6, outer_hdr_len)) {
+ /* callee did ip_drop_packet_chain() on mp. */
+ return (NULL);
+ }
+ }
+#ifdef FRAGCACHE_DEBUG
+ if (inner_ipv4 != NULL)
+ cmn_err(CE_WARN,
+ "(v4) sel.ips_protocol = %d, "
+ "sel.ips_local_port = %d, "
+ "sel.ips_remote_port = %d\n",
+ sel.ips_protocol, ntohs(sel.ips_local_port),
+ ntohs(sel.ips_remote_port));
+ if (inner_ipv6 != NULL)
+ cmn_err(CE_WARN,
+ "(v6) sel.ips_protocol = %d, "
+ "sel.ips_local_port = %d, "
+ "sel.ips_remote_port = %d\n",
+ sel.ips_protocol, ntohs(sel.ips_local_port),
+ ntohs(sel.ips_remote_port));
+#endif
+ /* Success so far - done with spare_mp */
+ ipsec_freemsg_chain(spare_mp);
+ }
+ rw_enter(&polhead->iph_lock, RW_READER);
+ pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, &sel);
+ rw_exit(&polhead->iph_lock);
+ if (pol == NULL) {
+ /*
+ * No matching policy on this tunnel, drop the packet.
+ *
+ * NOTE: Tunnel-mode tunnels are different from the
+ * IP global transport mode policy head. For a tunnel-mode
+ * tunnel, we drop the packet in lieu of passing it
+ * along accepted the way a global-policy miss would.
+ *
+ * NOTE2: "negotiate transport" tunnels should match ALL
+ * inbound packets, but we do not uncomment the ASSERT()
+ * below because if/when we open PF_POLICY, a user can
+ * shoot him/her-self in the foot with a 0 priority.
+ */
+
+ /* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */
+#ifdef FRAGCACHE_DEBUG
+ cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel "
+ "per-port policy\n");
+#endif
+ ip_drop_packet_chain(mp, B_FALSE, NULL, NULL,
+ &ipdrops_spd_explicit, &spd_dropper);
+ return (NULL);
+ }
+
+#ifdef FRAGCACHE_DEBUG
+ cmn_err(CE_WARN, "Having matching tunnel per-port policy\n");
+#endif
+
+ /* Construct an IPSEC_OUT message. */
+ ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out();
+ if (ipsec_mp == NULL) {
+ IPPOL_REFRELE(pol);
+ ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_spd_nomem,
+ &spd_dropper);
+ return (NULL);
+ }
+ ipsec_mp->b_cont = mp;
+ io = (ipsec_out_t *)ipsec_mp->b_rptr;
+ IPPH_REFHOLD(polhead);
+ /*
+ * NOTE: free() function of ipsec_out mblk will release polhead and
+ * pol references.
+ */
+ io->ipsec_out_polhead = polhead;
+ io->ipsec_out_policy = pol;
+ io->ipsec_out_zoneid = atp->tun_zoneid;
+ io->ipsec_out_v4 = (outer_ipv4 != NULL);
+ io->ipsec_out_secure = B_TRUE;
+
+ if (!(itp->itp_flags & ITPF_P_TUNNEL)) {
+ /* Set up transport mode for tunnelled packets. */
+ io->ipsec_out_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP :
+ IPPROTO_IPV6;
+ return (ipsec_mp);
+ }
+
+ /* Fill in tunnel-mode goodies here. */
+ io->ipsec_out_tunnel = B_TRUE;
+ /* XXX Do I need to fill in all of the goodies here? */
+ if (inner_ipv4) {
+ io->ipsec_out_inaf = AF_INET;
+ io->ipsec_out_insrc[0] =
+ pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4;
+ io->ipsec_out_indst[0] =
+ pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4;
+ } else {
+ io->ipsec_out_inaf = AF_INET6;
+ io->ipsec_out_insrc[0] =
+ pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0];
+ io->ipsec_out_insrc[1] =
+ pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1];
+ io->ipsec_out_insrc[2] =
+ pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2];
+ io->ipsec_out_insrc[3] =
+ pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3];
+ io->ipsec_out_indst[0] =
+ pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0];
+ io->ipsec_out_indst[1] =
+ pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1];
+ io->ipsec_out_indst[2] =
+ pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2];
+ io->ipsec_out_indst[3] =
+ pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3];
+ }
+ io->ipsec_out_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen;
+ io->ipsec_out_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen;
+ /* NOTE: These are used for transport mode too. */
+ io->ipsec_out_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport;
+ io->ipsec_out_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport;
+ io->ipsec_out_proto = pol->ipsp_sel->ipsl_key.ipsl_proto;
+
+ /*
+ * The mp pointer still valid
+ * Add ipsec_out to each fragment.
+ * The fragment head already has one
+ */
+ nmp = mp->b_next;
+ mp->b_next = NULL;
+ mp = nmp;
+ ASSERT(ipsec_mp != NULL);
+ while (mp != NULL) {
+ nmp = mp->b_next;
+ ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp);
+ if (ipsec_mp->b_next == NULL) {
+ ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL,
+ &ipdrops_spd_nomem, &spd_dropper);
+ ip_drop_packet_chain(mp, B_FALSE, NULL, NULL,
+ &ipdrops_spd_nomem, &spd_dropper);
+ return (NULL);
+ }
+ ipsec_mp = ipsec_mp->b_next;
+ mp->b_next = NULL;
+ mp = nmp;
+ }
+ return (ipsec_mp_head);
+}
+
+/*
+ * NOTE: The following releases pol's reference and
+ * calls ip_drop_packet() for me on NULL returns.
+ */
+mblk_t *
+ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol,
+ ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique)
+{
+ /* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */
+ mblk_t *data_chain = NULL, *data_tail = NULL;
+ mblk_t *ii_next;
+
+ while (ipsec_mp != NULL) {
+ ii_next = ipsec_mp->b_next;
+ ipsec_mp->b_next = NULL; /* No tripping asserts. */
+
+ /*
+ * Need IPPOL_REFHOLD(pol) for extras because
+ * ipsecin_policy does the refrele.
+ */
+ IPPOL_REFHOLD(pol);
+
+ if (ipsec_check_ipsecin_policy(NULL, ipsec_mp, pol,
+ inner_ipv4, inner_ipv6, pkt_unique) != NULL) {
+ if (data_tail == NULL) {
+ /* First one */
+ data_chain = data_tail = ipsec_mp->b_cont;
+ } else {
+ data_tail->b_next = ipsec_mp->b_cont;
+ data_tail = data_tail->b_next;
+ }
+ freeb(ipsec_mp);
+ } else {
+ /*
+ * ipsec_check_ipsecin_policy() freed ipsec_mp
+ * already. Need to get rid of any extra pol
+ * references, and any remaining bits as well.
+ */
+ IPPOL_REFRELE(pol);
+ ipsec_freemsg_chain(data_chain);
+ ipsec_freemsg_chain(ii_next); /* ipdrop stats? */
+ return (NULL);
+ }
+ ipsec_mp = ii_next;
+ }
+ /*
+ * One last release because either the loop bumped it up, or we never
+ * called ipsec_check_ipsecin_policy().
+ */
+ IPPOL_REFRELE(pol);
+
+ /* data_chain is ready for return to tun module. */
+ return (data_chain);
+}
+
+
+/*
+ * Returns B_TRUE if the inbound packet passed an IPsec policy check. Returns
+ * B_FALSE if it failed or if it is a fragment needing its friends before a
+ * policy check can be performed.
+ *
+ * Expects a non-NULL *data_mp, an optional ipsec_mp, and a non-NULL polhead.
+ * data_mp may be reassigned with a b_next chain of packets if fragments
+ * neeeded to be collected for a proper policy check.
+ *
+ * Always frees ipsec_mp, but only frees data_mp if returns B_FALSE. This
+ * function calls ip_drop_packet() on data_mp if need be.
+ *
+ * NOTE: outer_hdr_len is signed. If it's a negative value, the caller
+ * is inspecting an ICMP packet.
+ */
+boolean_t
+ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp,
+ ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4,
+ ip6_t *outer_ipv6, int outer_hdr_len)
+{
+ ipsec_policy_head_t *polhead;
+ ipsec_selector_t sel;
+ mblk_t *message = (ipsec_mp == NULL) ? *data_mp : ipsec_mp;
+ ipsec_policy_t *pol;
+ uint16_t tmpport;
+ selret_t rc;
+ boolean_t retval, port_policy_present, is_icmp;
+ in6_addr_t tmpaddr;
+ uint8_t flags;
+
+ sel.ips_is_icmp_inv_acq = 0;
+
+ ASSERT(outer_ipv4 != NULL && outer_ipv6 == NULL ||
+ outer_ipv4 == NULL && outer_ipv6 != NULL);
+ ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL ||
+ inner_ipv4 == NULL && inner_ipv6 != NULL);
+ ASSERT(message == *data_mp || message->b_cont == *data_mp);
+
+ if (outer_hdr_len < 0) {
+ outer_hdr_len = (-outer_hdr_len);
+ is_icmp = B_TRUE;
+ } else {
+ is_icmp = B_FALSE;
+ }
+
+ if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) {
+ polhead = itp->itp_policy;
+ /*
+ * We need to perform full Tunnel-Mode enforcement,
+ * and we need to have inner-header data for such enforcement.
+ *
+ * See ipsec_init_inbound_sel() for the 0x80000000 on inbound
+ * and on return.
+ */
+
+ port_policy_present = ((itp->itp_flags &
+ ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE);
+ flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) |
+ (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE);
+
+ rc = ipsec_init_inbound_sel(&sel, *data_mp, inner_ipv4,
+ inner_ipv6, flags);
+
+ switch (rc) {
+ case SELRET_NOMEM:
+ ip_drop_packet(message, B_TRUE, NULL, NULL,
+ &ipdrops_spd_nomem, &spd_dropper);
+ return (B_FALSE);
+ case SELRET_TUNFRAG:
+ /*
+ * At this point, if we're cleartext, we don't want
+ * to go there.
+ */
+ if (ipsec_mp == NULL) {
+ ip_drop_packet(*data_mp, B_TRUE, NULL, NULL,
+ &ipdrops_spd_got_clear, &spd_dropper);
+ *data_mp = NULL;
+ return (B_FALSE);
+ }
+ ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)->
+ ipsec_in_secure);
+ message = ipsec_fragcache_add(&itp->itp_fragcache,
+ ipsec_mp, *data_mp, outer_hdr_len);
+
+ if (message == NULL) {
+ /*
+ * Data is cached, fragment chain is not
+ * complete. I consume ipsec_mp and data_mp
+ */
+ return (B_FALSE);
+ }
+
+ /*
+ * If we get here, we have a full fragment chain.
+ * Reacquire headers and selectors from first fragment.
+ */
+ if (inner_ipv4 != NULL) {
+ inner_ipv4 = (ipha_t *)message->b_cont->b_rptr;
+ ASSERT(message->b_cont->b_wptr -
+ message->b_cont->b_rptr > sizeof (ipha_t));
+ } else {
+ inner_ipv6 = (ip6_t *)message->b_cont->b_rptr;
+ ASSERT(message->b_cont->b_wptr -
+ message->b_cont->b_rptr > sizeof (ip6_t));
+ }
+ /* Use SEL_NONE so we always get ports! */
+ rc = ipsec_init_inbound_sel(&sel, message->b_cont,
+ inner_ipv4, inner_ipv6, SEL_NONE);
+ switch (rc) {
+ case SELRET_SUCCESS:
+ /*
+ * Get to same place as first caller's
+ * SELRET_SUCCESS case.
+ */
+ break;
+ case SELRET_NOMEM:
+ ip_drop_packet_chain(message, B_TRUE, NULL,
+ NULL, &ipdrops_spd_nomem, &spd_dropper);
+ return (B_FALSE);
+ case SELRET_BADPKT:
+ ip_drop_packet_chain(message, B_TRUE, NULL,
+ NULL, &ipdrops_spd_malformed_frag,
+ &spd_dropper);
+ return (B_FALSE);
+ case SELRET_TUNFRAG:
+ cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)");
+ /* FALLTHRU */
+ default:
+ cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)"
+ " returns bizarro 0x%x", rc);
+ /* Guaranteed panic! */
+ ASSERT(rc == SELRET_NOMEM);
+ return (B_FALSE);
+ }
+ /* FALLTHRU */
+ case SELRET_SUCCESS:
+ /*
+ * Common case:
+ * No per-port policy or a non-fragment. Keep going.
+ */
+ break;
+ case SELRET_BADPKT:
+ /*
+ * We may receive ICMP (with IPv6 inner) packets that
+ * trigger this return value. Send 'em in for
+ * enforcement checking.
+ */
+ cmn_err(CE_NOTE, "ipsec_tun_inbound(): "
+ "sending 'bad packet' in for enforcement");
+ break;
+ default:
+ cmn_err(CE_WARN,
+ "ipsec_init_inbound_sel() returns bizarro 0x%x",
+ rc);
+ ASSERT(rc == SELRET_NOMEM); /* Guaranteed panic! */
+ return (B_FALSE);
+ }
+
+ if (is_icmp) {
+ /*
+ * Swap local/remote because this is an ICMP packet.
+ */
+ tmpaddr = sel.ips_local_addr_v6;
+ sel.ips_local_addr_v6 = sel.ips_remote_addr_v6;
+ sel.ips_remote_addr_v6 = tmpaddr;
+ tmpport = sel.ips_local_port;
+ sel.ips_local_port = sel.ips_remote_port;
+ sel.ips_remote_port = tmpport;
+ }
+
+ /* find_policy_head() */
+ rw_enter(&polhead->iph_lock, RW_READER);
+ pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND,
+ &sel);
+ rw_exit(&polhead->iph_lock);
+ if (pol != NULL) {
+ if (ipsec_mp == NULL ||
+ !((ipsec_in_t *)ipsec_mp->b_rptr)->
+ ipsec_in_secure) {
+ retval = pol->ipsp_act->ipa_allow_clear;
+ if (!retval) {
+ /*
+ * XXX should never get here with
+ * tunnel reassembled fragments?
+ */
+ ASSERT(message->b_next == NULL);
+ ip_drop_packet(message, B_TRUE, NULL,
+ NULL, &ipdrops_spd_got_clear,
+ &spd_dropper);
+ } else if (ipsec_mp != NULL) {
+ freeb(ipsec_mp);
+ }
+
+ IPPOL_REFRELE(pol);
+ return (retval);
+ }
+ /*
+ * NOTE: The following releases pol's reference and
+ * calls ip_drop_packet() for me on NULL returns.
+ *
+ * "sel" is still good here, so let's use it!
+ */
+ *data_mp = ipsec_check_ipsecin_policy_reasm(message,
+ pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID(
+ sel.ips_remote_port, sel.ips_local_port,
+ (inner_ipv4 == NULL) ? IPPROTO_IPV6 :
+ IPPROTO_ENCAP, sel.ips_protocol));
+ return (*data_mp != NULL);
+ }
+
+ /*
+ * Else fallthru and check the global policy on the outer
+ * header(s) if this tunnel is an old-style transport-mode
+ * one. Drop the packet explicitly (no policy entry) for
+ * a new-style tunnel-mode tunnel.
+ */
+ if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) {
+ ip_drop_packet_chain(message, B_TRUE, NULL,
+ NULL, &ipdrops_spd_explicit, &spd_dropper);
+ return (B_FALSE);
+ }
+ }
+
+ /*
+ * NOTE: If we reach here, we will not have packet chains from
+ * fragcache_add(), because the only way I get chains is on a
+ * tunnel-mode tunnel, which either returns with a pass, or gets
+ * hit by the ip_drop_packet_chain() call right above here.
+ */
+
+ /* If no per-tunnel security, check global policy now. */
+ if (ipsec_mp != NULL &&
+ (((outer_ipv4 != NULL) && !ipsec_inbound_v4_policy_present) ||
+ ((outer_ipv6 != NULL) && !ipsec_inbound_v6_policy_present))) {
+ if (((ipsec_in_t *)(ipsec_mp->b_rptr))->
+ ipsec_in_icmp_loopback) {
+ /*
+ * This is an ICMP message with an ipsec_mp
+ * attached. We should accept it.
+ */
+ if (ipsec_mp != NULL)
+ freeb(ipsec_mp);
+ return (B_TRUE);
+ }
+
+ ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL,
+ &ipdrops_spd_got_secure, &spd_dropper);
+ return (B_FALSE);
+ }
+
+ /* NOTE: Frees message if it returns NULL. */
+ if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6,
+ (ipsec_mp != NULL)) == NULL) {
+ return (B_FALSE);
+ }
+
+ if (ipsec_mp != NULL)
+ freeb(ipsec_mp);
+
+ /*
+ * At this point, we pretend it's a cleartext accepted
+ * packet.
+ */
+ return (B_TRUE);
+}
+
+/*
+ * AVL comparison routine for our list of tunnel polheads.
+ */
+static int
+tunnel_compare(const void *arg1, const void *arg2)
+{
+ ipsec_tun_pol_t *left, *right;
+ int rc;
+
+ left = (ipsec_tun_pol_t *)arg1;
+ right = (ipsec_tun_pol_t *)arg2;
+
+ rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ);
+ return (rc == 0 ? rc : (rc > 0 ? 1 : -1));
+}
+
+/*
+ * Free a tunnel policy node.
+ */
+void
+itp_free(ipsec_tun_pol_t *node)
+{
+ IPPH_REFRELE(node->itp_policy);
+ IPPH_REFRELE(node->itp_inactive);
+ mutex_destroy(&node->itp_lock);
+ kmem_free(node, sizeof (*node));
+}
+
+void
+itp_unlink(ipsec_tun_pol_t *node)
+{
+ rw_enter(&tunnel_policy_lock, RW_WRITER);
+ tunnel_policy_gen++;
+ ipsec_fragcache_uninit(&node->itp_fragcache);
+ avl_remove(&tunnel_policies, node);
+ rw_exit(&tunnel_policy_lock);
+ ITP_REFRELE(node);
+}
+
+/*
+ * Public interface to look up a tunnel security policy by name. Used by
+ * spdsock mostly. Returns "node" with a bumped refcnt.
+ */
+ipsec_tun_pol_t *
+get_tunnel_policy(char *name)
+{
+ ipsec_tun_pol_t *node, lookup;
+
+ (void) strncpy(lookup.itp_name, name, LIFNAMSIZ);
+
+ rw_enter(&tunnel_policy_lock, RW_READER);
+ node = (ipsec_tun_pol_t *)avl_find(&tunnel_policies, &lookup, NULL);
+ if (node != NULL) {
+ ITP_REFHOLD(node);
+ }
+ rw_exit(&tunnel_policy_lock);
+
+ return (node);
+}
+
+/*
+ * Public interface to walk all tunnel security polcies. Useful for spdsock
+ * DUMP operations. iterator() will not consume a reference.
+ */
+void
+itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *), void *arg)
+{
+ ipsec_tun_pol_t *node;
+
+ rw_enter(&tunnel_policy_lock, RW_READER);
+ for (node = avl_first(&tunnel_policies); node != NULL;
+ node = AVL_NEXT(&tunnel_policies, node)) {
+ iterator(node, arg);
+ }
+ rw_exit(&tunnel_policy_lock);
+}
+
+/*
+ * Initialize policy head. This can only fail if there's a memory problem.
+ */
+static boolean_t
+tunnel_polhead_init(ipsec_policy_head_t *iph)
+{
+ rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL);
+ iph->iph_refs = 1;
+ iph->iph_gen = 0;
+ if (ipsec_alloc_table(iph, tun_spd_hashsize, KM_SLEEP, B_FALSE) != 0) {
+ ipsec_polhead_free_table(iph);
+ return (B_FALSE);
+ }
+ ipsec_polhead_init(iph, tun_spd_hashsize);
+ return (B_TRUE);
+}
+
+/*
+ * Create a tunnel policy node with "name". Set errno with
+ * ENOMEM if there's a memory problem, and EEXIST if there's an existing
+ * node.
+ */
+ipsec_tun_pol_t *
+create_tunnel_policy(char *name, int *errno, uint64_t *gen)
+{
+ ipsec_tun_pol_t *newbie, *existing;
+ avl_index_t where;
+
+ newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
+ if (newbie == NULL) {
+ *errno = ENOMEM;
+ return (NULL);
+ }
+ if (!ipsec_fragcache_init(&newbie->itp_fragcache)) {
+ kmem_free(newbie, sizeof (*newbie));
+ *errno = ENOMEM;
+ return (NULL);
+ }
+
+ (void) strncpy(newbie->itp_name, name, LIFNAMSIZ);
+
+ rw_enter(&tunnel_policy_lock, RW_WRITER);
+ existing = (ipsec_tun_pol_t *)avl_find(&tunnel_policies, newbie,
+ &where);
+ if (existing != NULL) {
+ itp_free(newbie);
+ *errno = EEXIST;
+ rw_exit(&tunnel_policy_lock);
+ return (NULL);
+ }
+ tunnel_policy_gen++;
+ *gen = tunnel_policy_gen;
+ newbie->itp_refcnt = 2; /* One for the caller, one for the tree. */
+ newbie->itp_next_policy_index = 1;
+ avl_insert(&tunnel_policies, newbie, where);
+ mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL);
+ newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t),
+ KM_NOSLEEP);
+ if (newbie->itp_policy == NULL)
+ goto nomem;
+ newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t),
+ KM_NOSLEEP);
+ if (newbie->itp_inactive == NULL) {
+ kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t));
+ goto nomem;
+ }
+
+ if (!tunnel_polhead_init(newbie->itp_policy)) {
+ kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t));
+ kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t));
+ goto nomem;
+ } else if (!tunnel_polhead_init(newbie->itp_inactive)) {
+ IPPH_REFRELE(newbie->itp_policy);
+ kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t));
+ goto nomem;
+ }
+ rw_exit(&tunnel_policy_lock);
+
+ return (newbie);
+nomem:
+ *errno = ENOMEM;
+ kmem_free(newbie, sizeof (*newbie));
+ return (NULL);
+}
+
+/*
+ * We can't call the tun_t lookup function until tun is
+ * loaded, so create a dummy function to avoid symbol
+ * lookup errors on boot.
+ */
+/* ARGSUSED */
+ipsec_tun_pol_t *
+itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af)
+{
+ return (NULL); /* Always return NULL. */
+}
+
+/*
+ * Frag cache code, based on SunScreen 3.2 source
+ * screen/kernel/common/screen_fragcache.c
+ */
+
+#define IPSEC_FRAG_TTL_MAX 5
+/*
+ * Note that the following parameters create 256 hash buckets
+ * with 1024 free entries to be distributed. Things are cleaned
+ * periodically and are attempted to be cleaned when there is no
+ * free space, but this system errs on the side of dropping packets
+ * over creating memory exhaustion. We may decide to make hash
+ * factor a tunable if this proves to be a bad decision.
+ */
+#define IPSEC_FRAG_HASH_SLOTS (1<<8)
+#define IPSEC_FRAG_HASH_FACTOR 4
+#define IPSEC_FRAG_HASH_SIZE (IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR)
+
+#define IPSEC_FRAG_HASH_MASK (IPSEC_FRAG_HASH_SLOTS - 1)
+#define IPSEC_FRAG_HASH_FUNC(id) (((id) & IPSEC_FRAG_HASH_MASK) ^ \
+ (((id) / \
+ (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \
+ IPSEC_FRAG_HASH_MASK))
+
+/* Maximum fragments per packet. 48 bytes payload x 1366 packets > 64KB */
+#define IPSEC_MAX_FRAGS 1366
+
+#define V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \
+ IPH_OFFSET) << 3)
+#define V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \
+ IPH_MF)
+
+/*
+ * Initialize an ipsec fragcache instance.
+ * Returns B_FALSE if memory allocation fails.
+ */
+boolean_t
+ipsec_fragcache_init(ipsec_fragcache_t *frag)
+{
+ ipsec_fragcache_entry_t *ftemp;
+ int i;
+
+ mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL);
+ frag->itpf_ptr = (ipsec_fragcache_entry_t **)
+ kmem_zalloc(
+ sizeof (ipsec_fragcache_entry_t *) *
+ IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP);
+ if (frag->itpf_ptr == NULL)
+ return (B_FALSE);
+
+ ftemp = (ipsec_fragcache_entry_t *)
+ kmem_zalloc(sizeof (ipsec_fragcache_entry_t) *
+ IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP);
+ if (ftemp == NULL) {
+ kmem_free(frag->itpf_ptr,
+ sizeof (ipsec_fragcache_entry_t *) *
+ IPSEC_FRAG_HASH_SLOTS);
+ return (B_FALSE);
+ }
+
+ frag->itpf_freelist = NULL;
+
+ for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) {
+ ftemp->itpfe_next = frag->itpf_freelist;
+ frag->itpf_freelist = ftemp;
+ ftemp++;
+ }
+
+ frag->itpf_expire_hint = 0;
+
+ return (B_TRUE);
+}
+
+void
+ipsec_fragcache_uninit(ipsec_fragcache_t *frag)
+{
+ ipsec_fragcache_entry_t *fep;
+ int i;
+
+ mutex_enter(&frag->itpf_lock);
+ if (frag->itpf_ptr) {
+ /* Delete any existing fragcache entry chains */
+ for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) {
+ fep = (frag->itpf_ptr)[i];
+ while (fep != NULL) {
+ /* Returned fep is next in chain or NULL */
+ fep = fragcache_delentry(i, fep, frag);
+ }
+ }
+ /*
+ * Chase the pointers back to the beginning
+ * of the memory allocation and then
+ * get rid of the allocated freelist
+ */
+ while (frag->itpf_freelist->itpfe_next != NULL)
+ frag->itpf_freelist = frag->itpf_freelist->itpfe_next;
+ /*
+ * XXX - If we ever dynamically grow the freelist
+ * then we'll have to free entries individually
+ * or determine how many entries or chunks we have
+ * grown since the initial allocation.
+ */
+ kmem_free(frag->itpf_freelist,
+ sizeof (ipsec_fragcache_entry_t) *
+ IPSEC_FRAG_HASH_SIZE);
+ /* Free the fragcache structure */
+ kmem_free(frag->itpf_ptr,
+ sizeof (ipsec_fragcache_entry_t *) *
+ IPSEC_FRAG_HASH_SLOTS);
+ }
+ mutex_exit(&frag->itpf_lock);
+ mutex_destroy(&frag->itpf_lock);
+}
+
+/*
+ * Add a fragment to the fragment cache. Consumes mp if NULL is returned.
+ * Returns mp if a whole fragment has been assembled, NULL otherwise
+ */
+
+mblk_t *
+ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp,
+ int outer_hdr_len)
+{
+ boolean_t is_v4;
+ time_t itpf_time;
+ ipha_t *iph;
+ ipha_t *oiph;
+ ip6_t *ip6h = NULL;
+ uint8_t v6_proto;
+ uint8_t *v6_proto_p;
+ uint16_t ip6_hdr_length;
+ ip6_pkt_t ipp;
+ ip6_frag_t *fraghdr;
+ ipsec_fragcache_entry_t *fep;
+ int i;
+ mblk_t *nmp, *prevmp, *spare_mp = NULL;
+ int firstbyte, lastbyte;
+ int offset;
+ int last;
+ boolean_t inbound = (ipsec_mp != NULL);
+ mblk_t *first_mp = inbound ? ipsec_mp : mp;
+
+ mutex_enter(&frag->itpf_lock);
+
+ oiph = (ipha_t *)mp->b_rptr;
+ iph = (ipha_t *)(mp->b_rptr + outer_hdr_len);
+ if (IPH_HDR_VERSION(iph) == IPV4_VERSION) {
+ is_v4 = B_TRUE;
+ } else {
+ ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION);
+ if ((spare_mp = msgpullup(mp, -1)) == NULL) {
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet(first_mp, inbound, NULL, NULL,
+ &ipdrops_spd_nomem, &spd_dropper);
+ return (NULL);
+ }
+ ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len);
+
+ if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, &ip6_hdr_length,
+ &v6_proto_p)) {
+ /*
+ * Find upper layer protocol.
+ * If it fails we have a malformed packet
+ */
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet(first_mp, inbound, NULL, NULL,
+ &ipdrops_spd_malformed_packet, &spd_dropper);
+ freemsg(spare_mp);
+ return (NULL);
+ } else {
+ v6_proto = *v6_proto_p;
+ }
+
+
+ bzero(&ipp, sizeof (ipp));
+ (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL);
+ if (!(ipp.ipp_fields & IPPF_FRAGHDR)) {
+ /*
+ * We think this is a fragment, but didn't find
+ * a fragment header. Something is wrong.
+ */
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet(first_mp, inbound, NULL, NULL,
+ &ipdrops_spd_malformed_frag, &spd_dropper);
+ freemsg(spare_mp);
+ return (NULL);
+ }
+ fraghdr = ipp.ipp_fraghdr;
+ is_v4 = B_FALSE;
+ }
+
+ /* Anything to cleanup? */
+
+ /*
+ * This cleanup call could be put in a timer loop
+ * but it may actually be just as reasonable a decision to
+ * leave it here. The disadvantage is this only gets called when
+ * frags are added. The advantage is that it is not
+ * susceptible to race conditions like a time-based cleanup
+ * may be.
+ */
+ itpf_time = gethrestime_sec();
+ if (itpf_time >= frag->itpf_expire_hint)
+ ipsec_fragcache_clean(frag);
+
+ /* Lookup to see if there is an existing entry */
+
+ if (is_v4)
+ i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident);
+ else
+ i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident);
+
+ for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) {
+ if (is_v4) {
+ ASSERT(iph != NULL);
+ if ((fep->itpfe_id == iph->ipha_ident) &&
+ (fep->itpfe_src == iph->ipha_src) &&
+ (fep->itpfe_dst == iph->ipha_dst) &&
+ (fep->itpfe_proto == iph->ipha_protocol))
+ break;
+ } else {
+ ASSERT(fraghdr != NULL);
+ ASSERT(fep != NULL);
+ if ((fep->itpfe_id == fraghdr->ip6f_ident) &&
+ IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6,
+ &ip6h->ip6_src) &&
+ IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6,
+ &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto))
+ break;
+ }
+ }
+
+ if (is_v4) {
+ firstbyte = V4_FRAG_OFFSET(iph);
+ lastbyte = firstbyte + ntohs(iph->ipha_length) -
+ IPH_HDR_LENGTH(iph);
+ last = (V4_MORE_FRAGS(iph) == 0);
+#ifdef FRAGCACHE_DEBUG
+ cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, "
+ "last = %d, id = %d\n", firstbyte, lastbyte, last,
+ iph->ipha_ident);
+#endif
+ } else {
+ firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK);
+ lastbyte = firstbyte + ntohs(ip6h->ip6_plen) +
+ sizeof (ip6_t) - ip6_hdr_length;
+ last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0;
+#ifdef FRAGCACHE_DEBUG
+ cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, "
+ "last = %d, id = %d, fraghdr = %p, spare_mp = %p\n",
+ firstbyte, lastbyte, last, fraghdr->ip6f_ident,
+ fraghdr, spare_mp);
+#endif
+ }
+
+ /* check for bogus fragments and delete the entry */
+ if (firstbyte > 0 && firstbyte <= 8) {
+ if (fep != NULL)
+ (void) fragcache_delentry(i, fep, frag);
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet(first_mp, inbound, NULL, NULL,
+ &ipdrops_spd_malformed_frag, &spd_dropper);
+ freemsg(spare_mp);
+ return (NULL);
+ }
+
+ /* Not found, allocate a new entry */
+ if (fep == NULL) {
+ if (frag->itpf_freelist == NULL) {
+ /* see if there is some space */
+ ipsec_fragcache_clean(frag);
+ if (frag->itpf_freelist == NULL) {
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet(first_mp, inbound, NULL, NULL,
+ &ipdrops_spd_nomem, &spd_dropper);
+ freemsg(spare_mp);
+ return (NULL);
+ }
+ }
+
+ fep = frag->itpf_freelist;
+ frag->itpf_freelist = fep->itpfe_next;
+
+ if (is_v4) {
+ bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src,
+ sizeof (struct in_addr));
+ bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst,
+ sizeof (struct in_addr));
+ fep->itpfe_id = iph->ipha_ident;
+ fep->itpfe_proto = iph->ipha_protocol;
+ i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id);
+ } else {
+ bcopy((in6_addr_t *)&ip6h->ip6_src,
+ (in6_addr_t *)&fep->itpfe_src6,
+ sizeof (struct in6_addr));
+ bcopy((in6_addr_t *)&ip6h->ip6_dst,
+ (in6_addr_t *)&fep->itpfe_dst6,
+ sizeof (struct in6_addr));
+ fep->itpfe_id = fraghdr->ip6f_ident;
+ fep->itpfe_proto = v6_proto;
+ i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id);
+ }
+ itpf_time = gethrestime_sec();
+ fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1;
+ fep->itpfe_last = 0;
+ fep->itpfe_fraglist = NULL;
+ fep->itpfe_depth = 0;
+ fep->itpfe_next = (frag->itpf_ptr)[i];
+ (frag->itpf_ptr)[i] = fep;
+
+ if (frag->itpf_expire_hint > fep->itpfe_exp)
+ frag->itpf_expire_hint = fep->itpfe_exp;
+
+ }
+ freemsg(spare_mp);
+
+ /* Insert it in the frag list */
+ /* List is in order by starting offset of fragments */
+
+ prevmp = NULL;
+ for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) {
+ ipha_t *niph;
+ ipha_t *oniph;
+ ip6_t *nip6h;
+ ip6_pkt_t nipp;
+ ip6_frag_t *nfraghdr;
+ uint16_t nip6_hdr_length;
+ uint8_t *nv6_proto_p;
+ int nfirstbyte, nlastbyte;
+ char *data, *ndata;
+ mblk_t *nspare_mp = NULL;
+ mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp);
+ int hdr_len;
+
+ oniph = (ipha_t *)mp->b_rptr;
+ nip6h = NULL;
+ niph = NULL;
+
+ /*
+ * Determine outer header type and length and set
+ * pointers appropriately
+ */
+
+ if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) {
+ hdr_len = ((outer_hdr_len != 0) ?
+ IPH_HDR_LENGTH(oiph) : 0);
+ niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len);
+ } else {
+ ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION);
+ if ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL) {
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet_chain(nmp, inbound, NULL, NULL,
+ &ipdrops_spd_nomem, &spd_dropper);
+ return (NULL);
+ }
+ nip6h = (ip6_t *)nspare_mp->b_rptr;
+ (void) ip_hdr_length_nexthdr_v6(nspare_mp, nip6h,
+ &nip6_hdr_length, &v6_proto_p);
+ hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0);
+ }
+
+ /*
+ * Determine inner header type and length and set
+ * pointers appropriately
+ */
+
+ if (is_v4) {
+ if (niph == NULL) {
+ /* Was v6 outer */
+ niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len);
+ }
+ nfirstbyte = V4_FRAG_OFFSET(niph);
+ nlastbyte = nfirstbyte + ntohs(niph->ipha_length) -
+ IPH_HDR_LENGTH(niph);
+ } else {
+ if ((nspare_mp == NULL) &&
+ ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL)) {
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet_chain(nmp, inbound, NULL, NULL,
+ &ipdrops_spd_nomem, &spd_dropper);
+ return (NULL);
+ }
+ nip6h = (ip6_t *)(nspare_mp->b_rptr + hdr_len);
+ if (!ip_hdr_length_nexthdr_v6(nspare_mp, nip6h,
+ &nip6_hdr_length, &nv6_proto_p)) {
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet_chain(nmp, inbound, NULL, NULL,
+ &ipdrops_spd_malformed_frag, &spd_dropper);
+ ipsec_freemsg_chain(nspare_mp);
+ return (NULL);
+ }
+ bzero(&nipp, sizeof (nipp));
+ (void) ip_find_hdr_v6(nspare_mp, nip6h, &nipp, NULL);
+ nfraghdr = nipp.ipp_fraghdr;
+ nfirstbyte = ntohs(nfraghdr->ip6f_offlg &
+ IP6F_OFF_MASK);
+ nlastbyte = nfirstbyte + ntohs(nip6h->ip6_plen) +
+ sizeof (ip6_t) - nip6_hdr_length;
+ }
+ ipsec_freemsg_chain(nspare_mp);
+
+ /* Check for overlapping fragments */
+ if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) {
+ /*
+ * Overlap Check:
+ * ~~~~--------- # Check if the newly
+ * ~ ndata_mp| # received fragment
+ * ~~~~--------- # overlaps with the
+ * ---------~~~~~~ # current fragment.
+ * | mp ~
+ * ---------~~~~~~
+ */
+ if (is_v4) {
+ data = (char *)iph + IPH_HDR_LENGTH(iph) +
+ firstbyte - nfirstbyte;
+ ndata = (char *)niph + IPH_HDR_LENGTH(niph);
+ } else {
+ data = (char *)ip6h +
+ nip6_hdr_length + firstbyte -
+ nfirstbyte;
+ ndata = (char *)nip6h + nip6_hdr_length;
+ }
+ if (bcmp(data, ndata, MIN(lastbyte, nlastbyte)
+ - firstbyte)) {
+ /* Overlapping data does not match */
+ (void) fragcache_delentry(i, fep, frag);
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet(first_mp, inbound, NULL, NULL,
+ &ipdrops_spd_overlap_frag, &spd_dropper);
+ return (NULL);
+ }
+ /* Part of defense for jolt2.c fragmentation attack */
+ if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) {
+ /*
+ * Check for identical or subset fragments:
+ * ---------- ~~~~--------~~~~~
+ * | nmp | or ~ nmp ~
+ * ---------- ~~~~--------~~~~~
+ * ---------- ------
+ * | mp | | mp |
+ * ---------- ------
+ */
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet(first_mp, inbound, NULL, NULL,
+ &ipdrops_spd_evil_frag, &spd_dropper);
+ return (NULL);
+ }
+
+ }
+
+ /* Correct location for this fragment? */
+ if (firstbyte <= nfirstbyte) {
+ /*
+ * Check if the tail end of the new fragment overlaps
+ * with the head of the current fragment.
+ * --------~~~~~~~
+ * | nmp ~
+ * --------~~~~~~~
+ * ~~~~~--------
+ * ~ mp |
+ * ~~~~~--------
+ */
+ if (lastbyte > nfirstbyte) {
+ /* Fragments overlap */
+ data = (char *)iph + IPH_HDR_LENGTH(iph) +
+ firstbyte - nfirstbyte;
+ ndata = (char *)niph + IPH_HDR_LENGTH(niph);
+ if (is_v4) {
+ data = (char *)iph +
+ IPH_HDR_LENGTH(iph) + firstbyte -
+ nfirstbyte;
+ ndata = (char *)niph +
+ IPH_HDR_LENGTH(niph);
+ } else {
+ data = (char *)ip6h +
+ nip6_hdr_length + firstbyte -
+ nfirstbyte;
+ ndata = (char *)nip6h + nip6_hdr_length;
+ }
+ if (bcmp(data, ndata, MIN(lastbyte, nlastbyte)
+ - nfirstbyte)) {
+ /* Overlap mismatch */
+ (void) fragcache_delentry(i, fep, frag);
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet(first_mp, inbound, NULL,
+ NULL, &ipdrops_spd_overlap_frag,
+ &spd_dropper);
+ return (NULL);
+ }
+ }
+
+ /*
+ * Fragment does not illegally overlap and can now
+ * be inserted into the chain
+ */
+ break;
+ }
+
+ prevmp = nmp;
+ }
+ first_mp->b_next = nmp;
+
+ if (prevmp == NULL) {
+ fep->itpfe_fraglist = first_mp;
+ } else {
+ prevmp->b_next = first_mp;
+ }
+ if (last)
+ fep->itpfe_last = 1;
+
+ /* Part of defense for jolt2.c fragmentation attack */
+ if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) {
+ (void) fragcache_delentry(i, fep, frag);
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet(first_mp, inbound, NULL, NULL,
+ &ipdrops_spd_max_frags, &spd_dropper);
+ return (NULL);
+ }
+
+ /* Check for complete packet */
+
+ if (!fep->itpfe_last) {
+ mutex_exit(&frag->itpf_lock);
+#ifdef FRAGCACHE_DEBUG
+ cmn_err(CE_WARN, "Fragment cached, not last.\n");
+#endif
+ return (NULL);
+ }
+
+#ifdef FRAGCACHE_DEBUG
+ cmn_err(CE_WARN, "Last fragment cached.\n");
+ cmn_err(CE_WARN, "mp = %p, first_mp = %p.\n", mp, first_mp);
+#endif
+
+ offset = 0;
+ for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) {
+ mblk_t *data_mp = (inbound ? mp->b_cont : mp);
+ int hdr_len;
+
+ oiph = (ipha_t *)data_mp->b_rptr;
+ ip6h = NULL;
+ iph = NULL;
+
+ spare_mp = NULL;
+ if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) {
+ hdr_len = ((outer_hdr_len != 0) ?
+ IPH_HDR_LENGTH(oiph) : 0);
+ iph = (ipha_t *)(data_mp->b_rptr + hdr_len);
+ } else {
+ ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION);
+ if ((spare_mp = msgpullup(data_mp, -1)) == NULL) {
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet_chain(mp, inbound, NULL, NULL,
+ &ipdrops_spd_nomem, &spd_dropper);
+ return (NULL);
+ }
+ ip6h = (ip6_t *)spare_mp->b_rptr;
+ (void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h,
+ &ip6_hdr_length, &v6_proto_p);
+ hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0);
+ }
+
+ /* Calculate current fragment start/end */
+ if (is_v4) {
+ if (iph == NULL) {
+ /* Was v6 outer */
+ iph = (ipha_t *)(data_mp->b_rptr + hdr_len);
+ }
+ firstbyte = V4_FRAG_OFFSET(iph);
+ lastbyte = firstbyte + ntohs(iph->ipha_length) -
+ IPH_HDR_LENGTH(iph);
+ } else {
+ if ((spare_mp == NULL) &&
+ ((spare_mp = msgpullup(data_mp, -1)) == NULL)) {
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet_chain(mp, inbound, NULL, NULL,
+ &ipdrops_spd_nomem, &spd_dropper);
+ return (NULL);
+ }
+ ip6h = (ip6_t *)(spare_mp->b_rptr + hdr_len);
+ if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h,
+ &ip6_hdr_length, &v6_proto_p)) {
+ mutex_exit(&frag->itpf_lock);
+ ip_drop_packet_chain(mp, inbound, NULL, NULL,
+ &ipdrops_spd_malformed_frag, &spd_dropper);
+ ipsec_freemsg_chain(spare_mp);
+ return (NULL);
+ }
+ v6_proto = *v6_proto_p;
+ bzero(&ipp, sizeof (ipp));
+ (void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL);
+ fraghdr = ipp.ipp_fraghdr;
+ firstbyte = ntohs(fraghdr->ip6f_offlg &
+ IP6F_OFF_MASK);
+ lastbyte = firstbyte + ntohs(ip6h->ip6_plen) +
+ sizeof (ip6_t) - ip6_hdr_length;
+ }
+
+ /*
+ * If this fragment is greater than current offset,
+ * we have a missing fragment so return NULL
+ */
+ if (firstbyte > offset) {
+ mutex_exit(&frag->itpf_lock);
+#ifdef FRAGCACHE_DEBUG
+ /*
+ * Note, this can happen when the last frag
+ * gets sent through because it is smaller
+ * than the MTU. It is not necessarily an
+ * error condition.
+ */
+ cmn_err(CE_WARN, "Frag greater than offset! : "
+ "missing fragment: firstbyte = %d, offset = %d, "
+ "mp = %p\n", firstbyte, offset, mp);
+#endif
+ ipsec_freemsg_chain(spare_mp);
+ return (NULL);
+ }
+
+ /*
+ * If we are at the last fragment, we have the complete
+ * packet, so rechain things and return it to caller
+ * for processing
+ */
+
+ if ((is_v4 && !V4_MORE_FRAGS(iph)) ||
+ (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) {
+ mp = fep->itpfe_fraglist;
+ fep->itpfe_fraglist = NULL;
+ (void) fragcache_delentry(i, fep, frag);
+ mutex_exit(&frag->itpf_lock);
+
+ if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) >
+ 65535)) || (!is_v4 && (firstbyte +
+ ntohs(ip6h->ip6_plen) > 65535))) {
+ /* It is an invalid "ping-o-death" packet */
+ /* Discard it */
+ ip_drop_packet_chain(mp, inbound, NULL, NULL,
+ &ipdrops_spd_evil_frag, &spd_dropper);
+ ipsec_freemsg_chain(spare_mp);
+ return (NULL);
+ }
+#ifdef FRAGCACHE_DEBUG
+ cmn_err(CE_WARN, "Fragcache returning mp = %p, "
+ "mp->b_next = %p", mp, mp->b_next);
+#endif
+ ipsec_freemsg_chain(spare_mp);
+ /*
+ * For inbound case, mp has ipsec_in b_next'd chain
+ * For outbound case, it is just data mp chain
+ */
+ return (mp);
+ }
+ ipsec_freemsg_chain(spare_mp);
+
+ /*
+ * Update new ending offset if this
+ * fragment extends the packet
+ */
+ if (offset < lastbyte)
+ offset = lastbyte;
+ }
+
+ mutex_exit(&frag->itpf_lock);
+
+ /* Didn't find last fragment, so return NULL */
+ return (NULL);
+}
+
+static void
+ipsec_fragcache_clean(ipsec_fragcache_t *frag)
+{
+ ipsec_fragcache_entry_t *fep;
+ int i;
+ ipsec_fragcache_entry_t *earlyfep = NULL;
+ time_t itpf_time;
+ int earlyexp;
+ int earlyi = 0;
+
+ ASSERT(MUTEX_HELD(&frag->itpf_lock));
+
+ itpf_time = gethrestime_sec();
+ earlyexp = itpf_time + 10000;
+
+ for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) {
+ fep = (frag->itpf_ptr)[i];
+ while (fep) {
+ if (fep->itpfe_exp < itpf_time) {
+ /* found */
+ fep = fragcache_delentry(i, fep, frag);
+ } else {
+ if (fep->itpfe_exp < earlyexp) {
+ earlyfep = fep;
+ earlyexp = fep->itpfe_exp;
+ earlyi = i;
+ }
+ fep = fep->itpfe_next;
+ }
+ }
+ }
+
+ frag->itpf_expire_hint = earlyexp;
+
+ /* if (!found) */
+ if (frag->itpf_freelist == NULL)
+ (void) fragcache_delentry(earlyi, earlyfep, frag);
+}
+
+static ipsec_fragcache_entry_t *
+fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep,
+ ipsec_fragcache_t *frag)
+{
+ ipsec_fragcache_entry_t *targp;
+ ipsec_fragcache_entry_t *nextp = fep->itpfe_next;
+
+ ASSERT(MUTEX_HELD(&frag->itpf_lock));
+
+ /* Free up any fragment list still in cache entry */
+ ipsec_freemsg_chain(fep->itpfe_fraglist);
+
+ targp = (frag->itpf_ptr)[slot];
+ ASSERT(targp != 0);
+
+ if (targp == fep) {
+ /* unlink from head of hash chain */
+ (frag->itpf_ptr)[slot] = nextp;
+ /* link into free list */
+ fep->itpfe_next = frag->itpf_freelist;
+ frag->itpf_freelist = fep;
+ return (nextp);
+ }
+
+ /* maybe should use double linked list to make update faster */
+ /* must be past front of chain */
+ while (targp) {
+ if (targp->itpfe_next == fep) {
+ /* unlink from hash chain */
+ targp->itpfe_next = nextp;
+ /* link into free list */
+ fep->itpfe_next = frag->itpf_freelist;
+ frag->itpf_freelist = fep;
+ return (nextp);
+ }
+ targp = targp->itpfe_next;
+ ASSERT(targp != 0);
+ }
+ /* NOTREACHED */
+ return (NULL);
+}