summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/src/uts/common/inet/ip.h2
-rw-r--r--usr/src/uts/common/inet/ip/ip_dce.c21
-rw-r--r--usr/src/uts/common/inet/ip/ip_tunables.c6
-rw-r--r--usr/src/uts/intel/ip/ip.global-objs.debug642
-rw-r--r--usr/src/uts/intel/ip/ip.global-objs.obj642
5 files changed, 31 insertions, 2 deletions
diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h
index 42adb4c451..7fd66dab72 100644
--- a/usr/src/uts/common/inet/ip.h
+++ b/usr/src/uts/common/inet/ip.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 1990 Mentat Inc.
*/
@@ -3030,6 +3031,7 @@ extern vmem_t *ip_minor_arena_la;
#define ips_ip_strict_src_multihoming ips_propinfo_tbl[80].prop_cur_uval
#define ips_ipv6_strict_src_multihoming ips_propinfo_tbl[81].prop_cur_uval
#define ips_ipv6_drop_inbound_icmpv6 ips_propinfo_tbl[82].prop_cur_bval
+#define ips_ip_dce_reclaim_threshold ips_propinfo_tbl[83].prop_cur_uval
extern int dohwcksum; /* use h/w cksum if supported by the h/w */
#ifdef ZC_TEST
diff --git a/usr/src/uts/common/inet/ip/ip_dce.c b/usr/src/uts/common/inet/ip/ip_dce.c
index 215bc4675f..7ad3ddc9b4 100644
--- a/usr/src/uts/common/inet/ip/ip_dce.c
+++ b/usr/src/uts/common/inet/ip/ip_dce.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -103,6 +104,8 @@ static void dce_make_condemned(dce_t *);
static kmem_cache_t *dce_cache;
+/* Global so it can be tuned in /etc/system. This must be a power of two. */
+uint_t ip_dce_hash_size = 256;
/* Operates on a uint64_t */
#define RANDOM_HASH(p) ((p) ^ ((p)>>16) ^ ((p)>>32) ^ ((p)>>48))
@@ -117,6 +120,7 @@ dcb_reclaim(dcb_t *dcb, ip_stack_t *ipst, uint_t fraction)
uint_t fraction_pmtu = fraction*4;
uint_t hash;
dce_t *dce, *nextdce;
+ hrtime_t seed = gethrtime();
rw_enter(&dcb->dcb_lock, RW_WRITER);
for (dce = dcb->dcb_dce; dce != NULL; dce = nextdce) {
@@ -132,7 +136,7 @@ dcb_reclaim(dcb_t *dcb, ip_stack_t *ipst, uint_t fraction)
} else {
mutex_exit(&dce->dce_lock);
}
- hash = RANDOM_HASH((uint64_t)(uintptr_t)dce);
+ hash = RANDOM_HASH((uint64_t)((uintptr_t)dce | seed));
if (dce->dce_flags & DCEF_PMTU) {
if (hash % fraction_pmtu != 0)
continue;
@@ -234,7 +238,7 @@ dce_stack_init(ip_stack_t *ipst)
ipst->ips_dce_default->dce_ipst = ipst;
/* This must be a power of two since we are using IRE_ADDR_HASH macro */
- ipst->ips_dce_hashsize = 256;
+ ipst->ips_dce_hashsize = ip_dce_hash_size;
ipst->ips_dce_hash_v4 = kmem_zalloc(ipst->ips_dce_hashsize *
sizeof (dcb_t), KM_SLEEP);
ipst->ips_dce_hash_v6 = kmem_zalloc(ipst->ips_dce_hashsize *
@@ -414,6 +418,12 @@ dce_lookup_and_add_v4(ipaddr_t dst, ip_stack_t *ipst)
hash = IRE_ADDR_HASH(dst, ipst->ips_dce_hashsize);
dcb = &ipst->ips_dce_hash_v4[hash];
+ /*
+ * Assuming that we get fairly even distribution across all of the
+ * buckets, once one bucket is overly full, prune the whole cache.
+ */
+ if (dcb->dcb_cnt > ipst->ips_ip_dce_reclaim_threshold)
+ ip_dce_reclaim_stack(ipst);
rw_enter(&dcb->dcb_lock, RW_WRITER);
for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) {
if (dce->dce_v4addr == dst) {
@@ -447,6 +457,7 @@ dce_lookup_and_add_v4(ipaddr_t dst, ip_stack_t *ipst)
dce->dce_ptpn = &dcb->dcb_dce;
dcb->dcb_dce = dce;
dce->dce_bucket = dcb;
+ atomic_add_32(&dcb->dcb_cnt, 1);
dce_refhold(dce); /* For the caller */
rw_exit(&dcb->dcb_lock);
@@ -476,6 +487,12 @@ dce_lookup_and_add_v6(const in6_addr_t *dst, uint_t ifindex, ip_stack_t *ipst)
hash = IRE_ADDR_HASH_V6(*dst, ipst->ips_dce_hashsize);
dcb = &ipst->ips_dce_hash_v6[hash];
+ /*
+ * Assuming that we get fairly even distribution across all of the
+ * buckets, once one bucket is overly full, prune the whole cache.
+ */
+ if (dcb->dcb_cnt > ipst->ips_ip_dce_reclaim_threshold)
+ ip_dce_reclaim_stack(ipst);
rw_enter(&dcb->dcb_lock, RW_WRITER);
for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) {
if (IN6_ARE_ADDR_EQUAL(&dce->dce_v6addr, dst) &&
diff --git a/usr/src/uts/common/inet/ip/ip_tunables.c b/usr/src/uts/common/inet/ip/ip_tunables.c
index 516d6c1a21..aa7ace6a3c 100644
--- a/usr/src/uts/common/inet/ip/ip_tunables.c
+++ b/usr/src/uts/common/inet/ip/ip_tunables.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -908,6 +909,11 @@ mod_prop_info_t ip_propinfo_tbl[] = {
#else
{ "", 0, NULL, NULL, {0}, {0} },
#endif
+
+ { "_dce_reclaim_threshold", MOD_PROTO_IP,
+ mod_set_uint32, mod_get_uint32,
+ {1, 15000, 1000}, {1000} },
+
{ "mtu", MOD_PROTO_IPV4, NULL, ip_get_mtu, {0}, {0} },
{ "mtu", MOD_PROTO_IPV6, NULL, ip_get_mtu, {0}, {0} },
diff --git a/usr/src/uts/intel/ip/ip.global-objs.debug64 b/usr/src/uts/intel/ip/ip.global-objs.debug64
index 40d5bd498b..71a9a01996 100644
--- a/usr/src/uts/intel/ip/ip.global-objs.debug64
+++ b/usr/src/uts/intel/ip/ip.global-objs.debug64
@@ -21,6 +21,7 @@
#
# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2011 Nexenta Systems, Inc. All rights reserved
+# Copyright 2012 Joyent, Inc. All rights reserved
#
arp_m_tbl
@@ -107,6 +108,7 @@ ip6_ftable_hash_size
ip6opt_ls
ip_cgtp_filter_rev
ip_conn_cache
+ip_dce_hash_size
ip_debug
ip_g_all_ones
ip_helper_stream_info
diff --git a/usr/src/uts/intel/ip/ip.global-objs.obj64 b/usr/src/uts/intel/ip/ip.global-objs.obj64
index 5c56fe7efd..2095c7f5a2 100644
--- a/usr/src/uts/intel/ip/ip.global-objs.obj64
+++ b/usr/src/uts/intel/ip/ip.global-objs.obj64
@@ -21,6 +21,7 @@
#
# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2011 Nexenta Systems, Inc. All rights reserved
+# Copyright 2012 Joyent, Inc. All rights reserved
#
arp_m_tbl
@@ -107,6 +108,7 @@ ip6_ftable_hash_size
ip6opt_ls
ip_cgtp_filter_rev
ip_conn_cache
+ip_dce_hash_size
ip_debug
ip_g_all_ones
ip_helper_stream_info