From 7c6d7024e51780d3aacf9063d2133c1e957d7eea Mon Sep 17 00:00:00 2001 From: Jerry Jelinek Date: Mon, 13 Feb 2012 19:50:26 +0000 Subject: 3925 IP DCE does not scale Reviewed by: Keith M Wesolowski Reviewed by: Theo Schlossnagle Reviewed by: Sebastien Roy Approved by: Dan McDonald --- usr/src/uts/common/inet/ip.h | 12 +++ usr/src/uts/common/inet/ip/ip_attr.c | 107 ++++++++++++++++++------ usr/src/uts/common/inet/ip/ip_dce.c | 124 ++++++++++++++++++++++++---- usr/src/uts/common/inet/ip/ip_tunables.c | 6 ++ usr/src/uts/common/inet/ip_stack.h | 1 + usr/src/uts/common/inet/squeue.c | 1 + usr/src/uts/common/inet/tcp/tcp.c | 6 +- usr/src/uts/common/inet/tcp_stack.h | 3 +- usr/src/uts/intel/ip/ip.global-objs.debug64 | 8 ++ usr/src/uts/intel/ip/ip.global-objs.obj64 | 8 ++ 10 files changed, 234 insertions(+), 42 deletions(-) diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index 42adb4c451..bd50364310 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 1990 Mentat Inc. */ @@ -2195,6 +2196,8 @@ struct ip_xmit_attr_s { */ ixa_notify_t ixa_notify; /* Registered upcall notify function */ void *ixa_notify_cookie; /* ULP cookie for ixa_notify */ + + uint_t ixa_tcpcleanup; /* Used by conn_ixa_cleanup */ }; /* @@ -2265,6 +2268,14 @@ struct ip_xmit_attr_s { #define IXA_FREE_CRED 0x00000001 /* ixa_cred needs to be rele */ #define IXA_FREE_TSL 0x00000002 /* ixa_tsl needs to be rele */ +/* + * Trivial state machine used to synchronize IXA cleanup for TCP connections. + * See conn_ixa_cleanup(). + */ +#define IXATC_IDLE 0x00000000 +#define IXATC_INPROGRESS 0x00000001 +#define IXATC_COMPLETE 0x00000002 + /* * Simplistic way to set the ixa_xmit_hint for locally generated traffic * and forwarded traffic. The shift amount are based on the size of the @@ -3030,6 +3041,7 @@ extern vmem_t *ip_minor_arena_la; #define ips_ip_strict_src_multihoming ips_propinfo_tbl[80].prop_cur_uval #define ips_ipv6_strict_src_multihoming ips_propinfo_tbl[81].prop_cur_uval #define ips_ipv6_drop_inbound_icmpv6 ips_propinfo_tbl[82].prop_cur_bval +#define ips_ip_dce_reclaim_threshold ips_propinfo_tbl[83].prop_cur_uval extern int dohwcksum; /* use h/w cksum if supported by the h/w */ #ifdef ZC_TEST diff --git a/usr/src/uts/common/inet/ip/ip_attr.c b/usr/src/uts/common/inet/ip/ip_attr.c index 3197858f8e..6a075639fc 100644 --- a/usr/src/uts/common/inet/ip/ip_attr.c +++ b/usr/src/uts/common/inet/ip/ip_attr.c @@ -1176,6 +1176,59 @@ ixa_cleanup_stale(ip_xmit_attr_t *ixa) } } +static mblk_t * +tcp_ixa_cleanup_getmblk(conn_t *connp) +{ + tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp; + int need_retry; + mblk_t *mp; + + mutex_enter(&tcps->tcps_ixa_cleanup_lock); + + /* + * It's possible that someone else came in and started cleaning up + * another connection between the time we verified this one is not being + * cleaned up and the time we actually get the shared mblk. If that's + * the case, we've dropped the lock, and some other thread may have + * cleaned up this connection again, and is still waiting for + * notification of that cleanup's completion. Therefore we need to + * recheck. + */ + do { + need_retry = 0; + while (connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE) { + cv_wait(&tcps->tcps_ixa_cleanup_done_cv, + &tcps->tcps_ixa_cleanup_lock); + } + + while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) { + /* + * Multiple concurrent cleanups; need to have the last + * one run since it could be an unplumb. + */ + need_retry = 1; + cv_wait(&tcps->tcps_ixa_cleanup_ready_cv, + &tcps->tcps_ixa_cleanup_lock); + } + } while (need_retry); + + /* + * We now have the lock and the mblk; now make sure that no one else can + * try to clean up this connection or enqueue it for cleanup, clear the + * mblk pointer for this stack, drop the lock, and return the mblk. + */ + ASSERT(MUTEX_HELD(&tcps->tcps_ixa_cleanup_lock)); + ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_IDLE); + ASSERT(tcps->tcps_ixa_cleanup_mp == mp); + ASSERT(mp != NULL); + + connp->conn_ixa->ixa_tcpcleanup = IXATC_INPROGRESS; + tcps->tcps_ixa_cleanup_mp = NULL; + mutex_exit(&tcps->tcps_ixa_cleanup_lock); + + return (mp); +} + /* * Used to run ixa_cleanup_stale inside the tcp squeue. * When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp @@ -1195,11 +1248,39 @@ tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2, mutex_enter(&tcps->tcps_ixa_cleanup_lock); ASSERT(tcps->tcps_ixa_cleanup_mp == NULL); + connp->conn_ixa->ixa_tcpcleanup = IXATC_COMPLETE; tcps->tcps_ixa_cleanup_mp = mp; - cv_signal(&tcps->tcps_ixa_cleanup_cv); + cv_signal(&tcps->tcps_ixa_cleanup_ready_cv); + /* + * It is possible for any number of threads to be waiting for cleanup of + * different connections. Absent a per-connection (or per-IXA) CV, we + * need to wake them all up even though only one can be waiting on this + * particular cleanup. + */ + cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv); mutex_exit(&tcps->tcps_ixa_cleanup_lock); } +static void +tcp_ixa_cleanup_wait_and_finish(conn_t *connp) +{ + tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp; + + mutex_enter(&tcps->tcps_ixa_cleanup_lock); + + ASSERT(connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE); + + while (connp->conn_ixa->ixa_tcpcleanup == IXATC_INPROGRESS) { + cv_wait(&tcps->tcps_ixa_cleanup_done_cv, + &tcps->tcps_ixa_cleanup_lock); + } + + ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_COMPLETE); + connp->conn_ixa->ixa_tcpcleanup = IXATC_IDLE; + cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv); + + mutex_exit(&tcps->tcps_ixa_cleanup_lock); +} /* * ipcl_walk() function to help release any IRE, NCE, or DCEs that @@ -1214,21 +1295,8 @@ conn_ixa_cleanup(conn_t *connp, void *arg) if (IPCL_IS_TCP(connp)) { mblk_t *mp; - tcp_stack_t *tcps; - - tcps = connp->conn_netstack->netstack_tcp; - mutex_enter(&tcps->tcps_ixa_cleanup_lock); - while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) { - /* - * Multiple concurrent cleanups; need to have the last - * one run since it could be an unplumb. - */ - cv_wait(&tcps->tcps_ixa_cleanup_cv, - &tcps->tcps_ixa_cleanup_lock); - } - tcps->tcps_ixa_cleanup_mp = NULL; - mutex_exit(&tcps->tcps_ixa_cleanup_lock); + mp = tcp_ixa_cleanup_getmblk(connp); if (connp->conn_sqp->sq_run == curthread) { /* Already on squeue */ @@ -1237,15 +1305,8 @@ conn_ixa_cleanup(conn_t *connp, void *arg) CONN_INC_REF(connp); SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup, connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP); - - /* Wait until tcp_ixa_cleanup has run */ - mutex_enter(&tcps->tcps_ixa_cleanup_lock); - while (tcps->tcps_ixa_cleanup_mp == NULL) { - cv_wait(&tcps->tcps_ixa_cleanup_cv, - &tcps->tcps_ixa_cleanup_lock); - } - mutex_exit(&tcps->tcps_ixa_cleanup_lock); } + tcp_ixa_cleanup_wait_and_finish(connp); } else if (IPCL_IS_SCTP(connp)) { sctp_t *sctp; sctp_faddr_t *fp; diff --git a/usr/src/uts/common/inet/ip/ip_dce.c b/usr/src/uts/common/inet/ip/ip_dce.c index 215bc4675f..a6b9c98cad 100644 --- a/usr/src/uts/common/inet/ip/ip_dce.c +++ b/usr/src/uts/common/inet/ip/ip_dce.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include @@ -28,10 +29,12 @@ #include #include #include +#include #include #include #include #include +#include #define _SUN_TPI_VERSION 2 #include @@ -102,7 +105,19 @@ static void dce_delete_locked(dcb_t *, dce_t *); static void dce_make_condemned(dce_t *); static kmem_cache_t *dce_cache; +static kthread_t *dce_reclaim_thread; +static kmutex_t dce_reclaim_lock; +static kcondvar_t dce_reclaim_cv; +static int dce_reclaim_shutdown; +/* Global so it can be tuned in /etc/system. This must be a power of two. */ +uint_t ip_dce_hash_size = 1024; + +/* The time in seconds between executions of the IP DCE reclaim worker. */ +uint_t ip_dce_reclaim_interval = 60; + +/* The factor of the DCE threshold at which to start hard reclaims */ +uint_t ip_dce_reclaim_threshold_hard = 2; /* Operates on a uint64_t */ #define RANDOM_HASH(p) ((p) ^ ((p)>>16) ^ ((p)>>32) ^ ((p)>>48)) @@ -117,6 +132,11 @@ dcb_reclaim(dcb_t *dcb, ip_stack_t *ipst, uint_t fraction) uint_t fraction_pmtu = fraction*4; uint_t hash; dce_t *dce, *nextdce; + hrtime_t seed = gethrtime(); + uint_t retained = 0; + uint_t max = ipst->ips_ip_dce_reclaim_threshold; + + max *= ip_dce_reclaim_threshold_hard; rw_enter(&dcb->dcb_lock, RW_WRITER); for (dce = dcb->dcb_dce; dce != NULL; dce = nextdce) { @@ -132,13 +152,21 @@ dcb_reclaim(dcb_t *dcb, ip_stack_t *ipst, uint_t fraction) } else { mutex_exit(&dce->dce_lock); } - hash = RANDOM_HASH((uint64_t)(uintptr_t)dce); - if (dce->dce_flags & DCEF_PMTU) { - if (hash % fraction_pmtu != 0) - continue; - } else { - if (hash % fraction != 0) - continue; + + if (max == 0 || retained < max) { + hash = RANDOM_HASH((uint64_t)((uintptr_t)dce | seed)); + + if (dce->dce_flags & DCEF_PMTU) { + if (hash % fraction_pmtu != 0) { + retained++; + continue; + } + } else { + if (hash % fraction != 0) { + retained++; + continue; + } + } } IP_STAT(ipst, ip_dce_reclaim_deleted); @@ -175,17 +203,19 @@ ip_dce_reclaim_stack(ip_stack_t *ipst) } /* - * Called by the memory allocator subsystem directly, when the system - * is running low on memory. + * Called by dce_reclaim_worker() below, and no one else. Typically this will + * mean that the number of entries in the hash buckets has exceeded a tunable + * threshold. */ -/* ARGSUSED */ -void -ip_dce_reclaim(void *args) +static void +ip_dce_reclaim(void) { netstack_handle_t nh; netstack_t *ns; ip_stack_t *ipst; + ASSERT(curthread == dce_reclaim_thread); + netstack_next_init(&nh); while ((ns = netstack_next(&nh)) != NULL) { /* @@ -196,26 +226,75 @@ ip_dce_reclaim(void *args) netstack_rele(ns); continue; } - ip_dce_reclaim_stack(ipst); + if (atomic_swap_uint(&ipst->ips_dce_reclaim_needed, 0) != 0) + ip_dce_reclaim_stack(ipst); netstack_rele(ns); } netstack_next_fini(&nh); } +/* ARGSUSED */ +static void +dce_reclaim_worker(void *arg) +{ + callb_cpr_t cprinfo; + + CALLB_CPR_INIT(&cprinfo, &dce_reclaim_lock, callb_generic_cpr, + "dce_reclaim_worker"); + + mutex_enter(&dce_reclaim_lock); + while (!dce_reclaim_shutdown) { + CALLB_CPR_SAFE_BEGIN(&cprinfo); + (void) cv_timedwait(&dce_reclaim_cv, &dce_reclaim_lock, + ddi_get_lbolt() + ip_dce_reclaim_interval * hz); + CALLB_CPR_SAFE_END(&cprinfo, &dce_reclaim_lock); + + if (dce_reclaim_shutdown) + break; + + mutex_exit(&dce_reclaim_lock); + ip_dce_reclaim(); + mutex_enter(&dce_reclaim_lock); + } + + ASSERT(MUTEX_HELD(&dce_reclaim_lock)); + dce_reclaim_thread = NULL; + dce_reclaim_shutdown = 0; + cv_broadcast(&dce_reclaim_cv); + CALLB_CPR_EXIT(&cprinfo); /* drops the lock */ + + thread_exit(); +} + void dce_g_init(void) { dce_cache = kmem_cache_create("dce_cache", - sizeof (dce_t), 0, NULL, NULL, ip_dce_reclaim, NULL, NULL, 0); + sizeof (dce_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + + mutex_init(&dce_reclaim_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&dce_reclaim_cv, NULL, CV_DEFAULT, NULL); + + dce_reclaim_thread = thread_create(NULL, 0, dce_reclaim_worker, + NULL, 0, &p0, TS_RUN, minclsyspri); } void dce_g_destroy(void) { + mutex_enter(&dce_reclaim_lock); + dce_reclaim_shutdown = 1; + cv_signal(&dce_reclaim_cv); + while (dce_reclaim_thread != NULL) + cv_wait(&dce_reclaim_cv, &dce_reclaim_lock); + mutex_exit(&dce_reclaim_lock); + + cv_destroy(&dce_reclaim_cv); + mutex_destroy(&dce_reclaim_lock); + kmem_cache_destroy(dce_cache); } - /* * Allocate a default DCE and a hash table for per-IP address DCEs */ @@ -234,7 +313,7 @@ dce_stack_init(ip_stack_t *ipst) ipst->ips_dce_default->dce_ipst = ipst; /* This must be a power of two since we are using IRE_ADDR_HASH macro */ - ipst->ips_dce_hashsize = 256; + ipst->ips_dce_hashsize = ip_dce_hash_size; ipst->ips_dce_hash_v4 = kmem_zalloc(ipst->ips_dce_hashsize * sizeof (dcb_t), KM_SLEEP); ipst->ips_dce_hash_v6 = kmem_zalloc(ipst->ips_dce_hashsize * @@ -414,6 +493,12 @@ dce_lookup_and_add_v4(ipaddr_t dst, ip_stack_t *ipst) hash = IRE_ADDR_HASH(dst, ipst->ips_dce_hashsize); dcb = &ipst->ips_dce_hash_v4[hash]; + /* + * Assuming that we get fairly even distribution across all of the + * buckets, once one bucket is overly full, prune the whole cache. + */ + if (dcb->dcb_cnt > ipst->ips_ip_dce_reclaim_threshold) + atomic_or_uint(&ipst->ips_dce_reclaim_needed, 1); rw_enter(&dcb->dcb_lock, RW_WRITER); for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) { if (dce->dce_v4addr == dst) { @@ -447,6 +532,7 @@ dce_lookup_and_add_v4(ipaddr_t dst, ip_stack_t *ipst) dce->dce_ptpn = &dcb->dcb_dce; dcb->dcb_dce = dce; dce->dce_bucket = dcb; + atomic_add_32(&dcb->dcb_cnt, 1); dce_refhold(dce); /* For the caller */ rw_exit(&dcb->dcb_lock); @@ -476,6 +562,12 @@ dce_lookup_and_add_v6(const in6_addr_t *dst, uint_t ifindex, ip_stack_t *ipst) hash = IRE_ADDR_HASH_V6(*dst, ipst->ips_dce_hashsize); dcb = &ipst->ips_dce_hash_v6[hash]; + /* + * Assuming that we get fairly even distribution across all of the + * buckets, once one bucket is overly full, prune the whole cache. + */ + if (dcb->dcb_cnt > ipst->ips_ip_dce_reclaim_threshold) + atomic_or_uint(&ipst->ips_dce_reclaim_needed, 1); rw_enter(&dcb->dcb_lock, RW_WRITER); for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) { if (IN6_ARE_ADDR_EQUAL(&dce->dce_v6addr, dst) && diff --git a/usr/src/uts/common/inet/ip/ip_tunables.c b/usr/src/uts/common/inet/ip/ip_tunables.c index 4ef001442c..18fc81e7b7 100644 --- a/usr/src/uts/common/inet/ip/ip_tunables.c +++ b/usr/src/uts/common/inet/ip/ip_tunables.c @@ -21,6 +21,7 @@ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -909,6 +910,11 @@ mod_prop_info_t ip_propinfo_tbl[] = { #else { "", 0, NULL, NULL, {0}, {0} }, #endif + + { "_dce_reclaim_threshold", MOD_PROTO_IP, + mod_set_uint32, mod_get_uint32, + {1, 100000, 32}, {32} }, + { "mtu", MOD_PROTO_IPV4, NULL, ip_get_mtu, {0}, {0} }, { "mtu", MOD_PROTO_IPV6, NULL, ip_get_mtu, {0}, {0} }, diff --git a/usr/src/uts/common/inet/ip_stack.h b/usr/src/uts/common/inet/ip_stack.h index 9ca6eaa593..e46a3b6a3c 100644 --- a/usr/src/uts/common/inet/ip_stack.h +++ b/usr/src/uts/common/inet/ip_stack.h @@ -264,6 +264,7 @@ struct ip_stack { uint_t ips_dce_hashsize; struct dcb_s *ips_dce_hash_v4; struct dcb_s *ips_dce_hash_v6; + uint_t ips_dce_reclaim_needed; /* pending binds */ mblk_t *ips_ip6_asp_pending_ops; diff --git a/usr/src/uts/common/inet/squeue.c b/usr/src/uts/common/inet/squeue.c index 6d0bf70b2a..4108931c36 100644 --- a/usr/src/uts/common/inet/squeue.c +++ b/usr/src/uts/common/inet/squeue.c @@ -546,6 +546,7 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt, ASSERT(MUTEX_HELD(&sqp->sq_lock)); ASSERT(sqp->sq_first != NULL); now = gethrtime(); + sqp->sq_run = curthread; sqp->sq_drain(sqp, SQS_ENTER, now + squeue_drain_ns); /* diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index 49ab70eed4..5ba1c603c0 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -3793,7 +3793,8 @@ tcp_stack_init(netstackid_t stackid, netstack_t *ns) ASSERT(error == 0); tcps->tcps_ixa_cleanup_mp = allocb_wait(0, BPRI_MED, STR_NOSIG, NULL); ASSERT(tcps->tcps_ixa_cleanup_mp != NULL); - cv_init(&tcps->tcps_ixa_cleanup_cv, NULL, CV_DEFAULT, NULL); + cv_init(&tcps->tcps_ixa_cleanup_ready_cv, NULL, CV_DEFAULT, NULL); + cv_init(&tcps->tcps_ixa_cleanup_done_cv, NULL, CV_DEFAULT, NULL); mutex_init(&tcps->tcps_ixa_cleanup_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&tcps->tcps_reclaim_lock, NULL, MUTEX_DEFAULT, NULL); @@ -3858,7 +3859,8 @@ tcp_stack_fini(netstackid_t stackid, void *arg) freeb(tcps->tcps_ixa_cleanup_mp); tcps->tcps_ixa_cleanup_mp = NULL; - cv_destroy(&tcps->tcps_ixa_cleanup_cv); + cv_destroy(&tcps->tcps_ixa_cleanup_ready_cv); + cv_destroy(&tcps->tcps_ixa_cleanup_done_cv); mutex_destroy(&tcps->tcps_ixa_cleanup_lock); /* diff --git a/usr/src/uts/common/inet/tcp_stack.h b/usr/src/uts/common/inet/tcp_stack.h index 2dccf6b78c..e46ebe08da 100644 --- a/usr/src/uts/common/inet/tcp_stack.h +++ b/usr/src/uts/common/inet/tcp_stack.h @@ -101,7 +101,8 @@ struct tcp_stack { /* Used to synchronize access when reclaiming memory */ mblk_t *tcps_ixa_cleanup_mp; kmutex_t tcps_ixa_cleanup_lock; - kcondvar_t tcps_ixa_cleanup_cv; + kcondvar_t tcps_ixa_cleanup_ready_cv; + kcondvar_t tcps_ixa_cleanup_done_cv; /* Variables for handling kmem reclaim call back. */ kmutex_t tcps_reclaim_lock; diff --git a/usr/src/uts/intel/ip/ip.global-objs.debug64 b/usr/src/uts/intel/ip/ip.global-objs.debug64 index 40d5bd498b..ed9165dc7f 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.debug64 +++ b/usr/src/uts/intel/ip/ip.global-objs.debug64 @@ -21,6 +21,7 @@ # # Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2011 Nexenta Systems, Inc. All rights reserved +# Copyright 2012 Joyent, Inc. All rights reserved # arp_m_tbl @@ -49,6 +50,10 @@ cl_sctp_listen cl_sctp_unlisten conn_drain_nthreads dce_cache +dce_reclaim_cv +dce_reclaim_lock +dce_reclaim_shutdown +dce_reclaim_thread default_ip6_asp_table do_tcp_fusion do_tcpzcopy @@ -107,6 +112,9 @@ ip6_ftable_hash_size ip6opt_ls ip_cgtp_filter_rev ip_conn_cache +ip_dce_hash_size +ip_dce_reclaim_interval +ip_dce_reclaim_threshold_hard ip_debug ip_g_all_ones ip_helper_stream_info diff --git a/usr/src/uts/intel/ip/ip.global-objs.obj64 b/usr/src/uts/intel/ip/ip.global-objs.obj64 index 5c56fe7efd..11a0ba651f 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.obj64 +++ b/usr/src/uts/intel/ip/ip.global-objs.obj64 @@ -21,6 +21,7 @@ # # Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2011 Nexenta Systems, Inc. All rights reserved +# Copyright 2012 Joyent, Inc. All rights reserved # arp_m_tbl @@ -49,6 +50,10 @@ cl_sctp_listen cl_sctp_unlisten conn_drain_nthreads dce_cache +dce_reclaim_cv +dce_reclaim_lock +dce_reclaim_shutdown +dce_reclaim_thread default_ip6_asp_table do_tcp_fusion do_tcpzcopy @@ -107,6 +112,9 @@ ip6_ftable_hash_size ip6opt_ls ip_cgtp_filter_rev ip_conn_cache +ip_dce_hash_size +ip_dce_reclaim_interval +ip_dce_reclaim_threshold_hard ip_debug ip_g_all_ones ip_helper_stream_info -- cgit v1.2.3 From a0c1127b147dc6a0372b141deb8c0c2b0195b8ea Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Tue, 6 Aug 2013 09:50:40 -0800 Subject: 3973 zfs_ioc_rename alters passed in zc->zc_name Reviewed by: Matthew Ahrens Reviewed by: George Wilson Approved by: Christopher Siden --- usr/src/uts/common/fs/zfs/zfs_ioctl.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index ec44fe871b..e8fd90459e 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -3552,18 +3552,25 @@ zfs_ioc_rename(zfs_cmd_t *zc) at = strchr(zc->zc_name, '@'); if (at != NULL) { /* snaps must be in same fs */ + int error; + if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1)) return (SET_ERROR(EXDEV)); *at = '\0'; if (zc->zc_objset_type == DMU_OST_ZFS) { - int error = dmu_objset_find(zc->zc_name, + error = dmu_objset_find(zc->zc_name, recursive_unmount, at + 1, recursive ? DS_FIND_CHILDREN : 0); - if (error != 0) + if (error != 0) { + *at = '@'; return (error); + } } - return (dsl_dataset_rename_snapshot(zc->zc_name, - at + 1, strchr(zc->zc_value, '@') + 1, recursive)); + error = dsl_dataset_rename_snapshot(zc->zc_name, + at + 1, strchr(zc->zc_value, '@') + 1, recursive); + *at = '@'; + + return (error); } else { if (zc->zc_objset_type == DMU_OST_ZVOL) (void) zvol_remove_minor(zc->zc_name); -- cgit v1.2.3 From 2f6d1bc0542b0c28d55d75303ddce8a7be1eb6d2 Mon Sep 17 00:00:00 2001 From: Richard Lowe Date: Tue, 6 Aug 2013 15:49:26 +0100 Subject: 3925 IP DCE does not scale (fix sparc) --- usr/src/uts/sparc/ip/ip.global-objs.debug64 | 8 ++++++++ usr/src/uts/sparc/ip/ip.global-objs.obj64 | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/usr/src/uts/sparc/ip/ip.global-objs.debug64 b/usr/src/uts/sparc/ip/ip.global-objs.debug64 index 40d5bd498b..ed9165dc7f 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.debug64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64 @@ -21,6 +21,7 @@ # # Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2011 Nexenta Systems, Inc. All rights reserved +# Copyright 2012 Joyent, Inc. All rights reserved # arp_m_tbl @@ -49,6 +50,10 @@ cl_sctp_listen cl_sctp_unlisten conn_drain_nthreads dce_cache +dce_reclaim_cv +dce_reclaim_lock +dce_reclaim_shutdown +dce_reclaim_thread default_ip6_asp_table do_tcp_fusion do_tcpzcopy @@ -107,6 +112,9 @@ ip6_ftable_hash_size ip6opt_ls ip_cgtp_filter_rev ip_conn_cache +ip_dce_hash_size +ip_dce_reclaim_interval +ip_dce_reclaim_threshold_hard ip_debug ip_g_all_ones ip_helper_stream_info diff --git a/usr/src/uts/sparc/ip/ip.global-objs.obj64 b/usr/src/uts/sparc/ip/ip.global-objs.obj64 index 5c56fe7efd..11a0ba651f 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.obj64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64 @@ -21,6 +21,7 @@ # # Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2011 Nexenta Systems, Inc. All rights reserved +# Copyright 2012 Joyent, Inc. All rights reserved # arp_m_tbl @@ -49,6 +50,10 @@ cl_sctp_listen cl_sctp_unlisten conn_drain_nthreads dce_cache +dce_reclaim_cv +dce_reclaim_lock +dce_reclaim_shutdown +dce_reclaim_thread default_ip6_asp_table do_tcp_fusion do_tcpzcopy @@ -107,6 +112,9 @@ ip6_ftable_hash_size ip6opt_ls ip_cgtp_filter_rev ip_conn_cache +ip_dce_hash_size +ip_dce_reclaim_interval +ip_dce_reclaim_threshold_hard ip_debug ip_g_all_ones ip_helper_stream_info -- cgit v1.2.3 From 33915f34e743093758eb5a5065f68a43384af1e8 Mon Sep 17 00:00:00 2001 From: Richard Lowe Date: Fri, 2 Aug 2013 15:33:09 -0400 Subject: 3966 zfs lz4 compression (etc) should have bumped grub capability VERSION Reviewed by: Christopher Siden Reviewed by: Matt Ahrens Approved by: Dan McDonald --- usr/src/grub/capability | 22 ++++++---------------- usr/src/grub/grub-0.97/stage2/fsys_zfs.c | 3 +++ 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/usr/src/grub/capability b/usr/src/grub/capability index 44aaae2cb8..822546819d 100644 --- a/usr/src/grub/capability +++ b/usr/src/grub/capability @@ -24,22 +24,12 @@ # This file defines the current capabilities of GRUB over and above that # supported by the standard distribution # -# The version field contains the version of the associated GRUB software. -# The version is incremented by .1 (minor version number) each time there -# is a bugfix or enhancement of GRUB. In addition, the major version number -# is bumped up by 1 every time a release boundary is crossed. Thus if in S11 -# the starting version is 3, in S12 the starting version will be 4. -# Note that the first major number in each sequence is a whole integer -# i.e. 2.0 is truncated to 2 and 3.0 is truncated to 3. -# -# NOTE: Live Upgrade is currently unable to handle decimal fractions (i.e. -# minor version numbers) so the version number is being bumped up in -# integer increments until Live Upgrade is fixed. -# -# This file and the associated version are Solaris specific and are -# not a part of the open source distribution of GRUB. -# -VERSION=21 +# The version field contains the version of the associated GRUB software. The +# version is incremented by 1 each time there is a bugfix or enhancement to +# GRUB necessitating that the boot blocks be reinstalled for that fix or +# enhancement to take effect. +# +VERSION=22 dboot xVM zfs diff --git a/usr/src/grub/grub-0.97/stage2/fsys_zfs.c b/usr/src/grub/grub-0.97/stage2/fsys_zfs.c index bc90f7719a..881ef3fead 100644 --- a/usr/src/grub/grub-0.97/stage2/fsys_zfs.c +++ b/usr/src/grub/grub-0.97/stage2/fsys_zfs.c @@ -960,6 +960,9 @@ get_default_bootfsobj(dnode_phys_t *mosmdn, uint64_t *obj, char *stack) * List of pool features that the grub implementation of ZFS supports for * read. Note that features that are only required for write do not need * to be listed here since grub opens pools in read-only mode. + * + * When this list is updated the version number in usr/src/grub/capability + * must be incremented to ensure the new grub gets installed. */ static const char *spa_feature_names[] = { "org.illumos:lz4_compress", -- cgit v1.2.3