diff options
Diffstat (limited to 'usr/src/uts/common/inet')
| -rw-r--r-- | usr/src/uts/common/inet/ilb/ilb_conn.c | 8 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/ip/ip_attr.c | 5 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/ip/ip_if.c | 169 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/ip/ip_squeue.c | 2 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/ip/ip_tunables.c | 1 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/ip/ipsecesp.c | 3 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/ipf/ip_fil_solaris.c | 124 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/ipf/ipf.conf | 5 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/ipf/netinet/ipf_stack.h | 10 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/ipf/solaris.c | 1 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/squeue.c | 100 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/tcp_impl.h | 6 |
12 files changed, 318 insertions, 116 deletions
diff --git a/usr/src/uts/common/inet/ilb/ilb_conn.c b/usr/src/uts/common/inet/ilb/ilb_conn.c index 5029552f19..7f79d41dd6 100644 --- a/usr/src/uts/common/inet/ilb/ilb_conn.c +++ b/usr/src/uts/common/inet/ilb/ilb_conn.c @@ -22,6 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2014 Joyent, Inc. All rights reserved. */ #include <sys/sysmacros.h> @@ -365,6 +366,7 @@ ilb_conn_hash_fini(ilb_stack_t *ilbs) { uint32_t i; ilb_conn_t *connp; + ilb_conn_hash_t *hash; if (ilbs->ilbs_c2s_conn_hash == NULL) { ASSERT(ilbs->ilbs_s2c_conn_hash == NULL); @@ -388,10 +390,10 @@ ilb_conn_hash_fini(ilb_stack_t *ilbs) ilbs->ilbs_conn_taskq = NULL; /* Then remove all the conns. */ + hash = ilbs->ilbs_s2c_conn_hash; for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) { - while ((connp = ilbs->ilbs_s2c_conn_hash->ilb_connp) != NULL) { - ilbs->ilbs_s2c_conn_hash->ilb_connp = - connp->conn_s2c_next; + while ((connp = hash[i].ilb_connp) != NULL) { + hash[i].ilb_connp = connp->conn_s2c_next; ILB_SERVER_REFRELE(connp->conn_server); if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) { ilb_nat_src_entry_t *ent; diff --git a/usr/src/uts/common/inet/ip/ip_attr.c b/usr/src/uts/common/inet/ip/ip_attr.c index 85ee142dfc..c350d67c2d 100644 --- a/usr/src/uts/common/inet/ip/ip_attr.c +++ b/usr/src/uts/common/inet/ip/ip_attr.c @@ -909,6 +909,11 @@ ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa) */ if (ixa->ixa_free_flags & IXA_FREE_CRED) crhold(ixa->ixa_cred); + + /* + * There is no cleanup in progress on this new copy. + */ + ixa->ixa_tcpcleanup = IXATC_IDLE; } /* diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index d15d86d248..e31f96ebb8 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -24,6 +24,9 @@ * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2014, OmniTI Computer Consulting, Inc. All rights reserved. */ +/* + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ /* * This file contains the interface control functions for IP. @@ -224,6 +227,8 @@ static void ipif_trace_cleanup(const ipif_t *); static void ill_dlpi_clear_deferred(ill_t *ill); +static void phyint_flags_init(phyint_t *, t_uscalar_t); + /* * if we go over the memory footprint limit more than once in this msec * interval, we'll start pruning aggressively. @@ -282,7 +287,6 @@ static ip_m_t ip_m_tbl[] = { ip_nodef_v6intfid } }; -static ill_t ill_null; /* Empty ILL for init. */ char ipif_loopback_name[] = "lo0"; /* These are used by all IP network modules. */ @@ -3331,50 +3335,42 @@ ipsq_init(ill_t *ill, boolean_t enter) } /* - * ill_init is called by ip_open when a device control stream is opened. - * It does a few initializations, and shoots a DL_INFO_REQ message down - * to the driver. The response is later picked up in ip_rput_dlpi and - * used to set up default mechanisms for talking to the driver. (Always - * called as writer.) - * - * If this function returns error, ip_open will call ip_close which in - * turn will call ill_delete to clean up any memory allocated here that - * is not yet freed. + * Here we perform initialisation of the ill_t common to both regular + * interface ILLs and the special loopback ILL created by ill_lookup_on_name. */ -int -ill_init(queue_t *q, ill_t *ill) +static int +ill_init_common(ill_t *ill, queue_t *q, boolean_t isv6, boolean_t is_loopback, + boolean_t ipsq_enter) { - int count; - dl_info_req_t *dlir; - mblk_t *info_mp; + int count; uchar_t *frag_ptr; - /* - * The ill is initialized to zero by mi_alloc*(). In addition - * some fields already contain valid values, initialized in - * ip_open(), before we reach here. - */ mutex_init(&ill->ill_lock, NULL, MUTEX_DEFAULT, 0); mutex_init(&ill->ill_saved_ire_lock, NULL, MUTEX_DEFAULT, NULL); ill->ill_saved_ire_cnt = 0; - ill->ill_rq = q; - ill->ill_wq = WR(q); + if (is_loopback) { + ill->ill_max_frag = isv6 ? ip_loopback_mtu_v6plus : + ip_loopback_mtuplus; + /* + * No resolver here. + */ + ill->ill_net_type = IRE_LOOPBACK; + } else { + ill->ill_rq = q; + ill->ill_wq = WR(q); + ill->ill_ppa = UINT_MAX; + } - info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)), - BPRI_HI); - if (info_mp == NULL) - return (ENOMEM); + ill->ill_isv6 = isv6; /* * Allocate sufficient space to contain our fragment hash table and * the device name. */ frag_ptr = (uchar_t *)mi_zalloc(ILL_FRAG_HASH_TBL_SIZE + 2 * LIFNAMSIZ); - if (frag_ptr == NULL) { - freemsg(info_mp); + if (frag_ptr == NULL) return (ENOMEM); - } ill->ill_frag_ptr = frag_ptr; ill->ill_frag_free_num_pkts = 0; ill->ill_last_frag_clean_time = 0; @@ -3387,35 +3383,30 @@ ill_init(queue_t *q, ill_t *ill) ill->ill_phyint = (phyint_t *)mi_zalloc(sizeof (phyint_t)); if (ill->ill_phyint == NULL) { - freemsg(info_mp); mi_free(frag_ptr); return (ENOMEM); } mutex_init(&ill->ill_phyint->phyint_lock, NULL, MUTEX_DEFAULT, 0); - /* - * For now pretend this is a v4 ill. We need to set phyint_ill* - * at this point because of the following reason. If we can't - * enter the ipsq at some point and cv_wait, the writer that - * wakes us up tries to locate us using the list of all phyints - * in an ipsq and the ills from the phyint thru the phyint_ill*. - * If we don't set it now, we risk a missed wakeup. - */ - ill->ill_phyint->phyint_illv4 = ill; - ill->ill_ppa = UINT_MAX; + if (isv6) { + ill->ill_phyint->phyint_illv6 = ill; + } else { + ill->ill_phyint->phyint_illv4 = ill; + } + if (is_loopback) { + phyint_flags_init(ill->ill_phyint, DL_LOOP); + } + list_create(&ill->ill_nce, sizeof (nce_t), offsetof(nce_t, nce_node)); ill_set_inputfn(ill); - if (!ipsq_init(ill, B_TRUE)) { - freemsg(info_mp); + if (!ipsq_init(ill, ipsq_enter)) { mi_free(frag_ptr); mi_free(ill->ill_phyint); return (ENOMEM); } - ill->ill_state_flags |= ILL_LL_SUBNET_PENDING; - /* Frag queue limit stuff */ ill->ill_frag_count = 0; ill->ill_ipf_gen = 0; @@ -3440,6 +3431,53 @@ ill_init(queue_t *q, ill_t *ill) ill->ill_max_buf = ND_MAX_Q; ill->ill_refcnt = 0; + return (0); +} + +/* + * ill_init is called by ip_open when a device control stream is opened. + * It does a few initializations, and shoots a DL_INFO_REQ message down + * to the driver. The response is later picked up in ip_rput_dlpi and + * used to set up default mechanisms for talking to the driver. (Always + * called as writer.) + * + * If this function returns error, ip_open will call ip_close which in + * turn will call ill_delete to clean up any memory allocated here that + * is not yet freed. + * + * Note: ill_ipst and ill_zoneid must be set before calling ill_init. + */ +int +ill_init(queue_t *q, ill_t *ill) +{ + int ret; + dl_info_req_t *dlir; + mblk_t *info_mp; + + info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)), + BPRI_HI); + if (info_mp == NULL) + return (ENOMEM); + + /* + * The ill is initialized to zero by mi_alloc*(). In addition + * some fields already contain valid values, initialized in + * ip_open(), before we reach here. + * + * For now pretend this is a v4 ill. We need to set phyint_ill* + * at this point because of the following reason. If we can't + * enter the ipsq at some point and cv_wait, the writer that + * wakes us up tries to locate us using the list of all phyints + * in an ipsq and the ills from the phyint thru the phyint_ill*. + * If we don't set it now, we risk a missed wakeup. + */ + if ((ret = ill_init_common(ill, q, B_FALSE, B_FALSE, B_TRUE)) != 0) { + freemsg(info_mp); + return (ret); + } + + ill->ill_state_flags |= ILL_LL_SUBNET_PENDING; + /* Send down the Info Request to the driver. */ info_mp->b_datap->db_type = M_PCPROTO; dlir = (dl_info_req_t *)info_mp->b_rptr; @@ -3687,10 +3725,8 @@ ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6, if (ill == NULL) goto done; - *ill = ill_null; - mutex_init(&ill->ill_lock, NULL, MUTEX_DEFAULT, NULL); + bzero(ill, sizeof (*ill)); ill->ill_ipst = ipst; - list_create(&ill->ill_nce, sizeof (nce_t), offsetof(nce_t, nce_node)); netstack_hold(ipst->ips_netstack); /* * For exclusive stacks we set the zoneid to zero @@ -3698,25 +3734,12 @@ ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6, */ ill->ill_zoneid = GLOBAL_ZONEID; - ill->ill_phyint = (phyint_t *)mi_zalloc(sizeof (phyint_t)); - if (ill->ill_phyint == NULL) + if (ill_init_common(ill, NULL, isv6, B_TRUE, B_FALSE) != 0) goto done; - if (isv6) - ill->ill_phyint->phyint_illv6 = ill; - else - ill->ill_phyint->phyint_illv4 = ill; - mutex_init(&ill->ill_phyint->phyint_lock, NULL, MUTEX_DEFAULT, 0); - phyint_flags_init(ill->ill_phyint, DL_LOOP); - - if (isv6) { - ill->ill_isv6 = B_TRUE; - ill->ill_max_frag = ip_loopback_mtu_v6plus; - } else { - ill->ill_max_frag = ip_loopback_mtuplus; - } if (!ill_allocate_mibs(ill)) goto done; + ill->ill_current_frag = ill->ill_max_frag; ill->ill_mtu = ill->ill_max_frag; /* Initial value */ ill->ill_mc_mtu = ill->ill_mtu; @@ -3732,21 +3755,6 @@ ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6, /* Set ill_dlpi_pending for ipsq_current_finish() to work properly */ ill->ill_dlpi_pending = DL_PRIM_INVAL; - rw_init(&ill->ill_mcast_lock, NULL, RW_DEFAULT, NULL); - mutex_init(&ill->ill_mcast_serializer, NULL, MUTEX_DEFAULT, NULL); - ill->ill_global_timer = INFINITY; - ill->ill_mcast_v1_time = ill->ill_mcast_v2_time = 0; - ill->ill_mcast_v1_tset = ill->ill_mcast_v2_tset = 0; - ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; - ill->ill_mcast_qi = MCAST_DEF_QUERY_INTERVAL; - - /* No resolver here. */ - ill->ill_net_type = IRE_LOOPBACK; - - /* Initialize the ipsq */ - if (!ipsq_init(ill, B_FALSE)) - goto done; - ipif = ipif_allocate(ill, 0L, IRE_LOOPBACK, B_TRUE, B_TRUE, NULL); if (ipif == NULL) goto done; @@ -3775,17 +3783,10 @@ ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6, * Chain us in at the end of the ill list. hold the ill * before we make it globally visible. 1 for the lookup. */ - ill->ill_refcnt = 0; ill_refhold(ill); - ill->ill_frag_count = 0; - ill->ill_frag_free_num_pkts = 0; - ill->ill_last_frag_clean_time = 0; - ipsq = ill->ill_phyint->phyint_ipsq; - ill_set_inputfn(ill); - if (ill_glist_insert(ill, "lo", isv6) != 0) cmn_err(CE_PANIC, "cannot insert loopback interface"); diff --git a/usr/src/uts/common/inet/ip/ip_squeue.c b/usr/src/uts/common/inet/ip/ip_squeue.c index 33a2fa5935..dedb4dadcc 100644 --- a/usr/src/uts/common/inet/ip/ip_squeue.c +++ b/usr/src/uts/common/inet/ip/ip_squeue.c @@ -163,7 +163,7 @@ ip_squeue_create(pri_t pri) { squeue_t *sqp; - sqp = squeue_create(ip_squeue_worker_wait, pri); + sqp = squeue_create(ip_squeue_worker_wait, pri, B_TRUE); ASSERT(sqp != NULL); if (ip_squeue_create_callback != NULL) ip_squeue_create_callback(sqp); diff --git a/usr/src/uts/common/inet/ip/ip_tunables.c b/usr/src/uts/common/inet/ip/ip_tunables.c index 58e3b59ff3..dda05a316d 100644 --- a/usr/src/uts/common/inet/ip/ip_tunables.c +++ b/usr/src/uts/common/inet/ip/ip_tunables.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ diff --git a/usr/src/uts/common/inet/ip/ipsecesp.c b/usr/src/uts/common/inet/ip/ipsecesp.c index c325e8dc26..2ca770ebe9 100644 --- a/usr/src/uts/common/inet/ip/ipsecesp.c +++ b/usr/src/uts/common/inet/ip/ipsecesp.c @@ -234,8 +234,7 @@ esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid) { espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat", "net", KSTAT_TYPE_NAMED, - sizeof (esp_kstats_t) / sizeof (kstat_named_t), - KSTAT_FLAG_PERSISTENT, stackid); + sizeof (esp_kstats_t) / sizeof (kstat_named_t), 0, stackid); if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL) return (B_FALSE); diff --git a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c index 20dc18b588..0c3bb327ba 100644 --- a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c +++ b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c @@ -5,7 +5,7 @@ * * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2014, Joyent, Inc. All rights reserved. */ #if !defined(lint) @@ -83,6 +83,14 @@ static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t, static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t, void *)); static int ipf_hook6 __P((hook_data_t, int, int, void *)); +static int ipf_hookvndl3v4_in __P((hook_event_token_t, hook_data_t, + void *)); +static int ipf_hookvndl3v6_in __P((hook_event_token_t, hook_data_t, + void *)); +static int ipf_hookvndl3v4_out __P((hook_event_token_t, hook_data_t, + void *)); +static int ipf_hookvndl3v6_out __P((hook_event_token_t, hook_data_t, + void *)); extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *)); @@ -152,6 +160,16 @@ char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz"; char *hook6_loop_out = "ipfilter_hook6_loop_out"; char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz"; +/* vnd IPv4/v6 hook names */ +char *hook4_vnd_in = "ipfilter_hookvndl3v4_in"; +char *hook4_vnd_in_gz = "ipfilter_hookvndl3v4_in_gz"; +char *hook6_vnd_in = "ipfilter_hookvndl3v6_in"; +char *hook6_vnd_in_gz = "ipfilter_hookvndl3v6_in_gz"; +char *hook4_vnd_out = "ipfilter_hookvndl3v4_out"; +char *hook4_vnd_out_gz = "ipfilter_hookvndl3v4_out_gz"; +char *hook6_vnd_out = "ipfilter_hookvndl3v6_out"; +char *hook6_vnd_out_gz = "ipfilter_hookvndl3v6_out_gz"; + /* ------------------------------------------------------------------------ */ /* Function: ipldetach */ /* Returns: int - 0 == success, else error. */ @@ -248,6 +266,31 @@ ipf_stack_t *ifs; ifs->ifs_ipf_ipv4 = NULL; } + /* + * Remove VND hooks + */ + if (ifs->ifs_ipf_vndl3v4 != NULL) { + UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_in, + NH_PHYSICAL_IN, ifs_ipfhookvndl3v4_in); + UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_out, + NH_PHYSICAL_OUT, ifs_ipfhookvndl3v4_out); + + if (net_protocol_release(ifs->ifs_ipf_vndl3v4) != 0) + goto detach_failed; + ifs->ifs_ipf_vndl3v4 = NULL; + } + + if (ifs->ifs_ipf_vndl3v6 != NULL) { + UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_in, + NH_PHYSICAL_IN, ifs_ipfhookvndl3v6_in); + UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_out, + NH_PHYSICAL_OUT, ifs_ipfhookvndl3v6_out); + + if (net_protocol_release(ifs->ifs_ipf_vndl3v6) != 0) + goto detach_failed; + ifs->ifs_ipf_vndl3v6 = NULL; + } + #undef UNDO_HOOK #ifdef IPFDEBUG @@ -445,6 +488,48 @@ ipf_stack_t *ifs; } /* + * Add VND INET hooks + */ + ifs->ifs_ipf_vndl3v4 = net_protocol_lookup(id, NHF_VND_INET); + if (ifs->ifs_ipf_vndl3v4 == NULL) + goto hookup_failed; + + HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v4_in, ipf_hookvndl3v4_in, + hook4_vnd_in, hook4_vnd_in_gz, ifs); + HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v4_out, ipf_hookvndl3v4_out, + hook4_vnd_out, hook4_vnd_out_gz, ifs); + ifs->ifs_hookvndl3v4_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v4, + NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v4_in) == 0); + if (!ifs->ifs_hookvndl3v4_physical_in) + goto hookup_failed; + + ifs->ifs_hookvndl3v4_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v4, + NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v4_out) == 0); + if (!ifs->ifs_hookvndl3v4_physical_out) + goto hookup_failed; + + + /* + * VND INET6 hooks + */ + ifs->ifs_ipf_vndl3v6 = net_protocol_lookup(id, NHF_VND_INET6); + if (ifs->ifs_ipf_vndl3v6 == NULL) + goto hookup_failed; + + HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v6_in, ipf_hookvndl3v6_in, + hook6_vnd_in, hook6_vnd_in_gz, ifs); + HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v6_out, ipf_hookvndl3v6_out, + hook6_vnd_out, hook6_vnd_out_gz, ifs); + ifs->ifs_hookvndl3v6_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v6, + NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v6_in) == 0); + if (!ifs->ifs_hookvndl3v6_physical_in) + goto hookup_failed; + + ifs->ifs_hookvndl3v6_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v6, + NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v6_out) == 0); + if (!ifs->ifs_hookvndl3v6_physical_out) + goto hookup_failed; + /* * Reacquire ipf_global, now it is safe. */ WRITE_ENTER(&ifs->ifs_ipf_global); @@ -1011,7 +1096,6 @@ cred_t *cp; return ENXIO; unit = isp->ipfs_minor; - /* * ipf_find_stack returns with a read lock on ifs_ipf_global */ @@ -2015,6 +2099,42 @@ int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg) } /* ------------------------------------------------------------------------ */ +/* Function: ipf_hookvndl3_in */ +/* Returns: int - 0 == packet ok, else problem, free packet if not done */ +/* Parameters: event(I) - pointer to event */ +/* info(I) - pointer to hook information for firewalling */ +/* */ +/* The vnd hooks are private hooks to ON. They represents a layer 2 */ +/* datapath generally used to implement virtual machines. The driver sends */ +/* along L3 packets of either type IP or IPv6. The ethertype to distinguish */ +/* them is in the upper 16 bits while the remaining bits are the */ +/* traditional packet hook flags. */ +/* */ +/* They end up calling the appropriate traditional ip hooks. */ +/* ------------------------------------------------------------------------ */ +/*ARGSUSED*/ +int ipf_hookvndl3v4_in(hook_event_token_t token, hook_data_t info, void *arg) +{ + return ipf_hook4_in(token, info, arg); +} + +int ipf_hookvndl3v6_in(hook_event_token_t token, hook_data_t info, void *arg) +{ + return ipf_hook6_in(token, info, arg); +} + +/*ARGSUSED*/ +int ipf_hookvndl3v4_out(hook_event_token_t token, hook_data_t info, void *arg) +{ + return ipf_hook4_out(token, info, arg); +} + +int ipf_hookvndl3v6_out(hook_event_token_t token, hook_data_t info, void *arg) +{ + return ipf_hook6_out(token, info, arg); +} + +/* ------------------------------------------------------------------------ */ /* Function: ipf_hook4_loop_in */ /* Returns: int - 0 == packet ok, else problem, free packet if not done */ /* Parameters: event(I) - pointer to event */ diff --git a/usr/src/uts/common/inet/ipf/ipf.conf b/usr/src/uts/common/inet/ipf/ipf.conf index 6b36f9fdbf..f49e024a72 100644 --- a/usr/src/uts/common/inet/ipf/ipf.conf +++ b/usr/src/uts/common/inet/ipf/ipf.conf @@ -1,3 +1,8 @@ # # name="ipf" parent="pseudo" instance=0; + +# Increase the state table limits. fr_statemax should be ~70% of fr_statesize, +# and both should be prime numbers +fr_statesize=151007; +fr_statemax=113279; diff --git a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h index a239f1c1ca..9aa2478c6a 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h +++ b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h @@ -125,6 +125,10 @@ struct ipf_stack { hook_t *ifs_ipfhook6_loop_in; hook_t *ifs_ipfhook6_loop_out; hook_t *ifs_ipfhook6_nicevents; + hook_t *ifs_ipfhookvndl3v4_in; + hook_t *ifs_ipfhookvndl3v6_in; + hook_t *ifs_ipfhookvndl3v4_out; + hook_t *ifs_ipfhookvndl3v6_out; /* flags to indicate whether hooks are registered. */ boolean_t ifs_hook4_physical_in; @@ -137,10 +141,16 @@ struct ipf_stack { boolean_t ifs_hook6_nic_events; boolean_t ifs_hook6_loopback_in; boolean_t ifs_hook6_loopback_out; + boolean_t ifs_hookvndl3v4_physical_in; + boolean_t ifs_hookvndl3v6_physical_in; + boolean_t ifs_hookvndl3v4_physical_out; + boolean_t ifs_hookvndl3v6_physical_out; int ifs_ipf_loopback; net_handle_t ifs_ipf_ipv4; net_handle_t ifs_ipf_ipv6; + net_handle_t ifs_ipf_vndl3v4; + net_handle_t ifs_ipf_vndl3v6; /* ip_auth.c */ int ifs_fr_authsize; diff --git a/usr/src/uts/common/inet/ipf/solaris.c b/usr/src/uts/common/inet/ipf/solaris.c index c541f4dddc..5d56debc31 100644 --- a/usr/src/uts/common/inet/ipf/solaris.c +++ b/usr/src/uts/common/inet/ipf/solaris.c @@ -625,7 +625,6 @@ ipf_stack_shutdown(const netid_t id, void *arg) /* * Destroy things for ipf for one stack. */ -/* ARGSUSED */ static void ipf_stack_destroy_one(const netid_t id, ipf_stack_t *ifs) { diff --git a/usr/src/uts/common/inet/squeue.c b/usr/src/uts/common/inet/squeue.c index 2e08dc359b..1009f0700f 100644 --- a/usr/src/uts/common/inet/squeue.c +++ b/usr/src/uts/common/inet/squeue.c @@ -23,7 +23,7 @@ */ /* - * Copyright 2012 Joyent, Inc. All rights reserved. + * Copyright (c) 2014 Joyent, Inc. All rights reserved. */ /* @@ -61,6 +61,10 @@ * connection are processed on that squeue. The connection ("conn") to * squeue mapping is stored in "conn_t" member "conn_sqp". * + * If the squeue is not related to TCP/IP, then the value of sqp->sq_isip is + * false and it will not have an associated conn_t, which means many aspects of + * the system, such as polling and swtiching squeues will not be used. + * * Since the processing of the connection cuts across multiple layers * but still allows packets for different connnection to be processed on * other CPU/squeues, squeues are also termed as "Vertical Perimeter" or @@ -244,7 +248,7 @@ squeue_init(void) /* ARGSUSED */ squeue_t * -squeue_create(clock_t wait, pri_t pri) +squeue_create(clock_t wait, pri_t pri, boolean_t isip) { squeue_t *sqp = kmem_cache_alloc(squeue_cache, KM_SLEEP); @@ -260,11 +264,36 @@ squeue_create(clock_t wait, pri_t pri) sqp->sq_enter = squeue_enter; sqp->sq_drain = squeue_drain; + sqp->sq_isip = isip; return (sqp); } /* + * We need to kill the threads and then clean up. We should VERIFY that + * polling is disabled so we don't have to worry about disassociating from + * MAC/IP/etc. + */ +void +squeue_destroy(squeue_t *sqp) +{ + kt_did_t worker, poll; + mutex_enter(&sqp->sq_lock); + VERIFY(!(sqp->sq_state & (SQS_POLL_THR_QUIESCED | + SQS_POLL_QUIESCE_DONE | SQS_PAUSE | SQS_EXIT))); + worker = sqp->sq_worker->t_did; + poll = sqp->sq_poll_thr->t_did; + sqp->sq_state |= SQS_EXIT; + cv_signal(&sqp->sq_poll_cv); + cv_signal(&sqp->sq_worker_cv); + mutex_exit(&sqp->sq_lock); + + thread_join(poll); + thread_join(worker); + kmem_cache_free(squeue_cache, sqp); +} + +/* * Bind squeue worker thread to the specified CPU, given by CPU id. * If the CPU id value is -1, bind the worker thread to the value * specified in sq_bind field. If a thread is already bound to a @@ -475,18 +504,21 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt, * Handle squeue switching. More details in the * block comment at the top of the file */ - if (connp->conn_sqp == sqp) { + if (sqp->sq_isip == B_FALSE || connp->conn_sqp == sqp) { SQUEUE_DBG_SET(sqp, mp, proc, connp, tag); - connp->conn_on_sqp = B_TRUE; + if (sqp->sq_isip == B_TRUE) + connp->conn_on_sqp = B_TRUE; DTRACE_PROBE3(squeue__proc__start, squeue_t *, sqp, mblk_t *, mp, conn_t *, connp); (*proc)(connp, mp, sqp, ira); DTRACE_PROBE2(squeue__proc__end, squeue_t *, sqp, conn_t *, connp); - connp->conn_on_sqp = B_FALSE; + if (sqp->sq_isip == B_TRUE) { + connp->conn_on_sqp = B_FALSE; + CONN_DEC_REF(connp); + } SQUEUE_DBG_CLEAR(sqp); - CONN_DEC_REF(connp); } else { SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc, connp, ira, SQ_FILL, SQTAG_SQUEUE_CHANGE); @@ -513,7 +545,7 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt, return; } } else { - if (ira != NULL) { + if (sqp->sq_isip == B_TRUE && ira != NULL) { mblk_t *attrmp; ASSERT(cnt == 1); @@ -587,7 +619,8 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt, if (!(sqp->sq_state & SQS_REENTER) && (process_flag != SQ_FILL) && (sqp->sq_first == NULL) && (sqp->sq_run == curthread) && (cnt == 1) && - (connp->conn_on_sqp == B_FALSE)) { + (sqp->sq_isip == B_FALSE || + connp->conn_on_sqp == B_FALSE)) { sqp->sq_state |= SQS_REENTER; mutex_exit(&sqp->sq_lock); @@ -602,15 +635,21 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt, * Handle squeue switching. More details in the * block comment at the top of the file */ - if (connp->conn_sqp == sqp) { - connp->conn_on_sqp = B_TRUE; + if (sqp->sq_isip == B_FALSE || connp->conn_sqp == sqp) { + SQUEUE_DBG_SET(sqp, mp, proc, connp, + tag); + if (sqp->sq_isip == B_TRUE) + connp->conn_on_sqp = B_TRUE; DTRACE_PROBE3(squeue__proc__start, squeue_t *, sqp, mblk_t *, mp, conn_t *, connp); (*proc)(connp, mp, sqp, ira); DTRACE_PROBE2(squeue__proc__end, squeue_t *, sqp, conn_t *, connp); - connp->conn_on_sqp = B_FALSE; - CONN_DEC_REF(connp); + if (sqp->sq_isip == B_TRUE) { + connp->conn_on_sqp = B_FALSE; + CONN_DEC_REF(connp); + } + SQUEUE_DBG_CLEAR(sqp); } else { SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc, connp, ira, SQ_FILL, SQTAG_SQUEUE_CHANGE); @@ -631,7 +670,7 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt, #ifdef DEBUG mp->b_tag = tag; #endif - if (ira != NULL) { + if (sqp->sq_isip && ira != NULL) { mblk_t *attrmp; ASSERT(cnt == 1); @@ -779,7 +818,7 @@ again: mp->b_prev = NULL; /* Is there an ip_recv_attr_t to handle? */ - if (ip_recv_attr_is_mblk(mp)) { + if (sqp->sq_isip == B_TRUE && ip_recv_attr_is_mblk(mp)) { mblk_t *attrmp = mp; ASSERT(attrmp->b_cont != NULL); @@ -804,20 +843,25 @@ again: /* - * Handle squeue switching. More details in the - * block comment at the top of the file + * Handle squeue switching. More details in the block comment at + * the top of the file. non-IP squeues cannot switch, as there + * is no conn_t. */ - if (connp->conn_sqp == sqp) { + if (sqp->sq_isip == B_FALSE || connp->conn_sqp == sqp) { SQUEUE_DBG_SET(sqp, mp, proc, connp, mp->b_tag); - connp->conn_on_sqp = B_TRUE; + if (sqp->sq_isip == B_TRUE) + connp->conn_on_sqp = B_TRUE; DTRACE_PROBE3(squeue__proc__start, squeue_t *, sqp, mblk_t *, mp, conn_t *, connp); (*proc)(connp, mp, sqp, ira); DTRACE_PROBE2(squeue__proc__end, squeue_t *, sqp, conn_t *, connp); - connp->conn_on_sqp = B_FALSE; - CONN_DEC_REF(connp); + if (sqp->sq_isip == B_TRUE) { + connp->conn_on_sqp = B_FALSE; + CONN_DEC_REF(connp); + } + SQUEUE_DBG_CLEAR(sqp); } else { SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc, connp, ira, SQ_FILL, SQTAG_SQUEUE_CHANGE); @@ -1051,6 +1095,11 @@ squeue_polling_thread(squeue_t *sqp) cv_wait(async, lock); CALLB_CPR_SAFE_END(&cprinfo, lock); + if (sqp->sq_state & SQS_EXIT) { + mutex_exit(lock); + thread_exit(); + } + ctl_state = sqp->sq_state & (SQS_POLL_THR_CONTROL | SQS_POLL_THR_QUIESCED); if (ctl_state != 0) { @@ -1076,6 +1125,9 @@ squeue_polling_thread(squeue_t *sqp) (SQS_PROC|SQS_POLLING|SQS_GET_PKTS)) == (SQS_PROC|SQS_POLLING|SQS_GET_PKTS)); + /* Only IP related squeues should reach this point */ + VERIFY(sqp->sq_isip == B_TRUE); + poll_again: sq_rx_ring = sqp->sq_rx_ring; sq_get_pkts = sq_rx_ring->rr_rx; @@ -1205,6 +1257,7 @@ squeue_worker_thr_control(squeue_t *sqp) ill_rx_ring_t *rx_ring; ASSERT(MUTEX_HELD(&sqp->sq_lock)); + VERIFY(sqp->sq_isip == B_TRUE); if (sqp->sq_state & SQS_POLL_RESTART) { /* Restart implies a previous quiesce. */ @@ -1316,6 +1369,11 @@ squeue_worker(squeue_t *sqp) for (;;) { for (;;) { + if (sqp->sq_state & SQS_EXIT) { + mutex_exit(lock); + thread_exit(); + } + /* * If the poll thread has handed control to us * we need to break out of the wait. @@ -1412,6 +1470,7 @@ squeue_synch_enter(conn_t *connp, mblk_t *use_mp) again: sqp = connp->conn_sqp; + VERIFY(sqp->sq_isip == B_TRUE); mutex_enter(&sqp->sq_lock); if (sqp->sq_first == NULL && !(sqp->sq_state & SQS_PROC)) { @@ -1487,6 +1546,7 @@ void squeue_synch_exit(conn_t *connp) { squeue_t *sqp = connp->conn_sqp; + VERIFY(sqp->sq_isip == B_TRUE); mutex_enter(&sqp->sq_lock); if (sqp->sq_run == curthread) { diff --git a/usr/src/uts/common/inet/tcp_impl.h b/usr/src/uts/common/inet/tcp_impl.h index 1b20e40aca..3488630f2c 100644 --- a/usr/src/uts/common/inet/tcp_impl.h +++ b/usr/src/uts/common/inet/tcp_impl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, Joyent Inc. All rights reserved. + * Copyright (c) 2013, Joyent Inc. All rights reserved. * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. * Copyright (c) 2013, 2014 by Delphix. All rights reserved. */ @@ -61,9 +61,9 @@ extern sock_downcalls_t sock_tcp_downcalls; * by setting it to 0. */ #define TCP_XMIT_LOWATER 4096 -#define TCP_XMIT_HIWATER 49152 +#define TCP_XMIT_HIWATER 128000 #define TCP_RECV_LOWATER 2048 -#define TCP_RECV_HIWATER 128000 +#define TCP_RECV_HIWATER 1048576 /* * Bind hash list size and has function. It has to be a power of 2 for |
