diff options
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/cmd/mdb/common/modules/sctp/sctp.c | 17 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp.c | 10 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_asconf.c | 25 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_bind.c | 2 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_common.c | 317 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_conn.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_cookie.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_error.c | 7 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_heartbeat.c | 6 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_impl.h | 112 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_init.c | 12 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_input.c | 47 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_opt_data.c | 7 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_output.c | 461 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_shutdown.c | 38 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_snmp.c | 55 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_timer.c | 8 |
18 files changed, 814 insertions, 324 deletions
diff --git a/usr/src/cmd/mdb/common/modules/sctp/sctp.c b/usr/src/cmd/mdb/common/modules/sctp/sctp.c index 90d733bc04..83904321bb 100644 --- a/usr/src/cmd/mdb/common/modules/sctp/sctp.c +++ b/usr/src/cmd/mdb/common/modules/sctp/sctp.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -569,8 +569,11 @@ show_sctp_flags(sctp_t *sctp) mdb_printf("\tndelay\t\t\t%d\n", sctp->sctp_ndelay); mdb_printf("\tcondemned\t\t%d\n", sctp->sctp_condemned); mdb_printf("\tchk_fast_rexmit\t\t%d\n", sctp->sctp_chk_fast_rexmit); + mdb_printf("\tprsctp_aware\t\t%d\n", sctp->sctp_prsctp_aware); mdb_printf("\tlinklocal\t\t%d\n", sctp->sctp_linklocal); + mdb_printf("\tmac_exempt\t\t%d\n", sctp->sctp_mac_exempt); + mdb_printf("\trexmitting\t\t%d\n", sctp->sctp_rexmitting); mdb_printf("\trecvsndrcvinfo\t\t%d\n", sctp->sctp_recvsndrcvinfo); mdb_printf("\trecvassocevnt\t\t%d\n", sctp->sctp_recvassocevnt); @@ -832,11 +835,13 @@ sctp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) "max_init_rxt\t%?d\tpa_max_rxt\t%?d\n" "pp_max_rxt\t%?d\trto_max\t\t%?u\n" "rto_min\t\t%?u\trto_initial\t%?u\n" - "init_rto_max\t%?u\n", sctp.sctp_cookie_mp, + "init_rto_max\t%?u\n" + "rxt_nxttsn\t%?u\trxt_maxtsn\t%?u\n", sctp.sctp_cookie_mp, sctp.sctp_strikes, sctp.sctp_max_init_rxt, sctp.sctp_pa_max_rxt, sctp.sctp_pp_max_rxt, sctp.sctp_rto_max, sctp.sctp_rto_min, - sctp.sctp_rto_initial, sctp.sctp_init_rto_max); + sctp.sctp_rto_initial, sctp.sctp_init_rto_max, + sctp.sctp_rxt_nxttsn, sctp.sctp_rxt_maxtsn); } if (opts & MDB_SCTP_SHOW_CONN) { diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index 1618e95bdd..1f9eb56a10 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -83,6 +83,8 @@ extern "C" { #define TCP_DEV_NAME "/dev/tcp" #define TCP6_DEV_NAME "/dev/tcp6" +#define SCTP_MOD_NAME "sctp" + /* Minor numbers */ #define IPV4_MINOR 0 #define IPV6_MINOR 1 diff --git a/usr/src/uts/common/inet/sctp/sctp.c b/usr/src/uts/common/inet/sctp/sctp.c index 33820e4cb4..cc9c1345ad 100644 --- a/usr/src/uts/common/inet/sctp/sctp.c +++ b/usr/src/uts/common/inet/sctp/sctp.c @@ -324,7 +324,6 @@ int sctp_disconnect(sctp_t *sctp) { int error = 0; - sctp_faddr_t *fp; dprint(3, ("sctp_disconnect %p, state %d\n", (void *)sctp, sctp->sctp_state)); @@ -375,8 +374,7 @@ sctp_disconnect(sctp_t *sctp) sctp_send_shutdown(sctp, 0); /* Pass gathered wisdom to IP for keeping */ - for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) - sctp_faddr2ire(sctp, fp); + sctp_update_ire(sctp); /* * If lingering on close then wait until the shutdown @@ -1298,9 +1296,10 @@ sctp_create(void *sctp_ulpd, sctp_t *parent, int family, int flags, if (credp == NULL) return (NULL); - if ((sctp_connp = ipcl_conn_create(IPCL_SCTPCONN, sleep)) == NULL) + if ((sctp_connp = ipcl_conn_create(IPCL_SCTPCONN, sleep)) == NULL) { + SCTP_KSTAT(sctp_conn_create); return (NULL); - + } sctp_connp->conn_ulp_labeled = is_system_labeled(); psctp = (sctp_t *)parent; @@ -1698,6 +1697,7 @@ sctp_find_next_tq(sctp_t *sctp) sctp->sctp_recvq_tq = tq; return (B_TRUE); } + SCTP_KSTAT(sctp_find_next_tq); return (B_FALSE); } diff --git a/usr/src/uts/common/inet/sctp/sctp_asconf.c b/usr/src/uts/common/inet/sctp/sctp_asconf.c index 6dfa20b156..a242564f14 100644 --- a/usr/src/uts/common/inet/sctp/sctp_asconf.c +++ b/usr/src/uts/common/inet/sctp/sctp_asconf.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -416,6 +417,7 @@ sctp_input_asconf(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp) hmp = sctp_make_mp(sctp, fp, sizeof (*ach) + sizeof (*idp)); if (hmp == NULL) { /* Let the peer retransmit */ + SCTP_KSTAT(sctp_send_asconf_ack_failed); return; } ach = (sctp_chunk_hdr_t *)hmp->b_wptr; @@ -478,6 +480,7 @@ sctp_input_asconf(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp) alist = kmem_alloc(asize, KM_NOSLEEP); if (alist == NULL) { freeb(hmp); + SCTP_KSTAT(sctp_cl_assoc_change); return; } } @@ -488,6 +491,7 @@ sctp_input_asconf(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp) if (acount > 0) kmem_free(alist, asize); freeb(hmp); + SCTP_KSTAT(sctp_cl_assoc_change); return; } } @@ -927,6 +931,7 @@ sctp_wput_asconf(sctp_t *sctp, sctp_faddr_t *fp) ipmp = sctp_make_mp(sctp, fp, 0); if (ipmp == NULL) { SCTP_FADDR_RC_TIMER_RESTART(sctp, fp, fp->rto); + SCTP_KSTAT(sctp_send_asconf_failed); return; } mp = sctp->sctp_cxmit_list; @@ -1175,7 +1180,7 @@ sctp_addip_req(sctp_t *sctp, sctp_parm_hdr_t *ph, uint32_t cid, } /* Add the new address */ mutex_enter(&sctp->sctp_conn_tfp->tf_lock); - err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP); + err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP, B_FALSE); mutex_exit(&sctp->sctp_conn_tfp->tf_lock); if (err == ENOMEM) { /* no memory */ @@ -1223,13 +1228,7 @@ sctp_addip_req(sctp_t *sctp, sctp_parm_hdr_t *ph, uint32_t cid, sctp->sctp_primary = fp; } if (sctp->sctp_current == nfp) { - sctp->sctp_current = fp; - sctp->sctp_mss = fp->sfa_pmss; - sctp_faddr2hdraddr(fp, sctp); - - if (!SCTP_IS_DETACHED(sctp)) { - sctp_set_ulp_prop(sctp); - } + sctp_set_faddr_current(sctp, fp); } if (sctp->sctp_lastdata == nfp) { sctp->sctp_lastdata = fp; @@ -1376,15 +1375,7 @@ sctp_setprim_req(sctp_t *sctp, sctp_parm_hdr_t *ph, uint32_t cid, if (nfp->state != SCTP_FADDRS_ALIVE || nfp == sctp->sctp_current) { return (NULL); } - sctp->sctp_current = nfp; - sctp->sctp_mss = nfp->sfa_pmss; - - /* Reset the addrs in the composite header */ - sctp_faddr2hdraddr(nfp, sctp); - if (!SCTP_IS_DETACHED(sctp)) { - sctp_set_ulp_prop(sctp); - } - + sctp_set_faddr_current(sctp, nfp); return (NULL); } diff --git a/usr/src/uts/common/inet/sctp/sctp_bind.c b/usr/src/uts/common/inet/sctp/sctp_bind.c index 9933bf4996..2cbb7c21ee 100644 --- a/usr/src/uts/common/inet/sctp/sctp_bind.c +++ b/usr/src/uts/common/inet/sctp/sctp_bind.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -313,6 +314,7 @@ sctp_bind_add(sctp_t *sctp, const void *addrs, uint32_t addrcnt, ASSERT(size == 0); if (!caller_hold_lock) WAKE_SCTP(sctp); + SCTP_KSTAT(sctp_cl_check_addrs); return (err); } ASSERT(addrlist != NULL); diff --git a/usr/src/uts/common/inet/sctp/sctp_common.c b/usr/src/uts/common/inet/sctp/sctp_common.c index 516df089eb..0caf1913ca 100644 --- a/usr/src/uts/common/inet/sctp/sctp_common.c +++ b/usr/src/uts/common/inet/sctp/sctp_common.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -55,9 +56,9 @@ #include "sctp_addr.h" static struct kmem_cache *sctp_kmem_faddr_cache; -static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *); +static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *, mblk_t *); -/* Set the source address. Refer to comments in sctp_ire2faddr(). */ +/* Set the source address. Refer to comments in sctp_get_ire(). */ static void set_saddr(sctp_t *sctp, sctp_faddr_t *fp) { @@ -82,15 +83,15 @@ set_saddr(sctp_t *sctp, sctp_faddr_t *fp) * Call this function to update the cached IRE of a peer addr fp. */ void -sctp_ire2faddr(sctp_t *sctp, sctp_faddr_t *fp) +sctp_get_ire(sctp_t *sctp, sctp_faddr_t *fp) { - ire_t *ire; - ipaddr_t addr4; - in6_addr_t laddr; + ire_t *ire; + ipaddr_t addr4; + in6_addr_t laddr; sctp_saddr_ipif_t *sp; - uint_t ipif_seqid; - int hdrlen; - ts_label_t *tsl; + uint_t ipif_seqid; + int hdrlen; + ts_label_t *tsl; /* Remove the previous cache IRE */ if ((ire = fp->ire) != NULL) { @@ -141,7 +142,9 @@ sctp_ire2faddr(sctp_t *sctp, sctp_faddr_t *fp) * it won't be used to send data. */ set_saddr(sctp, fp); - goto set_current; + if (fp->state == SCTP_FADDRS_UNREACH) + return; + goto check_current; } ipif_seqid = ire->ire_ipif->ipif_seqid; @@ -154,7 +157,7 @@ sctp_ire2faddr(sctp_t *sctp, sctp_faddr_t *fp) SCTP_PRINTADDR(ire->ire_src_addr_v6))); } - /* make sure the laddr is part of this association */ + /* Make sure the laddr is part of this association */ if ((sp = sctp_ipif_lookup(sctp, ipif_seqid)) != NULL && !sp->saddr_ipif_dontsrc) { if (sp->saddr_ipif_unconfirmed == 1) @@ -162,16 +165,38 @@ sctp_ire2faddr(sctp_t *sctp, sctp_faddr_t *fp) fp->saddr = laddr; } else { dprint(2, ("ire2faddr: src addr is not part of assc\n")); - /* set the src to the first saddr and hope for the best */ + + /* + * Set the src to the first saddr and hope for the best. + * Note that we will still do the ire caching below. + * Otherwise, whenever we send a packet, we need to do + * the ire lookup again and still may not get the correct + * source address. Note that this case should very seldomly + * happen. One scenario this can happen is an app + * explicitly bind() to an address. But that address is + * not the preferred source address to send to the peer. + */ set_saddr(sctp, fp); + if (fp->state == SCTP_FADDRS_UNREACH) { + IRE_REFRELE(ire); + return; + } } - /* Cache the IRE */ + /* + * Note that ire_cache_lookup_*() returns an ire with the tracing + * bits enabled. This requires the thread holding the ire also + * do the IRE_REFRELE(). Thus we need to do IRE_REFHOLD_NOTR() + * and then IRE_REFRELE() the ire here to make the tracing bits + * work. + */ IRE_REFHOLD_NOTR(ire); + IRE_REFRELE(ire); + + /* Cache the IRE */ fp->ire = ire; if (fp->ire->ire_type == IRE_LOOPBACK && !sctp->sctp_loopback) sctp->sctp_loopback = 1; - IRE_REFRELE(ire); /* * Pull out RTO information for this faddr and use it if we don't @@ -209,74 +234,77 @@ sctp_ire2faddr(sctp_t *sctp, sctp_faddr_t *fp) } } -set_current: - if (fp == sctp->sctp_current) { - sctp_faddr2hdraddr(fp, sctp); - sctp->sctp_mss = fp->sfa_pmss; - if (!SCTP_IS_DETACHED(sctp)) { - sctp_set_ulp_prop(sctp); - } - } +check_current: + if (fp == sctp->sctp_current) + sctp_set_faddr_current(sctp, fp); } void -sctp_faddr2ire(sctp_t *sctp, sctp_faddr_t *fp) +sctp_update_ire(sctp_t *sctp) { - ire_t *ire; + ire_t *ire; + sctp_faddr_t *fp; - if ((ire = fp->ire) == NULL) { - return; - } - - mutex_enter(&ire->ire_lock); - - /* If the cached IRE is going away, there is no point to update it. */ - if (ire->ire_marks & IRE_MARK_CONDEMNED) { - mutex_exit(&ire->ire_lock); - IRE_REFRELE_NOTR(ire); - fp->ire = NULL; - return; - } + for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { + if ((ire = fp->ire) == NULL) + continue; + mutex_enter(&ire->ire_lock); - /* - * Only record the PMTU for this faddr if we actually have - * done discovery. This prevents initialized default from - * clobbering any real info that IP may have. - */ - if (fp->pmtu_discovered) { - if (fp->isv4) { - ire->ire_max_frag = fp->sfa_pmss + sctp->sctp_hdr_len; - } else { - ire->ire_max_frag = fp->sfa_pmss + sctp->sctp_hdr6_len; + /* + * If the cached IRE is going away, there is no point to + * update it. + */ + if (ire->ire_marks & IRE_MARK_CONDEMNED) { + mutex_exit(&ire->ire_lock); + IRE_REFRELE_NOTR(ire); + fp->ire = NULL; + continue; } - } - if (sctp_rtt_updates != 0 && fp->rtt_updates >= sctp_rtt_updates) { /* - * If there is no old cached values, initialize them - * conservatively. Set them to be (1.5 * new value). - * This code copied from ip_ire_advise(). The cached - * value is in ms. + * Only record the PMTU for this faddr if we actually have + * done discovery. This prevents initialized default from + * clobbering any real info that IP may have. */ - if (ire->ire_uinfo.iulp_rtt != 0) { - ire->ire_uinfo.iulp_rtt = (ire->ire_uinfo.iulp_rtt + - TICK_TO_MSEC(fp->srtt)) >> 1; - } else { - ire->ire_uinfo.iulp_rtt = TICK_TO_MSEC(fp->srtt + - (fp->srtt >> 1)); + if (fp->pmtu_discovered) { + if (fp->isv4) { + ire->ire_max_frag = fp->sfa_pmss + + sctp->sctp_hdr_len; + } else { + ire->ire_max_frag = fp->sfa_pmss + + sctp->sctp_hdr6_len; + } } - if (ire->ire_uinfo.iulp_rtt_sd != 0) { - ire->ire_uinfo.iulp_rtt_sd = - (ire->ire_uinfo.iulp_rtt_sd + - TICK_TO_MSEC(fp->rttvar)) >> 1; - } else { - ire->ire_uinfo.iulp_rtt_sd = TICK_TO_MSEC(fp->rttvar + - (fp->rttvar >> 1)); + + if (sctp_rtt_updates != 0 && + fp->rtt_updates >= sctp_rtt_updates) { + /* + * If there is no old cached values, initialize them + * conservatively. Set them to be (1.5 * new value). + * This code copied from ip_ire_advise(). The cached + * value is in ms. + */ + if (ire->ire_uinfo.iulp_rtt != 0) { + ire->ire_uinfo.iulp_rtt = + (ire->ire_uinfo.iulp_rtt + + TICK_TO_MSEC(fp->srtt)) >> 1; + } else { + ire->ire_uinfo.iulp_rtt = + TICK_TO_MSEC(fp->srtt + (fp->srtt >> 1)); + } + if (ire->ire_uinfo.iulp_rtt_sd != 0) { + ire->ire_uinfo.iulp_rtt_sd = + (ire->ire_uinfo.iulp_rtt_sd + + TICK_TO_MSEC(fp->rttvar)) >> 1; + } else { + ire->ire_uinfo.iulp_rtt_sd = + TICK_TO_MSEC(fp->rttvar + + (fp->rttvar >> 1)); + } + fp->rtt_updates = 0; } - fp->rtt_updates = 0; + mutex_exit(&ire->ire_lock); } - - mutex_exit(&ire->ire_lock); } /* @@ -301,7 +329,7 @@ sctp_make_mp(sctp_t *sctp, sctp_faddr_t *sendto, int trailer) /* Try to look for another IRE again. */ if (fp->ire == NULL) - sctp_ire2faddr(sctp, fp); + sctp_get_ire(sctp, fp); /* There is no suitable source address to use, return. */ if (fp->state == SCTP_FADDRS_UNREACH) @@ -462,13 +490,18 @@ sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2) } /* - * Caller must hold conn fanout lock. + * Returns 0 on success, -1 on memory allocation failure. If sleep + * is true, this function should never fail. The boolean parameter + * first decides whether the newly created faddr structure should be + * added at the beginning of the list or at the end. + * + * Note: caller must hold conn fanout lock. */ -static int -sctp_add_faddr_entry(sctp_t *sctp, in6_addr_t *addr, int sleep, - boolean_t first) +int +sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep, boolean_t first) { - sctp_faddr_t *faddr; + sctp_faddr_t *faddr; + mblk_t *timer_mp; if (is_system_labeled()) { ts_label_t *tsl; @@ -521,8 +554,14 @@ sctp_add_faddr_entry(sctp_t *sctp, in6_addr_t *addr, int sleep, if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL) return (ENOMEM); + timer_mp = sctp_timer_alloc((sctp), sctp_rexmit_timer); + if (timer_mp == NULL) { + kmem_cache_free(sctp_kmem_faddr_cache, faddr); + return (ENOMEM); + } + ((sctpt_t *)(timer_mp->b_rptr))->sctpt_faddr = faddr; - sctp_init_faddr(sctp, faddr, addr); + sctp_init_faddr(sctp, faddr, addr, timer_mp); ASSERT(faddr->next == NULL); if (sctp->sctp_faddrs == NULL) { @@ -542,36 +581,6 @@ sctp_add_faddr_entry(sctp_t *sctp, in6_addr_t *addr, int sleep, return (0); } -/* - * Add new address to end of list. - * Returns 0 on success, or errno on failure: - * ENOMEM - allocation failure; only for sleep==KM_NOSLEEP - * EACCES - label is incompatible with caller or connection - * (labeled [trusted] solaris only) - * Caller must hold conn fanout lock. - */ -int -sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep) -{ - dprint(4, ("add_faddr: %x:%x:%x:%x %d\n", SCTP_PRINTADDR(*addr), - sleep)); - - return (sctp_add_faddr_entry(sctp, addr, sleep, B_FALSE)); -} - -/* - * Same as sctp_add_faddr above, but put new entry at front of list. - * Caller must hold conn fanout lock. - */ -int -sctp_add_faddr_first(sctp_t *sctp, in6_addr_t *addr, int sleep) -{ - dprint(4, ("add_faddr_first: %x:%x:%x:%x %d\n", SCTP_PRINTADDR(*addr), - sleep)); - - return (sctp_add_faddr_entry(sctp, addr, sleep, B_TRUE)); -} - sctp_faddr_t * sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr) { @@ -597,21 +606,16 @@ sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr) return (fp); } +/* + * To change the currently used peer address to the specified one. + */ void -sctp_faddr2hdraddr(sctp_faddr_t *fp, sctp_t *sctp) +sctp_set_faddr_current(sctp_t *sctp, sctp_faddr_t *fp) { + /* Now setup the composite header. */ if (fp->isv4) { IN6_V4MAPPED_TO_IPADDR(&fp->faddr, sctp->sctp_ipha->ipha_dst); - /* Must not allow unspec src addr if not bound to all */ - if (IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr) && - !sctp->sctp_bound_to_all) { - /* - * set the src to the first v4 saddr and hope - * for the best - */ - fp->saddr = sctp_get_valid_addr(sctp, B_FALSE); - } IN6_V4MAPPED_TO_IPADDR(&fp->saddr, sctp->sctp_ipha->ipha_src); /* update don't fragment bit */ if (fp->df) { @@ -622,17 +626,15 @@ sctp_faddr2hdraddr(sctp_faddr_t *fp, sctp_t *sctp) } } else { sctp->sctp_ip6h->ip6_dst = fp->faddr; - /* Must not allow unspec src addr if not bound to all */ - if (IN6_IS_ADDR_UNSPECIFIED(&fp->saddr) && - !sctp->sctp_bound_to_all) { - /* - * set the src to the first v6 saddr and hope - * for the best - */ - fp->saddr = sctp_get_valid_addr(sctp, B_TRUE); - } sctp->sctp_ip6h->ip6_src = fp->saddr; } + + sctp->sctp_current = fp; + sctp->sctp_mss = fp->sfa_pmss; + + /* Update the uppper layer for the change. */ + if (!SCTP_IS_DETACHED(sctp)) + sctp_set_ulp_prop(sctp); } void @@ -641,10 +643,8 @@ sctp_redo_faddr_srcs(sctp_t *sctp) sctp_faddr_t *fp; for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { - sctp_ire2faddr(sctp, fp); + sctp_get_ire(sctp, fp); } - - sctp_faddr2hdraddr(sctp->sctp_current, sctp); } void @@ -661,20 +661,20 @@ sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp) fp->state = SCTP_FADDRS_ALIVE; sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_AVAILABLE, 0); - /* If this is the primary, switch back to it now */ + /* + * If this is the primary, switch back to it now. And + * we probably want to reset the source addr used to reach + * it. + */ if (fp == sctp->sctp_primary) { - sctp->sctp_current = fp; - sctp->sctp_mss = fp->sfa_pmss; - /* Reset the addrs in the composite header */ - sctp_faddr2hdraddr(fp, sctp); - if (!SCTP_IS_DETACHED(sctp)) { - sctp_set_ulp_prop(sctp); - } + sctp_set_faddr_current(sctp, fp); + sctp_get_ire(sctp, fp); + return; } } if (fp->ire == NULL) { /* Should have a full IRE now */ - sctp_ire2faddr(sctp, fp); + sctp_get_ire(sctp, fp); } } @@ -719,7 +719,7 @@ sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate) /* Find next alive faddr */ ofp = fp; - for (fp = fp->next; fp; fp = fp->next) { + for (fp = fp->next; fp != NULL; fp = fp->next) { if (fp->state == SCTP_FADDRS_ALIVE) { break; } @@ -734,19 +734,19 @@ sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate) } } + /* + * Find a new fp, so if the current faddr is dead, use the new fp + * as the current one. + */ if (fp != ofp) { if (sctp->sctp_current == NULL) { dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n", SCTP_PRINTADDR(fp->faddr))); - sctp->sctp_current = fp; - sctp->sctp_mss = fp->sfa_pmss; - - /* Reset the addrs in the composite header */ - sctp_faddr2hdraddr(fp, sctp); - - if (!SCTP_IS_DETACHED(sctp)) { - sctp_set_ulp_prop(sctp); - } + /* + * Note that we don't need to reset the source addr + * of the new fp. + */ + sctp_set_faddr_current(sctp, fp); } return (0); } @@ -1120,9 +1120,8 @@ sctp_v6_label(sctp_t *sctp) /* * XXX implement more sophisticated logic */ -/* ARGSUSED */ int -sctp_set_hdraddrs(sctp_t *sctp, cred_t *cr) +sctp_set_hdraddrs(sctp_t *sctp) { sctp_faddr_t *fp; int gotv4 = 0; @@ -1352,7 +1351,7 @@ sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt, fp = sctp_lookup_faddr(sctp, hdrsaddr); if (fp == NULL) { /* not included; add it now */ - err = sctp_add_faddr_first(sctp, hdrsaddr, KM_NOSLEEP); + err = sctp_add_faddr(sctp, hdrsaddr, KM_NOSLEEP, B_TRUE); if (err != 0) return (err); @@ -1453,7 +1452,8 @@ sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt, goto next; /* OK, add it to the faddr set */ - err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP); + err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP, + B_FALSE); if (err != 0) return (err); } @@ -1480,7 +1480,8 @@ sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt, goto next; err = sctp_add_faddr(sctp, - (in6_addr_t *)(ph + 1), KM_NOSLEEP); + (in6_addr_t *)(ph + 1), KM_NOSLEEP, + B_FALSE); if (err != 0) return (err); } @@ -1510,8 +1511,10 @@ next: asize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; alist = kmem_alloc(asize, KM_NOSLEEP); - if (alist == NULL) + if (alist == NULL) { + SCTP_KSTAT(sctp_cl_assoc_change); return (ENOMEM); + } /* * Just include the address the INIT was sent to in the * delete list and send the entire faddr list. We could @@ -1524,6 +1527,7 @@ next: dlist = kmem_alloc(dsize, KM_NOSLEEP); if (dlist == NULL) { kmem_free(alist, asize); + SCTP_KSTAT(sctp_cl_assoc_change); return (ENOMEM); } bcopy(&curaddr, dlist, sizeof (curaddr)); @@ -1771,7 +1775,8 @@ sctp_congest_reset(sctp_t *sctp) } static void -sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr) +sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr, + mblk_t *timer_mp) { bcopy(addr, &fp->faddr, sizeof (*addr)); if (IN6_IS_ADDR_V4MAPPED(addr)) { @@ -1798,7 +1803,7 @@ sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr) fp->pba = 0; fp->acked = 0; fp->lastactive = lbolt64; - fp->timer_mp = NULL; + fp->timer_mp = timer_mp; fp->hb_pending = B_FALSE; fp->timer_running = 0; fp->df = 1; @@ -1812,7 +1817,7 @@ sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr) sizeof (fp->hb_secret)); fp->hb_expiry = lbolt64; - sctp_ire2faddr(sctp, fp); + sctp_get_ire(sctp, fp); } /*ARGSUSED*/ diff --git a/usr/src/uts/common/inet/sctp/sctp_conn.c b/usr/src/uts/common/inet/sctp/sctp_conn.c index 66db656acf..31d5220202 100644 --- a/usr/src/uts/common/inet/sctp/sctp_conn.c +++ b/usr/src/uts/common/inet/sctp/sctp_conn.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -93,7 +94,7 @@ sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt, cr = lconnp->conn_cred; } - if ((err = sctp_set_hdraddrs(acceptor, cr)) != 0) + if ((err = sctp_set_hdraddrs(acceptor)) != 0) return (err); if ((sctp_options & SCTP_PRSCTP_OPTION) && @@ -275,6 +276,7 @@ sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, kmem_free(flist, fsize); sctp_close_eager(eager); BUMP_MIB(&sctp_mib, sctpListenDrop); + SCTP_KSTAT(sctp_cl_connect); return (NULL); } /* The clustering module frees these list */ @@ -477,7 +479,8 @@ sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen) * OK; set up the peer addr (this may grow after we get * the INIT ACK from the peer with additional addresses). */ - if ((err = sctp_add_faddr(sctp, &dstaddr, sleep)) != 0) { + if ((err = sctp_add_faddr(sctp, &dstaddr, sleep, + B_FALSE)) != 0) { mutex_exit(&tbf->tf_lock); WAKE_SCTP(sctp); return (err); @@ -496,7 +499,7 @@ sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen) mutex_exit(&tbf->tf_lock); /* initialize composite headers */ - if ((err = sctp_set_hdraddrs(sctp, NULL)) != 0) { + if ((err = sctp_set_hdraddrs(sctp)) != 0) { sctp_conn_hash_remove(sctp); WAKE_SCTP(sctp); return (err); diff --git a/usr/src/uts/common/inet/sctp/sctp_cookie.c b/usr/src/uts/common/inet/sctp/sctp_cookie.c index 71fff0067a..e84e21eeec 100644 --- a/usr/src/uts/common/inet/sctp/sctp_cookie.c +++ b/usr/src/uts/common/inet/sctp/sctp_cookie.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -744,6 +745,7 @@ sctp_send_cookie_ack(sctp_t *sctp) camp = sctp_make_mp(sctp, NULL, sizeof (*cach)); if (camp == NULL) { /* XXX should abort, but don't have the inmp anymore */ + SCTP_KSTAT(sctp_send_cookie_ack_failed); return; } @@ -1024,6 +1026,7 @@ sendcookie: SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); if (errmp != NULL) freeb(errmp); + SCTP_KSTAT(sctp_send_cookie_failed); return; } /* diff --git a/usr/src/uts/common/inet/sctp/sctp_error.c b/usr/src/uts/common/inet/sctp/sctp_error.c index 9b4cca78e3..e5ec99104d 100644 --- a/usr/src/uts/common/inet/sctp/sctp_error.c +++ b/usr/src/uts/common/inet/sctp/sctp_error.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -102,8 +103,11 @@ sctp_user_abort(sctp_t *sctp, mblk_t *data, boolean_t tbit) sctp_faddr_t *fp = sctp->sctp_current; mp = sctp_make_mp(sctp, fp, 0); - if (mp == NULL) + if (mp == NULL) { + SCTP_KSTAT(sctp_send_user_abort_failed); return; + } + /* * Create abort chunk. */ @@ -366,6 +370,7 @@ sctp_send_err(sctp_t *sctp, mblk_t *emp, sctp_faddr_t *dest) } else { sendmp = sctp_make_mp(sctp, dest, 0); if (sendmp == NULL) { + SCTP_KSTAT(sctp_send_err_failed); freemsg(emp); return; } diff --git a/usr/src/uts/common/inet/sctp/sctp_heartbeat.c b/usr/src/uts/common/inet/sctp/sctp_heartbeat.c index 8da1e7d9b8..652cef09a7 100644 --- a/usr/src/uts/common/inet/sctp/sctp_heartbeat.c +++ b/usr/src/uts/common/inet/sctp/sctp_heartbeat.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -81,6 +82,7 @@ sctp_return_heartbeat(sctp_t *sctp, sctp_chunk_hdr_t *hbcp, mblk_t *mp) /* Create an IP header, returning to the src addr from the heartbt */ smp = sctp_make_mp(sctp, fp, len); if (smp == NULL) { + SCTP_KSTAT(sctp_return_hb_failed); return; } @@ -126,8 +128,10 @@ sctp_send_heartbeat(sctp_t *sctp, sctp_faddr_t *fp) sizeof (fp->hb_secret) + sizeof (fp->faddr); hbmp = sctp_make_mp(sctp, fp, hblen); - if (hbmp == NULL) + if (hbmp == NULL) { + SCTP_KSTAT(sctp_send_hb_failed); return; + } cp = (sctp_chunk_hdr_t *)hbmp->b_wptr; cp->sch_id = CHUNK_HEARTBEAT; diff --git a/usr/src/uts/common/inet/sctp/sctp_impl.h b/usr/src/uts/common/inet/sctp/sctp_impl.h index 42fff07569..5251bd5d34 100644 --- a/usr/src/uts/common/inet/sctp/sctp_impl.h +++ b/usr/src/uts/common/inet/sctp/sctp_impl.h @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -62,36 +63,6 @@ typedef struct sctpt_s { } sctpt_t; /* - * faddr timer mblks are not allocated until first use. This macro - * will allocate the timer mblk if necessary, set the faddr, and then - * start the timer. - */ -#define SCTP_FADDR_TIMER_RESTART(sctp, fp, intvl) \ - if ((fp)->timer_mp == NULL) { \ - (fp)->timer_mp = sctp_timer_alloc((sctp), sctp_rexmit_timer); \ - } \ - if ((fp)->timer_mp != NULL) { \ - ((sctpt_t *)((fp)->timer_mp->b_rptr))->sctpt_faddr = fp; \ - dprint(3, ("faddr_timer_restart: fp=%p %x:%x:%x:%x %d\n", \ - (void *)(fp), SCTP_PRINTADDR((fp)->faddr), \ - (int)(intvl))); \ - sctp_timer((sctp), (fp)->timer_mp, (intvl)); \ - (fp)->timer_running = 1; \ - } - -#define SCTP_FADDR_TIMER_STOP(fp) \ - if ((fp)->timer_running && (fp)->timer_mp != NULL) { \ - sctp_timer_stop((fp)->timer_mp); \ - (fp)->timer_running = 0; \ - } - -#define SCTP_CALC_RXT(fp, max) \ -{ \ - if (((fp)->rto <<= 1) > (max)) \ - (fp)->rto = (max); \ -} - -/* * Maximum number of duplicate TSNs we can report. This is currently * static, and governs the size of the mblk used to hold the duplicate * reports. The use of duplcate TSN reports is currently experimental, @@ -184,6 +155,31 @@ extern sctpparam_t sctp_param_arr[]; extern sctpparam_t sctp_wroff_xtra_param; #define sctp_wroff_xtra sctp_wroff_xtra_param.sctp_param_val +/* + * Retransmission timer start and stop macro for a given faddr. + */ +#define SCTP_FADDR_TIMER_RESTART(sctp, fp, intvl) \ +{ \ + dprint(3, ("faddr_timer_restart: fp=%p %x:%x:%x:%x %d\n", \ + (void *)(fp), SCTP_PRINTADDR((fp)->faddr), (int)(intvl))); \ + sctp_timer((sctp), (fp)->timer_mp, (intvl)); \ + (fp)->timer_running = 1; \ +} + +#define SCTP_FADDR_TIMER_STOP(fp) \ + ASSERT((fp)->timer_mp != NULL); \ + if ((fp)->timer_running) { \ + sctp_timer_stop((fp)->timer_mp); \ + (fp)->timer_running = 0; \ + } + +#define SCTP_CALC_RXT(fp, max) \ +{ \ + if (((fp)->rto <<= 1) > (max)) \ + (fp)->rto = (max); \ +} + + #define SCTP_MAX_COMBINED_HEADER_LENGTH (60 + 12) /* Maxed out ip + sctp */ #define SCTP_MAX_IP_OPTIONS_LENGTH (60 - IP_SIMPLE_HDR_LENGTH) #define SCTP_MAX_HDR_LENGTH 60 @@ -385,6 +381,40 @@ extern sin6_t sctp_sin6_null; /* Zero address for quick clears */ extern mib2_sctp_t sctp_mib; /* SNMP fixed size info */ +/* SCTP kstat */ +typedef struct sctp_kstat_s { + kstat_named_t sctp_add_faddr; + kstat_named_t sctp_add_timer; + kstat_named_t sctp_conn_create; + kstat_named_t sctp_find_next_tq; + kstat_named_t sctp_fr_add_hdr; + kstat_named_t sctp_fr_not_found; + kstat_named_t sctp_output_failed; + kstat_named_t sctp_rexmit_failed; + kstat_named_t sctp_send_init_failed; + kstat_named_t sctp_send_cookie_failed; + kstat_named_t sctp_send_cookie_ack_failed; + kstat_named_t sctp_send_err_failed; + kstat_named_t sctp_send_sack_failed; + kstat_named_t sctp_send_shutdown_failed; + kstat_named_t sctp_send_shutdown_ack_failed; + kstat_named_t sctp_send_shutdown_comp_failed; + kstat_named_t sctp_send_user_abort_failed; + kstat_named_t sctp_send_asconf_failed; + kstat_named_t sctp_send_asconf_ack_failed; + kstat_named_t sctp_send_ftsn_failed; + kstat_named_t sctp_send_hb_failed; + kstat_named_t sctp_return_hb_failed; + kstat_named_t sctp_ss_rexmit_failed; + kstat_named_t sctp_cl_connect; + kstat_named_t sctp_cl_assoc_change; + kstat_named_t sctp_cl_check_addrs; +} sctp_kstat_t; + +extern sctp_kstat_t sctp_statistics; + +#define SCTP_KSTAT(x) (sctp_statistics.x.value.ui64++) + /* * Object to represent database of options to search passed to * {sock,tpi}optcom_req() interface routine to take care of option @@ -744,7 +774,9 @@ typedef struct sctp_s { sctp_prsctp_aware : 1, /* is peer PR-SCTP aware? */ sctp_linklocal : 1, /* is linklocal assoc. */ sctp_mac_exempt : 1, /* SO_MAC_EXEMPT */ - sctp_dummy : 5; + sctp_rexmitting : 1, /* SCTP is retransmitting */ + + sctp_dummy : 4; } sctp_bits; struct { uint32_t @@ -787,6 +819,7 @@ typedef struct sctp_s { #define sctp_prsctp_aware sctp_bits.sctp_prsctp_aware #define sctp_linklocal sctp_bits.sctp_linklocal #define sctp_mac_exempt sctp_bits.sctp_mac_exempt +#define sctp_rexmitting sctp_bits.sctp_rexmitting #define sctp_recvsndrcvinfo sctp_events.sctp_recvsndrcvinfo #define sctp_recvassocevnt sctp_events.sctp_recvassocevnt @@ -903,6 +936,8 @@ typedef struct sctp_s { uint_t sctp_v4label_len; /* length of cached v4 label */ uint_t sctp_v6label_len; /* length of cached v6 label */ + uint32_t sctp_rxt_nxttsn; /* Next TSN to be rexmitted */ + uint32_t sctp_rxt_maxtsn; /* Max TSN sent at time out */ } sctp_t; extern list_t sctp_g_list; /* Head of SCTP instance data chain */ @@ -919,8 +954,7 @@ extern mblk_t *sctp_pad_mp; extern void sctp_ack_timer(sctp_t *); extern size_t sctp_adaption_code_param(sctp_t *, uchar_t *); extern void sctp_adaption_event(sctp_t *); -extern int sctp_add_faddr(sctp_t *, in6_addr_t *, int); -extern int sctp_add_faddr_first(sctp_t *, in6_addr_t *, int); +extern int sctp_add_faddr(sctp_t *, in6_addr_t *, int, boolean_t); extern boolean_t sctp_add_ftsn_set(sctp_ftsn_set_t **, sctp_faddr_t *, mblk_t *, uint_t *, uint32_t *); extern boolean_t sctp_add_recvq(sctp_t *, mblk_t *, boolean_t); @@ -974,8 +1008,6 @@ extern void sctp_faddr_alive(sctp_t *, sctp_faddr_t *); extern int sctp_faddr_dead(sctp_t *, sctp_faddr_t *, int); extern void sctp_faddr_fini(void); extern void sctp_faddr_init(void); -extern void sctp_faddr2hdraddr(sctp_faddr_t *, sctp_t *); -extern void sctp_faddr2ire(sctp_t *, sctp_faddr_t *); extern void sctp_fast_rexmit(sctp_t *); extern void sctp_fill_sack(sctp_t *, unsigned char *, int); extern void sctp_free_faddr_timers(sctp_t *); @@ -990,6 +1022,7 @@ extern int sctp_get_addrlist(sctp_t *, const void *, uint32_t *, uchar_t **, int *, size_t *); extern int sctp_get_addrparams(sctp_t *, sctp_t *, mblk_t *, sctp_chunk_hdr_t *, uint_t *); +extern void sctp_get_ire(sctp_t *, sctp_faddr_t *); extern void sctp_get_faddr_list(sctp_t *, uchar_t *, size_t); extern mblk_t *sctp_get_first_sent(sctp_t *); extern mblk_t *sctp_get_msg_to_send(sctp_t *, mblk_t **, mblk_t *, int *, @@ -1014,7 +1047,6 @@ extern uint32_t sctp_init2vtag(sctp_chunk_hdr_t *); extern void sctp_intf_event(sctp_t *, in6_addr_t, int, int); extern void sctp_input_data(sctp_t *, mblk_t *, mblk_t *); extern void sctp_instream_cleanup(sctp_t *, boolean_t); -extern void sctp_ire2faddr(sctp_t *, sctp_faddr_t *); extern int sctp_is_a_faddr_clean(sctp_t *); extern void sctp_kstat_init(void); @@ -1075,16 +1107,19 @@ extern void sctp_send_initack(sctp_t *, sctp_chunk_hdr_t *, mblk_t *); extern void sctp_send_shutdown(sctp_t *, int); extern void sctp_send_heartbeat(sctp_t *, sctp_faddr_t *); extern void sctp_sendfail_event(sctp_t *, mblk_t *, int, boolean_t); -extern int sctp_set_hdraddrs(sctp_t *, cred_t *); +extern void sctp_set_faddr_current(sctp_t *, sctp_faddr_t *); +extern int sctp_set_hdraddrs(sctp_t *); extern void sctp_sets_init(void); extern void sctp_sets_fini(void); extern void sctp_shutdown_event(sctp_t *); extern void sctp_stop_faddr_timers(sctp_t *); -extern int sctp_shutdown_received(sctp_t *, sctp_chunk_hdr_t *, int, int); +extern int sctp_shutdown_received(sctp_t *, sctp_chunk_hdr_t *, boolean_t, + boolean_t, sctp_faddr_t *); extern void sctp_shutdown_complete(sctp_t *); extern void sctp_set_if_mtu(sctp_t *); extern void sctp_set_iplen(sctp_t *, mblk_t *); extern void sctp_set_ulp_prop(sctp_t *); +extern void sctp_ss_rexmit(sctp_t *); extern size_t sctp_supaddr_param_len(sctp_t *); extern size_t sctp_supaddr_param(sctp_t *, uchar_t *); @@ -1095,6 +1130,7 @@ extern void sctp_timer_free(mblk_t *); extern void sctp_timer_stop(mblk_t *); extern void sctp_unlink_faddr(sctp_t *, sctp_faddr_t *); +extern void sctp_update_ire(sctp_t *sctp); extern in_port_t sctp_update_next_port(in_port_t, zone_t *zone); extern void sctp_update_rtt(sctp_t *, sctp_faddr_t *, clock_t); extern void sctp_user_abort(sctp_t *, mblk_t *, boolean_t); diff --git a/usr/src/uts/common/inet/sctp/sctp_init.c b/usr/src/uts/common/inet/sctp/sctp_init.c index f1d0c54b34..5585d42213 100644 --- a/usr/src/uts/common/inet/sctp/sctp_init.c +++ b/usr/src/uts/common/inet/sctp/sctp_init.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -202,8 +202,10 @@ sctp_init_mp(sctp_t *sctp) sctp->sctp_sctph6->sh_verf = 0; mp = sctp_make_mp(sctp, NULL, initlen); - if (mp == NULL) + if (mp == NULL) { + SCTP_KSTAT(sctp_send_init_failed); return (NULL); + } /* Lay in a new INIT chunk, starting with the chunk header */ chp = (sctp_chunk_hdr_t *)mp->b_wptr; diff --git a/usr/src/uts/common/inet/sctp/sctp_input.c b/usr/src/uts/common/inet/sctp/sctp_input.c index e17b8ff3ec..8b2218d862 100644 --- a/usr/src/uts/common/inet/sctp/sctp_input.c +++ b/usr/src/uts/common/inet/sctp/sctp_input.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -1648,6 +1649,7 @@ checks_done: (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); smp = sctp_make_mp(sctp, sendto, slen); if (smp == NULL) { + SCTP_KSTAT(sctp_send_sack_failed); return (NULL); } sch = (sctp_chunk_hdr_t *)smp->b_wptr; @@ -1742,6 +1744,7 @@ sctp_check_abandoned_msg(sctp_t *sctp, mblk_t *meta) if (head == NULL) { sctp->sctp_adv_pap = adv_pap; freemsg(nmp); + SCTP_KSTAT(sctp_send_ftsn_failed); return (ENOMEM); } SCTP_MSG_SET_ABANDONED(meta); @@ -1831,6 +1834,7 @@ sctp_cumack(sctp_t *sctp, uint32_t tsn, mblk_t **first_unacked) } if (SCTP_CHUNK_ISACKED(mp)) continue; + SCTP_CHUNK_SET_SACKCNT(mp, 0); SCTP_CHUNK_ACKED(mp); ASSERT(fp->suna >= chunklen); fp->suna -= chunklen; @@ -2780,7 +2784,7 @@ ret: fp->cwnd = fp->suna + sctp_maxburst * fp->sfa_pmss; } fp->acked = 0; - return (trysend); + goto check_ss_rxmit; } for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { if (cumack_forward && fp->acked && !fast_recovery && @@ -2807,6 +2811,31 @@ ret: } fp->acked = 0; } +check_ss_rxmit: + /* + * If this is a SACK following a timeout, check if there are + * still unacked chunks (sent before the timeout) that we can + * send. + */ + if (sctp->sctp_rexmitting) { + if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_rxt_maxtsn)) { + /* + * As we are in retransmission phase, we may get a + * SACK which indicates some new chunks are received + * but cum_tsn does not advance. During this + * phase, the other side advances cum_tsn only because + * it receives our retransmitted chunks. Only + * this signals that some chunks are still + * missing. + */ + if (cumack_forward) + sctp_ss_rexmit(sctp); + } else { + sctp->sctp_rexmitting = B_FALSE; + sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn; + sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn; + } + } return (trysend); } @@ -3551,7 +3580,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) case CHUNK_SHUTDOWN: sctp_shutdown_event(sctp); trysend = sctp_shutdown_received(sctp, ch, - 0, 0); + B_FALSE, B_FALSE, fp); BUMP_LOCAL(sctp->sctp_ibchunks); break; case CHUNK_SHUTDOWN_ACK: @@ -3906,10 +3935,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) switch (ch->sch_id) { case CHUNK_ABORT: /* Pass gathered wisdom to IP for keeping */ - for (fp = sctp->sctp_faddrs; fp != NULL; - fp = fp->next) { - sctp_faddr2ire(sctp, fp); - } + sctp_update_ire(sctp); sctp_process_abort(sctp, ch, 0); goto done; case CHUNK_SHUTDOWN_COMPLETE: @@ -3919,10 +3945,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) NULL); /* Pass gathered wisdom to IP for keeping */ - for (fp = sctp->sctp_faddrs; fp != NULL; - fp = fp->next) { - sctp_faddr2ire(sctp, fp); - } + sctp_update_ire(sctp); sctp_clean_death(sctp, 0); goto done; case CHUNK_SHUTDOWN_ACK: @@ -3935,7 +3958,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) goto done; case CHUNK_COOKIE: (void) sctp_shutdown_received(sctp, NULL, - 1, 0); + B_TRUE, B_FALSE, fp); BUMP_LOCAL(sctp->sctp_ibchunks); break; case CHUNK_HEARTBEAT: @@ -3953,7 +3976,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp) switch (ch->sch_id) { case CHUNK_SHUTDOWN: trysend = sctp_shutdown_received(sctp, ch, - 0, 0); + B_FALSE, B_FALSE, fp); break; case CHUNK_SACK: trysend = sctp_got_sack(sctp, ch); diff --git a/usr/src/uts/common/inet/sctp/sctp_opt_data.c b/usr/src/uts/common/inet/sctp/sctp_opt_data.c index cd263faeca..89ffd6ae60 100644 --- a/usr/src/uts/common/inet/sctp/sctp_opt_data.c +++ b/usr/src/uts/common/inet/sctp/sctp_opt_data.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -661,11 +662,7 @@ sctp_set_prim(sctp_t *sctp, const void *invalp, uint_t inlen) if (fp->state != SCTP_FADDRS_ALIVE || fp == sctp->sctp_current) { return (0); } - sctp->sctp_current = fp; - sctp->sctp_mss = fp->sfa_pmss; - /* Reset the addrs in the composite header */ - sctp_faddr2hdraddr(fp, sctp); - sctp_set_ulp_prop(sctp); + sctp_set_faddr_current(sctp, fp); return (0); } diff --git a/usr/src/uts/common/inet/sctp/sctp_output.c b/usr/src/uts/common/inet/sctp/sctp_output.c index 8ded7748c9..a71c5d2f19 100644 --- a/usr/src/uts/common/inet/sctp/sctp_output.c +++ b/usr/src/uts/common/inet/sctp/sctp_output.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -563,14 +564,14 @@ sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, * A null fp->ire could mean that the address is 'down'. Similarly, * it is possible that the address went down, we tried to send an * heartbeat and ended up setting fp->saddr as unspec because we - * didn't have any usable source address. In either case - * sctp_ire2faddr() will try find an IRE, if available, and set - * the source address, if needed. If we still don't have any + * didn't have any usable source address. In either case + * sctp_get_ire() will try find an IRE, if available, and set + * the source address, if needed. If we still don't have any * usable source address, fp->state will be SCTP_FADDRS_UNREACH and * we return EHOSTUNREACH. */ if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) { - sctp_ire2faddr(sctp, fp); + sctp_get_ire(sctp, fp); if (fp->state == SCTP_FADDRS_UNREACH) { if (error != NULL) *error = EHOSTUNREACH; @@ -579,8 +580,10 @@ sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, } /* Copy in IP header. */ if ((mp->b_rptr - mp->b_datap->db_base) < - (sctp_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2) { + (sctp_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 || + !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) { mblk_t *nmp; + /* * This can happen if IP headers are adjusted after * data was moved into chunks, or during retransmission, @@ -683,6 +686,8 @@ sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) int msglen; int extra; sctp_msg_hdr_t *msg_hdr; + sctp_faddr_t *old_fp = NULL; + sctp_faddr_t *chunk_fp; for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; @@ -694,13 +699,23 @@ sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) if (SCTP_CHUNK_WANT_REXMIT(mp)) { /* * Use the same peer address to do fast - * retransmission. + * retransmission. If the original peer + * address is dead, switch to the current + * one. Record the old one so that we + * will pick the chunks sent to the old + * one for fast retransmission. */ + chunk_fp = SCTP_CHUNK_DEST(mp); if (*fp == NULL) { - *fp = SCTP_CHUNK_DEST(mp); - if ((*fp)->state != SCTP_FADDRS_ALIVE) + *fp = chunk_fp; + if ((*fp)->state != SCTP_FADDRS_ALIVE) { + old_fp = *fp; *fp = sctp->sctp_current; - } else if (*fp != SCTP_CHUNK_DEST(mp)) { + } + } else if (old_fp == NULL && *fp != chunk_fp) { + continue; + } else if (old_fp != NULL && + old_fp != chunk_fp) { continue; } @@ -730,6 +745,7 @@ sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) } } BUMP_MIB(&sctp_mib, sctpOutFastRetrans); + BUMP_LOCAL(sctp->sctp_rxtchunks); SCTP_CHUNK_CLEAR_REXMIT(mp); if (start_mp == NULL) { start_mp = nmp; @@ -927,10 +943,13 @@ sctp_fast_rexmit(sctp_t *sctp) ASSERT(sctp->sctp_xmit_head != NULL); mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp); - if (mp == NULL) + if (mp == NULL) { + SCTP_KSTAT(sctp_fr_not_found); return; + } if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) { freemsg(mp); + SCTP_KSTAT(sctp_fr_add_hdr); return; } if ((pktlen > fp->sfa_pmss) && fp->isv4) { @@ -1069,6 +1088,7 @@ sctp_output(sctp_t *sctp) fp, chunklen, meta); } freemsg(nmp); + SCTP_KSTAT(sctp_output_failed); goto unsent_data; } seglen += sacklen; @@ -1086,7 +1106,6 @@ sctp_output(sctp_t *sctp) fp->cwnd = sctp_slow_start_after_idle * fp->sfa_pmss; } - fp->lastactive = now; pathmax = fp->cwnd - fp->suna; if (seglen + extra > pathmax) { @@ -1115,9 +1134,11 @@ sctp_output(sctp_t *sctp) fp, chunklen, meta); } freemsg(nmp); + SCTP_KSTAT(sctp_output_failed); goto unsent_data; } } + fp->lastactive = now; if (pathmax > fp->sfa_pmss) pathmax = fp->sfa_pmss; SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); @@ -1127,6 +1148,7 @@ sctp_output(sctp_t *sctp) if (sctp->sctp_out_time == 0) { sctp->sctp_out_time = now; sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1; + ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn)); } if (extra > 0) { fill = sctp_get_padding(extra); @@ -1460,6 +1482,7 @@ ftsn_done: if (head == NULL) { freemsg(*nmp); *nmp = NULL; + SCTP_KSTAT(sctp_send_ftsn_failed); return; } *seglen += sacklen; @@ -1572,6 +1595,24 @@ sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp) sctp->sctp_adv_pap = tsn; } + +/* + * Determine if we should bundle a data chunk with the chunk being + * retransmitted. We bundle if + * + * - the chunk is sent to the same destination and unack'ed. + * + * OR + * + * - the chunk is unsent, i.e. new data. + */ +#define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \ + (!SCTP_CHUNK_ABANDONED((mp)) && \ + ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \ + !SCTP_CHUNK_ISACKED(mp))) || \ + (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \ + SCTP_CHUNK_FLAG_SENT))) + /* * Retransmit first segment which hasn't been acked with cumtsn or send * a Forward TSN chunk, if appropriate. @@ -1590,10 +1631,12 @@ sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) int extra; sctp_data_hdr_t *sdc; sctp_faddr_t *fp; - int error; uint32_t adv_pap = sctp->sctp_adv_pap; boolean_t do_ftsn = B_FALSE; boolean_t ftsn_check = B_TRUE; + uint32_t first_ua_tsn; + sctp_msg_hdr_t *mhdr; + uint32_t tot_wnd; while (meta != NULL) { for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { @@ -1651,7 +1694,7 @@ sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) } meta = meta->b_next; if (meta != NULL && sctp->sctp_prsctp_aware) { - sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr; + mhdr = (sctp_msg_hdr_t *)meta->b_rptr; while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) || SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) { @@ -1689,6 +1732,7 @@ out: ASSERT(fp != NULL); sdc = (sctp_data_hdr_t *)mp->b_rptr; + first_ua_tsn = ntohl(sdc->sdh_tsn); if (do_ftsn) { sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen); if (nmp == NULL) { @@ -1696,10 +1740,16 @@ out: goto restart_timer; } head = nmp; - mp = NULL; - meta = sctp->sctp_xmit_tail; + /* + * Move to the next unabandoned chunk. XXXCheck if meta will + * always be marked abandoned. + */ + while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta)) + meta = meta->b_next; if (meta != NULL) - mp = meta->b_cont; + mp = mp->b_cont; + else + mp = NULL; goto try_bundle; } seglen = ntohs(sdc->sdh_len); @@ -1707,17 +1757,7 @@ out: if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) extra = SCTP_ALIGN - extra; - /* - * Cancel RTT measurement if the retransmitted TSN is before the - * TSN used for timimg. - */ - if (sctp->sctp_out_time != 0 && - SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) { - sctp->sctp_out_time = 0; - } - /* Clear the counter as the RTT calculation may be off. */ - fp->rtt_updates = 0; - + /* Find out if we need to piggyback SACK. */ if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { sacklen = 0; } else { @@ -1728,10 +1768,39 @@ out: /* piggybacked SACK doesn't fit */ sacklen = 0; } else { - fp = sctp->sctp_lastdata; + /* + * OK, we have room to send SACK back. But we + * should send it back to the last fp where we + * receive data from, unless sctp_lastdata equals + * oldfp, then we should probably not send it + * back to that fp. Also we should check that + * the fp is alive. + */ + if (sctp->sctp_lastdata != oldfp && + sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) { + fp = sctp->sctp_lastdata; + } } } + /* + * Cancel RTT measurement if the retransmitted TSN is before the + * TSN used for timimg. + */ + if (sctp->sctp_out_time != 0 && + SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) { + sctp->sctp_out_time = 0; + } + /* Clear the counter as the RTT calculation may be off. */ + fp->rtt_updates = 0; + oldfp->rtt_updates = 0; + + /* + * After a timeout, we should change the current faddr so that + * new chunks will be sent to the alternate address. + */ + sctp_set_faddr_current(sctp, fp); + nmp = dupmsg(mp); if (nmp == NULL) goto restart_timer; @@ -1749,6 +1818,7 @@ out: head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL); if (head == NULL) { freemsg(nmp); + SCTP_KSTAT(sctp_rexmit_failed); goto restart_timer; } seglen += sacklen; @@ -1756,55 +1826,80 @@ out: SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); mp = mp->b_next; + + /* Check how much more we can send. */ + tot_wnd = MIN(fp->cwnd, sctp->sctp_frwnd); + /* + * If the number of outstanding bytes is more than what we are + * allowed to send, stop. + */ + if (tot_wnd <= chunklen || tot_wnd < fp->suna + chunklen) + goto done_bundle; + else + tot_wnd -= chunklen; + try_bundle: while (seglen < fp->sfa_pmss) { int32_t new_len; + /* Go through the list to find more chunks to be bundled. */ while (mp != NULL) { - if (SCTP_CHUNK_CANSEND(mp)) + /* Check if the chunk can be bundled. */ + if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp)) break; mp = mp->b_next; } + /* Go to the next message. */ if (mp == NULL) { - meta = sctp_get_msg_to_send(sctp, &mp, meta->b_next, - &error, 0, 0, oldfp); - if (error != 0 || meta == NULL) - break; - ASSERT(mp != NULL); - sctp->sctp_xmit_tail = meta; + for (meta = meta->b_next; meta != NULL; + meta = meta->b_next) { + mhdr = (sctp_msg_hdr_t *)meta->b_rptr; + + if (SCTP_IS_MSG_ABANDONED(meta) || + SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, + sctp)) { + continue; + } + + mp = meta->b_cont; + goto try_bundle; + } + /* No more chunk to be bundled. */ + break; } + sdc = (sctp_data_hdr_t *)mp->b_rptr; - chunklen = ntohs(sdc->sdh_len) - sizeof (*sdc); - new_len = seglen + ntohs(sdc->sdh_len); + new_len = ntohs(sdc->sdh_len); + chunklen = new_len - sizeof (*sdc); + if (chunklen > tot_wnd) + break; - if (seglen & (SCTP_ALIGN - 1)) { - extra = SCTP_ALIGN - (seglen & (SCTP_ALIGN - 1)); + if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) + extra = SCTP_ALIGN - extra; + if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) + break; + if ((nmp = dupmsg(mp)) == NULL) + break; - if (new_len + extra > fp->sfa_pmss) { - break; - } + if (extra > 0) { fill = sctp_get_padding(extra); if (fill != NULL) { - new_len += extra; - linkb(head, fill); + linkb(nmp, fill); } else { - break; - } - } else { - if (new_len > fp->sfa_pmss) { + freemsg(nmp); break; } } - if ((nmp = dupmsg(mp)) == NULL) { - break; - } - seglen = new_len; + linkb(head, nmp); SCTP_CHUNK_CLEAR_FLAGS(nmp); SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); - linkb(head, nmp); + + seglen = new_len; + tot_wnd -= chunklen; mp = mp->b_next; } +done_bundle: if ((seglen > fp->sfa_pmss) && fp->isv4) { ipha_t *iph = (ipha_t *)head->b_rptr; @@ -1820,20 +1915,38 @@ try_bundle: seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd)); + sctp->sctp_rexmitting = B_TRUE; + sctp->sctp_rxt_nxttsn = first_ua_tsn; + sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; sctp_set_iplen(sctp, head); sctp_add_sendq(sctp, head); /* - * Restart timer with exponential backoff + * Restart the oldfp timer with exponential backoff and + * the new fp timer for the retransmitted chunks. */ restart_timer: oldfp->strikes++; sctp->sctp_strikes++; SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); - SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); if (oldfp->suna != 0) SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto); sctp->sctp_active = lbolt64; + + /* + * Should we restart the timer of the new fp? If there is + * outstanding data to the new fp, the timer should be + * running already. So restarting it means that the timer + * will fire later for those outstanding data. But if + * we don't restart it, the timer will fire too early for the + * just retransmitted chunks to the new fp. The reason is that we + * don't keep a timestamp on when a chunk is retransmitted. + * So when the timer fires, it will just search for the + * chunk with the earliest TSN sent to new fp. This probably + * is the chunk we just retransmitted. So for now, let's + * be conservative and restart the timer of the new fp. + */ + SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); } /* @@ -1885,3 +1998,241 @@ sctp_wput(queue_t *q, mblk_t *mp) return; } } + +/* + * This function is called by sctp_ss_rexmit() to create a packet + * to be retransmitted to the given fp. The given meta and mp + * parameters are respectively the sctp_msg_hdr_t and the mblk of the + * first chunk to be retransmitted. + */ +static mblk_t * +sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp, + uint_t *packet_len) +{ + uint32_t seglen = 0; + uint16_t chunklen; + int extra; + mblk_t *nmp; + mblk_t *head; + mblk_t *fill; + sctp_data_hdr_t *sdc; + sctp_msg_hdr_t *mhdr; + + sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; + seglen = ntohs(sdc->sdh_len); + chunklen = seglen - sizeof (*sdc); + if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) + extra = SCTP_ALIGN - extra; + + nmp = dupmsg(*mp); + if (nmp == NULL) + return (NULL); + if (extra > 0) { + fill = sctp_get_padding(extra); + if (fill != NULL) { + linkb(nmp, fill); + seglen += extra; + } else { + freemsg(nmp); + return (NULL); + } + } + SCTP_CHUNK_CLEAR_FLAGS(nmp); + head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL); + if (head == NULL) { + freemsg(nmp); + return (NULL); + } + SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); + sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); + *mp = (*mp)->b_next; + +try_bundle: + while (seglen < fp->sfa_pmss) { + int32_t new_len; + + /* + * Go through the list to find more chunks to be bundled. + * We should only retransmit sent by unack'ed chunks. Since + * they were sent before, the peer's receive window should + * be able to receive them. + */ + while (*mp != NULL) { + /* Check if the chunk can be bundled. */ + if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp)) + break; + *mp = (*mp)->b_next; + } + /* Go to the next message. */ + if (*mp == NULL) { + for (*meta = (*meta)->b_next; *meta != NULL; + *meta = (*meta)->b_next) { + mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr; + + if (SCTP_IS_MSG_ABANDONED(*meta) || + SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr, + sctp)) { + continue; + } + + *mp = (*meta)->b_cont; + goto try_bundle; + } + /* No more chunk to be bundled. */ + break; + } + + sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; + /* Don't bundle chunks beyond sctp_rxt_maxtsn. */ + if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn)) + break; + new_len = ntohs(sdc->sdh_len); + chunklen = new_len - sizeof (*sdc); + + if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) + extra = SCTP_ALIGN - extra; + if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) + break; + if ((nmp = dupmsg(*mp)) == NULL) + break; + + if (extra > 0) { + fill = sctp_get_padding(extra); + if (fill != NULL) { + linkb(nmp, fill); + } else { + freemsg(nmp); + break; + } + } + linkb(head, nmp); + + SCTP_CHUNK_CLEAR_FLAGS(nmp); + SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); + sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); + + seglen = new_len; + *mp = (*mp)->b_next; + } + *packet_len = seglen; + return (head); +} + +/* + * sctp_ss_rexmit() is called when we get a SACK after a timeout which + * advances the cum_tsn but the cum_tsn is still less than what we have sent + * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial" + * SACK. We retransmit unacked chunks without having to wait for another + * timeout. The rationale is that the SACK should not be "partial" if all the + * lost chunks have been retransmitted. Since the SACK is "partial," + * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still + * be missing. It is better for us to retransmit them now instead + * of waiting for a timeout. + */ +void +sctp_ss_rexmit(sctp_t *sctp) +{ + mblk_t *meta; + mblk_t *mp; + mblk_t *pkt; + sctp_faddr_t *fp; + uint_t pkt_len; + uint32_t tot_wnd; + sctp_data_hdr_t *sdc; + int burst; + + /* + * If the last cum ack is smaller than what we have just + * retransmitted, simply return. + */ + if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn)) + sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1; + else + return; + + ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn)); + + /* + * After a timer fires, sctp_current should be set to the new + * fp where the retransmitted chunks are sent. + */ + fp = sctp->sctp_current; + + /* + * Since we are retransmitting, we can only use cwnd to determine + * how much we can send as we were allowed to send those chunks + * previously. + */ + tot_wnd = fp->cwnd; + /* So we have sent more than we can, just return. */ + if (tot_wnd < fp->suna || tot_wnd - fp->suna < fp->sfa_pmss) + return; + else + tot_wnd -= fp->suna; + + /* Find the first unack'ed chunk */ + for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { + sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr; + + if (SCTP_IS_MSG_ABANDONED(meta) || + SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) { + continue; + } + + for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { + /* Again, this may not be possible */ + if (!SCTP_CHUNK_ISSENT(mp)) + return; + sdc = (sctp_data_hdr_t *)mp->b_rptr; + if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn) + goto found_msg; + } + } + + /* Everything is abandoned... */ + return; + +found_msg: + if (!fp->timer_running) + SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); + pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); + if (pkt == NULL) { + SCTP_KSTAT(sctp_ss_rexmit_failed); + return; + } + if ((pkt_len > fp->sfa_pmss) && fp->isv4) { + ipha_t *iph = (ipha_t *)pkt->b_rptr; + + /* + * Path MTU is different from path we thought it would + * be when we created chunks, or IP headers have grown. + * Need to clear the DF bit. + */ + iph->ipha_fragment_offset_and_flags = 0; + } + sctp_set_iplen(sctp, pkt); + sctp_add_sendq(sctp, pkt); + + /* Check and see if there is more chunk to be retransmitted. */ + if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss || + meta == NULL) + return; + if (mp == NULL) + meta = meta->b_next; + if (meta == NULL) + return; + + /* Retransmit another packet if the window allows. */ + for (tot_wnd -= pkt_len, burst = sctp_maxburst - 1; + meta != NULL && burst > 0; meta = meta->b_next, burst--) { + if (mp == NULL) + mp = meta->b_cont; + for (; mp != NULL; mp = mp->b_next) { + /* Again, this may not be possible */ + if (!SCTP_CHUNK_ISSENT(mp)) + return; + if (!SCTP_CHUNK_ISACKED(mp)) + goto found_msg; + } + } +} diff --git a/usr/src/uts/common/inet/sctp/sctp_shutdown.c b/usr/src/uts/common/inet/sctp/sctp_shutdown.c index 0fcf98b959..8b52360849 100644 --- a/usr/src/uts/common/inet/sctp/sctp_shutdown.c +++ b/usr/src/uts/common/inet/sctp/sctp_shutdown.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -110,6 +110,7 @@ sctp_send_shutdown(sctp_t *sctp, int rexmit) sendmp = sctp_make_mp(sctp, fp, sizeof (*sch) + sizeof (*ctsn)); if (sendmp == NULL) { + SCTP_KSTAT(sctp_send_shutdown_failed); goto done; } sch = (sctp_chunk_hdr_t *)sendmp->b_wptr; @@ -142,14 +143,13 @@ done: } int -sctp_shutdown_received(sctp_t *sctp, sctp_chunk_hdr_t *sch, int crwsd, - int rexmit) +sctp_shutdown_received(sctp_t *sctp, sctp_chunk_hdr_t *sch, boolean_t crwsd, + boolean_t rexmit, sctp_faddr_t *fp) { mblk_t *samp; sctp_chunk_hdr_t *sach; uint32_t *tsn; int trysend = 0; - sctp_faddr_t *fp; if (sctp->sctp_state != SCTPS_SHUTDOWN_ACK_SENT) sctp->sctp_state = SCTPS_SHUTDOWN_RECEIVED; @@ -172,15 +172,20 @@ sctp_shutdown_received(sctp_t *sctp, sctp_chunk_hdr_t *sch, int crwsd, if (sctp->sctp_xmit_head != NULL || sctp->sctp_xmit_unsent != NULL) return (1); - /* rotate faddrs if we are retransmitting */ - if (!rexmit) - fp = sctp->sctp_current; - else - fp = sctp_rotate_faddr(sctp, sctp->sctp_shutdown_faddr); + if (fp == NULL) { + /* rotate faddrs if we are retransmitting */ + if (!rexmit) + fp = sctp->sctp_current; + else + fp = sctp_rotate_faddr(sctp, sctp->sctp_shutdown_faddr); + } + sctp->sctp_shutdown_faddr = fp; samp = sctp_make_mp(sctp, fp, sizeof (*sach)); - if (samp == NULL) + if (samp == NULL) { + SCTP_KSTAT(sctp_send_shutdown_ack_failed); goto dotimer; + } sach = (sctp_chunk_hdr_t *)samp->b_wptr; sach->sch_id = CHUNK_SHUTDOWN_ACK; @@ -226,6 +231,7 @@ sctp_shutdown_complete(sctp_t *sctp) scmp = sctp_make_mp(sctp, NULL, sizeof (*scch)); if (scmp == NULL) { /* XXX use timer approach */ + SCTP_KSTAT(sctp_send_shutdown_comp_failed); return; } @@ -271,11 +277,13 @@ sctp_ootb_shutdown_ack(sctp_t *gsctp, mblk_t *inmp, uint_t ip_hdr_len) /* * Check to see if we can reuse the incoming mblk. There should - * not be other reference. Since this packet comes from below, + * not be other reference and the db_base of the mblk should be + * properly aligned. Since this packet comes from below, * there should be enough header space to fill in what the lower * layers want to add. And we will not stash anything there. */ - if (DB_REF(inmp) != 1) { + if (!IS_P2ALIGNED(DB_BASE(inmp), sizeof (ire_t *)) || + DB_REF(inmp) != 1) { mp1 = allocb(MBLKL(inmp) + sctp_wroff_xtra, BPRI_MED); if (mp1 == NULL) { freeb(inmp); diff --git a/usr/src/uts/common/inet/sctp/sctp_snmp.c b/usr/src/uts/common/inet/sctp/sctp_snmp.c index f078fb4c20..d42cce5d81 100644 --- a/usr/src/uts/common/inet/sctp/sctp_snmp.c +++ b/usr/src/uts/common/inet/sctp/sctp_snmp.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -46,10 +47,47 @@ #include "sctp_addr.h" mib2_sctp_t sctp_mib; -kstat_t *sctp_mibkp; /* kstat exporting sctp_mib data */ +static kstat_t *sctp_mibkp; /* kstat exporting sctp_mib data */ +static kstat_t *sctp_kstat; /* kstat exporting general sctp stats */ static int sctp_snmp_state(sctp_t *sctp); +/* + * The following kstats are for debugging purposes. They keep + * track of problems which should not happen normally. But in + * those cases which they do happen, these kstats would be handy + * for engineers to diagnose the problems. They are not intended + * to be consumed by customers. + */ +sctp_kstat_t sctp_statistics = { + { "sctp_add_faddr", KSTAT_DATA_UINT64 }, + { "sctp_add_timer", KSTAT_DATA_UINT64 }, + { "sctp_conn_create", KSTAT_DATA_UINT64 }, + { "sctp_find_next_tq", KSTAT_DATA_UINT64 }, + { "sctp_fr_add_hdr", KSTAT_DATA_UINT64 }, + { "sctp_fr_not_found", KSTAT_DATA_UINT64 }, + { "sctp_output_failed", KSTAT_DATA_UINT64 }, + { "sctp_rexmit_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_init_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_cookie_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_cookie_ack_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_err_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_sack_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_shutdown_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_shutdown_ack_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_shutdown_comp_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_user_abort_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_asconf_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_asconf_ack_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_ftsn_failed", KSTAT_DATA_UINT64 }, + { "sctp_send_hb_failed", KSTAT_DATA_UINT64 }, + { "sctp_return_hb_failed", KSTAT_DATA_UINT64 }, + { "sctp_ss_rexmit_failed", KSTAT_DATA_UINT64 }, + { "sctp_cl_connect", KSTAT_DATA_UINT64 }, + { "sctp_cl_assoc_change", KSTAT_DATA_UINT64 }, + { "sctp_cl_check_addrs", KSTAT_DATA_UINT64 }, +}; + static int sctp_kstat_update(kstat_t *kp, int rw) { @@ -261,8 +299,8 @@ sctp_kstat_init(void) { "sctpInClosed", KSTAT_DATA_INT32, 0 } }; - sctp_mibkp = kstat_create("sctp", 0, "sctp", "mib2", KSTAT_TYPE_NAMED, - NUM_OF_FIELDS(sctp_named_kstat_t), 0); + sctp_mibkp = kstat_create(SCTP_MOD_NAME, 0, "sctp", "mib2", + KSTAT_TYPE_NAMED, NUM_OF_FIELDS(sctp_named_kstat_t), 0); if (sctp_mibkp == NULL) return; @@ -276,6 +314,13 @@ sctp_kstat_init(void) sctp_mibkp->ks_update = sctp_kstat_update; kstat_install(sctp_mibkp); + + if ((sctp_kstat = kstat_create(SCTP_MOD_NAME, 0, "sctpstat", + "net", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(sctp_statistics), + KSTAT_FLAG_VIRTUAL)) != NULL) { + sctp_kstat->ks_data = &sctp_statistics; + kstat_install(sctp_kstat); + } } void @@ -285,6 +330,10 @@ sctp_kstat_fini(void) kstat_delete(sctp_mibkp); sctp_mibkp = NULL; } + if (sctp_kstat != NULL) { + kstat_delete(sctp_kstat); + sctp_kstat = NULL; + } } /* diff --git a/usr/src/uts/common/inet/sctp/sctp_timer.c b/usr/src/uts/common/inet/sctp/sctp_timer.c index 0b2ae2a7fe..fb19313897 100644 --- a/usr/src/uts/common/inet/sctp/sctp_timer.c +++ b/usr/src/uts/common/inet/sctp/sctp_timer.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -166,6 +167,7 @@ sctp_timer_alloc(sctp_t *sctp, pfv_t func) sctpt->sctpt_pfv = func; return (mp); } + SCTP_KSTAT(sctp_add_timer); return (NULL); } @@ -578,7 +580,8 @@ sctp_rexmit_timer(sctp_t *sctp, sctp_faddr_t *fp) case SCTPS_SHUTDOWN_PENDING: case SCTPS_SHUTDOWN_RECEIVED: if (sctp->sctp_state == SCTPS_SHUTDOWN_RECEIVED) { - (void) sctp_shutdown_received(sctp, NULL, 0, 1); + (void) sctp_shutdown_received(sctp, NULL, B_FALSE, + B_TRUE, NULL); } if (sctp->sctp_xmit_head == NULL && @@ -645,7 +648,8 @@ rxmit_init: ASSERT(sctp->sctp_xmit_unsent == NULL); BUMP_LOCAL(sctp->sctp_T2expire); - (void) sctp_shutdown_received(sctp, NULL, 0, 1); + (void) sctp_shutdown_received(sctp, NULL, B_FALSE, B_TRUE, + NULL); BUMP_MIB(&sctp_mib, sctpTimRetrans); break; default: |