summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/mdb/common/modules/sctp/sctp.c1
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_common.c7
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_cookie.c21
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_impl.h21
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_init.c27
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_input.c12
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_output.c93
7 files changed, 113 insertions, 69 deletions
diff --git a/usr/src/cmd/mdb/common/modules/sctp/sctp.c b/usr/src/cmd/mdb/common/modules/sctp/sctp.c
index aa088b45e6..4d15fb4a48 100644
--- a/usr/src/cmd/mdb/common/modules/sctp/sctp.c
+++ b/usr/src/cmd/mdb/common/modules/sctp/sctp.c
@@ -163,6 +163,7 @@ sctp_faddr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
mdb_printf("pba\t\t%?u\tacked\t\t%?u\n", fa->pba, fa->acked);
mdb_printf("lastactive\t%?ld\thb_secret\t%?#lx\n", fa->lastactive,
fa->hb_secret);
+ mdb_printf("rxt_unacked\t\t%?u\n", fa->rxt_unacked);
mdb_printf("timer_mp\t%?p\tire\t\t%?p\n", fa->timer_mp, fa->ire);
mdb_printf("hb_pending\t%?d\ttimer_running\t%?d\n"
"df\t\t%?d\tpmtu_discovered\t%?d\n"
diff --git a/usr/src/uts/common/inet/sctp/sctp_common.c b/usr/src/uts/common/inet/sctp/sctp_common.c
index 2930c15337..692f10773e 100644
--- a/usr/src/uts/common/inet/sctp/sctp_common.c
+++ b/usr/src/uts/common/inet/sctp/sctp_common.c
@@ -232,8 +232,8 @@ sctp_get_ire(sctp_t *sctp, sctp_faddr_t *fp)
/* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */
fp->sfa_pmss = (ire->ire_max_frag - hdrlen) & ~(SCTP_ALIGN - 1);
if (fp->cwnd < (fp->sfa_pmss * 2)) {
- fp->cwnd = fp->sfa_pmss *
- sctps->sctps_slow_start_initial;
+ SET_CWND(fp, fp->sfa_pmss,
+ sctps->sctps_slow_start_initial);
}
}
@@ -1788,7 +1788,7 @@ sctp_congest_reset(sctp_t *sctp)
for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
fp->ssthresh = sctps->sctps_initial_mtu;
- fp->cwnd = fp->sfa_pmss * sctps->sctps_slow_start_initial;
+ SET_CWND(fp, fp->sfa_pmss, sctps->sctps_slow_start_initial);
fp->suna = 0;
fp->pba = 0;
}
@@ -1871,6 +1871,7 @@ sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr,
(void) random_get_pseudo_bytes((uint8_t *)&fp->hb_secret,
sizeof (fp->hb_secret));
fp->hb_expiry = lbolt64;
+ fp->rxt_unacked = 0;
sctp_get_ire(sctp, fp);
}
diff --git a/usr/src/uts/common/inet/sctp/sctp_cookie.c b/usr/src/uts/common/inet/sctp/sctp_cookie.c
index 036f069257..320783c898 100644
--- a/usr/src/uts/common/inet/sctp/sctp_cookie.c
+++ b/usr/src/uts/common/inet/sctp/sctp_cookie.c
@@ -527,7 +527,7 @@ sctp_send_initack(sctp_t *sctp, sctp_hdr_t *initsh, sctp_chunk_hdr_t *ch,
if (initcollision)
iacklen += sctp_supaddr_param_len(sctp);
if (!linklocal)
- iacklen += sctp_addr_params_len(sctp, supp_af, B_FALSE);
+ iacklen += sctp_addr_params(sctp, supp_af, NULL, B_FALSE);
ipsctplen += sizeof (*iacksh) + iacklen;
iacklen += errlen;
if ((pad = ipsctplen % 4) != 0) {
@@ -627,7 +627,7 @@ sctp_send_initack(sctp_t *sctp, sctp_hdr_t *initsh, sctp_chunk_hdr_t *ch,
if (initcollision)
p += sctp_supaddr_param(sctp, (uchar_t *)p);
if (!linklocal)
- p += sctp_addr_params(sctp, supp_af, (uchar_t *)p);
+ p += sctp_addr_params(sctp, supp_af, (uchar_t *)p, B_FALSE);
if (((sctp_options & SCTP_PRSCTP_OPTION) || initcollision) &&
sctp->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) {
p += sctp_options_param(sctp, p, SCTP_PRSCTP_OPTION);
@@ -1148,7 +1148,7 @@ sctp_process_cookie(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *cmp,
/* Timestamp is int64_t, and we only guarantee 32-bit alignment */
bcopy(p, &ts, sizeof (ts));
- /* Cookie life time, int32_t */
+ /* Cookie life time, uint32_t */
lt = (uint32_t *)(p + sizeof (ts));
/*
@@ -1171,11 +1171,18 @@ sctp_process_cookie(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *cmp,
*iackpp = iack;
*recv_adaption = 0;
- /* Check the timestamp */
- diff = lbolt64 - ts;
- if (diff > *lt && (init->sic_inittag != sctp->sctp_fvtag ||
+ /*
+ * Check the staleness of the Cookie, specified in 3.3.10.3 of
+ * RFC 2960.
+ *
+ * The mesaure of staleness is the difference, in microseconds,
+ * between the current time and the time the State Cookie expires.
+ * So it is lbolt64 - (ts + *lt). If it is positive, it means
+ * that the Cookie has expired.
+ */
+ diff = lbolt64 - (ts + *lt);
+ if (diff > 0 && (init->sic_inittag != sctp->sctp_fvtag ||
iack->sic_inittag != sctp->sctp_lvtag)) {
-
uint32_t staleness;
staleness = TICK_TO_USEC(diff);
diff --git a/usr/src/uts/common/inet/sctp/sctp_impl.h b/usr/src/uts/common/inet/sctp/sctp_impl.h
index 453f911f5f..a5e5a16111 100644
--- a/usr/src/uts/common/inet/sctp/sctp_impl.h
+++ b/usr/src/uts/common/inet/sctp/sctp_impl.h
@@ -512,6 +512,7 @@ typedef struct sctp_faddr_s {
uint32_t T3expire; /* # of times T3 timer expired */
uint64_t hb_secret; /* per addr "secret" in heartbeat */
+ uint32_t rxt_unacked; /* # unack'ed retransmitted bytes */
} sctp_faddr_t;
/* Flags to indicate supported address type in the PARM_SUP_ADDRS. */
@@ -534,6 +535,19 @@ typedef struct sctp_ipif_hash_s {
int ipif_count;
} sctp_ipif_hash_t;
+
+/*
+ * Initialize cwnd according to RFC 3390. def_max_init_cwnd is
+ * either sctp_slow_start_initial or sctp_slow_start_after idle
+ * depending on the caller.
+ */
+#define SET_CWND(fp, mss, def_max_init_cwnd) \
+{ \
+ (fp)->cwnd = MIN(def_max_init_cwnd * (mss), \
+ MIN(4 * (mss), MAX(2 * (mss), 4380 / (mss) * (mss)))); \
+}
+
+
struct sctp_s;
/*
@@ -811,7 +825,7 @@ typedef struct sctp_s {
int64_t sctp_last_secret_update;
uint8_t sctp_secret[SCTP_SECRET_LEN]; /* for cookie auth */
uint8_t sctp_old_secret[SCTP_SECRET_LEN];
- uint32_t sctp_cookie_lifetime; /* cookie lifetime in ms */
+ uint32_t sctp_cookie_lifetime; /* cookie lifetime in tick */
/*
* Address family that app wishes returned addrsses to be in.
@@ -919,8 +933,7 @@ extern boolean_t sctp_add_ftsn_set(sctp_ftsn_set_t **, sctp_faddr_t *, mblk_t *,
extern boolean_t sctp_add_recvq(sctp_t *, mblk_t *, boolean_t);
extern void sctp_add_sendq(sctp_t *, mblk_t *);
extern void sctp_add_unrec_parm(sctp_parm_hdr_t *, mblk_t **);
-extern size_t sctp_addr_params(sctp_t *, int, uchar_t *);
-extern size_t sctp_addr_params_len(sctp_t *, int, boolean_t);
+extern size_t sctp_addr_params(sctp_t *, int, uchar_t *, boolean_t);
extern mblk_t *sctp_add_proto_hdr(sctp_t *, sctp_faddr_t *, mblk_t *, int,
int *);
extern void sctp_addr_req(sctp_t *, mblk_t *);
@@ -1040,7 +1053,7 @@ extern sctp_parm_hdr_t *sctp_next_parm(sctp_parm_hdr_t *, ssize_t *);
extern void sctp_ootb_shutdown_ack(sctp_t *, mblk_t *, uint_t);
extern size_t sctp_options_param(const sctp_t *, void *, int);
extern size_t sctp_options_param_len(const sctp_t *, int);
-extern void sctp_output(sctp_t *sctp);
+extern void sctp_output(sctp_t *, uint_t);
extern boolean_t sctp_param_register(IDP *, sctpparam_t *, int, sctp_stack_t *);
extern void sctp_partial_delivery_event(sctp_t *);
diff --git a/usr/src/uts/common/inet/sctp/sctp_init.c b/usr/src/uts/common/inet/sctp/sctp_init.c
index 68df56e14f..0b6f263383 100644
--- a/usr/src/uts/common/inet/sctp/sctp_init.c
+++ b/usr/src/uts/common/inet/sctp/sctp_init.c
@@ -192,7 +192,7 @@ sctp_init_mp(sctp_t *sctp)
initlen += (sizeof (sctp_parm_hdr_t) + sizeof (uint32_t));
}
initlen += sctp_supaddr_param_len(sctp);
- initlen += sctp_addr_params_len(sctp, supp_af, B_TRUE);
+ initlen += sctp_addr_params(sctp, supp_af, NULL, B_TRUE);
if (sctp->sctp_prsctp_aware && sctps->sctps_prsctp_enabled)
initlen += sctp_options_param_len(sctp, SCTP_PRSCTP_OPTION);
@@ -234,7 +234,7 @@ sctp_init_mp(sctp_t *sctp)
p += sctp_supaddr_param(sctp, p);
/* Add address parameters */
- p += sctp_addr_params(sctp, supp_af, p);
+ p += sctp_addr_params(sctp, supp_af, p, B_FALSE);
/* Add Forward-TSN-Supported param */
if (sctp->sctp_prsctp_aware && sctps->sctps_prsctp_enabled)
@@ -261,32 +261,21 @@ sctp_init2vtag(sctp_chunk_hdr_t *initch)
}
size_t
-sctp_addr_params_len(sctp_t *sctp, int af, boolean_t modify)
+sctp_addr_params(sctp_t *sctp, int af, uchar_t *p, boolean_t modify)
{
+ size_t param_len;
+
ASSERT(sctp->sctp_nsaddrs > 0);
/*
* If we have only one local address or it is a loopback or linklocal
* association, we let the peer pull the address from the IP header.
*/
- if (sctp->sctp_nsaddrs == 1 || sctp->sctp_loopback ||
+ if ((!modify && sctp->sctp_nsaddrs == 1) || sctp->sctp_loopback ||
sctp->sctp_linklocal) {
return (0);
}
- return (sctp_saddr_info(sctp, af, NULL, modify));
-}
-
-size_t
-sctp_addr_params(sctp_t *sctp, int af, uchar_t *p)
-{
- /*
- * If we have only one local address or it is a loopback or linklocal
- * association, we let the peer pull the address from the IP header.
- */
- if (sctp->sctp_nsaddrs == 1 || sctp->sctp_loopback ||
- sctp->sctp_linklocal) {
- return (0);
- }
- return (sctp_saddr_info(sctp, af, p, B_FALSE));
+ param_len = sctp_saddr_info(sctp, af, p, modify);
+ return ((sctp->sctp_nsaddrs == 1) ? 0 : param_len);
}
diff --git a/usr/src/uts/common/inet/sctp/sctp_input.c b/usr/src/uts/common/inet/sctp/sctp_input.c
index 0b15712122..0d28c4c5bc 100644
--- a/usr/src/uts/common/inet/sctp/sctp_input.c
+++ b/usr/src/uts/common/inet/sctp/sctp_input.c
@@ -1248,8 +1248,9 @@ sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups,
/* Drop and SACK, but don't advance the cumulative TSN. */
sctp->sctp_force_sack = 1;
dprint(0, ("sctp_data_chunk: exceed rwnd %d rxqueued %d "
- "ssn %d tsn %x\n", sctp->sctp_rwnd,
- sctp->sctp_rxqueued, dc->sdh_ssn, ntohl(dc->sdh_tsn)));
+ "dlen %d ssn %d tsn %x\n", sctp->sctp_rwnd,
+ sctp->sctp_rxqueued, dlen, ntohs(dc->sdh_ssn),
+ ntohl(dc->sdh_tsn)));
return;
}
@@ -2893,12 +2894,15 @@ check_ss_rxmit:
* this signals that some chunks are still
* missing.
*/
- if (cumack_forward)
+ if (cumack_forward) {
+ fp->rxt_unacked -= acked;
sctp_ss_rexmit(sctp);
+ }
} else {
sctp->sctp_rexmitting = B_FALSE;
sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
+ fp->rxt_unacked = 0;
}
}
return (trysend);
@@ -4143,7 +4147,7 @@ nomorechunks:
}
if (trysend) {
- sctp_output(sctp);
+ sctp_output(sctp, UINT_MAX);
if (sctp->sctp_cxmit_list != NULL)
sctp_wput_asconf(sctp, NULL);
}
diff --git a/usr/src/uts/common/inet/sctp/sctp_output.c b/usr/src/uts/common/inet/sctp/sctp_output.c
index f4dfc8f17a..6a0ce9aba2 100644
--- a/usr/src/uts/common/inet/sctp/sctp_output.c
+++ b/usr/src/uts/common/inet/sctp/sctp_output.c
@@ -297,7 +297,7 @@ sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
sctp->sctp_unsent += msg_len;
BUMP_LOCAL(sctp->sctp_msgcount);
if (sctp->sctp_state == SCTPS_ESTABLISHED)
- sctp_output(sctp);
+ sctp_output(sctp, UINT_MAX);
process_sendq:
WAKE_SCTP(sctp);
sctp_process_sendq(sctp);
@@ -968,7 +968,7 @@ sctp_fast_rexmit(sctp_t *sctp)
}
void
-sctp_output(sctp_t *sctp)
+sctp_output(sctp_t *sctp, uint_t num_pkt)
{
mblk_t *mp = NULL;
mblk_t *nmp;
@@ -989,7 +989,7 @@ sctp_output(sctp_t *sctp)
sctp_data_hdr_t *sdc;
int error;
boolean_t notsent = B_TRUE;
- sctp_stack_t *sctps = sctp->sctp_sctps;
+ sctp_stack_t *sctps = sctp->sctp_sctps;
if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
sacklen = 0;
@@ -1017,7 +1017,7 @@ sctp_output(sctp_t *sctp)
}
if (meta != NULL)
mp = meta->b_cont;
- while (cansend > 0) {
+ while (cansend > 0 && num_pkt-- != 0) {
pad = 0;
/*
@@ -1108,8 +1108,8 @@ sctp_output(sctp_t *sctp)
* a while, do slow start again.
*/
if (now - fp->lastactive > fp->rto) {
- fp->cwnd = sctps->sctps_slow_start_after_idle *
- fp->sfa_pmss;
+ SET_CWND(fp, fp->sfa_pmss,
+ sctps->sctps_slow_start_after_idle);
}
pathmax = fp->cwnd - fp->suna;
@@ -1643,7 +1643,6 @@ sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
boolean_t ftsn_check = B_TRUE;
uint32_t first_ua_tsn;
sctp_msg_hdr_t *mhdr;
- uint32_t tot_wnd;
sctp_stack_t *sctps = sctp->sctp_sctps;
while (meta != NULL) {
@@ -1722,9 +1721,17 @@ window_probe:
*/
if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc)))
sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc);
+
/* next TSN to send */
sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
- sctp_output(sctp);
+
+ /*
+ * The above sctp_frwnd adjustment is coarse. The "changed"
+ * sctp_frwnd may allow us to send more than 1 packet. So
+ * tell sctp_output() to send only 1 packet.
+ */
+ sctp_output(sctp, 1);
+
/* Last sent TSN */
sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn);
@@ -1734,7 +1741,13 @@ window_probe:
return;
out:
/*
- * If were are probing for zero window, don't adjust retransmission
+ * After a time out, assume that everything has left the network. So
+ * we can clear rxt_unacked for the original peer address.
+ */
+ oldfp->rxt_unacked = 0;
+
+ /*
+ * If we were probing for zero window, don't adjust retransmission
* variables, but the timer is still backed off.
*/
if (sctp->sctp_zero_win_probe) {
@@ -1756,8 +1769,14 @@ out:
} else {
SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
}
+
+ /*
+ * The strikes will be clear by sctp_faddr_alive() when the
+ * other side sends us an ack.
+ */
oldfp->strikes++;
sctp->sctp_strikes++;
+
SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max);
if (oldfp != fp && oldfp->suna != 0)
SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto);
@@ -1873,18 +1892,8 @@ out:
mp = mp->b_next;
- /* Check how much more we can send. */
- tot_wnd = MIN(fp->cwnd, sctp->sctp_frwnd);
- /*
- * If the number of outstanding bytes is more than what we are
- * allowed to send, stop.
- */
- if (tot_wnd <= chunklen || tot_wnd < fp->suna + chunklen)
- goto done_bundle;
- else
- tot_wnd -= chunklen;
-
try_bundle:
+ /* We can at least and at most send 1 packet at timeout. */
while (seglen < fp->sfa_pmss) {
int32_t new_len;
@@ -1917,8 +1926,6 @@ try_bundle:
sdc = (sctp_data_hdr_t *)mp->b_rptr;
new_len = ntohs(sdc->sdh_len);
chunklen = new_len - sizeof (*sdc);
- if (chunklen > tot_wnd)
- break;
if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
extra = SCTP_ALIGN - extra;
@@ -1942,7 +1949,6 @@ try_bundle:
SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
seglen = new_len;
- tot_wnd -= chunklen;
mp = mp->b_next;
}
done_bundle:
@@ -1956,6 +1962,8 @@ done_bundle:
*/
iph->ipha_fragment_offset_and_flags = 0;
}
+ fp->rxt_unacked += seglen;
+
dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
"ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn),
@@ -2049,7 +2057,7 @@ sctp_wput(queue_t *q, mblk_t *mp)
* This function is called by sctp_ss_rexmit() to create a packet
* to be retransmitted to the given fp. The given meta and mp
* parameters are respectively the sctp_msg_hdr_t and the mblk of the
- * first chunk to be retransmitted. This is also called when we want
+ * first chunk to be retransmitted. This is also called when we want
* to retransmit a zero window probe from sctp_rexmit() or when we
* want to retransmit the zero window probe after the window has
* opened from sctp_got_sack().
@@ -2173,6 +2181,7 @@ try_bundle:
*mp = (*mp)->b_next;
}
*packet_len = seglen;
+ fp->rxt_unacked += seglen;
return (head);
}
@@ -2219,16 +2228,36 @@ sctp_ss_rexmit(sctp_t *sctp)
fp = sctp->sctp_current;
/*
- * Since we are retransmitting, we can only use cwnd to determine
- * how much we can send as we were allowed to send those chunks
- * previously.
+ * Since we are retransmitting, we only need to use cwnd to determine
+ * how much we can send as we were allowed (by peer's receive window)
+ * to send those retransmitted chunks previously when they are first
+ * sent. If we record how much we have retransmitted but
+ * unacknowledged using rxt_unacked, then the amount we can now send
+ * is equal to cwnd minus rxt_unacked.
+ *
+ * The field rxt_unacked is incremented when we retransmit a packet
+ * and decremented when we got a SACK acknowledging something. And
+ * it is reset when the retransmission timer fires as we assume that
+ * all packets have left the network after a timeout. If this
+ * assumption is not true, it means that after a timeout, we can
+ * get a SACK acknowledging more than rxt_unacked (its value only
+ * contains what is retransmitted when the timer fires). So
+ * rxt_unacked will become very big (it is an unsiged int so going
+ * negative means that the value is huge). This is the reason we
+ * always send at least 1 MSS bytes.
+ *
+ * The reason why we do not have an accurate count is that we
+ * only know how many packets are outstanding (using the TSN numbers).
+ * But we do not know how many bytes those packets contain. To
+ * have an accurate count, we need to walk through the send list.
+ * As it is not really important to have an accurate count during
+ * retransmission, we skip this walk to save some time. This should
+ * not make the retransmission too aggressive to cause congestion.
*/
- tot_wnd = fp->cwnd;
- /* So we have sent more than we can, just return. */
- if (tot_wnd < fp->suna || tot_wnd - fp->suna < fp->sfa_pmss)
- return;
+ if (fp->cwnd <= fp->rxt_unacked)
+ tot_wnd = fp->sfa_pmss;
else
- tot_wnd -= fp->suna;
+ tot_wnd = fp->cwnd - fp->rxt_unacked;
/* Find the first unack'ed chunk */
for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {