summaryrefslogtreecommitdiff
path: root/usr/src/stand/lib/tcp/tcp_inet.h
blob: 82d441f8519291c605af96c50931839476c2f275 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
/*
 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * Copyright (c) 1982, 1986 Regents of the University of California.
 * All rights reserved. The Berkeley software License Agreement
 * specifies the terms and conditions for redistribution.
 */

#ifndef _TCP_INET_H
#define	_TCP_INET_H

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#ifdef	__cplusplus
extern "C" {
#endif

#include "tcp_sack.h"

/* TCP states */
#define	TCPS_ALL_ACKED		-7	/* Internal state for retransmissions */
#define	TCPS_CLOSED		-6
#define	TCPS_IDLE		-5	/* idle (opened, but not bound) */
#define	TCPS_BOUND		-4	/* bound, ready to connect or accept */
#define	TCPS_LISTEN		-3	/* listening for connection */
#define	TCPS_SYN_SENT		-2	/* active, have sent syn */
#define	TCPS_SYN_RCVD		-1	/* have received syn (and sent ours) */
/* states < TCPS_ESTABLISHED are those where connections not established */
#define	TCPS_ESTABLISHED	0	/* established */
#define	TCPS_CLOSE_WAIT		1	/* rcvd fin, waiting for close */
/* states > TCPS_CLOSE_WAIT are those where user has closed */
#define	TCPS_FIN_WAIT_1		2	/* have closed and sent fin */
#define	TCPS_CLOSING		3	/* closed, xchd FIN, await FIN ACK */
#define	TCPS_LAST_ACK		4	/* had fin and close; await FIN ACK */
/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */
#define	TCPS_FIN_WAIT_2		5	/* have closed, fin is acked */
#define	TCPS_TIME_WAIT		6	/* in 2*msl quiet wait after close */

/*
 * Internal flags used in conjunction with the packet header flags.
 * Used in tcp_rput_data to keep track of what needs to be done.
 */
#define	TH_LIMIT_XMIT		0x0400	/* Limited xmit is needed */
#define	TH_XMIT_NEEDED		0x0800	/* Window opened - send queued data */
#define	TH_REXMIT_NEEDED	0x1000	/* Time expired for unacked data */
#define	TH_ACK_NEEDED		0x2000	/* Send an ack now. */
#define	TH_NEED_SACK_REXMIT	0x4000	/* Use SACK info to retransmission */
#define	TH_TIMER_NEEDED 0x8000	/* Start the delayed ack/push bit timer */

/*
 * Special Magic number used by TCP to issue a callback to recvfrom() in the
 * form of a dummy inetgram.
 */
#define	TCP_CALLB_MAGIC_ID	0xFFFF

/*
 * TCP sequence numbers are 32 bit integers operated
 * on with modular arithmetic.  These macros can be
 * used to compare such integers.
 */
#define	SEQ_LT(a, b)	((int32_t)((a)-(b)) < 0)
#define	SEQ_LEQ(a, b)	((int32_t)((a)-(b)) <= 0)
#define	SEQ_GT(a, b)	((int32_t)((a)-(b)) > 0)
#define	SEQ_GEQ(a, b)	((int32_t)((a)-(b)) >= 0)

/* TCP Protocol header */
typedef	struct tcphdr_s {
	uint8_t		th_lport[2];	/* Source port */
	uint8_t		th_fport[2];	/* Destination port */
	uint8_t		th_seq[4];	/* Sequence number */
	uint8_t		th_ack[4];	/* Acknowledgement number */
	uint8_t		th_offset_and_rsrvd[1]; /* Offset to the packet data */
	uint8_t		th_flags[1];
	uint8_t		th_win[2];	/* Allocation number */
	uint8_t		th_sum[2];	/* TCP checksum */
	uint8_t		th_urp[2];	/* Urgent pointer */
} tcph_t;

#define	TCP_HDR_LENGTH(tcph) (((tcph)->th_offset_and_rsrvd[0] >>2) &(0xF << 2))
#define	TCP_MAX_COMBINED_HEADER_LENGTH	(60 + 60) /* Maxed out ip + tcp */
#define	TCP_MAX_IP_OPTIONS_LENGTH	(60 - IP_SIMPLE_HDR_LENGTH)
#define	TCP_MAX_HDR_LENGTH		60
#define	TCP_MAX_TCP_OPTIONS_LENGTH	(60 - sizeof (tcph_t))
#define	TCP_MIN_HEADER_LENGTH		20
#define	TCP_MAXWIN			65535
#define	TCP_PORT_LEN			sizeof (in_port_t)
#define	TCP_MAX_WINSHIFT		14
#define	TCP_MAX_LARGEWIN		(TCP_MAXWIN << TCP_MAX_WINSHIFT)

#define	TCPIP_HDR_LENGTH(mp, n)					\
	(n) = IPH_HDR_LENGTH((mp)->b_rptr),			\
	(n) += TCP_HDR_LENGTH((tcph_t *)&(mp)->b_rptr[(n)])

/* TCP Protocol header (used if the header is known to be 32-bit aligned) */
typedef	struct tcphdra_s {
	in_port_t	tha_lport;	/* Source port */
	in_port_t	tha_fport;	/* Destination port */
	uint32_t	tha_seq;	/* Sequence number */
	uint32_t	tha_ack;	/* Acknowledgement number */
	uint8_t tha_offset_and_reserved; /* Offset to the packet data */
	uint8_t		tha_flags;
	uint16_t	tha_win;	/* Allocation number */
	uint16_t	tha_sum;	/* TCP checksum */
	uint16_t	tha_urp;	/* Urgent pointer */
} tcpha_t;

typedef struct tcp_s {
	struct tcp_s *tcp_time_wait_next; /* Next TCP in TIME_WAIT list */
	struct tcp_s *tcp_time_wait_prev; /* Prev TCP in TIME_WAIT list */
	uint32_t tcp_max_swnd;		/* Maximum swnd we have seen */
	uint32_t tcp_suna;		/* Sender unacknowledged */
	uint32_t tcp_csuna;		/* Clear (no rexmits in window) suna */
	uint32_t tcp_snxt;		/* Senders next seq num */
	uint32_t tcp_swnd;		/* Senders window (relative to suna) */
	uint32_t tcp_mss;		/* Max segment size */
	uint32_t tcp_iss;		/* Initial send seq num */
	uint32_t tcp_rnxt;		/* Seq we expect to recv next */
	uint32_t tcp_rwnd;		/* Current receive window */
	uint32_t tcp_rwnd_max;		/* Max receive window */

	uint32_t tcp_irs;		/* Initial recv seq num */
	uint32_t tcp_fss;		/* Final/fin send seq num */

	uint32_t tcp_swl1;		/* These help us avoid using stale */
	uint32_t tcp_swl2;		/*  packets to update state */

	uint32_t tcp_cwnd;		/* Congestion window */
	int32_t tcp_cwnd_cnt;		/* cwnd cnt in congestion avoidance */
	uint32_t tcp_cwnd_ssthresh;	/* Congestion window */
	uint32_t tcp_cwnd_max;

	int32_t	tcp_snd_burst;		/* Send burst factor */

	int32_t	tcp_state;
	int32_t	tcp_rcv_ws;		/* My window scale power */
	int32_t	tcp_snd_ws;		/* Sender's window scale power */
	uint32_t tcp_ts_recent;	/* Timestamp of earliest unacked */
					/*  data segment */

	uint32_t	tcp_if_mtu;	/* Outgoing interface MTU. */

	uint32_t	tcp_rtt_sa;	/* Round trip smoothed average */
	uint32_t	tcp_rtt_sd;	/* Round trip smoothed deviation */
	uint32_t	tcp_rtt_update;		/* Round trip update(s) */
	uint32_t 	tcp_ms_we_have_waited;	/* Total retrans time */
	uint32_t	tcp_rto;	/* Round trip timeout */
	uint32_t	tcp_rto_timeout;	/* RTT timeout time */
	uint32_t	tcp_time_wait_expire;
				/* time in hz when t/w expires */
	uint32_t	tcp_last_rcv_lbolt;
				/* lbolt on last packet, used for PAWS */
	int	tcp_lingertime;		/* Close linger time (in seconds) */

	uint32_t	tcp_first_timer_threshold;  /* When to prod IP */
	uint32_t tcp_second_timer_threshold; /* When to give up completely */
	uint32_t tcp_first_ctimer_threshold; /* 1st threshold when connecting */
	uint32_t tcp_second_ctimer_threshold; /* 2nd ... while connecting */

	uint32_t tcp_rexmit_nxt;	/* Next rexmit seq num */
	uint32_t tcp_rexmit_max;	/* Max retran seq num */

	uint32_t tcp_naglim;		/* Tunable nagle limit */
	uint32_t	tcp_valid_bits;
#define	TCP_ISS_VALID	0x1	/* Is the tcp_iss seq num active? */
#define	TCP_FSS_VALID	0x2	/* Is the tcp_fss seq num active? */
#define	TCP_URG_VALID	0x4	/* Is the tcp_urg seq num active? */
#define	TCP_OFO_FIN_VALID 0x8	/* Has TCP received an out of order FIN? */

	int32_t	tcp_xmit_hiwater;	/* Send buffer high water mark. */
	int32_t	tcp_xmit_lowater;	/* Send buffer low water mark. */

	uchar_t	tcp_timer_backoff;	/* Backoff shift count. */
	uint32_t tcp_last_recv_time;	/* Last time we receive a segment. */

	/* Fields arranged in approximate access order along main paths */
	mblk_t	*tcp_xmit_head;		/* Head of rexmit list */
	mblk_t	*tcp_xmit_last;		/* last valid data seen by tcp_wput */
	uint32_t tcp_unsent;		/* # of bytes in hand that are unsent */
	mblk_t	*tcp_xmit_tail;		/* Last rexmit data sent */
	uint32_t tcp_xmit_tail_unsent;	/* # of unsent bytes in xmit_tail */
	mblk_t	*tcp_rcv_list;		/* Queued until push or exceed */
	mblk_t	*tcp_rcv_last_tail;	/* tcp_rcv_push_wait. */
	uint32_t tcp_rcv_cnt;		/* tcp_rcv_list is b_next chain. */

	uint32_t tcp_rack;		/* Seq # we have acked */
	uint32_t tcp_rack_cnt;		/* # of bytes we have deferred ack */

	mblk_t	*tcp_reass_head;	 /* Out of order reassembly list head */
	mblk_t	*tcp_reass_tail;	 /* Out of order reassembly list tail */

	tcp_sack_info_t	*tcp_sack_info;

#define	tcp_pipe	tcp_sack_info->tcp_pipe
#define	tcp_fack	tcp_sack_info->tcp_fack
#define	tcp_sack_snxt	tcp_sack_info->tcp_sack_snxt
#define	tcp_max_sack_blk	tcp_sack_info->tcp_max_sack_blk
#define	tcp_num_sack_blk	tcp_sack_info->tcp_num_sack_blk
#define	tcp_sack_list		tcp_sack_info->tcp_sack_list
#define	tcp_num_notsack_blk	tcp_sack_info->tcp_num_notsack_blk
#define	tcp_cnt_notsack_list	tcp_sack_info->tcp_cnt_notsack_list
#define	tcp_notsack_list		tcp_sack_info->tcp_notsack_list

	int tcp_conn_req_cnt_q0;	/* # of conn reqs in SYN_RCVD */
	int tcp_conn_req_cnt_q;	/* # of conn reqs in ESTABLISHED */
	int tcp_conn_req_max;	/* # of ESTABLISHED conn reqs allowed */

	int	tcp_iphc_len;		/* actual allocated buffer size */
	int32_t	tcp_hdr_len;		/* Byte len of combined TCP/IP hdr */
	struct ip	*tcp_ipha;	/* IPv4 header in the buffer */
	int	tcp_ip_hdr_len;		/* Byte len of our current IPvx hdr */
	tcph_t	*tcp_tcph;		/* tcp header within combined hdr */
	int32_t	tcp_tcp_hdr_len;	/* tcp header len within combined */

	uint16_t tcp_last_sent_len;	/* Record length for nagle */
	uint16_t tcp_dupack_cnt;	/* # of consequtive duplicate acks */

	/*
	 * Address family that app wishes returned addrsses to be in.
	 * Currently taken from address family used in T_BIND_REQ, but
	 * should really come from family used in original socket() call.
	 * Value can be AF_INET or AF_INET6.
	 */
	uint_t	tcp_family;

	uint32_t	tcp_ofo_fin_seq; /* Recv out of order FIN seq num */
	uint32_t	tcp_cwr_snd_max;

	uint32_t
		tcp_fin_acked : 1,	/* Has our FIN been acked? */
		tcp_fin_rcvd : 1,	/* Have we seen a FIN? */
		tcp_fin_sent : 1,	/* Have we sent our FIN yet? */
		tcp_useloopback : 1,	/* SO_USELOOPBACK "socket" option. */

		tcp_conn_def_q0: 1,	/* move from q0 to q deferred */
		tcp_zero_win_probe: 1,	/* Zero win probing is in progress */
		tcp_set_timer : 1,
		tcp_active_open: 1,	/* This is a active open */

		tcp_timer_running: 1,	/* Retransmission timer running */
		tcp_rexmit : 1,		/* TCP is retransmitting */
		tcp_snd_sack_ok : 1,	/* Can use SACK for this connection */
		tcp_ecn_ok : 1,		/* Can use ECN for this connection */

		tcp_ecn_echo_on : 1,	/* Need to do ECN echo */
		tcp_ecn_cwr_sent : 1,	/* ECN_CWR has been sent */
		tcp_cwr : 1,		/* Cwnd has reduced recently */
		tcp_snd_ts_ok  : 1,

		tcp_snd_ws_ok  : 1,
		tcp_linger : 1,
		tcp_dontdrop : 1,
		tcp_junk_fill_thru_bit_31 : 13;

	char	*tcp_iphc;		/* Buffer holding tcp/ip hdr template */

	in_addr_t	tcp_remote;	/* true remote address - needed for */
					/* source routing. */
	in_addr_t	tcp_bound_source;	/* IP address in bind_req */
	/*
	 * These fields contain the same information as tcp_tcph->th_*port.
	 * However, the lookup functions can not use the header fields
	 * since during IP option manipulation the tcp_tcph pointer
	 * changes.
	 */
	union {
		struct {
			in_port_t	tcpu_fport;	/* Remote port */
			in_port_t	tcpu_lport;	/* Local port */
		} tcpu_ports1;
		uint32_t		tcpu_ports2;	/* Rem port, */
							/* local port */
					/* Used for TCP_MATCH performance */
	} tcp_tcpu;
#define	tcp_fport	tcp_tcpu.tcpu_ports1.tcpu_fport
#define	tcp_lport	tcp_tcpu.tcpu_ports1.tcpu_lport
#define	tcp_ports	tcp_tcpu.tcpu_ports2
	/*
	 * IP format that packets transmitted from this struct should use.
	 * Value can be IPV4_VERSION or IPV6_VERSION.  Determines whether
	 * IP+TCP header template above stores an IPv4 or IPv6 header.
	 */
	ushort_t	tcp_ipversion;
	int		tcp_client_errno;
	struct tcp_s *tcp_eager_next_q; /* next eager in ESTABLISHED state */
	struct tcp_s *tcp_eager_last_q;	/* last eager in ESTABLISHED state */
	struct tcp_s *tcp_eager_next_q0; /* next eager in SYN_RCVD state */
	struct tcp_s *tcp_eager_prev_q0; /* prev eager in SYN_RCVD state */
	struct tcp_s *tcp_listener;	/* Our listener */
} tcp_t;

/* External TCP functions. */
extern void tcp_socket_init(struct inetboot_socket *);
extern int tcp_connect(int);
extern int tcp_listen(int, int);
extern int tcp_bind(int);
extern int tcp_send(int, tcp_t *, const void *, int);
extern int tcp_opt_set(tcp_t *, int, int, const void *, socklen_t);
extern int tcp_accept(int, struct sockaddr *, socklen_t *);
extern int tcp_shutdown(int);

/* Exported for recvfrom */
extern void tcp_rcv_drain_sock(int);

/* Exported for stand/lib/sock/sock_test.c */
extern void tcp_time_wait_report(void);

#ifdef	__cplusplus
}
#endif

#endif /* _TCP_INET_H */