1 files changed, 3194 insertions, 8935 deletions
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index c9a941eab2..0e1ef43cfb 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -46,8 +46,6 @@
 #include <sys/ethernet.h>
 #include <sys/cpuvar.h>
 #include <sys/dlpi.h>
-#include <sys/multidata.h>
-#include <sys/multidata_impl.h>
 #include <sys/pattr.h>
 #include <sys/policy.h>
 #include <sys/priv.h>
@@ -87,7 +85,6 @@
 #include <inet/tcp_impl.h>
 #include <inet/udp_impl.h>
 #include <net/pfkeyv2.h>
-#include <inet/ipsec_info.h>
 #include <inet/ipdrop.h>
 
 #include <inet/ipclassifier.h>
@@ -95,6 +92,7 @@
 #include <inet/ip_ftable.h>
 #include <inet/ip_if.h>
 #include <inet/ipp_common.h>
+#include <inet/ip_rts.h>
 #include <inet/ip_netinfo.h>
 #include <sys/squeue_impl.h>
 #include <sys/squeue.h>
@@ -111,7 +109,7 @@
  *
  * The entire tcp state is contained in tcp_t and conn_t structure
  * which are allocated in tandem using ipcl_conn_create() and passing
- * IPCL_CONNTCP as a flag. We use 'conn_ref' and 'conn_lock' to protect
+ * IPCL_TCPCONN as a flag. We use 'conn_ref' and 'conn_lock' to protect
  * the references on the tcp_t. The tcp_t structure is never compressed
  * and packets always land on the correct TCP perimeter from the time
  * eager is created till the time tcp_t dies (as such the old mentat
@@ -172,8 +170,8 @@
  *
  * This is a more interesting case because of various races involved in
  * establishing a eager in its own perimeter. Read the meta comment on
- * top of tcp_conn_request(). But briefly, the squeue is picked by
- * ip_tcp_input()/ip_fanout_tcp_v6() based on the interrupted CPU.
+ * top of tcp_input_listener(). But briefly, the squeue is picked by
+ * ip_fanout based on the ring or the sender (if loopback).
  *
  * Closing a connection:
  *
@@ -198,20 +196,13 @@
  *
  * Special provisions and fast paths:
  *
- * We make special provision for (AF_INET, SOCK_STREAM) sockets which
- * can't have 'ipv6_recvpktinfo' set and for these type of sockets, IP
- * will never send a M_CTL to TCP. As such, ip_tcp_input() which handles
- * all TCP packets from the wire makes a IPCL_IS_TCP4_CONNECTED_NO_POLICY
- * check to send packets directly to tcp_rput_data via squeue. Everyone
- * else comes through tcp_input() on the read side.
- *
- * We also make special provisions for sockfs by marking tcp_issocket
+ * We make special provisions for sockfs by marking tcp_issocket
  * whenever we have only sockfs on top of TCP. This allows us to skip
  * putting the tcp in acceptor hash since a sockfs listener can never
  * become acceptor and also avoid allocating a tcp_t for acceptor STREAM
  * since eager has already been allocated and the accept now happens
  * on acceptor STREAM. There is a big blob of comment on top of
- * tcp_conn_request explaining the new accept. When socket is POP'd,
+ * tcp_input_listener explaining the new accept. When socket is POP'd,
  * sockfs sends us an ioctl to mark the fact and we go back to old
  * behaviour. Once tcp_issocket is unset, its never set for the
  * life of that connection.
@@ -224,13 +215,6 @@
  * only exception is tcp_xmit_listeners_reset() which is called
  * directly from IP and needs to policy check to see if TH_RST
  * can be sent out.
- *
- * PFHooks notes :
- *
- * For mdt case, one meta buffer contains multiple packets. Mblks for every
- * packet are assembled and passed to the hooks. When packets are blocked,
- * or boundary of any packet is changed, the mdt processing is stopped, and
- * packets of the meta buffer are send to the IP path one by one.
  */
 
 /*
@@ -244,7 +228,7 @@ int tcp_squeue_flag;
 
 /*
  * This controls how tiny a write must be before we try to copy it
- * into the the mblk on the tail of the transmit queue.  Not much
+ * into the mblk on the tail of the transmit queue.  Not much
  * speedup is observed for values larger than sixteen.  Zero will
  * disable the optimisation.
  */
@@ -333,16 +317,6 @@ static uint_t tcp_clean_death_stat[TCP_MAX_CLEAN_DEATH_TAG];
 tcp_g_stat_t	tcp_g_statistics;
 kstat_t		*tcp_g_kstat;
 
-/*
- * Call either ip_output or ip_output_v6. This replaces putnext() calls on the
- * tcp write side.
- */
-#define	CALL_IP_WPUT(connp, q, mp) {					\
-	ASSERT(((q)->q_flag & QREADR) == 0);				\
-	TCP_DBGSTAT(connp->conn_netstack->netstack_tcp, tcp_ip_output);	\
-	connp->conn_send(connp, (mp), (q), IP_WPUT);			\
-}
-
 /* Macros for timestamp comparisons */
 #define	TSTMP_GEQ(a, b)	((int32_t)((a)-(b)) >= 0)
 #define	TSTMP_LT(a, b)	((int32_t)((a)-(b)) < 0)
@@ -354,7 +328,7 @@ kstat_t		*tcp_g_kstat;
  * nanoseconds (versus every 4 microseconds suggested by RFC 793, page 27);
  * a per-connection component which grows by 125000 for every new connection;
  * and an "extra" component that grows by a random amount centered
- * approximately on 64000.  This causes the the ISS generator to cycle every
+ * approximately on 64000.  This causes the ISS generator to cycle every
  * 4.89 hours if no TCP connections are made, and faster if connections are
  * made.
  *
@@ -381,8 +355,13 @@ static sin6_t	sin6_null;	/* Zero address for quick clears */
  */
 #define	TCP_OLD_URP_INTERPRETATION	1
 
+/*
+ * Since tcp_listener is not cleared atomically with tcp_detached
+ * being cleared we need this extra bit to tell a detached connection
+ * apart from one that is in the process of being accepted.
+ */
 #define	TCP_IS_DETACHED_NONEAGER(tcp)	\
-	(TCP_IS_DETACHED(tcp) && \
+	(TCP_IS_DETACHED(tcp) &&	\
 	    (!(tcp)->tcp_hard_binding))
 
 /*
@@ -495,7 +474,6 @@ typedef struct tcp_timer_s {
 
 static kmem_cache_t *tcp_timercache;
 kmem_cache_t	*tcp_sack_info_cache;
-kmem_cache_t	*tcp_iphc_cache;
 
 /*
  * For scalability, we must not run a timer for every TCP connection
@@ -592,17 +570,6 @@ typedef struct tcp_opt_s {
 } tcp_opt_t;
 
 /*
- * TCP option struct passing information b/w lisenter and eager.
- */
-struct tcp_options {
-	uint_t			to_flags;
-	ssize_t			to_boundif;	/* IPV6_BOUND_IF */
-};
-
-#define	TCPOPT_BOUNDIF		0x00000001	/* set IPV6_BOUND_IF */
-#define	TCPOPT_RECVPKTINFO	0x00000002	/* set IPV6_RECVPKTINFO */
-
-/*
  * RFC1323-recommended phrasing of TSTAMP option, for easier parsing
  */
 
@@ -673,43 +640,53 @@ typedef struct tcpt_s {
 /*
  * Functions called directly via squeue having a prototype of edesc_t.
  */
-void		tcp_conn_request(void *arg, mblk_t *mp, void *arg2);
-static void	tcp_wput_nondata(void *arg, mblk_t *mp, void *arg2);
-void		tcp_accept_finish(void *arg, mblk_t *mp, void *arg2);
-static void	tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2);
-static void	tcp_wput_proto(void *arg, mblk_t *mp, void *arg2);
-void 		tcp_input(void *arg, mblk_t *mp, void *arg2);
-void		tcp_rput_data(void *arg, mblk_t *mp, void *arg2);
-static void	tcp_close_output(void *arg, mblk_t *mp, void *arg2);
-void		tcp_output(void *arg, mblk_t *mp, void *arg2);
-void		tcp_output_urgent(void *arg, mblk_t *mp, void *arg2);
-static void	tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2);
-static void	tcp_timer_handler(void *arg, mblk_t *mp, void *arg2);
-static void	tcp_linger_interrupted(void *arg, mblk_t *mp, void *arg2);
+void		tcp_input_listener(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *ira);
+static void	tcp_wput_nondata(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy);
+void		tcp_accept_finish(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy);
+static void	tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy);
+static void	tcp_wput_proto(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy);
+void		tcp_input_data(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *ira);
+static void	tcp_close_output(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy);
+void		tcp_output(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy);
+void		tcp_output_urgent(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy);
+static void	tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy);
+static void	tcp_timer_handler(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy);
+static void	tcp_linger_interrupted(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy);
 
 
 /* Prototype for TCP functions */
 static void	tcp_random_init(void);
 int		tcp_random(void);
 static void	tcp_tli_accept(tcp_t *tcp, mblk_t *mp);
-static int	tcp_accept_swap(tcp_t *listener, tcp_t *acceptor,
+static void	tcp_accept_swap(tcp_t *listener, tcp_t *acceptor,
 		    tcp_t *eager);
-static int	tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp);
+static int	tcp_set_destination(tcp_t *tcp);
 static in_port_t tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
     int reuseaddr, boolean_t quick_connect, boolean_t bind_to_req_port_only,
     boolean_t user_specified);
 static void	tcp_closei_local(tcp_t *tcp);
 static void	tcp_close_detached(tcp_t *tcp);
-static boolean_t tcp_conn_con(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph,
-			mblk_t *idmp, mblk_t **defermp);
+static boolean_t tcp_conn_con(tcp_t *tcp, uchar_t *iphdr,
+		    mblk_t *idmp, mblk_t **defermp, ip_recv_attr_t *ira);
 static void	tcp_tpi_connect(tcp_t *tcp, mblk_t *mp);
 static int	tcp_connect_ipv4(tcp_t *tcp, ipaddr_t *dstaddrp,
-		    in_port_t dstport, uint_t srcid, cred_t *cr, pid_t pid);
-static int 	tcp_connect_ipv6(tcp_t *tcp, in6_addr_t *dstaddrp,
-		    in_port_t dstport, uint32_t flowinfo, uint_t srcid,
-		    uint32_t scope_id, cred_t *cr, pid_t pid);
+		    in_port_t dstport, uint_t srcid);
+static int	tcp_connect_ipv6(tcp_t *tcp, in6_addr_t *dstaddrp,
+		    in_port_t dstport, uint32_t flowinfo,
+		    uint_t srcid, uint32_t scope_id);
 static int	tcp_clean_death(tcp_t *tcp, int err, uint8_t tag);
-static void	tcp_def_q_set(tcp_t *tcp, mblk_t *mp);
 static void	tcp_disconnect(tcp_t *tcp, mblk_t *mp);
 static char	*tcp_display(tcp_t *tcp, char *, char);
 static boolean_t tcp_eager_blowoff(tcp_t *listener, t_scalar_t seqnum);
@@ -735,34 +712,16 @@ static void	tcp_acceptor_hash_remove(tcp_t *tcp);
 static void	tcp_capability_req(tcp_t *tcp, mblk_t *mp);
 static void	tcp_info_req(tcp_t *tcp, mblk_t *mp);
 static void	tcp_addr_req(tcp_t *tcp, mblk_t *mp);
-static void	tcp_addr_req_ipv6(tcp_t *tcp, mblk_t *mp);
-void		tcp_g_q_setup(tcp_stack_t *);
-void		tcp_g_q_create(tcp_stack_t *);
-void		tcp_g_q_destroy(tcp_stack_t *);
-static int	tcp_header_init_ipv4(tcp_t *tcp);
-static int	tcp_header_init_ipv6(tcp_t *tcp);
-int		tcp_init(tcp_t *tcp, queue_t *q);
-static int	tcp_init_values(tcp_t *tcp);
-static mblk_t	*tcp_ip_advise_mblk(void *addr, int addr_len, ipic_t **ipic);
-static void	tcp_ip_ire_mark_advice(tcp_t *tcp);
+static void	tcp_init_values(tcp_t *tcp);
 static void	tcp_ip_notify(tcp_t *tcp);
-static mblk_t	*tcp_ire_mp(mblk_t **mpp);
 static void	tcp_iss_init(tcp_t *tcp);
 static void	tcp_keepalive_killer(void *arg);
-static int	tcp_parse_options(tcph_t *tcph, tcp_opt_t *tcpopt);
-static void	tcp_mss_set(tcp_t *tcp, uint32_t size, boolean_t do_ss);
+static int	tcp_parse_options(tcpha_t *tcpha, tcp_opt_t *tcpopt);
+static void	tcp_mss_set(tcp_t *tcp, uint32_t size);
 static int	tcp_conprim_opt_process(tcp_t *tcp, mblk_t *mp,
 		    int *do_disconnectp, int *t_errorp, int *sys_errorp);
 static boolean_t tcp_allow_connopt_set(int level, int name);
 int		tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr);
-int		tcp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr);
-int		tcp_tpi_opt_set(queue_t *q, uint_t optset_context, int level,
-		    int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
-		    uchar_t *outvalp, void *thisdg_attrs, cred_t *cr,
-		    mblk_t *mblk);
-static void	tcp_opt_reverse(tcp_t *tcp, ipha_t *ipha);
-static int	tcp_opt_set_header(tcp_t *tcp, boolean_t checkonly,
-		    uchar_t *ptr, uint_t len);
 static int	tcp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
 static boolean_t tcp_param_register(IDP *ndp, tcpparam_t *tcppa, int cnt,
     tcp_stack_t *);
@@ -785,9 +744,9 @@ static uint_t	tcp_rcv_drain(tcp_t *tcp);
 static void	tcp_sack_rxmit(tcp_t *tcp, uint_t *flags);
 static boolean_t tcp_send_rst_chk(tcp_stack_t *);
 static void	tcp_ss_rexmit(tcp_t *tcp);
-static mblk_t	*tcp_rput_add_ancillary(tcp_t *tcp, mblk_t *mp, ip6_pkt_t *ipp);
-static void	tcp_process_options(tcp_t *, tcph_t *);
-static void	tcp_rput_common(tcp_t *tcp, mblk_t *mp);
+static mblk_t	*tcp_input_add_ancillary(tcp_t *tcp, mblk_t *mp, ip_pkt_t *ipp,
+    ip_recv_attr_t *);
+static void	tcp_process_options(tcp_t *, tcpha_t *);
 static void	tcp_rsrv(queue_t *q);
 static int	tcp_snmp_state(tcp_t *tcp);
 static void	tcp_timer(void *arg);
@@ -801,16 +760,10 @@ void		tcp_tpi_accept(queue_t *q, mblk_t *mp);
 static void	tcp_wput_data(tcp_t *tcp, mblk_t *mp, boolean_t urgent);
 static void	tcp_wput_flush(tcp_t *tcp, mblk_t *mp);
 static void	tcp_wput_iocdata(tcp_t *tcp, mblk_t *mp);
-static int	tcp_send(queue_t *q, tcp_t *tcp, const int mss,
-		    const int tcp_hdr_len, const int tcp_tcp_hdr_len,
+static int	tcp_send(tcp_t *tcp, const int mss,
+		    const int total_hdr_len, const int tcp_hdr_len,
 		    const int num_sack_blk, int *usable, uint_t *snxt,
-		    int *tail_unsent, mblk_t **xmit_tail, mblk_t *local_time,
-		    const int mdt_thres);
-static int	tcp_multisend(queue_t *q, tcp_t *tcp, const int mss,
-		    const int tcp_hdr_len, const int tcp_tcp_hdr_len,
-		    const int num_sack_blk, int *usable, uint_t *snxt,
-		    int *tail_unsent, mblk_t **xmit_tail, mblk_t *local_time,
-		    const int mdt_thres);
+		    int *tail_unsent, mblk_t **xmit_tail, mblk_t *local_time);
 static void	tcp_fill_header(tcp_t *tcp, uchar_t *rptr, clock_t now,
 		    int num_sack_blk);
 static void	tcp_wsrv(queue_t *q);
@@ -818,38 +771,36 @@ static int	tcp_xmit_end(tcp_t *tcp);
 static void	tcp_ack_timer(void *arg);
 static mblk_t	*tcp_ack_mp(tcp_t *tcp);
 static void	tcp_xmit_early_reset(char *str, mblk_t *mp,
-		    uint32_t seq, uint32_t ack, int ctl, uint_t ip_hdr_len,
-		    zoneid_t zoneid, tcp_stack_t *, conn_t *connp);
+		    uint32_t seq, uint32_t ack, int ctl, ip_recv_attr_t *,
+		    ip_stack_t *, conn_t *);
 static void	tcp_xmit_ctl(char *str, tcp_t *tcp, uint32_t seq,
 		    uint32_t ack, int ctl);
-static int	setmaxps(queue_t *q, int maxpsz);
 static void	tcp_set_rto(tcp_t *, time_t);
-static boolean_t tcp_check_policy(tcp_t *, mblk_t *, ipha_t *, ip6_t *,
-		    boolean_t, boolean_t);
-static void	tcp_icmp_error_ipv6(tcp_t *tcp, mblk_t *mp,
-		    boolean_t ipsec_mctl);
+static void	tcp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
+static void	tcp_icmp_error_ipv6(tcp_t *, mblk_t *, ip_recv_attr_t *);
+static boolean_t tcp_verifyicmp(conn_t *, void *, icmph_t *, icmp6_t *,
+    ip_recv_attr_t *);
 static int	tcp_build_hdrs(tcp_t *);
 static void	tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp,
-		    uint32_t seg_seq, uint32_t seg_ack, int seg_len,
-		    tcph_t *tcph);
-boolean_t	tcp_paws_check(tcp_t *tcp, tcph_t *tcph, tcp_opt_t *tcpoptp);
-static mblk_t	*tcp_mdt_info_mp(mblk_t *);
-static void	tcp_mdt_update(tcp_t *, ill_mdt_capab_t *, boolean_t);
-static int	tcp_mdt_add_attrs(multidata_t *, const mblk_t *,
-		    const boolean_t, const uint32_t, const uint32_t,
-		    const uint32_t, const uint32_t, tcp_stack_t *);
-static void	tcp_multisend_data(tcp_t *, ire_t *, const ill_t *, mblk_t *,
-		    const uint_t, const uint_t, boolean_t *);
-static mblk_t	*tcp_lso_info_mp(mblk_t *);
-static void	tcp_lso_update(tcp_t *, ill_lso_capab_t *);
-static void	tcp_send_data(tcp_t *, queue_t *, mblk_t *);
+    uint32_t seg_seq, uint32_t seg_ack, int seg_len, tcpha_t *tcpha,
+    ip_recv_attr_t *ira);
+boolean_t	tcp_paws_check(tcp_t *tcp, tcpha_t *tcpha, tcp_opt_t *tcpoptp);
+static boolean_t tcp_zcopy_check(tcp_t *);
+static void	tcp_zcopy_notify(tcp_t *);
+static mblk_t	*tcp_zcopy_backoff(tcp_t *, mblk_t *, boolean_t);
+static void	tcp_update_lso(tcp_t *tcp, ip_xmit_attr_t *ixa);
+static void	tcp_update_pmtu(tcp_t *tcp, boolean_t decrease_only);
+static void	tcp_update_zcopy(tcp_t *tcp);
+static void	tcp_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t,
+    ixa_notify_arg_t);
+static void	tcp_rexmit_after_error(tcp_t *tcp);
+static void	tcp_send_data(tcp_t *, mblk_t *);
 extern mblk_t	*tcp_timermp_alloc(int);
 extern void	tcp_timermp_free(tcp_t *);
 static void	tcp_timer_free(tcp_t *tcp, mblk_t *mp);
 static void	tcp_stop_lingering(tcp_t *tcp);
 static void	tcp_close_linger_timeout(void *arg);
 static void	*tcp_stack_init(netstackid_t stackid, netstack_t *ns);
-static void	tcp_stack_shutdown(netstackid_t stackid, void *arg);
 static void	tcp_stack_fini(netstackid_t stackid, void *arg);
 static void	*tcp_g_kstat_init(tcp_g_stat_t *);
 static void	tcp_g_kstat_fini(kstat_t *);
@@ -858,11 +809,10 @@ static void	tcp_kstat_fini(netstackid_t, kstat_t *);
 static void	*tcp_kstat2_init(netstackid_t, tcp_stat_t *);
 static void	tcp_kstat2_fini(netstackid_t, kstat_t *);
 static int	tcp_kstat_update(kstat_t *kp, int rw);
-void		tcp_reinput(conn_t *connp, mblk_t *mp, squeue_t *sqp);
-static int	tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp,
-			tcph_t *tcph, uint_t ipvers, mblk_t *idmp);
-static int	tcp_conn_create_v4(conn_t *lconnp, conn_t *connp, ipha_t *ipha,
-			tcph_t *tcph, mblk_t *idmp);
+static mblk_t	*tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp,
+    ip_recv_attr_t *ira);
+static mblk_t	*tcp_conn_create_v4(conn_t *lconnp, conn_t *connp, mblk_t *mp,
+    ip_recv_attr_t *ira);
 static int	tcp_squeue_switch(int);
 
 static int	tcp_open(queue_t *, dev_t *, int, int, cred_t *, boolean_t);
@@ -872,21 +822,17 @@ static int	tcp_tpi_close(queue_t *, int);
 static int	tcp_tpi_close_accept(queue_t *);
 
 static void	tcp_squeue_add(squeue_t *);
-static boolean_t tcp_zcopy_check(tcp_t *);
-static void	tcp_zcopy_notify(tcp_t *);
-static mblk_t	*tcp_zcopy_disable(tcp_t *, mblk_t *);
-static mblk_t	*tcp_zcopy_backoff(tcp_t *, mblk_t *, int);
-static void	tcp_ire_ill_check(tcp_t *, ire_t *, ill_t *, boolean_t);
+static void	tcp_setcred_data(mblk_t *, ip_recv_attr_t *);
 
-extern void	tcp_kssl_input(tcp_t *, mblk_t *);
+extern void	tcp_kssl_input(tcp_t *, mblk_t *, cred_t *);
 
-void tcp_eager_kill(void *arg, mblk_t *mp, void *arg2);
-void tcp_clean_death_wrapper(void *arg, mblk_t *mp, void *arg2);
+void tcp_eager_kill(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy);
+void tcp_clean_death_wrapper(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy);
 
 static int tcp_accept(sock_lower_handle_t, sock_lower_handle_t,
 	    sock_upper_handle_t, cred_t *);
 static int tcp_listen(sock_lower_handle_t, int, cred_t *);
-static int tcp_post_ip_bind(tcp_t *, mblk_t *, int, cred_t *, pid_t);
 static int tcp_do_listen(conn_t *, struct sockaddr *, socklen_t, int, cred_t *,
     boolean_t);
 static int tcp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
@@ -922,7 +868,8 @@ static void tcp_ulp_newconn(conn_t *, conn_t *, mblk_t *);
  */
 static mblk_t	*tcp_ioctl_abort_build_msg(tcp_ioc_abort_conn_t *, tcp_t *);
 static void	tcp_ioctl_abort_dump(tcp_ioc_abort_conn_t *);
-static void	tcp_ioctl_abort_handler(tcp_t *, mblk_t *);
+static void	tcp_ioctl_abort_handler(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy);
 static int	tcp_ioctl_abort(tcp_ioc_abort_conn_t *, tcp_stack_t *tcps);
 static void	tcp_ioctl_abort_conn(queue_t *, mblk_t *);
 static int	tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t *, int, int *,
@@ -988,12 +935,6 @@ struct streamtab tcpinfov6 = {
 
 sock_downcalls_t sock_tcp_downcalls;
 
-/*
- * Have to ensure that tcp_g_q_close is not done by an
- * interrupt thread.
- */
-static taskq_t *tcp_taskq;
-
 /* Setable only in /etc/system. Move to ndd? */
 boolean_t tcp_icmp_source_quench = B_FALSE;
 
@@ -1042,8 +983,8 @@ static struct T_info_ack tcp_g_t_info_ack_v6 = {
 #define	PARAM_MAX (~(uint32_t)0)
 
 /* Max size IP datagram is 64k - 1 */
-#define	TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcph_t)))
-#define	TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcph_t)))
+#define	TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
+#define	TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
 /* Max of the above */
 #define	TCP_MSS_MAX	TCP_MSS_MAX_IPV4
 
@@ -1128,29 +1069,10 @@ static tcpparam_t	lcl_tcp_param_arr[] = {
  { 0,		100*MS,		50*MS,		"tcp_push_timer_interval"},
  { 0,		1,		0,		"tcp_use_smss_as_mss_opt"},
  { 0,		PARAM_MAX,	8*MINUTES,	"tcp_keepalive_abort_interval"},
+ { 0,		1,		0,		"tcp_dev_flow_ctl"},
 };
 /* END CSTYLED */
 
-/*
- * tcp_mdt_hdr_{head,tail}_min are the leading and trailing spaces of
- * each header fragment in the header buffer.  Each parameter value has
- * to be a multiple of 4 (32-bit aligned).
- */
-static tcpparam_t lcl_tcp_mdt_head_param =
-	{ 32, 256, 32, "tcp_mdt_hdr_head_min" };
-static tcpparam_t lcl_tcp_mdt_tail_param =
-	{ 0,  256, 32, "tcp_mdt_hdr_tail_min" };
-#define	tcps_mdt_hdr_head_min	tcps_mdt_head_param->tcp_param_val
-#define	tcps_mdt_hdr_tail_min	tcps_mdt_tail_param->tcp_param_val
-
-/*
- * tcp_mdt_max_pbufs is the upper limit value that tcp uses to figure out
- * the maximum number of payload buffers associated per Multidata.
- */
-static tcpparam_t lcl_tcp_mdt_max_pbufs_param =
-	{ 1, MULTIDATA_MAX_PBUFS, MULTIDATA_MAX_PBUFS, "tcp_mdt_max_pbufs" };
-#define	tcps_mdt_max_pbufs	tcps_mdt_max_pbufs_param->tcp_param_val
-
 /* Round up the value to the nearest mss. */
 #define	MSS_ROUNDUP(value, mss)		((((value) - 1) / (mss) + 1) * (mss))
 
@@ -1162,7 +1084,7 @@ static tcpparam_t lcl_tcp_mdt_max_pbufs_param =
  * point ECT(0) for TCP as described in RFC 2481.
  */
 #define	SET_ECT(tcp, iph) \
-	if ((tcp)->tcp_ipversion == IPV4_VERSION) { \
+	if ((tcp)->tcp_connp->conn_ipversion == IPV4_VERSION) { \
 		/* We need to clear the code point first. */ \
 		((ipha_t *)(iph))->ipha_type_of_service &= 0xFC; \
 		((ipha_t *)(iph))->ipha_type_of_service |= IPH_ECN_ECT0; \
@@ -1183,23 +1105,12 @@ static tcpparam_t lcl_tcp_mdt_max_pbufs_param =
 #define	IS_VMLOANED_MBLK(mp) \
 	(((mp)->b_datap->db_struioflag & STRUIO_ZC) != 0)
 
-
-/* Enable or disable b_cont M_MULTIDATA chaining for MDT. */
-boolean_t tcp_mdt_chain = B_TRUE;
-
-/*
- * MDT threshold in the form of effective send MSS multiplier; we take
- * the MDT path if the amount of unsent data exceeds the threshold value
- * (default threshold is 1*SMSS).
- */
-uint_t tcp_mdt_smss_threshold = 1;
-
 uint32_t do_tcpzcopy = 1;		/* 0: disable, 1: enable, 2: force */
 
 /*
  * Forces all connections to obey the value of the tcps_maxpsz_multiplier
  * tunable settable via NDD.  Otherwise, the per-connection behavior is
- * determined dynamically during tcp_adapt_ire(), which is the default.
+ * determined dynamically during tcp_set_destination(), which is the default.
  */
 boolean_t tcp_static_maxpsz = B_FALSE;
 
@@ -1273,84 +1184,73 @@ int (*cl_inet_connect2)(netstackid_t stack_id, uint8_t protocol,
 			    uint8_t *laddrp, in_port_t lport,
 			    uint8_t *faddrp, in_port_t fport,
 			    void *args) = NULL;
-
 void (*cl_inet_disconnect)(netstackid_t stack_id, uint8_t protocol,
 			    sa_family_t addr_family, uint8_t *laddrp,
 			    in_port_t lport, uint8_t *faddrp,
 			    in_port_t fport, void *args) = NULL;
 
-/*
- * The following are defined in ip.c
- */
-extern int (*cl_inet_isclusterwide)(netstackid_t stack_id, uint8_t protocol,
-			    sa_family_t addr_family, uint8_t *laddrp,
-			    void *args);
-extern uint32_t (*cl_inet_ipident)(netstackid_t stack_id, uint8_t protocol,
-			    sa_family_t addr_family, uint8_t *laddrp,
-			    uint8_t *faddrp, void *args);
-
 
 /*
  * int CL_INET_CONNECT(conn_t *cp, tcp_t *tcp, boolean_t is_outgoing, int err)
  */
-#define	CL_INET_CONNECT(connp, tcp, is_outgoing, err) {		\
+#define	CL_INET_CONNECT(connp, is_outgoing, err) {		\
 	(err) = 0;						\
 	if (cl_inet_connect2 != NULL) {				\
 		/*						\
 		 * Running in cluster mode - register active connection	\
 		 * information						\
 		 */							\
-		if ((tcp)->tcp_ipversion == IPV4_VERSION) {		\
-			if ((tcp)->tcp_ipha->ipha_src != 0) {		\
+		if ((connp)->conn_ipversion == IPV4_VERSION) {		\
+			if ((connp)->conn_laddr_v4 != 0) {		\
 				(err) = (*cl_inet_connect2)(		\
 				    (connp)->conn_netstack->netstack_stackid,\
 				    IPPROTO_TCP, is_outgoing, AF_INET,	\
-				    (uint8_t *)(&((tcp)->tcp_ipha->ipha_src)),\
-				    (in_port_t)(tcp)->tcp_lport,	\
-				    (uint8_t *)(&((tcp)->tcp_ipha->ipha_dst)),\
-				    (in_port_t)(tcp)->tcp_fport, NULL);	\
+				    (uint8_t *)(&((connp)->conn_laddr_v4)),\
+				    (in_port_t)(connp)->conn_lport,	\
+				    (uint8_t *)(&((connp)->conn_faddr_v4)),\
+				    (in_port_t)(connp)->conn_fport, NULL); \
 			}						\
 		} else {						\
 			if (!IN6_IS_ADDR_UNSPECIFIED(			\
-			    &(tcp)->tcp_ip6h->ip6_src)) {		\
+			    &(connp)->conn_laddr_v6)) {			\
 				(err) = (*cl_inet_connect2)(		\
 				    (connp)->conn_netstack->netstack_stackid,\
 				    IPPROTO_TCP, is_outgoing, AF_INET6,	\
-				    (uint8_t *)(&((tcp)->tcp_ip6h->ip6_src)),\
-				    (in_port_t)(tcp)->tcp_lport,	\
-				    (uint8_t *)(&((tcp)->tcp_ip6h->ip6_dst)),\
-				    (in_port_t)(tcp)->tcp_fport, NULL);	\
+				    (uint8_t *)(&((connp)->conn_laddr_v6)),\
+				    (in_port_t)(connp)->conn_lport,	\
+				    (uint8_t *)(&((connp)->conn_faddr_v6)), \
+				    (in_port_t)(connp)->conn_fport, NULL); \
 			}						\
 		}							\
 	}								\
 }
 
-#define	CL_INET_DISCONNECT(connp, tcp)	{				\
+#define	CL_INET_DISCONNECT(connp)	{				\
 	if (cl_inet_disconnect != NULL) {				\
 		/*							\
 		 * Running in cluster mode - deregister active		\
 		 * connection information				\
 		 */							\
-		if ((tcp)->tcp_ipversion == IPV4_VERSION) {		\
-			if ((tcp)->tcp_ip_src != 0) {			\
+		if ((connp)->conn_ipversion == IPV4_VERSION) {		\
+			if ((connp)->conn_laddr_v4 != 0) {		\
 				(*cl_inet_disconnect)(			\
 				    (connp)->conn_netstack->netstack_stackid,\
 				    IPPROTO_TCP, AF_INET,		\
-				    (uint8_t *)(&((tcp)->tcp_ip_src)),	\
-				    (in_port_t)(tcp)->tcp_lport,	\
-				    (uint8_t *)(&((tcp)->tcp_ipha->ipha_dst)),\
-				    (in_port_t)(tcp)->tcp_fport, NULL);	\
+				    (uint8_t *)(&((connp)->conn_laddr_v4)),\
+				    (in_port_t)(connp)->conn_lport,	\
+				    (uint8_t *)(&((connp)->conn_faddr_v4)),\
+				    (in_port_t)(connp)->conn_fport, NULL); \
 			}						\
 		} else {						\
 			if (!IN6_IS_ADDR_UNSPECIFIED(			\
-			    &(tcp)->tcp_ip_src_v6)) {			\
+			    &(connp)->conn_laddr_v6)) {			\
 				(*cl_inet_disconnect)(			\
 				    (connp)->conn_netstack->netstack_stackid,\
 				    IPPROTO_TCP, AF_INET6,		\
-				    (uint8_t *)(&((tcp)->tcp_ip_src_v6)),\
-				    (in_port_t)(tcp)->tcp_lport,	\
-				    (uint8_t *)(&((tcp)->tcp_ip6h->ip6_dst)),\
-				    (in_port_t)(tcp)->tcp_fport, NULL);	\
+				    (uint8_t *)(&((connp)->conn_laddr_v6)),\
+				    (in_port_t)(connp)->conn_lport,	\
+				    (uint8_t *)(&((connp)->conn_faddr_v6)), \
+				    (in_port_t)(connp)->conn_fport, NULL); \
 			}						\
 		}							\
 	}								\
@@ -1367,11 +1267,6 @@ int cl_tcp_walk_list(netstackid_t stack_id,
 static int cl_tcp_walk_list_stack(int (*callback)(cl_tcp_info_t *, void *),
     void *arg, tcp_stack_t *tcps);
 
-#define	DTRACE_IP_FASTPATH(mp, iph, ill, ipha, ip6h) 			\
-	DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *,	\
-	    iph, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha,		\
-	    ip6_t *, ip6h, int, 0);
-
 static void
 tcp_set_recv_threshold(tcp_t *tcp, uint32_t new_rcvthresh)
 {
@@ -1540,7 +1435,7 @@ tcp_time_wait_append(tcp_t *tcp)
 
 /* ARGSUSED */
 void
-tcp_timewait_output(void *arg, mblk_t *mp, void *arg2)
+tcp_timewait_output(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
 	conn_t	*connp = (conn_t *)arg;
 	tcp_t	*tcp = connp->conn_tcp;
@@ -1551,11 +1446,11 @@ tcp_timewait_output(void *arg, mblk_t *mp, void *arg2)
 		return;
 	}
 
-	ASSERT((tcp->tcp_family == AF_INET &&
-	    tcp->tcp_ipversion == IPV4_VERSION) ||
-	    (tcp->tcp_family == AF_INET6 &&
-	    (tcp->tcp_ipversion == IPV4_VERSION ||
-	    tcp->tcp_ipversion == IPV6_VERSION)));
+	ASSERT((connp->conn_family == AF_INET &&
+	    connp->conn_ipversion == IPV4_VERSION) ||
+	    (connp->conn_family == AF_INET6 &&
+	    (connp->conn_ipversion == IPV4_VERSION ||
+	    connp->conn_ipversion == IPV6_VERSION)));
 	ASSERT(!tcp->tcp_listener);
 
 	TCP_STAT(tcps, tcp_time_wait_reap);
@@ -1579,10 +1474,17 @@ tcp_ipsec_cleanup(tcp_t *tcp)
 	ASSERT(connp->conn_flags & IPCL_TCPCONN);
 
 	if (connp->conn_latch != NULL) {
-		IPLATCH_REFRELE(connp->conn_latch,
-		    connp->conn_netstack);
+		IPLATCH_REFRELE(connp->conn_latch);
 		connp->conn_latch = NULL;
 	}
+	if (connp->conn_latch_in_policy != NULL) {
+		IPPOL_REFRELE(connp->conn_latch_in_policy);
+		connp->conn_latch_in_policy = NULL;
+	}
+	if (connp->conn_latch_in_action != NULL) {
+		IPACT_REFRELE(connp->conn_latch_in_action);
+		connp->conn_latch_in_action = NULL;
+	}
 	if (connp->conn_policy != NULL) {
 		IPPH_REFRELE(connp->conn_policy, connp->conn_netstack);
 		connp->conn_policy = NULL;
@@ -1598,9 +1500,6 @@ void
 tcp_cleanup(tcp_t *tcp)
 {
 	mblk_t		*mp;
-	char		*tcp_iphc;
-	int		tcp_iphc_len;
-	int		tcp_hdr_grown;
 	tcp_sack_info_t	*tcp_sack_info;
 	conn_t		*connp = tcp->tcp_connp;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
@@ -1611,6 +1510,22 @@ tcp_cleanup(tcp_t *tcp)
 
 	/* Cleanup that which needs the netstack first */
 	tcp_ipsec_cleanup(tcp);
+	ixa_cleanup(connp->conn_ixa);
+
+	if (connp->conn_ht_iphc != NULL) {
+		kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
+		connp->conn_ht_iphc = NULL;
+		connp->conn_ht_iphc_allocated = 0;
+		connp->conn_ht_iphc_len = 0;
+		connp->conn_ht_ulp = NULL;
+		connp->conn_ht_ulp_len = 0;
+		tcp->tcp_ipha = NULL;
+		tcp->tcp_ip6h = NULL;
+		tcp->tcp_tcpha = NULL;
+	}
+
+	/* We clear any IP_OPTIONS and extension headers */
+	ip_pkt_free(&connp->conn_xmit_ipp);
 
 	tcp_free(tcp);
 
@@ -1626,8 +1541,6 @@ tcp_cleanup(tcp_t *tcp)
 	}
 	tcp->tcp_kssl_pending = B_FALSE;
 
-	conn_delete_ire(connp, NULL);
-
 	/*
 	 * Since we will bzero the entire structure, we need to
 	 * remove it and reinsert it in global hash list. We
@@ -1639,46 +1552,36 @@ tcp_cleanup(tcp_t *tcp)
 	 */
 	ipcl_globalhash_remove(connp);
 
-	/*
-	 * Now it is safe to decrement the reference counts.
-	 * This might be the last reference on the netstack and TCPS
-	 * in which case it will cause the tcp_g_q_close and
-	 * the freeing of the IP Instance.
-	 */
-	connp->conn_netstack = NULL;
-	netstack_rele(ns);
-	ASSERT(tcps != NULL);
-	tcp->tcp_tcps = NULL;
-	TCPS_REFRELE(tcps);
-
 	/* Save some state */
 	mp = tcp->tcp_timercache;
 
 	tcp_sack_info = tcp->tcp_sack_info;
-	tcp_iphc = tcp->tcp_iphc;
-	tcp_iphc_len = tcp->tcp_iphc_len;
-	tcp_hdr_grown = tcp->tcp_hdr_grown;
 	tcp_rsrv_mp = tcp->tcp_rsrv_mp;
 
 	if (connp->conn_cred != NULL) {
 		crfree(connp->conn_cred);
 		connp->conn_cred = NULL;
 	}
-	if (connp->conn_effective_cred != NULL) {
-		crfree(connp->conn_effective_cred);
-		connp->conn_effective_cred = NULL;
-	}
 	ipcl_conn_cleanup(connp);
 	connp->conn_flags = IPCL_TCPCONN;
+
+	/*
+	 * Now it is safe to decrement the reference counts.
+	 * This might be the last reference on the netstack
+	 * in which case it will cause the freeing of the IP Instance.
+	 */
+	connp->conn_netstack = NULL;
+	connp->conn_ixa->ixa_ipst = NULL;
+	netstack_rele(ns);
+	ASSERT(tcps != NULL);
+	tcp->tcp_tcps = NULL;
+
 	bzero(tcp, sizeof (tcp_t));
 
 	/* restore the state */
 	tcp->tcp_timercache = mp;
 
 	tcp->tcp_sack_info = tcp_sack_info;
-	tcp->tcp_iphc = tcp_iphc;
-	tcp->tcp_iphc_len = tcp_iphc_len;
-	tcp->tcp_hdr_grown = tcp_hdr_grown;
 	tcp->tcp_rsrv_mp = tcp_rsrv_mp;
 
 	tcp->tcp_connp = connp;
@@ -1686,7 +1589,7 @@ tcp_cleanup(tcp_t *tcp)
 	ASSERT(connp->conn_tcp == tcp);
 	ASSERT(connp->conn_flags & IPCL_TCPCONN);
 	connp->conn_state_flags = CONN_INCIPIENT;
-	ASSERT(connp->conn_ulp == IPPROTO_TCP);
+	ASSERT(connp->conn_proto == IPPROTO_TCP);
 	ASSERT(connp->conn_ref == 1);
 }
 
@@ -1777,11 +1680,7 @@ tcp_time_wait_collector(void *arg)
 				/*
 				 * Set the CONDEMNED flag now itself so that
 				 * the refcnt cannot increase due to any
-				 * walker. But we have still not cleaned up
-				 * conn_ire_cache. This is still ok since
-				 * we are going to clean it up in tcp_cleanup
-				 * immediately and any interface unplumb
-				 * thread will wait till the ire is blown away
+				 * walker.
 				 */
 				connp->conn_state_flags |= CONN_CONDEMNED;
 				mutex_exit(lock);
@@ -1809,7 +1708,7 @@ tcp_time_wait_collector(void *arg)
 					mutex_exit(
 					    &tcp_time_wait->tcp_time_wait_lock);
 					tcp_bind_hash_remove(tcp);
-					conn_delete_ire(tcp->tcp_connp, NULL);
+					ixa_cleanup(tcp->tcp_connp->conn_ixa);
 					tcp_ipsec_cleanup(tcp);
 					CONN_DEC_REF(tcp->tcp_connp);
 				}
@@ -1839,7 +1738,7 @@ tcp_time_wait_collector(void *arg)
 				TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
 				mp = &tcp->tcp_closemp;
 				SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
-				    tcp_timewait_output, connp,
+				    tcp_timewait_output, connp, NULL,
 				    SQ_FILL, SQTAG_TCP_TIMEWAIT);
 			}
 		} else {
@@ -1867,7 +1766,7 @@ tcp_time_wait_collector(void *arg)
 			TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
 			mp = &tcp->tcp_closemp;
 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
-			    tcp_timewait_output, connp,
+			    tcp_timewait_output, connp, NULL,
 			    SQ_FILL, SQTAG_TCP_TIMEWAIT);
 		}
 		mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
@@ -1886,24 +1785,23 @@ tcp_time_wait_collector(void *arg)
 /*
  * Reply to a clients T_CONN_RES TPI message. This function
  * is used only for TLI/XTI listener. Sockfs sends T_CONN_RES
- * on the acceptor STREAM and processed in tcp_wput_accept().
- * Read the block comment on top of tcp_conn_request().
+ * on the acceptor STREAM and processed in tcp_accept_common().
+ * Read the block comment on top of tcp_input_listener().
  */
 static void
 tcp_tli_accept(tcp_t *listener, mblk_t *mp)
 {
-	tcp_t	*acceptor;
-	tcp_t	*eager;
-	tcp_t   *tcp;
+	tcp_t		*acceptor;
+	tcp_t		*eager;
+	tcp_t   	*tcp;
 	struct T_conn_res	*tcr;
 	t_uscalar_t	acceptor_id;
 	t_scalar_t	seqnum;
-	mblk_t	*opt_mp = NULL;	/* T_OPTMGMT_REQ messages */
-	struct tcp_options *tcpopt;
-	mblk_t	*ok_mp;
-	mblk_t	*mp1;
+	mblk_t		*discon_mp = NULL;
+	mblk_t		*ok_mp;
+	mblk_t		*mp1;
 	tcp_stack_t	*tcps = listener->tcp_tcps;
-	int	error;
+	conn_t		*econnp;
 
 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tcr)) {
 		tcp_err_ack(listener, mp, TPROTO, 0);
@@ -1922,8 +1820,8 @@ tcp_tli_accept(tcp_t *listener, mblk_t *mp)
 	 * fanout hash lock is held.
 	 * This prevents any thread from entering the acceptor queue from
 	 * below (since it has not been hard bound yet i.e. any inbound
-	 * packets will arrive on the listener or default tcp queue and
-	 * go through tcp_lookup).
+	 * packets will arrive on the listener conn_t and
+	 * go through the classifier).
 	 * The CONN_INC_REF will prevent the acceptor from closing.
 	 *
 	 * XXX It is still possible for a tli application to send down data
@@ -1974,7 +1872,7 @@ tcp_tli_accept(tcp_t *listener, mblk_t *mp)
 	} else {
 		acceptor = tcp_acceptor_hash_lookup(acceptor_id, tcps);
 		if (acceptor == NULL) {
-			if (listener->tcp_debug) {
+			if (listener->tcp_connp->conn_debug) {
 				(void) strlog(TCP_MOD_ID, 0, 1,
 				    SL_ERROR|SL_TRACE,
 				    "tcp_accept: did not find acceptor 0x%x\n",
@@ -2013,7 +1911,7 @@ tcp_tli_accept(tcp_t *listener, mblk_t *mp)
 	 * Rendezvous with an eager connection request packet hanging off
 	 * 'tcp' that has the 'seqnum' tag.  We tagged the detached open
 	 * tcp structure when the connection packet arrived in
-	 * tcp_conn_request().
+	 * tcp_input_listener().
 	 */
 	seqnum = tcr->SEQ_number;
 	eager = listener;
@@ -2047,37 +1945,26 @@ tcp_tli_accept(tcp_t *listener, mblk_t *mp)
 	 */
 	ASSERT(eager->tcp_connp->conn_ref >= 1);
 
-	/* Pre allocate the stroptions mblk also */
-	opt_mp = allocb(MAX(sizeof (struct tcp_options),
-	    sizeof (struct T_conn_res)), BPRI_HI);
-	if (opt_mp == NULL) {
+	/*
+	 * Pre allocate the discon_ind mblk also. tcp_accept_finish will
+	 * use it if something failed.
+	 */
+	discon_mp = allocb(MAX(sizeof (struct T_discon_ind),
+	    sizeof (struct stroptions)), BPRI_HI);
+	if (discon_mp == NULL) {
 		CONN_DEC_REF(acceptor->tcp_connp);
 		CONN_DEC_REF(eager->tcp_connp);
 		tcp_err_ack(listener, mp, TSYSERR, ENOMEM);
 		return;
 	}
-	DB_TYPE(opt_mp) = M_SETOPTS;
-	opt_mp->b_wptr += sizeof (struct tcp_options);
-	tcpopt = (struct tcp_options *)opt_mp->b_rptr;
-	tcpopt->to_flags = 0;
 
-	/*
-	 * Prepare for inheriting IPV6_BOUND_IF and IPV6_RECVPKTINFO
-	 * from listener to acceptor.
-	 */
-	if (listener->tcp_bound_if != 0) {
-		tcpopt->to_flags |= TCPOPT_BOUNDIF;
-		tcpopt->to_boundif = listener->tcp_bound_if;
-	}
-	if (listener->tcp_ipv6_recvancillary & TCP_IPV6_RECVPKTINFO) {
-		tcpopt->to_flags |= TCPOPT_RECVPKTINFO;
-	}
+	econnp = eager->tcp_connp;
 
-	/* Re-use mp1 to hold a copy of mp, in case reallocb fails */
+	/* Hold a copy of mp, in case reallocb fails */
 	if ((mp1 = copymsg(mp)) == NULL) {
 		CONN_DEC_REF(acceptor->tcp_connp);
 		CONN_DEC_REF(eager->tcp_connp);
-		freemsg(opt_mp);
+		freemsg(discon_mp);
 		tcp_err_ack(listener, mp, TSYSERR, ENOMEM);
 		return;
 	}
@@ -2093,7 +1980,7 @@ tcp_tli_accept(tcp_t *listener, mblk_t *mp)
 	{
 		int extra;
 
-		extra = (eager->tcp_family == AF_INET) ?
+		extra = (econnp->conn_family == AF_INET) ?
 		    sizeof (sin_t) : sizeof (sin6_t);
 
 		/*
@@ -2104,7 +1991,7 @@ tcp_tli_accept(tcp_t *listener, mblk_t *mp)
 		if ((ok_mp = mi_tpi_ok_ack_alloc_extra(mp, extra)) == NULL) {
 			CONN_DEC_REF(acceptor->tcp_connp);
 			CONN_DEC_REF(eager->tcp_connp);
-			freemsg(opt_mp);
+			freemsg(discon_mp);
 			/* Original mp has been freed by now, so use mp1 */
 			tcp_err_ack(listener, mp1, TSYSERR, ENOMEM);
 			return;
@@ -2114,38 +2001,32 @@ tcp_tli_accept(tcp_t *listener, mblk_t *mp)
 
 		switch (extra) {
 		case sizeof (sin_t): {
-				sin_t *sin = (sin_t *)ok_mp->b_wptr;
+			sin_t *sin = (sin_t *)ok_mp->b_wptr;
 
-				ok_mp->b_wptr += extra;
-				sin->sin_family = AF_INET;
-				sin->sin_port = eager->tcp_lport;
-				sin->sin_addr.s_addr =
-				    eager->tcp_ipha->ipha_src;
-				break;
-			}
+			ok_mp->b_wptr += extra;
+			sin->sin_family = AF_INET;
+			sin->sin_port = econnp->conn_lport;
+			sin->sin_addr.s_addr = econnp->conn_laddr_v4;
+			break;
+		}
 		case sizeof (sin6_t): {
-				sin6_t *sin6 = (sin6_t *)ok_mp->b_wptr;
+			sin6_t *sin6 = (sin6_t *)ok_mp->b_wptr;
 
-				ok_mp->b_wptr += extra;
-				sin6->sin6_family = AF_INET6;
-				sin6->sin6_port = eager->tcp_lport;
-				if (eager->tcp_ipversion == IPV4_VERSION) {
-					sin6->sin6_flowinfo = 0;
-					IN6_IPADDR_TO_V4MAPPED(
-					    eager->tcp_ipha->ipha_src,
-					    &sin6->sin6_addr);
-				} else {
-					ASSERT(eager->tcp_ip6h != NULL);
-					sin6->sin6_flowinfo =
-					    eager->tcp_ip6h->ip6_vcf &
-					    ~IPV6_VERS_AND_FLOW_MASK;
-					sin6->sin6_addr =
-					    eager->tcp_ip6h->ip6_src;
-				}
+			ok_mp->b_wptr += extra;
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_port = econnp->conn_lport;
+			sin6->sin6_addr = econnp->conn_laddr_v6;
+			sin6->sin6_flowinfo = econnp->conn_flowinfo;
+			if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6) &&
+			    (econnp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
+				sin6->sin6_scope_id =
+				    econnp->conn_ixa->ixa_scopeid;
+			} else {
 				sin6->sin6_scope_id = 0;
-				sin6->__sin6_src_id = 0;
-				break;
 			}
+			sin6->__sin6_src_id = 0;
+			break;
+		}
 		default:
 			break;
 		}
@@ -2158,15 +2039,7 @@ tcp_tli_accept(tcp_t *listener, mblk_t *mp)
 	 * the tcp_accept_swap is done since it would be dangerous to
 	 * let the application start using the new fd prior to the swap.
 	 */
-	error = tcp_accept_swap(listener, acceptor, eager);
-	if (error != 0) {
-		CONN_DEC_REF(acceptor->tcp_connp);
-		CONN_DEC_REF(eager->tcp_connp);
-		freemsg(ok_mp);
-		/* Original mp has been freed by now, so use mp1 */
-		tcp_err_ack(listener, mp1, TSYSERR, error);
-		return;
-	}
+	tcp_accept_swap(listener, acceptor, eager);
 
 	/*
 	 * tcp_accept_swap unlinks eager from listener but does not drop
@@ -2244,7 +2117,7 @@ tcp_tli_accept(tcp_t *listener, mblk_t *mp)
 	/* We no longer need mp1, since all options processing has passed */
 	freemsg(mp1);
 
-	putnext(listener->tcp_rq, ok_mp);
+	putnext(listener->tcp_connp->conn_rq, ok_mp);
 
 	mutex_enter(&listener->tcp_eager_lock);
 	if (listener->tcp_eager_prev_q0->tcp_conn_def_q0) {
@@ -2305,7 +2178,7 @@ tcp_tli_accept(tcp_t *listener, mblk_t *mp)
 		listener->tcp_eager_last_q = tcp;
 		tcp->tcp_eager_next_q = NULL;
 		mutex_exit(&listener->tcp_eager_lock);
-		putnext(tcp->tcp_rq, conn_ind);
+		putnext(tcp->tcp_connp->conn_rq, conn_ind);
 	} else {
 		mutex_exit(&listener->tcp_eager_lock);
 	}
@@ -2318,26 +2191,20 @@ tcp_tli_accept(tcp_t *listener, mblk_t *mp)
 	 */
 finish:
 	ASSERT(acceptor->tcp_detached);
-	ASSERT(tcps->tcps_g_q != NULL);
+	acceptor->tcp_connp->conn_rq = NULL;
 	ASSERT(!IPCL_IS_NONSTR(acceptor->tcp_connp));
-	acceptor->tcp_rq = tcps->tcps_g_q;
-	acceptor->tcp_wq = WR(tcps->tcps_g_q);
+	acceptor->tcp_connp->conn_wq = NULL;
 	(void) tcp_clean_death(acceptor, 0, 2);
 	CONN_DEC_REF(acceptor->tcp_connp);
 
 	/*
-	 * In case we already received a FIN we have to make tcp_rput send
-	 * the ordrel_ind. This will also send up a window update if the window
-	 * has opened up.
-	 *
-	 * In the normal case of a successful connection acceptance
-	 * we give the O_T_BIND_REQ to the read side put procedure as an
-	 * indication that this was just accepted. This tells tcp_rput to
-	 * pass up any data queued in tcp_rcv_list.
+	 * We pass discon_mp to tcp_accept_finish to get on the right squeue.
 	 *
-	 * In the fringe case where options sent with T_CONN_RES failed and
-	 * we required, we would be indicating a T_DISCON_IND to blow
-	 * away this connection.
+	 * It will update the setting for sockfs/stream head and also take
+	 * care of any data that arrived before accept() wad called.
+	 * In case we already received a FIN then tcp_accept_finish will send up
+	 * the ordrel. It will also send up a window update if the window
+	 * has opened up.
 	 */
 
 	/*
@@ -2346,7 +2213,7 @@ finish:
 	 * and is well know but nothing can be done short of major rewrite
 	 * to fix it. Now it is possible to take care of it by assigning TLI/XTI
 	 * eager same squeue as listener (we can distinguish non socket
-	 * listeners at the time of handling a SYN in tcp_conn_request)
+	 * listeners at the time of handling a SYN in tcp_input_listener)
 	 * and do most of the work that tcp_accept_finish does here itself
 	 * and then get behind the acceptor squeue to access the acceptor
 	 * queue.
@@ -2354,52 +2221,38 @@ finish:
 	/*
 	 * We already have a ref on tcp so no need to do one before squeue_enter
 	 */
-	SQUEUE_ENTER_ONE(eager->tcp_connp->conn_sqp, opt_mp, tcp_accept_finish,
-	    eager->tcp_connp, SQ_FILL, SQTAG_TCP_ACCEPT_FINISH);
+	SQUEUE_ENTER_ONE(eager->tcp_connp->conn_sqp, discon_mp,
+	    tcp_accept_finish, eager->tcp_connp, NULL, SQ_FILL,
+	    SQTAG_TCP_ACCEPT_FINISH);
 }
 
 /*
  * Swap information between the eager and acceptor for a TLI/XTI client.
  * The sockfs accept is done on the acceptor stream and control goes
- * through tcp_wput_accept() and tcp_accept()/tcp_accept_swap() is not
+ * through tcp_tli_accept() and tcp_accept()/tcp_accept_swap() is not
  * called. In either case, both the eager and listener are in their own
  * perimeter (squeue) and the code has to deal with potential race.
  *
- * See the block comment on top of tcp_accept() and tcp_wput_accept().
+ * See the block comment on top of tcp_accept() and tcp_tli_accept().
  */
-static int
+static void
 tcp_accept_swap(tcp_t *listener, tcp_t *acceptor, tcp_t *eager)
 {
 	conn_t	*econnp, *aconnp;
-	cred_t	*effective_cred = NULL;
 
-	ASSERT(eager->tcp_rq == listener->tcp_rq);
+	ASSERT(eager->tcp_connp->conn_rq == listener->tcp_connp->conn_rq);
 	ASSERT(eager->tcp_detached && !acceptor->tcp_detached);
-	ASSERT(!eager->tcp_hard_bound);
 	ASSERT(!TCP_IS_SOCKET(acceptor));
 	ASSERT(!TCP_IS_SOCKET(eager));
 	ASSERT(!TCP_IS_SOCKET(listener));
 
-	econnp = eager->tcp_connp;
-	aconnp = acceptor->tcp_connp;
-
 	/*
 	 * Trusted Extensions may need to use a security label that is
 	 * different from the acceptor's label on MLP and MAC-Exempt
 	 * sockets. If this is the case, the required security label
-	 * already exists in econnp->conn_effective_cred. Use this label
-	 * to generate a new effective cred for the acceptor.
-	 *
-	 * We allow for potential application level retry attempts by
-	 * checking for transient errors before modifying eager.
+	 * already exists in econnp->conn_ixa->ixa_tsl. Since we make the
+	 * acceptor stream refer to econnp we atomatically get that label.
 	 */
-	if (is_system_labeled() &&
-	    aconnp->conn_cred != NULL && econnp->conn_effective_cred != NULL) {
-		effective_cred = copycred_from_tslabel(aconnp->conn_cred,
-		    crgetlabel(econnp->conn_effective_cred), KM_NOSLEEP);
-		if (effective_cred == NULL)
-			return (ENOMEM);
-	}
 
 	acceptor->tcp_detached = B_TRUE;
 	/*
@@ -2416,18 +2269,20 @@ tcp_accept_swap(tcp_t *listener, tcp_t *acceptor, tcp_t *eager)
 	ASSERT(eager->tcp_eager_next_q0 == NULL &&
 	    eager->tcp_eager_prev_q0 == NULL);
 	mutex_exit(&listener->tcp_eager_lock);
-	eager->tcp_rq = acceptor->tcp_rq;
-	eager->tcp_wq = acceptor->tcp_wq;
 
-	eager->tcp_rq->q_ptr = econnp;
-	eager->tcp_wq->q_ptr = econnp;
+	econnp = eager->tcp_connp;
+	aconnp = acceptor->tcp_connp;
+	econnp->conn_rq = aconnp->conn_rq;
+	econnp->conn_wq = aconnp->conn_wq;
+	econnp->conn_rq->q_ptr = econnp;
+	econnp->conn_wq->q_ptr = econnp;
 
 	/*
 	 * In the TLI/XTI loopback case, we are inside the listener's squeue,
 	 * which might be a different squeue from our peer TCP instance.
 	 * For TCP Fusion, the peer expects that whenever tcp_detached is
 	 * clear, our TCP queues point to the acceptor's queues.  Thus, use
-	 * membar_producer() to ensure that the assignments of tcp_rq/tcp_wq
+	 * membar_producer() to ensure that the assignments of conn_rq/conn_wq
 	 * above reach global visibility prior to the clearing of tcp_detached.
 	 */
 	membar_producer();
@@ -2439,419 +2294,187 @@ tcp_accept_swap(tcp_t *listener, tcp_t *acceptor, tcp_t *eager)
 	econnp->conn_minor_arena = aconnp->conn_minor_arena;
 
 	ASSERT(econnp->conn_minor_arena != NULL);
-	if (eager->tcp_cred != NULL)
-		crfree(eager->tcp_cred);
-	eager->tcp_cred = econnp->conn_cred = aconnp->conn_cred;
-	if (econnp->conn_effective_cred != NULL)
-		crfree(econnp->conn_effective_cred);
-	econnp->conn_effective_cred = effective_cred;
+	if (econnp->conn_cred != NULL)
+		crfree(econnp->conn_cred);
+	econnp->conn_cred = aconnp->conn_cred;
 	aconnp->conn_cred = NULL;
-	ASSERT(aconnp->conn_effective_cred == NULL);
-
+	econnp->conn_cpid = aconnp->conn_cpid;
 	ASSERT(econnp->conn_netstack == aconnp->conn_netstack);
 	ASSERT(eager->tcp_tcps == acceptor->tcp_tcps);
 
 	econnp->conn_zoneid = aconnp->conn_zoneid;
 	econnp->conn_allzones = aconnp->conn_allzones;
+	econnp->conn_ixa->ixa_zoneid = aconnp->conn_ixa->ixa_zoneid;
 
+	econnp->conn_mac_mode = aconnp->conn_mac_mode;
+	econnp->conn_zone_is_global = aconnp->conn_zone_is_global;
 	aconnp->conn_mac_mode = CONN_MAC_DEFAULT;
 
 	/* Do the IPC initialization */
 	CONN_INC_REF(econnp);
 
-	econnp->conn_multicast_loop = aconnp->conn_multicast_loop;
-	econnp->conn_af_isv6 = aconnp->conn_af_isv6;
-	econnp->conn_pkt_isv6 = aconnp->conn_pkt_isv6;
+	econnp->conn_family = aconnp->conn_family;
+	econnp->conn_ipversion = aconnp->conn_ipversion;
 
 	/* Done with old IPC. Drop its ref on its connp */
 	CONN_DEC_REF(aconnp);
-	return (0);
 }
 
 
 /*
  * Adapt to the information, such as rtt and rtt_sd, provided from the
- * ire cached in conn_cache_ire. If no ire cached, do a ire lookup.
+ * DCE and IRE maintained by IP.
  *
  * Checks for multicast and broadcast destination address.
- * Returns zero on failure; non-zero if ok.
+ * Returns zero if ok; an errno on failure.
  *
  * Note that the MSS calculation here is based on the info given in
- * the IRE.  We do not do any calculation based on TCP options.  They
- * will be handled in tcp_rput_other() and tcp_rput_data() when TCP
- * knows which options to use.
+ * the DCE and IRE.  We do not do any calculation based on TCP options.  They
+ * will be handled in tcp_input_data() when TCP knows which options to use.
  *
  * Note on how TCP gets its parameters for a connection.
  *
  * When a tcp_t structure is allocated, it gets all the default parameters.
- * In tcp_adapt_ire(), it gets those metric parameters, like rtt, rtt_sd,
+ * In tcp_set_destination(), it gets those metric parameters, like rtt, rtt_sd,
  * spipe, rpipe, ... from the route metrics.  Route metric overrides the
  * default.
  *
- * An incoming SYN with a multicast or broadcast destination address, is dropped
- * in 1 of 2 places.
- *
- * 1. If the packet was received over the wire it is dropped in
- * ip_rput_process_broadcast()
- *
- * 2. If the packet was received through internal IP loopback, i.e. the packet
- * was generated and received on the same machine, it is dropped in
- * ip_wput_local()
+ * An incoming SYN with a multicast or broadcast destination address is dropped
+ * in ip_fanout_v4/v6.
  *
  * An incoming SYN with a multicast or broadcast source address is always
- * dropped in tcp_adapt_ire. The same logic in tcp_adapt_ire also serves to
+ * dropped in tcp_set_destination, since IPDF_ALLOW_MCBC is not set in
+ * conn_connect.
+ * The same logic in tcp_set_destination also serves to
  * reject an attempt to connect to a broadcast or multicast (destination)
  * address.
  */
 static int
-tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp)
+tcp_set_destination(tcp_t *tcp)
 {
-	ire_t		*ire;
-	ire_t		*sire = NULL;
-	iulp_t		*ire_uinfo = NULL;
 	uint32_t	mss_max;
 	uint32_t	mss;
 	boolean_t	tcp_detached = TCP_IS_DETACHED(tcp);
 	conn_t		*connp = tcp->tcp_connp;
-	boolean_t	ire_cacheable = B_FALSE;
-	zoneid_t	zoneid = connp->conn_zoneid;
-	int		match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT |
-	    MATCH_IRE_SECATTR;
-	ts_label_t	*tsl = crgetlabel(CONN_CRED(connp));
-	ill_t		*ill = NULL;
-	boolean_t	incoming = (ire_mp == NULL);
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	ip_stack_t	*ipst = tcps->tcps_netstack->netstack_ip;
-
-	ASSERT(connp->conn_ire_cache == NULL);
-
-	if (tcp->tcp_ipversion == IPV4_VERSION) {
+	iulp_t		uinfo;
+	int		error;
+	uint32_t	flags;
 
-		if (CLASSD(tcp->tcp_connp->conn_rem)) {
-			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
-			return (0);
-		}
-		/*
-		 * If IP_NEXTHOP is set, then look for an IRE_CACHE
-		 * for the destination with the nexthop as gateway.
-		 * ire_ctable_lookup() is used because this particular
-		 * ire, if it exists, will be marked private.
-		 * If that is not available, use the interface ire
-		 * for the nexthop.
-		 *
-		 * TSol: tcp_update_label will detect label mismatches based
-		 * only on the destination's label, but that would not
-		 * detect label mismatches based on the security attributes
-		 * of routes or next hop gateway. Hence we need to pass the
-		 * label to ire_ftable_lookup below in order to locate the
-		 * right prefix (and/or) ire cache. Similarly we also need
-		 * pass the label to the ire_cache_lookup below to locate
-		 * the right ire that also matches on the label.
-		 */
-		if (tcp->tcp_connp->conn_nexthop_set) {
-			ire = ire_ctable_lookup(tcp->tcp_connp->conn_rem,
-			    tcp->tcp_connp->conn_nexthop_v4, 0, NULL, zoneid,
-			    tsl, MATCH_IRE_MARK_PRIVATE_ADDR | MATCH_IRE_GW,
-			    ipst);
-			if (ire == NULL) {
-				ire = ire_ftable_lookup(
-				    tcp->tcp_connp->conn_nexthop_v4,
-				    0, 0, IRE_INTERFACE, NULL, NULL, zoneid, 0,
-				    tsl, match_flags, ipst);
-				if (ire == NULL)
-					return (0);
-			} else {
-				ire_uinfo = &ire->ire_uinfo;
-			}
-		} else {
-			ire = ire_cache_lookup(tcp->tcp_connp->conn_rem,
-			    zoneid, tsl, ipst);
-			if (ire != NULL) {
-				ire_cacheable = B_TRUE;
-				ire_uinfo = (ire_mp != NULL) ?
-				    &((ire_t *)ire_mp->b_rptr)->ire_uinfo:
-				    &ire->ire_uinfo;
+	flags = IPDF_LSO | IPDF_ZCOPY;
+	/*
+	 * Make sure we have a dce for the destination to avoid dce_ident
+	 * contention for connected sockets.
+	 */
+	flags |= IPDF_UNIQUE_DCE;
 
-			} else {
-				if (ire_mp == NULL) {
-					ire = ire_ftable_lookup(
-					    tcp->tcp_connp->conn_rem,
-					    0, 0, 0, NULL, &sire, zoneid, 0,
-					    tsl, (MATCH_IRE_RECURSIVE |
-					    MATCH_IRE_DEFAULT), ipst);
-					if (ire == NULL)
-						return (0);
-					ire_uinfo = (sire != NULL) ?
-					    &sire->ire_uinfo :
-					    &ire->ire_uinfo;
-				} else {
-					ire = (ire_t *)ire_mp->b_rptr;
-					ire_uinfo =
-					    &((ire_t *)
-					    ire_mp->b_rptr)->ire_uinfo;
-				}
-			}
-		}
-		ASSERT(ire != NULL);
+	if (!tcps->tcps_ignore_path_mtu)
+		connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
 
-		if ((ire->ire_src_addr == INADDR_ANY) ||
-		    (ire->ire_type & IRE_BROADCAST)) {
-			/*
-			 * ire->ire_mp is non null when ire_mp passed in is used
-			 * ire->ire_mp is set in ip_bind_insert_ire[_v6]().
-			 */
-			if (ire->ire_mp == NULL)
-				ire_refrele(ire);
-			if (sire != NULL)
-				ire_refrele(sire);
-			return (0);
-		}
-
-		if (tcp->tcp_ipha->ipha_src == INADDR_ANY) {
-			ipaddr_t src_addr;
+	/* Use conn_lock to satify ASSERT; tcp is already serialized */
+	mutex_enter(&connp->conn_lock);
+	error = conn_connect(connp, &uinfo, flags);
+	mutex_exit(&connp->conn_lock);
+	if (error != 0)
+		return (error);
 
-			/*
-			 * ip_bind_connected() has stored the correct source
-			 * address in conn_src.
-			 */
-			src_addr = tcp->tcp_connp->conn_src;
-			tcp->tcp_ipha->ipha_src = src_addr;
-			/*
-			 * Copy of the src addr. in tcp_t is needed
-			 * for the lookup funcs.
-			 */
-			IN6_IPADDR_TO_V4MAPPED(src_addr, &tcp->tcp_ip_src_v6);
-		}
-		/*
-		 * Set the fragment bit so that IP will tell us if the MTU
-		 * should change. IP tells us the latest setting of
-		 * ip_path_mtu_discovery through ire_frag_flag.
-		 */
-		if (ipst->ips_ip_path_mtu_discovery) {
-			tcp->tcp_ipha->ipha_fragment_offset_and_flags =
-			    htons(IPH_DF);
-		}
-		/*
-		 * If ire_uinfo is NULL, this is the IRE_INTERFACE case
-		 * for IP_NEXTHOP. No cache ire has been found for the
-		 * destination and we are working with the nexthop's
-		 * interface ire. Since we need to forward all packets
-		 * to the nexthop first, we "blindly" set tcp_localnet
-		 * to false, eventhough the destination may also be
-		 * onlink.
-		 */
-		if (ire_uinfo == NULL)
-			tcp->tcp_localnet = 0;
-		else
-			tcp->tcp_localnet = (ire->ire_gateway_addr == 0);
-	} else {
-		/*
-		 * For incoming connection ire_mp = NULL
-		 * For outgoing connection ire_mp != NULL
-		 * Technically we should check conn_incoming_ill
-		 * when ire_mp is NULL and conn_outgoing_ill when
-		 * ire_mp is non-NULL. But this is performance
-		 * critical path and for IPV*_BOUND_IF, outgoing
-		 * and incoming ill are always set to the same value.
-		 */
-		ill_t	*dst_ill = NULL;
-		ipif_t  *dst_ipif = NULL;
+	error = tcp_build_hdrs(tcp);
+	if (error != 0)
+		return (error);
 
-		ASSERT(connp->conn_outgoing_ill == connp->conn_incoming_ill);
+	tcp->tcp_localnet = uinfo.iulp_localnet;
 
-		if (connp->conn_outgoing_ill != NULL) {
-			/* Outgoing or incoming path */
-			int   err;
+	if (uinfo.iulp_rtt != 0) {
+		clock_t	rto;
 
-			dst_ill = conn_get_held_ill(connp,
-			    &connp->conn_outgoing_ill, &err);
-			if (err == ILL_LOOKUP_FAILED || dst_ill == NULL) {
-				ip1dbg(("tcp_adapt_ire: ill_lookup failed\n"));
-				return (0);
-			}
-			match_flags |= MATCH_IRE_ILL;
-			dst_ipif = dst_ill->ill_ipif;
-		}
-		ire = ire_ctable_lookup_v6(&tcp->tcp_connp->conn_remv6,
-		    0, 0, dst_ipif, zoneid, tsl, match_flags, ipst);
+		tcp->tcp_rtt_sa = uinfo.iulp_rtt;
+		tcp->tcp_rtt_sd = uinfo.iulp_rtt_sd;
+		rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
+		    tcps->tcps_rexmit_interval_extra +
+		    (tcp->tcp_rtt_sa >> 5);
 
-		if (ire != NULL) {
-			ire_cacheable = B_TRUE;
-			ire_uinfo = (ire_mp != NULL) ?
-			    &((ire_t *)ire_mp->b_rptr)->ire_uinfo:
-			    &ire->ire_uinfo;
+		if (rto > tcps->tcps_rexmit_interval_max) {
+			tcp->tcp_rto = tcps->tcps_rexmit_interval_max;
+		} else if (rto < tcps->tcps_rexmit_interval_min) {
+			tcp->tcp_rto = tcps->tcps_rexmit_interval_min;
 		} else {
-			if (ire_mp == NULL) {
-				ire = ire_ftable_lookup_v6(
-				    &tcp->tcp_connp->conn_remv6,
-				    0, 0, 0, dst_ipif, &sire, zoneid,
-				    0, tsl, match_flags, ipst);
-				if (ire == NULL) {
-					if (dst_ill != NULL)
-						ill_refrele(dst_ill);
-					return (0);
-				}
-				ire_uinfo = (sire != NULL) ? &sire->ire_uinfo :
-				    &ire->ire_uinfo;
-			} else {
-				ire = (ire_t *)ire_mp->b_rptr;
-				ire_uinfo =
-				    &((ire_t *)ire_mp->b_rptr)->ire_uinfo;
-			}
-		}
-		if (dst_ill != NULL)
-			ill_refrele(dst_ill);
-
-		ASSERT(ire != NULL);
-		ASSERT(ire_uinfo != NULL);
-
-		if (IN6_IS_ADDR_UNSPECIFIED(&ire->ire_src_addr_v6) ||
-		    IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6)) {
-			/*
-			 * ire->ire_mp is non null when ire_mp passed in is used
-			 * ire->ire_mp is set in ip_bind_insert_ire[_v6]().
-			 */
-			if (ire->ire_mp == NULL)
-				ire_refrele(ire);
-			if (sire != NULL)
-				ire_refrele(sire);
-			return (0);
+			tcp->tcp_rto = rto;
 		}
-
-		if (IN6_IS_ADDR_UNSPECIFIED(&tcp->tcp_ip6h->ip6_src)) {
-			in6_addr_t	src_addr;
-
-			/*
-			 * ip_bind_connected_v6() has stored the correct source
-			 * address per IPv6 addr. selection policy in
-			 * conn_src_v6.
-			 */
-			src_addr = tcp->tcp_connp->conn_srcv6;
-
-			tcp->tcp_ip6h->ip6_src = src_addr;
-			/*
-			 * Copy of the src addr. in tcp_t is needed
-			 * for the lookup funcs.
-			 */
-			tcp->tcp_ip_src_v6 = src_addr;
-			ASSERT(IN6_ARE_ADDR_EQUAL(&tcp->tcp_ip6h->ip6_src,
-			    &connp->conn_srcv6));
+	}
+	if (uinfo.iulp_ssthresh != 0)
+		tcp->tcp_cwnd_ssthresh = uinfo.iulp_ssthresh;
+	else
+		tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN;
+	if (uinfo.iulp_spipe > 0) {
+		connp->conn_sndbuf = MIN(uinfo.iulp_spipe,
+		    tcps->tcps_max_buf);
+		if (tcps->tcps_snd_lowat_fraction != 0) {
+			connp->conn_sndlowat = connp->conn_sndbuf /
+			    tcps->tcps_snd_lowat_fraction;
 		}
-		tcp->tcp_localnet =
-		    IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6);
+		(void) tcp_maxpsz_set(tcp, B_TRUE);
 	}
-
 	/*
-	 * This allows applications to fail quickly when connections are made
-	 * to dead hosts. Hosts can be labeled dead by adding a reject route
-	 * with both the RTF_REJECT and RTF_PRIVATE flags set.
+	 * Note that up till now, acceptor always inherits receive
+	 * window from the listener.  But if there is a metrics
+	 * associated with a host, we should use that instead of
+	 * inheriting it from listener. Thus we need to pass this
+	 * info back to the caller.
 	 */
-	if ((ire->ire_flags & RTF_REJECT) &&
-	    (ire->ire_flags & RTF_PRIVATE))
-		goto error;
+	if (uinfo.iulp_rpipe > 0) {
+		tcp->tcp_rwnd = MIN(uinfo.iulp_rpipe,
+		    tcps->tcps_max_buf);
+	}
+
+	if (uinfo.iulp_rtomax > 0) {
+		tcp->tcp_second_timer_threshold =
+		    uinfo.iulp_rtomax;
+	}
 
 	/*
-	 * Make use of the cached rtt and rtt_sd values to calculate the
-	 * initial RTO.  Note that they are already initialized in
-	 * tcp_init_values().
-	 * If ire_uinfo is NULL, i.e., we do not have a cache ire for
-	 * IP_NEXTHOP, but instead are using the interface ire for the
-	 * nexthop, then we do not use the ire_uinfo from that ire to
-	 * do any initializations.
+	 * Use the metric option settings, iulp_tstamp_ok and
+	 * iulp_wscale_ok, only for active open. What this means
+	 * is that if the other side uses timestamp or window
+	 * scale option, TCP will also use those options. That
+	 * is for passive open.  If the application sets a
+	 * large window, window scale is enabled regardless of
+	 * the value in iulp_wscale_ok.  This is the behavior
+	 * since 2.6.  So we keep it.
+	 * The only case left in passive open processing is the
+	 * check for SACK.
+	 * For ECN, it should probably be like SACK.  But the
+	 * current value is binary, so we treat it like the other
+	 * cases.  The metric only controls active open.For passive
+	 * open, the ndd param, tcp_ecn_permitted, controls the
+	 * behavior.
 	 */
-	if (ire_uinfo != NULL) {
-		if (ire_uinfo->iulp_rtt != 0) {
-			clock_t	rto;
-
-			tcp->tcp_rtt_sa = ire_uinfo->iulp_rtt;
-			tcp->tcp_rtt_sd = ire_uinfo->iulp_rtt_sd;
-			rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
-			    tcps->tcps_rexmit_interval_extra +
-			    (tcp->tcp_rtt_sa >> 5);
-
-			if (rto > tcps->tcps_rexmit_interval_max) {
-				tcp->tcp_rto = tcps->tcps_rexmit_interval_max;
-			} else if (rto < tcps->tcps_rexmit_interval_min) {
-				tcp->tcp_rto = tcps->tcps_rexmit_interval_min;
-			} else {
-				tcp->tcp_rto = rto;
-			}
-		}
-		if (ire_uinfo->iulp_ssthresh != 0)
-			tcp->tcp_cwnd_ssthresh = ire_uinfo->iulp_ssthresh;
-		else
-			tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN;
-		if (ire_uinfo->iulp_spipe > 0) {
-			tcp->tcp_xmit_hiwater = MIN(ire_uinfo->iulp_spipe,
-			    tcps->tcps_max_buf);
-			if (tcps->tcps_snd_lowat_fraction != 0)
-				tcp->tcp_xmit_lowater = tcp->tcp_xmit_hiwater /
-				    tcps->tcps_snd_lowat_fraction;
-			(void) tcp_maxpsz_set(tcp, B_TRUE);
-		}
+	if (!tcp_detached) {
 		/*
-		 * Note that up till now, acceptor always inherits receive
-		 * window from the listener.  But if there is a metrics
-		 * associated with a host, we should use that instead of
-		 * inheriting it from listener. Thus we need to pass this
-		 * info back to the caller.
+		 * The if check means that the following can only
+		 * be turned on by the metrics only IRE, but not off.
 		 */
-		if (ire_uinfo->iulp_rpipe > 0) {
-			tcp->tcp_rwnd = MIN(ire_uinfo->iulp_rpipe,
-			    tcps->tcps_max_buf);
-		}
-
-		if (ire_uinfo->iulp_rtomax > 0) {
-			tcp->tcp_second_timer_threshold =
-			    ire_uinfo->iulp_rtomax;
-		}
-
+		if (uinfo.iulp_tstamp_ok)
+			tcp->tcp_snd_ts_ok = B_TRUE;
+		if (uinfo.iulp_wscale_ok)
+			tcp->tcp_snd_ws_ok = B_TRUE;
+		if (uinfo.iulp_sack == 2)
+			tcp->tcp_snd_sack_ok = B_TRUE;
+		if (uinfo.iulp_ecn_ok)
+			tcp->tcp_ecn_ok = B_TRUE;
+	} else {
 		/*
-		 * Use the metric option settings, iulp_tstamp_ok and
-		 * iulp_wscale_ok, only for active open. What this means
-		 * is that if the other side uses timestamp or window
-		 * scale option, TCP will also use those options. That
-		 * is for passive open.  If the application sets a
-		 * large window, window scale is enabled regardless of
-		 * the value in iulp_wscale_ok.  This is the behavior
-		 * since 2.6.  So we keep it.
-		 * The only case left in passive open processing is the
-		 * check for SACK.
-		 * For ECN, it should probably be like SACK.  But the
-		 * current value is binary, so we treat it like the other
-		 * cases.  The metric only controls active open.For passive
-		 * open, the ndd param, tcp_ecn_permitted, controls the
-		 * behavior.
+		 * Passive open.
+		 *
+		 * As above, the if check means that SACK can only be
+		 * turned on by the metric only IRE.
 		 */
-		if (!tcp_detached) {
-			/*
-			 * The if check means that the following can only
-			 * be turned on by the metrics only IRE, but not off.
-			 */
-			if (ire_uinfo->iulp_tstamp_ok)
-				tcp->tcp_snd_ts_ok = B_TRUE;
-			if (ire_uinfo->iulp_wscale_ok)
-				tcp->tcp_snd_ws_ok = B_TRUE;
-			if (ire_uinfo->iulp_sack == 2)
-				tcp->tcp_snd_sack_ok = B_TRUE;
-			if (ire_uinfo->iulp_ecn_ok)
-				tcp->tcp_ecn_ok = B_TRUE;
-		} else {
-			/*
-			 * Passive open.
-			 *
-			 * As above, the if check means that SACK can only be
-			 * turned on by the metric only IRE.
-			 */
-			if (ire_uinfo->iulp_sack > 0) {
-				tcp->tcp_snd_sack_ok = B_TRUE;
-			}
+		if (uinfo.iulp_sack > 0) {
+			tcp->tcp_snd_sack_ok = B_TRUE;
 		}
 	}
 
-
 	/*
-	 * XXX: Note that currently, ire_max_frag can be as small as 68
+	 * XXX Note that currently, iulp_mtu can be as small as 68
 	 * because of PMTUd.  So tcp_mss may go to negative if combined
 	 * length of all those options exceeds 28 bytes.  But because
 	 * of the tcp_mss_min check below, we may not have a problem if
@@ -2864,31 +2487,15 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp)
 	 * We do not deal with that now.  All those problems related to
 	 * PMTUd will be fixed later.
 	 */
-	ASSERT(ire->ire_max_frag != 0);
-	mss = tcp->tcp_if_mtu = ire->ire_max_frag;
-	if (tcp->tcp_ipp_fields & IPPF_USE_MIN_MTU) {
-		if (tcp->tcp_ipp_use_min_mtu == IPV6_USE_MIN_MTU_NEVER) {
-			mss = MIN(mss, IPV6_MIN_MTU);
-		}
-	}
+	ASSERT(uinfo.iulp_mtu != 0);
+	mss = tcp->tcp_initial_pmtu = uinfo.iulp_mtu;
 
 	/* Sanity check for MSS value. */
-	if (tcp->tcp_ipversion == IPV4_VERSION)
+	if (connp->conn_ipversion == IPV4_VERSION)
 		mss_max = tcps->tcps_mss_max_ipv4;
 	else
 		mss_max = tcps->tcps_mss_max_ipv6;
 
-	if (tcp->tcp_ipversion == IPV6_VERSION &&
-	    (ire->ire_frag_flag & IPH_FRAG_HDR)) {
-		/*
-		 * After receiving an ICMPv6 "packet too big" message with a
-		 * MTU < 1280, and for multirouted IPv6 packets, the IP layer
-		 * will insert a 8-byte fragment header in every packet; we
-		 * reduce the MSS by that amount here.
-		 */
-		mss -= sizeof (ip6_frag_t);
-	}
-
 	if (tcp->tcp_ipsec_overhead == 0)
 		tcp->tcp_ipsec_overhead = conn_ipsec_length(connp);
 
@@ -2903,71 +2510,28 @@ tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp)
 	tcp->tcp_mss = mss;
 
 	/*
+	 * Update the tcp connection with LSO capability.
+	 */
+	tcp_update_lso(tcp, connp->conn_ixa);
+
+	/*
 	 * Initialize the ISS here now that we have the full connection ID.
 	 * The RFC 1948 method of initial sequence number generation requires
 	 * knowledge of the full connection ID before setting the ISS.
 	 */
-
 	tcp_iss_init(tcp);
 
-	if (ire->ire_type & (IRE_LOOPBACK | IRE_LOCAL))
-		tcp->tcp_loopback = B_TRUE;
-
-	if (sire != NULL)
-		IRE_REFRELE(sire);
-
-	/*
-	 * If we got an IRE_CACHE and an ILL, go through their properties;
-	 * otherwise, this is deferred until later when we have an IRE_CACHE.
-	 */
-	if (tcp->tcp_loopback ||
-	    (ire_cacheable && (ill = ire_to_ill(ire)) != NULL)) {
-		/*
-		 * For incoming, see if this tcp may be MDT-capable.  For
-		 * outgoing, this process has been taken care of through
-		 * tcp_rput_other.
-		 */
-		tcp_ire_ill_check(tcp, ire, ill, incoming);
-		tcp->tcp_ire_ill_check_done = B_TRUE;
-	}
+	tcp->tcp_loopback = (uinfo.iulp_loopback | uinfo.iulp_local);
 
-	mutex_enter(&connp->conn_lock);
 	/*
 	 * Make sure that conn is not marked incipient
 	 * for incoming connections. A blind
 	 * removal of incipient flag is cheaper than
 	 * check and removal.
 	 */
+	mutex_enter(&connp->conn_lock);
 	connp->conn_state_flags &= ~CONN_INCIPIENT;
-
-	/*
-	 * Must not cache forwarding table routes
-	 * or recache an IRE after the conn_t has
-	 * had conn_ire_cache cleared and is flagged
-	 * unusable, (see the CONN_CACHE_IRE() macro).
-	 */
-	if (ire_cacheable && CONN_CACHE_IRE(connp)) {
-		rw_enter(&ire->ire_bucket->irb_lock, RW_READER);
-		if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) {
-			connp->conn_ire_cache = ire;
-			IRE_UNTRACE_REF(ire);
-			rw_exit(&ire->ire_bucket->irb_lock);
-			mutex_exit(&connp->conn_lock);
-			return (1);
-		}
-		rw_exit(&ire->ire_bucket->irb_lock);
-	}
 	mutex_exit(&connp->conn_lock);
-
-	if (ire->ire_mp == NULL)
-		ire_refrele(ire);
-	return (1);
-
-error:
-	if (ire->ire_mp == NULL)
-		ire_refrele(ire);
-	if (sire != NULL)
-		ire_refrele(sire);
 	return (0);
 }
 
@@ -3001,7 +2565,7 @@ tcp_tpi_bind(tcp_t *tcp, mblk_t *mp)
 
 	ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
-		if (tcp->tcp_debug) {
+		if (connp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
 			    "tcp_tpi_bind: bad req, len %u",
 			    (uint_t)(mp->b_wptr - mp->b_rptr));
@@ -3010,7 +2574,7 @@ tcp_tpi_bind(tcp_t *tcp, mblk_t *mp)
 		return;
 	}
 	/* Make sure the largest address fits */
-	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1);
+	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
 	if (mp1 == NULL) {
 		tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
 		return;
@@ -3024,7 +2588,7 @@ tcp_tpi_bind(tcp_t *tcp, mblk_t *mp)
 	switch (len) {
 	case 0:		/* request for a generic port */
 		tbr->ADDR_offset = sizeof (struct T_bind_req);
-		if (tcp->tcp_family == AF_INET) {
+		if (connp->conn_family == AF_INET) {
 			tbr->ADDR_length = sizeof (sin_t);
 			sin = (sin_t *)&tbr[1];
 			*sin = sin_null;
@@ -3033,7 +2597,7 @@ tcp_tpi_bind(tcp_t *tcp, mblk_t *mp)
 			len = sizeof (sin_t);
 			mp->b_wptr = (uchar_t *)&sin[1];
 		} else {
-			ASSERT(tcp->tcp_family == AF_INET6);
+			ASSERT(connp->conn_family == AF_INET6);
 			tbr->ADDR_length = sizeof (sin6_t);
 			sin6 = (sin6_t *)&tbr[1];
 			*sin6 = sin6_null;
@@ -3055,7 +2619,7 @@ tcp_tpi_bind(tcp_t *tcp, mblk_t *mp)
 		break;
 
 	default:
-		if (tcp->tcp_debug) {
+		if (connp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
 			    "tcp_tpi_bind: bad address length, %d",
 			    tbr->ADDR_length);
@@ -3080,16 +2644,16 @@ done:
 		/*
 		 * Update port information as sockfs/tpi needs it for checking
 		 */
-		if (tcp->tcp_family == AF_INET) {
+		if (connp->conn_family == AF_INET) {
 			sin = (sin_t *)sa;
-			sin->sin_port = tcp->tcp_lport;
+			sin->sin_port = connp->conn_lport;
 		} else {
 			sin6 = (sin6_t *)sa;
-			sin6->sin6_port = tcp->tcp_lport;
+			sin6->sin6_port = connp->conn_lport;
 		}
 		mp->b_datap->db_type = M_PCPROTO;
 		tbr->PRIM_type = T_BIND_ACK;
-		putnext(tcp->tcp_rq, mp);
+		putnext(connp->conn_rq, mp);
 	}
 }
 
@@ -3139,7 +2703,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
 		 * Set loopmax appropriately so that one does not look
 		 * forever in the case all of the anonymous ports are in use.
 		 */
-		if (tcp->tcp_anon_priv_bind) {
+		if (connp->conn_anon_priv_bind) {
 			/*
 			 * loopmax =
 			 * 	(IPPORT_RESERVED-1) - tcp_min_anonpriv_port + 1
@@ -3175,7 +2739,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
 		mutex_enter(&tbf->tf_lock);
 		for (ltcp = tbf->tf_tcp; ltcp != NULL;
 		    ltcp = ltcp->tcp_bind_hash) {
-			if (lport == ltcp->tcp_lport)
+			if (lport == ltcp->tcp_connp->conn_lport)
 				break;
 		}
 
@@ -3191,7 +2755,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
 			 * privilege as being in all zones, as there's
 			 * otherwise no way to identify the right receiver.
 			 */
-			if (!IPCL_BIND_ZONE_MATCH(ltcp->tcp_connp, connp))
+			if (!IPCL_BIND_ZONE_MATCH(lconnp, connp))
 				continue;
 
 			/*
@@ -3227,7 +2791,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
 			 * added.
 			 *
 			 * if (ltcp->tcp_state == TCPS_LISTEN ||
-			 *	!reuseaddr || !ltcp->tcp_reuseaddr) {
+			 *	!reuseaddr || !lconnp->conn_reuseaddr) {
 			 *		...
 			 * }
 			 *
@@ -3243,17 +2807,18 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
 			 */
 			not_socket = !(TCP_IS_SOCKET(ltcp) &&
 			    TCP_IS_SOCKET(tcp));
-			exclbind = ltcp->tcp_exclbind || tcp->tcp_exclbind;
+			exclbind = lconnp->conn_exclbind ||
+			    connp->conn_exclbind;
 
 			if ((lconnp->conn_mac_mode != CONN_MAC_DEFAULT) ||
 			    (connp->conn_mac_mode != CONN_MAC_DEFAULT) ||
 			    (exclbind && (not_socket ||
 			    ltcp->tcp_state <= TCPS_ESTABLISHED))) {
 				if (V6_OR_V4_INADDR_ANY(
-				    ltcp->tcp_bound_source_v6) ||
+				    lconnp->conn_bound_addr_v6) ||
 				    V6_OR_V4_INADDR_ANY(*laddr) ||
 				    IN6_ARE_ADDR_EQUAL(laddr,
-				    &ltcp->tcp_bound_source_v6)) {
+				    &lconnp->conn_bound_addr_v6)) {
 					break;
 				}
 				continue;
@@ -3266,7 +2831,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
 			 * specific port. We use the same autoassigned port
 			 * number space for IPv4 and IPv6 sockets.
 			 */
-			if (tcp->tcp_ipversion != ltcp->tcp_ipversion &&
+			if (connp->conn_ipversion != lconnp->conn_ipversion &&
 			    bind_to_req_port_only)
 				continue;
 
@@ -3281,9 +2846,9 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
 			 */
 			if (quick_connect &&
 			    (ltcp->tcp_state > TCPS_LISTEN) &&
-			    ((tcp->tcp_fport != ltcp->tcp_fport) ||
-			    !IN6_ARE_ADDR_EQUAL(&tcp->tcp_remote_v6,
-			    &ltcp->tcp_remote_v6)))
+			    ((connp->conn_fport != lconnp->conn_fport) ||
+			    !IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6,
+			    &lconnp->conn_faddr_v6)))
 				continue;
 
 			if (!reuseaddr) {
@@ -3299,9 +2864,9 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
 				 */
 				if (!V6_OR_V4_INADDR_ANY(*laddr) &&
 				    !V6_OR_V4_INADDR_ANY(
-				    ltcp->tcp_bound_source_v6) &&
+				    lconnp->conn_bound_addr_v6) &&
 				    !IN6_ARE_ADDR_EQUAL(laddr,
-				    &ltcp->tcp_bound_source_v6))
+				    &lconnp->conn_bound_addr_v6))
 					continue;
 				if (ltcp->tcp_state >= TCPS_BOUND) {
 					/*
@@ -3327,7 +2892,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
 				 * SO_REUSEADDR setting, so we break.
 				 */
 				if (IN6_ARE_ADDR_EQUAL(laddr,
-				    &ltcp->tcp_bound_source_v6) &&
+				    &lconnp->conn_bound_addr_v6) &&
 				    (ltcp->tcp_state == TCPS_LISTEN ||
 				    ltcp->tcp_state == TCPS_BOUND))
 					break;
@@ -3343,11 +2908,10 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
 			 * number.
 			 */
 			tcp->tcp_state = TCPS_BOUND;
-			tcp->tcp_lport = htons(port);
-			*(uint16_t *)tcp->tcp_tcph->th_lport = tcp->tcp_lport;
+			connp->conn_lport = htons(port);
 
 			ASSERT(&tcps->tcps_bind_fanout[TCP_BIND_HASH(
-			    tcp->tcp_lport)] == tbf);
+			    connp->conn_lport)] == tbf);
 			tcp_bind_hash_insert(tbf, tcp, 1);
 
 			mutex_exit(&tbf->tf_lock);
@@ -3364,12 +2928,12 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
 			 * is updated. After the update, it may or may not
 			 * be in the valid range.
 			 */
-			if (!tcp->tcp_anon_priv_bind)
+			if (!connp->conn_anon_priv_bind)
 				tcps->tcps_next_port_to_try = port + 1;
 			return (port);
 		}
 
-		if (tcp->tcp_anon_priv_bind) {
+		if (connp->conn_anon_priv_bind) {
 			port = tcp_get_next_priv_port(tcp);
 		} else {
 			if (count == 0 && user_specified) {
@@ -3402,12 +2966,13 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
  * tcp_clean_death / tcp_close_detached must not be called more than once
  * on a tcp. Thus every function that potentially calls tcp_clean_death
  * must check for the tcp state before calling tcp_clean_death.
- * Eg. tcp_input, tcp_rput_data, tcp_eager_kill, tcp_clean_death_wrapper,
+ * Eg. tcp_input_data, tcp_eager_kill, tcp_clean_death_wrapper,
  * tcp_timer_handler, all check for the tcp state.
  */
 /* ARGSUSED */
 void
-tcp_clean_death_wrapper(void *arg, mblk_t *mp, void *arg2)
+tcp_clean_death_wrapper(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy)
 {
 	tcp_t	*tcp = ((conn_t *)arg)->conn_tcp;
 
@@ -3449,11 +3014,11 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag)
 	}
 
 	ASSERT(tcp != NULL);
-	ASSERT((tcp->tcp_family == AF_INET &&
-	    tcp->tcp_ipversion == IPV4_VERSION) ||
-	    (tcp->tcp_family == AF_INET6 &&
-	    (tcp->tcp_ipversion == IPV4_VERSION ||
-	    tcp->tcp_ipversion == IPV6_VERSION)));
+	ASSERT((connp->conn_family == AF_INET &&
+	    connp->conn_ipversion == IPV4_VERSION) ||
+	    (connp->conn_family == AF_INET6 &&
+	    (connp->conn_ipversion == IPV4_VERSION ||
+	    connp->conn_ipversion == IPV6_VERSION)));
 
 	if (TCP_IS_DETACHED(tcp)) {
 		if (tcp->tcp_hard_binding) {
@@ -3483,7 +3048,7 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag)
 
 	TCP_STAT(tcps, tcp_clean_death_nondetached);
 
-	q = tcp->tcp_rq;
+	q = connp->conn_rq;
 
 	/* Trash all inbound data */
 	if (!IPCL_IS_NONSTR(connp)) {
@@ -3506,7 +3071,7 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag)
 			 */
 			(void) putnextctl1(q, M_FLUSH, FLUSHR);
 		}
-		if (tcp->tcp_debug) {
+		if (connp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE|SL_ERROR,
 			    "tcp_clean_death: discon err %d", err);
 		}
@@ -3519,7 +3084,7 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag)
 			if (mp != NULL) {
 				putnext(q, mp);
 			} else {
-				if (tcp->tcp_debug) {
+				if (connp->conn_debug) {
 					(void) strlog(TCP_MOD_ID, 0, 1,
 					    SL_ERROR|SL_TRACE,
 					    "tcp_clean_death, sending M_ERROR");
@@ -3552,6 +3117,7 @@ tcp_stop_lingering(tcp_t *tcp)
 {
 	clock_t	delta = 0;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
 
 	tcp->tcp_linger_tid = 0;
 	if (tcp->tcp_state > TCPS_LISTEN) {
@@ -3568,15 +3134,14 @@ tcp_stop_lingering(tcp_t *tcp)
 		}
 		/*
 		 * Need to cancel those timers which will not be used when
-		 * TCP is detached.  This has to be done before the tcp_wq
-		 * is set to the global queue.
+		 * TCP is detached.  This has to be done before the conn_wq
+		 * is cleared.
 		 */
 		tcp_timers_stop(tcp);
 
 		tcp->tcp_detached = B_TRUE;
-		ASSERT(tcps->tcps_g_q != NULL);
-		tcp->tcp_rq = tcps->tcps_g_q;
-		tcp->tcp_wq = WR(tcps->tcps_g_q);
+		connp->conn_rq = NULL;
+		connp->conn_wq = NULL;
 
 		if (tcp->tcp_state == TCPS_TIME_WAIT) {
 			tcp_time_wait_append(tcp);
@@ -3595,16 +3160,14 @@ tcp_stop_lingering(tcp_t *tcp)
 		}
 	} else {
 		tcp_closei_local(tcp);
-		CONN_DEC_REF(tcp->tcp_connp);
+		CONN_DEC_REF(connp);
 	}
 finish:
 	/* Signal closing thread that it can complete close */
 	mutex_enter(&tcp->tcp_closelock);
 	tcp->tcp_detached = B_TRUE;
-	ASSERT(tcps->tcps_g_q != NULL);
-
-	tcp->tcp_rq = tcps->tcps_g_q;
-	tcp->tcp_wq = WR(tcps->tcps_g_q);
+	connp->conn_rq = NULL;
+	connp->conn_wq = NULL;
 
 	tcp->tcp_closed = 1;
 	cv_signal(&tcp->tcp_closecv);
@@ -3636,9 +3199,9 @@ tcp_close_common(conn_t *connp, int flags)
 	ASSERT(connp->conn_ref >= 2);
 
 	/*
-	 * Mark the conn as closing. ill_pending_mp_add will not
+	 * Mark the conn as closing. ipsq_pending_mp_add will not
 	 * add any mp to the pending mp list, after this conn has
-	 * started closing. Same for sq_pending_mp_add
+	 * started closing.
 	 */
 	mutex_enter(&connp->conn_lock);
 	connp->conn_state_flags |= CONN_CLOSING;
@@ -3664,7 +3227,7 @@ tcp_close_common(conn_t *connp, int flags)
 	TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
 
 	SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_close_output, connp,
-	    tcp_squeue_flag, SQTAG_IP_TCP_CLOSE);
+	    NULL, tcp_squeue_flag, SQTAG_IP_TCP_CLOSE);
 
 	mutex_enter(&tcp->tcp_closelock);
 	while (!tcp->tcp_closed) {
@@ -3684,13 +3247,13 @@ tcp_close_common(conn_t *connp, int flags)
 			 * thread is higher priority than the squeue worker
 			 * thread and is bound to the same cpu.
 			 */
-			if (tcp->tcp_linger && tcp->tcp_lingertime > 0) {
+			if (connp->conn_linger && connp->conn_lingertime > 0) {
 				mutex_exit(&tcp->tcp_closelock);
 				/* Entering squeue, bump ref count. */
 				CONN_INC_REF(connp);
 				bp = allocb_wait(0, BPRI_HI, STR_NOSIG, NULL);
 				SQUEUE_ENTER_ONE(connp->conn_sqp, bp,
-				    tcp_linger_interrupted, connp,
+				    tcp_linger_interrupted, connp, NULL,
 				    tcp_squeue_flag, SQTAG_IP_TCP_CLOSE);
 				mutex_enter(&tcp->tcp_closelock);
 			}
@@ -3703,8 +3266,8 @@ tcp_close_common(conn_t *connp, int flags)
 
 	/*
 	 * In the case of listener streams that have eagers in the q or q0
-	 * we wait for the eagers to drop their reference to us. tcp_rq and
-	 * tcp_wq of the eagers point to our queues. By waiting for the
+	 * we wait for the eagers to drop their reference to us. conn_rq and
+	 * conn_wq of the eagers point to our queues. By waiting for the
 	 * refcnt to drop to 1, we are sure that the eagers have cleaned
 	 * up their queue pointers and also dropped their references to us.
 	 */
@@ -3716,13 +3279,12 @@ tcp_close_common(conn_t *connp, int flags)
 		mutex_exit(&connp->conn_lock);
 	}
 	/*
-	 * ioctl cleanup. The mp is queued in the
-	 * ill_pending_mp or in the sq_pending_mp.
+	 * ioctl cleanup. The mp is queued in the ipx_pending_mp.
 	 */
 	if (conn_ioctl_cleanup_reqd)
 		conn_ioctl_cleanup(connp);
 
-	tcp->tcp_cpid = -1;
+	connp->conn_cpid = NOPID;
 }
 
 static int
@@ -3799,7 +3361,7 @@ tcp_tpi_close_accept(queue_t *q)
 
 /* ARGSUSED */
 static void
-tcp_linger_interrupted(void *arg, mblk_t *mp, void *arg2)
+tcp_linger_interrupted(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
 	conn_t	*connp = (conn_t *)arg;
 	tcp_t	*tcp = connp->conn_tcp;
@@ -3828,7 +3390,7 @@ tcp_linger_interrupted(void *arg, mblk_t *mp, void *arg2)
 
 /* ARGSUSED */
 static void
-tcp_close_output(void *arg, mblk_t *mp, void *arg2)
+tcp_close_output(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
 	char	*msg;
 	conn_t	*connp = (conn_t *)arg;
@@ -3847,10 +3409,6 @@ tcp_close_output(void *arg, mblk_t *mp, void *arg2)
 	}
 	mutex_exit(&tcp->tcp_eager_lock);
 
-	connp->conn_mdt_ok = B_FALSE;
-	tcp->tcp_mdt = B_FALSE;
-
-	connp->conn_lso_ok = B_FALSE;
 	tcp->tcp_lso = B_FALSE;
 
 	msg = NULL;
@@ -3879,12 +3437,11 @@ tcp_close_output(void *arg, mblk_t *mp, void *arg2)
 		 * If SO_LINGER has set a zero linger time, abort the
 		 * connection with a reset.
 		 */
-		if (tcp->tcp_linger && tcp->tcp_lingertime == 0) {
+		if (connp->conn_linger && connp->conn_lingertime == 0) {
 			msg = "tcp_close, zero lingertime";
 			break;
 		}
 
-		ASSERT(tcp->tcp_hard_bound || tcp->tcp_hard_binding);
 		/*
 		 * Abort connection if there is unread data queued.
 		 */
@@ -3893,9 +3450,6 @@ tcp_close_output(void *arg, mblk_t *mp, void *arg2)
 			break;
 		}
 		/*
-		 * tcp_hard_bound is now cleared thus all packets go through
-		 * tcp_lookup. This fact is used by tcp_detach below.
-		 *
 		 * We have done a qwait() above which could have possibly
 		 * drained more messages in turn causing transition to a
 		 * different state. Check whether we have to do the rest
@@ -3915,7 +3469,7 @@ tcp_close_output(void *arg, mblk_t *mp, void *arg2)
 		 * If lingering on close then wait until the fin is acked,
 		 * the SO_LINGER time passes, or a reset is sent/received.
 		 */
-		if (tcp->tcp_linger && tcp->tcp_lingertime > 0 &&
+		if (connp->conn_linger && connp->conn_lingertime > 0 &&
 		    !(tcp->tcp_fin_acked) &&
 		    tcp->tcp_state >= TCPS_ESTABLISHED) {
 			if (tcp->tcp_closeflags & (FNDELAY|FNONBLOCK)) {
@@ -3926,7 +3480,7 @@ tcp_close_output(void *arg, mblk_t *mp, void *arg2)
 
 				tcp->tcp_linger_tid = TCP_TIMER(tcp,
 				    tcp_close_linger_timeout,
-				    tcp->tcp_lingertime * hz);
+				    connp->conn_lingertime * hz);
 
 				/* tcp_close_linger_timeout will finish close */
 				if (tcp->tcp_linger_tid == 0)
@@ -3944,8 +3498,8 @@ tcp_close_output(void *arg, mblk_t *mp, void *arg2)
 		}
 
 		/*
-		 * Make sure that no other thread will access the tcp_rq of
-		 * this instance (through lookups etc.) as tcp_rq will go
+		 * Make sure that no other thread will access the conn_rq of
+		 * this instance (through lookups etc.) as conn_rq will go
 		 * away shortly.
 		 */
 		tcp_acceptor_hash_remove(tcp);
@@ -3962,8 +3516,8 @@ tcp_close_output(void *arg, mblk_t *mp, void *arg2)
 		}
 		/*
 		 * Need to cancel those timers which will not be used when
-		 * TCP is detached.  This has to be done before the tcp_wq
-		 * is set to the global queue.
+		 * TCP is detached.  This has to be done before the conn_wq
+		 * is set to NULL.
 		 */
 		tcp_timers_stop(tcp);
 
@@ -4004,18 +3558,6 @@ tcp_close_output(void *arg, mblk_t *mp, void *arg2)
 	ASSERT(connp->conn_ref >= 2);
 
 finish:
-	/*
-	 * Although packets are always processed on the correct
-	 * tcp's perimeter and access is serialized via squeue's,
-	 * IP still needs a queue when sending packets in time_wait
-	 * state so use WR(tcps_g_q) till ip_output() can be
-	 * changed to deal with just connp. For read side, we
-	 * could have set tcp_rq to NULL but there are some cases
-	 * in tcp_rput_data() from early days of this code which
-	 * do a putnext without checking if tcp is closed. Those
-	 * need to be identified before both tcp_rq and tcp_wq
-	 * can be set to NULL and tcps_g_q can disappear forever.
-	 */
 	mutex_enter(&tcp->tcp_closelock);
 	/*
 	 * Don't change the queues in the case of a listener that has
@@ -4024,13 +3566,8 @@ finish:
 	 */
 	if (!tcp->tcp_wait_for_eagers) {
 		tcp->tcp_detached = B_TRUE;
-		/*
-		 * When default queue is closing we set tcps_g_q to NULL
-		 * after the close is done.
-		 */
-		ASSERT(tcps->tcps_g_q != NULL);
-		tcp->tcp_rq = tcps->tcps_g_q;
-		tcp->tcp_wq = WR(tcps->tcps_g_q);
+		connp->conn_rq = NULL;
+		connp->conn_wq = NULL;
 	}
 
 	/* Signal tcp_close() to finish closing. */
@@ -4112,8 +3649,7 @@ tcp_timers_stop(tcp_t *tcp)
 static void
 tcp_closei_local(tcp_t *tcp)
 {
-	ire_t 	*ire;
-	conn_t	*connp = tcp->tcp_connp;
+	conn_t		*connp = tcp->tcp_connp;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
 
 	if (!TCP_IS_SOCKET(tcp))
@@ -4138,7 +3674,7 @@ tcp_closei_local(tcp_t *tcp)
 		 * this point, eager will be closed but we
 		 * leave it in listeners eager list so that
 		 * if listener decides to close without doing
-		 * accept, we can clean this up. In tcp_wput_accept
+		 * accept, we can clean this up. In tcp_tli_accept
 		 * we take care of the case of accept on closed
 		 * eager.
 		 */
@@ -4150,9 +3686,9 @@ tcp_closei_local(tcp_t *tcp)
 			 * listener queue, after we have released our
 			 * reference on the listener
 			 */
-			ASSERT(tcps->tcps_g_q != NULL);
-			tcp->tcp_rq = tcps->tcps_g_q;
-			tcp->tcp_wq = WR(tcps->tcps_g_q);
+			ASSERT(tcp->tcp_detached);
+			connp->conn_rq = NULL;
+			connp->conn_wq = NULL;
 			CONN_DEC_REF(listener->tcp_connp);
 		} else {
 			mutex_exit(&listener->tcp_eager_lock);
@@ -4185,20 +3721,16 @@ tcp_closei_local(tcp_t *tcp)
 	 */
 	if (tcp->tcp_state == TCPS_TIME_WAIT)
 		(void) tcp_time_wait_remove(tcp, NULL);
-	CL_INET_DISCONNECT(connp, tcp);
+	CL_INET_DISCONNECT(connp);
 	ipcl_hash_remove(connp);
+	ixa_cleanup(connp->conn_ixa);
 
 	/*
-	 * Delete the cached ire in conn_ire_cache and also mark
-	 * the conn as CONDEMNED
+	 * Mark the conn as CONDEMNED
 	 */
 	mutex_enter(&connp->conn_lock);
 	connp->conn_state_flags |= CONN_CONDEMNED;
-	ire = connp->conn_ire_cache;
-	connp->conn_ire_cache = NULL;
 	mutex_exit(&connp->conn_lock);
-	if (ire != NULL)
-		IRE_REFRELE_NOTR(ire);
 
 	/* Need to cleanup any pending ioctls */
 	ASSERT(tcp->tcp_time_wait_next == NULL);
@@ -4227,14 +3759,14 @@ tcp_closei_local(tcp_t *tcp)
 void
 tcp_free(tcp_t *tcp)
 {
-	mblk_t	*mp;
-	ip6_pkt_t	*ipp;
+	mblk_t		*mp;
+	conn_t		*connp = tcp->tcp_connp;
 
 	ASSERT(tcp != NULL);
 	ASSERT(tcp->tcp_ptpahn == NULL && tcp->tcp_acceptor_hash == NULL);
 
-	tcp->tcp_rq = NULL;
-	tcp->tcp_wq = NULL;
+	connp->conn_rq = NULL;
+	connp->conn_wq = NULL;
 
 	tcp_close_mpp(&tcp->tcp_xmit_head);
 	tcp_close_mpp(&tcp->tcp_reass_head);
@@ -4281,12 +3813,12 @@ tcp_free(tcp_t *tcp)
 		tcp->tcp_dstoptslen = 0;
 	}
 	ASSERT(tcp->tcp_dstoptslen == 0);
-	if (tcp->tcp_rtdstopts != NULL) {
-		mi_free(tcp->tcp_rtdstopts);
-		tcp->tcp_rtdstopts = NULL;
-		tcp->tcp_rtdstoptslen = 0;
+	if (tcp->tcp_rthdrdstopts != NULL) {
+		mi_free(tcp->tcp_rthdrdstopts);
+		tcp->tcp_rthdrdstopts = NULL;
+		tcp->tcp_rthdrdstoptslen = 0;
 	}
-	ASSERT(tcp->tcp_rtdstoptslen == 0);
+	ASSERT(tcp->tcp_rthdrdstoptslen == 0);
 	if (tcp->tcp_rthdr != NULL) {
 		mi_free(tcp->tcp_rthdr);
 		tcp->tcp_rthdr = NULL;
@@ -4294,18 +3826,6 @@ tcp_free(tcp_t *tcp)
 	}
 	ASSERT(tcp->tcp_rthdrlen == 0);
 
-	ipp = &tcp->tcp_sticky_ipp;
-	if (ipp->ipp_fields & (IPPF_HOPOPTS | IPPF_RTDSTOPTS | IPPF_DSTOPTS |
-	    IPPF_RTHDR))
-		ip6_pkt_free(ipp);
-
-	/*
-	 * Free memory associated with the tcp/ip header template.
-	 */
-
-	if (tcp->tcp_iphc != NULL)
-		bzero(tcp->tcp_iphc, tcp->tcp_iphc_len);
-
 	/*
 	 * Following is really a blowing away a union.
 	 * It happens to have exactly two members of identical size
@@ -4317,17 +3837,19 @@ tcp_free(tcp_t *tcp)
 
 /*
  * Put a connection confirmation message upstream built from the
- * address information within 'iph' and 'tcph'.  Report our success or failure.
+ * address/flowid information with the conn and iph. Report our success or
+ * failure.
  */
 static boolean_t
-tcp_conn_con(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph, mblk_t *idmp,
-    mblk_t **defermp)
+tcp_conn_con(tcp_t *tcp, uchar_t *iphdr, mblk_t *idmp,
+    mblk_t **defermp, ip_recv_attr_t *ira)
 {
 	sin_t	sin;
 	sin6_t	sin6;
 	mblk_t	*mp;
 	char	*optp = NULL;
 	int	optlen = 0;
+	conn_t	*connp = tcp->tcp_connp;
 
 	if (defermp != NULL)
 		*defermp = NULL;
@@ -4352,20 +3874,19 @@ tcp_conn_con(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph, mblk_t *idmp,
 	}
 
 	if (IPH_HDR_VERSION(iphdr) == IPV4_VERSION) {
-		ipha_t *ipha = (ipha_t *)iphdr;
 
 		/* packet is IPv4 */
-		if (tcp->tcp_family == AF_INET) {
+		if (connp->conn_family == AF_INET) {
 			sin = sin_null;
-			sin.sin_addr.s_addr = ipha->ipha_src;
-			sin.sin_port = *(uint16_t *)tcph->th_lport;
+			sin.sin_addr.s_addr = connp->conn_faddr_v4;
+			sin.sin_port = connp->conn_fport;
 			sin.sin_family = AF_INET;
 			mp = mi_tpi_conn_con(NULL, (char *)&sin,
 			    (int)sizeof (sin_t), optp, optlen);
 		} else {
 			sin6 = sin6_null;
-			IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &sin6.sin6_addr);
-			sin6.sin6_port = *(uint16_t *)tcph->th_lport;
+			sin6.sin6_addr = connp->conn_faddr_v6;
+			sin6.sin6_port = connp->conn_fport;
 			sin6.sin6_family = AF_INET6;
 			mp = mi_tpi_conn_con(NULL, (char *)&sin6,
 			    (int)sizeof (sin6_t), optp, optlen);
@@ -4375,10 +3896,10 @@ tcp_conn_con(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph, mblk_t *idmp,
 		ip6_t	*ip6h = (ip6_t *)iphdr;
 
 		ASSERT(IPH_HDR_VERSION(iphdr) == IPV6_VERSION);
-		ASSERT(tcp->tcp_family == AF_INET6);
+		ASSERT(connp->conn_family == AF_INET6);
 		sin6 = sin6_null;
-		sin6.sin6_addr = ip6h->ip6_src;
-		sin6.sin6_port = *(uint16_t *)tcph->th_lport;
+		sin6.sin6_addr = connp->conn_faddr_v6;
+		sin6.sin6_port = connp->conn_fport;
 		sin6.sin6_family = AF_INET6;
 		sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
 		mp = mi_tpi_conn_con(NULL, (char *)&sin6,
@@ -4393,16 +3914,16 @@ tcp_conn_con(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph, mblk_t *idmp,
 	if (defermp == NULL) {
 		conn_t *connp = tcp->tcp_connp;
 		if (IPCL_IS_NONSTR(connp)) {
-			cred_t *cr;
-			pid_t cpid;
-
-			cr = msg_getcred(mp, &cpid);
 			(*connp->conn_upcalls->su_connected)
-			    (connp->conn_upper_handle, tcp->tcp_connid, cr,
-			    cpid);
+			    (connp->conn_upper_handle, tcp->tcp_connid,
+			    ira->ira_cred, ira->ira_cpid);
 			freemsg(mp);
 		} else {
-			putnext(tcp->tcp_rq, mp);
+			if (ira->ira_cred != NULL) {
+				/* So that getpeerucred works for TPI sockfs */
+				mblk_setcred(mp, ira->ira_cred, ira->ira_cpid);
+			}
+			putnext(connp->conn_rq, mp);
 		}
 	} else {
 		*defermp = mp;
@@ -4456,7 +3977,7 @@ tcp_drop_q0(tcp_t *tcp)
 	 */
 	MAKE_UNDROPPABLE(eager);
 
-	if (tcp->tcp_debug) {
+	if (tcp->tcp_connp->conn_debug) {
 		(void) strlog(TCP_MOD_ID, 0, 3, SL_TRACE,
 		    "tcp_drop_q0: listen half-open queue (max=%d) overflow"
 		    " (%d pending) on %s, drop one", tcps->tcps_conn_req_max_q0,
@@ -4469,18 +3990,19 @@ tcp_drop_q0(tcp_t *tcp)
 	/* Put a reference on the conn as we are enqueueing it in the sqeue */
 	CONN_INC_REF(eager->tcp_connp);
 
-	/* Mark the IRE created for this SYN request temporary */
-	tcp_ip_ire_mark_advice(eager);
 	SQUEUE_ENTER_ONE(eager->tcp_connp->conn_sqp, mp,
-	    tcp_clean_death_wrapper, eager->tcp_connp,
+	    tcp_clean_death_wrapper, eager->tcp_connp, NULL,
 	    SQ_FILL, SQTAG_TCP_DROP_Q0);
 
 	return (B_TRUE);
 }
 
-int
+/*
+ * Handle a SYN on an AF_INET6 socket; can be either IPv4 or IPv6
+ */
+static mblk_t *
 tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp,
-    tcph_t *tcph, uint_t ipvers, mblk_t *idmp)
+    ip_recv_attr_t *ira)
 {
 	tcp_t 		*ltcp = lconnp->conn_tcp;
 	tcp_t		*tcp = connp->conn_tcp;
@@ -4488,36 +4010,30 @@ tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp,
 	ipha_t		*ipha;
 	ip6_t		*ip6h;
 	sin6_t 		sin6;
-	in6_addr_t 	v6dst;
-	int		err;
-	int		ifindex = 0;
+	uint_t		ifindex = ira->ira_ruifindex;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
 
-	if (ipvers == IPV4_VERSION) {
+	if (ira->ira_flags & IRAF_IS_IPV4) {
 		ipha = (ipha_t *)mp->b_rptr;
 
-		connp->conn_send = ip_output;
-		connp->conn_recv = tcp_input;
-
-		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst,
-		    &connp->conn_bound_source_v6);
-		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &connp->conn_srcv6);
-		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &connp->conn_remv6);
+		connp->conn_ipversion = IPV4_VERSION;
+		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &connp->conn_laddr_v6);
+		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &connp->conn_faddr_v6);
+		connp->conn_saddr_v6 = connp->conn_laddr_v6;
 
 		sin6 = sin6_null;
-		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &sin6.sin6_addr);
-		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &v6dst);
-		sin6.sin6_port = *(uint16_t *)tcph->th_lport;
+		sin6.sin6_addr = connp->conn_faddr_v6;
+		sin6.sin6_port = connp->conn_fport;
 		sin6.sin6_family = AF_INET6;
-		sin6.__sin6_src_id = ip_srcid_find_addr(&v6dst,
-		    lconnp->conn_zoneid, tcps->tcps_netstack);
-		if (tcp->tcp_recvdstaddr) {
+		sin6.__sin6_src_id = ip_srcid_find_addr(&connp->conn_laddr_v6,
+		    IPCL_ZONEID(lconnp), tcps->tcps_netstack);
+
+		if (connp->conn_recv_ancillary.crb_recvdstaddr) {
 			sin6_t	sin6d;
 
 			sin6d = sin6_null;
-			IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst,
-			    &sin6d.sin6_addr);
-			sin6d.sin6_port = *(uint16_t *)tcph->th_fport;
+			sin6d.sin6_addr = connp->conn_laddr_v6;
+			sin6d.sin6_port = connp->conn_lport;
 			sin6d.sin6_family = AF_INET;
 			tpi_mp = mi_tpi_extconn_ind(NULL,
 			    (char *)&sin6d, sizeof (sin6_t),
@@ -4534,24 +4050,18 @@ tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp,
 	} else {
 		ip6h = (ip6_t *)mp->b_rptr;
 
-		connp->conn_send = ip_output_v6;
-		connp->conn_recv = tcp_input;
-
-		connp->conn_bound_source_v6 = ip6h->ip6_dst;
-		connp->conn_srcv6 = ip6h->ip6_dst;
-		connp->conn_remv6 = ip6h->ip6_src;
-
-		/* db_cksumstuff is set at ip_fanout_tcp_v6 */
-		ifindex = (int)DB_CKSUMSTUFF(mp);
-		DB_CKSUMSTUFF(mp) = 0;
+		connp->conn_ipversion = IPV6_VERSION;
+		connp->conn_laddr_v6 = ip6h->ip6_dst;
+		connp->conn_faddr_v6 = ip6h->ip6_src;
+		connp->conn_saddr_v6 = connp->conn_laddr_v6;
 
 		sin6 = sin6_null;
-		sin6.sin6_addr = ip6h->ip6_src;
-		sin6.sin6_port = *(uint16_t *)tcph->th_lport;
+		sin6.sin6_addr = connp->conn_faddr_v6;
+		sin6.sin6_port = connp->conn_fport;
 		sin6.sin6_family = AF_INET6;
 		sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
-		sin6.__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst,
-		    lconnp->conn_zoneid, tcps->tcps_netstack);
+		sin6.__sin6_src_id = ip_srcid_find_addr(&connp->conn_laddr_v6,
+		    IPCL_ZONEID(lconnp), tcps->tcps_netstack);
 
 		if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
 			/* Pass up the scope_id of remote addr */
@@ -4559,13 +4069,16 @@ tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp,
 		} else {
 			sin6.sin6_scope_id = 0;
 		}
-		if (tcp->tcp_recvdstaddr) {
+		if (connp->conn_recv_ancillary.crb_recvdstaddr) {
 			sin6_t	sin6d;
 
 			sin6d = sin6_null;
-			sin6.sin6_addr = ip6h->ip6_dst;
-			sin6d.sin6_port = *(uint16_t *)tcph->th_fport;
-			sin6d.sin6_family = AF_INET;
+			sin6.sin6_addr = connp->conn_laddr_v6;
+			sin6d.sin6_port = connp->conn_lport;
+			sin6d.sin6_family = AF_INET6;
+			if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_laddr_v6))
+				sin6d.sin6_scope_id = ifindex;
+
 			tpi_mp = mi_tpi_extconn_ind(NULL,
 			    (char *)&sin6d, sizeof (sin6_t),
 			    (char *)&tcp, (t_scalar_t)sizeof (intptr_t),
@@ -4579,194 +4092,40 @@ tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp,
 		}
 	}
 
-	if (tpi_mp == NULL)
-		return (ENOMEM);
-
-	connp->conn_fport = *(uint16_t *)tcph->th_lport;
-	connp->conn_lport = *(uint16_t *)tcph->th_fport;
-	connp->conn_flags |= (IPCL_TCP6|IPCL_EAGER);
-	connp->conn_fully_bound = B_FALSE;
-
-	/* Inherit information from the "parent" */
-	tcp->tcp_ipversion = ltcp->tcp_ipversion;
-	tcp->tcp_family = ltcp->tcp_family;
-
-	tcp->tcp_wq = ltcp->tcp_wq;
-	tcp->tcp_rq = ltcp->tcp_rq;
-
 	tcp->tcp_mss = tcps->tcps_mss_def_ipv6;
-	tcp->tcp_detached = B_TRUE;
-	SOCK_CONNID_INIT(tcp->tcp_connid);
-	if ((err = tcp_init_values(tcp)) != 0) {
-		freemsg(tpi_mp);
-		return (err);
-	}
-
-	if (ipvers == IPV4_VERSION) {
-		if ((err = tcp_header_init_ipv4(tcp)) != 0) {
-			freemsg(tpi_mp);
-			return (err);
-		}
-		ASSERT(tcp->tcp_ipha != NULL);
-	} else {
-		/* ifindex must be already set */
-		ASSERT(ifindex != 0);
-
-		if (ltcp->tcp_bound_if != 0)
-			tcp->tcp_bound_if = ltcp->tcp_bound_if;
-		else if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
-			tcp->tcp_bound_if = ifindex;
-
-		tcp->tcp_ipv6_recvancillary = ltcp->tcp_ipv6_recvancillary;
-		tcp->tcp_recvifindex = 0;
-		tcp->tcp_recvhops = 0xffffffffU;
-		ASSERT(tcp->tcp_ip6h != NULL);
-	}
-
-	tcp->tcp_lport = ltcp->tcp_lport;
-
-	if (ltcp->tcp_ipversion == tcp->tcp_ipversion) {
-		if (tcp->tcp_iphc_len != ltcp->tcp_iphc_len) {
-			/*
-			 * Listener had options of some sort; eager inherits.
-			 * Free up the eager template and allocate one
-			 * of the right size.
-			 */
-			if (tcp->tcp_hdr_grown) {
-				kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len);
-			} else {
-				bzero(tcp->tcp_iphc, tcp->tcp_iphc_len);
-				kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc);
-			}
-			tcp->tcp_iphc = kmem_zalloc(ltcp->tcp_iphc_len,
-			    KM_NOSLEEP);
-			if (tcp->tcp_iphc == NULL) {
-				tcp->tcp_iphc_len = 0;
-				freemsg(tpi_mp);
-				return (ENOMEM);
-			}
-			tcp->tcp_iphc_len = ltcp->tcp_iphc_len;
-			tcp->tcp_hdr_grown = B_TRUE;
-		}
-		tcp->tcp_hdr_len = ltcp->tcp_hdr_len;
-		tcp->tcp_ip_hdr_len = ltcp->tcp_ip_hdr_len;
-		tcp->tcp_tcp_hdr_len = ltcp->tcp_tcp_hdr_len;
-		tcp->tcp_ip6_hops = ltcp->tcp_ip6_hops;
-		tcp->tcp_ip6_vcf = ltcp->tcp_ip6_vcf;
-
-		/*
-		 * Copy the IP+TCP header template from listener to eager
-		 */
-		bcopy(ltcp->tcp_iphc, tcp->tcp_iphc, ltcp->tcp_hdr_len);
-		if (tcp->tcp_ipversion == IPV6_VERSION) {
-			if (((ip6i_t *)(tcp->tcp_iphc))->ip6i_nxt ==
-			    IPPROTO_RAW) {
-				tcp->tcp_ip6h =
-				    (ip6_t *)(tcp->tcp_iphc +
-				    sizeof (ip6i_t));
-			} else {
-				tcp->tcp_ip6h =
-				    (ip6_t *)(tcp->tcp_iphc);
-			}
-			tcp->tcp_ipha = NULL;
-		} else {
-			tcp->tcp_ipha = (ipha_t *)tcp->tcp_iphc;
-			tcp->tcp_ip6h = NULL;
-		}
-		tcp->tcp_tcph = (tcph_t *)(tcp->tcp_iphc +
-		    tcp->tcp_ip_hdr_len);
-	} else {
-		/*
-		 * only valid case when ipversion of listener and
-		 * eager differ is when listener is IPv6 and
-		 * eager is IPv4.
-		 * Eager header template has been initialized to the
-		 * maximum v4 header sizes, which includes space for
-		 * TCP and IP options.
-		 */
-		ASSERT((ltcp->tcp_ipversion == IPV6_VERSION) &&
-		    (tcp->tcp_ipversion == IPV4_VERSION));
-		ASSERT(tcp->tcp_iphc_len >=
-		    TCP_MAX_COMBINED_HEADER_LENGTH);
-		tcp->tcp_tcp_hdr_len = ltcp->tcp_tcp_hdr_len;
-		/* copy IP header fields individually */
-		tcp->tcp_ipha->ipha_ttl =
-		    ltcp->tcp_ip6h->ip6_hops;
-		bcopy(ltcp->tcp_tcph->th_lport,
-		    tcp->tcp_tcph->th_lport, sizeof (ushort_t));
-	}
-
-	bcopy(tcph->th_lport, tcp->tcp_tcph->th_fport, sizeof (in_port_t));
-	bcopy(tcp->tcp_tcph->th_fport, &tcp->tcp_fport,
-	    sizeof (in_port_t));
-
-	if (ltcp->tcp_lport == 0) {
-		tcp->tcp_lport = *(in_port_t *)tcph->th_fport;
-		bcopy(tcph->th_fport, tcp->tcp_tcph->th_lport,
-		    sizeof (in_port_t));
-	}
-
-	if (tcp->tcp_ipversion == IPV4_VERSION) {
-		ASSERT(ipha != NULL);
-		tcp->tcp_ipha->ipha_dst = ipha->ipha_src;
-		tcp->tcp_ipha->ipha_src = ipha->ipha_dst;
-
-		/* Source routing option copyover (reverse it) */
-		if (tcps->tcps_rev_src_routes)
-			tcp_opt_reverse(tcp, ipha);
-	} else {
-		ASSERT(ip6h != NULL);
-		tcp->tcp_ip6h->ip6_dst = ip6h->ip6_src;
-		tcp->tcp_ip6h->ip6_src = ip6h->ip6_dst;
-	}
-
-	ASSERT(tcp->tcp_conn.tcp_eager_conn_ind == NULL);
-	ASSERT(!tcp->tcp_tconnind_started);
-	/*
-	 * If the SYN contains a credential, it's a loopback packet; attach
-	 * the credential to the TPI message.
-	 */
-	mblk_copycred(tpi_mp, idmp);
-
-	tcp->tcp_conn.tcp_eager_conn_ind = tpi_mp;
-
-	/* Inherit the listener's SSL protection state */
-
-	if ((tcp->tcp_kssl_ent = ltcp->tcp_kssl_ent) != NULL) {
-		kssl_hold_ent(tcp->tcp_kssl_ent);
-		tcp->tcp_kssl_pending = B_TRUE;
-	}
-
-	/* Inherit the listener's non-STREAMS flag */
-	if (IPCL_IS_NONSTR(lconnp)) {
-		connp->conn_flags |= IPCL_NONSTR;
-	}
-
-	return (0);
+	return (tpi_mp);
 }
 
-
-int
-tcp_conn_create_v4(conn_t *lconnp, conn_t *connp, ipha_t *ipha,
-    tcph_t *tcph, mblk_t *idmp)
+/* Handle a SYN on an AF_INET socket */
+mblk_t *
+tcp_conn_create_v4(conn_t *lconnp, conn_t *connp, mblk_t *mp,
+    ip_recv_attr_t *ira)
 {
 	tcp_t 		*ltcp = lconnp->conn_tcp;
 	tcp_t		*tcp = connp->conn_tcp;
 	sin_t		sin;
 	mblk_t		*tpi_mp = NULL;
-	int		err;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	ipha_t		*ipha;
+
+	ASSERT(ira->ira_flags & IRAF_IS_IPV4);
+	ipha = (ipha_t *)mp->b_rptr;
+
+	connp->conn_ipversion = IPV4_VERSION;
+	IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &connp->conn_laddr_v6);
+	IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &connp->conn_faddr_v6);
+	connp->conn_saddr_v6 = connp->conn_laddr_v6;
 
 	sin = sin_null;
-	sin.sin_addr.s_addr = ipha->ipha_src;
-	sin.sin_port = *(uint16_t *)tcph->th_lport;
+	sin.sin_addr.s_addr = connp->conn_faddr_v4;
+	sin.sin_port = connp->conn_fport;
 	sin.sin_family = AF_INET;
-	if (ltcp->tcp_recvdstaddr) {
+	if (lconnp->conn_recv_ancillary.crb_recvdstaddr) {
 		sin_t	sind;
 
 		sind = sin_null;
-		sind.sin_addr.s_addr = ipha->ipha_dst;
-		sind.sin_port = *(uint16_t *)tcph->th_fport;
+		sind.sin_addr.s_addr = connp->conn_laddr_v4;
+		sind.sin_port = connp->conn_lport;
 		sind.sin_family = AF_INET;
 		tpi_mp = mi_tpi_extconn_ind(NULL,
 		    (char *)&sind, sizeof (sin_t), (char *)&tcp,
@@ -4779,214 +4138,8 @@ tcp_conn_create_v4(conn_t *lconnp, conn_t *connp, ipha_t *ipha,
 		    (t_scalar_t)ltcp->tcp_conn_req_seqnum);
 	}
 
-	if (tpi_mp == NULL) {
-		return (ENOMEM);
-	}
-
-	connp->conn_flags |= (IPCL_TCP4|IPCL_EAGER);
-	connp->conn_send = ip_output;
-	connp->conn_recv = tcp_input;
-	connp->conn_fully_bound = B_FALSE;
-
-	IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &connp->conn_bound_source_v6);
-	IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &connp->conn_srcv6);
-	IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &connp->conn_remv6);
-	connp->conn_fport = *(uint16_t *)tcph->th_lport;
-	connp->conn_lport = *(uint16_t *)tcph->th_fport;
-
-	/* Inherit information from the "parent" */
-	tcp->tcp_ipversion = ltcp->tcp_ipversion;
-	tcp->tcp_family = ltcp->tcp_family;
-	tcp->tcp_wq = ltcp->tcp_wq;
-	tcp->tcp_rq = ltcp->tcp_rq;
 	tcp->tcp_mss = tcps->tcps_mss_def_ipv4;
-	tcp->tcp_detached = B_TRUE;
-	SOCK_CONNID_INIT(tcp->tcp_connid);
-	if ((err = tcp_init_values(tcp)) != 0) {
-		freemsg(tpi_mp);
-		return (err);
-	}
-
-	/*
-	 * Let's make sure that eager tcp template has enough space to
-	 * copy IPv4 listener's tcp template. Since the conn_t structure is
-	 * preserved and tcp_iphc_len is also preserved, an eager conn_t may
-	 * have a tcp_template of total len TCP_MAX_COMBINED_HEADER_LENGTH or
-	 * more (in case of re-allocation of conn_t with tcp-IPv6 template with
-	 * extension headers or with ip6i_t struct). Note that bcopy() below
-	 * copies listener tcp's hdr_len which cannot be greater than TCP_MAX_
-	 * COMBINED_HEADER_LENGTH as this listener must be a IPv4 listener.
-	 */
-	ASSERT(tcp->tcp_iphc_len >= TCP_MAX_COMBINED_HEADER_LENGTH);
-	ASSERT(ltcp->tcp_hdr_len <= TCP_MAX_COMBINED_HEADER_LENGTH);
-
-	tcp->tcp_hdr_len = ltcp->tcp_hdr_len;
-	tcp->tcp_ip_hdr_len = ltcp->tcp_ip_hdr_len;
-	tcp->tcp_tcp_hdr_len = ltcp->tcp_tcp_hdr_len;
-	tcp->tcp_ttl = ltcp->tcp_ttl;
-	tcp->tcp_tos = ltcp->tcp_tos;
-
-	/* Copy the IP+TCP header template from listener to eager */
-	bcopy(ltcp->tcp_iphc, tcp->tcp_iphc, ltcp->tcp_hdr_len);
-	tcp->tcp_ipha = (ipha_t *)tcp->tcp_iphc;
-	tcp->tcp_ip6h = NULL;
-	tcp->tcp_tcph = (tcph_t *)(tcp->tcp_iphc +
-	    tcp->tcp_ip_hdr_len);
-
-	/* Initialize the IP addresses and Ports */
-	tcp->tcp_ipha->ipha_dst = ipha->ipha_src;
-	tcp->tcp_ipha->ipha_src = ipha->ipha_dst;
-	bcopy(tcph->th_lport, tcp->tcp_tcph->th_fport, sizeof (in_port_t));
-	bcopy(tcph->th_fport, tcp->tcp_tcph->th_lport, sizeof (in_port_t));
-
-	/* Source routing option copyover (reverse it) */
-	if (tcps->tcps_rev_src_routes)
-		tcp_opt_reverse(tcp, ipha);
-
-	ASSERT(tcp->tcp_conn.tcp_eager_conn_ind == NULL);
-	ASSERT(!tcp->tcp_tconnind_started);
-
-	/*
-	 * If the SYN contains a credential, it's a loopback packet; attach
-	 * the credential to the TPI message.
-	 */
-	mblk_copycred(tpi_mp, idmp);
-
-	tcp->tcp_conn.tcp_eager_conn_ind = tpi_mp;
-
-	/* Inherit the listener's SSL protection state */
-	if ((tcp->tcp_kssl_ent = ltcp->tcp_kssl_ent) != NULL) {
-		kssl_hold_ent(tcp->tcp_kssl_ent);
-		tcp->tcp_kssl_pending = B_TRUE;
-	}
-
-	/* Inherit the listener's non-STREAMS flag */
-	if (IPCL_IS_NONSTR(lconnp)) {
-		connp->conn_flags |= IPCL_NONSTR;
-	}
-
-	return (0);
-}
-
-/*
- * sets up conn for ipsec.
- * if the first mblk is M_CTL it is consumed and mpp is updated.
- * in case of error mpp is freed.
- */
-conn_t *
-tcp_get_ipsec_conn(tcp_t *tcp, squeue_t *sqp, mblk_t **mpp)
-{
-	conn_t 		*connp = tcp->tcp_connp;
-	conn_t 		*econnp;
-	squeue_t 	*new_sqp;
-	mblk_t 		*first_mp = *mpp;
-	mblk_t		*mp = *mpp;
-	boolean_t	mctl_present = B_FALSE;
-	uint_t		ipvers;
-
-	econnp = tcp_get_conn(sqp, tcp->tcp_tcps);
-	if (econnp == NULL) {
-		freemsg(first_mp);
-		return (NULL);
-	}
-	if (DB_TYPE(mp) == M_CTL) {
-		if (mp->b_cont == NULL ||
-		    mp->b_cont->b_datap->db_type != M_DATA) {
-			freemsg(first_mp);
-			return (NULL);
-		}
-		mp = mp->b_cont;
-		if ((mp->b_datap->db_struioflag & STRUIO_EAGER) == 0) {
-			freemsg(first_mp);
-			return (NULL);
-		}
-
-		mp->b_datap->db_struioflag &= ~STRUIO_EAGER;
-		first_mp->b_datap->db_struioflag &= ~STRUIO_POLICY;
-		mctl_present = B_TRUE;
-	} else {
-		ASSERT(mp->b_datap->db_struioflag & STRUIO_POLICY);
-		mp->b_datap->db_struioflag &= ~STRUIO_POLICY;
-	}
-
-	new_sqp = (squeue_t *)DB_CKSUMSTART(mp);
-	DB_CKSUMSTART(mp) = 0;
-
-	ASSERT(OK_32PTR(mp->b_rptr));
-	ipvers = IPH_HDR_VERSION(mp->b_rptr);
-	if (ipvers == IPV4_VERSION) {
-		uint16_t  	*up;
-		uint32_t	ports;
-		ipha_t		*ipha;
-
-		ipha = (ipha_t *)mp->b_rptr;
-		up = (uint16_t *)((uchar_t *)ipha +
-		    IPH_HDR_LENGTH(ipha) + TCP_PORTS_OFFSET);
-		ports = *(uint32_t *)up;
-		IPCL_TCP_EAGER_INIT(econnp, IPPROTO_TCP,
-		    ipha->ipha_dst, ipha->ipha_src, ports);
-	} else {
-		uint16_t  	*up;
-		uint32_t	ports;
-		uint16_t	ip_hdr_len;
-		uint8_t		*nexthdrp;
-		ip6_t 		*ip6h;
-		tcph_t		*tcph;
-
-		ip6h = (ip6_t *)mp->b_rptr;
-		if (ip6h->ip6_nxt == IPPROTO_TCP) {
-			ip_hdr_len = IPV6_HDR_LEN;
-		} else if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &ip_hdr_len,
-		    &nexthdrp) || *nexthdrp != IPPROTO_TCP) {
-			CONN_DEC_REF(econnp);
-			freemsg(first_mp);
-			return (NULL);
-		}
-		tcph = (tcph_t *)&mp->b_rptr[ip_hdr_len];
-		up = (uint16_t *)tcph->th_lport;
-		ports = *(uint32_t *)up;
-		IPCL_TCP_EAGER_INIT_V6(econnp, IPPROTO_TCP,
-		    ip6h->ip6_dst, ip6h->ip6_src, ports);
-	}
-
-	/*
-	 * The caller already ensured that there is a sqp present.
-	 */
-	econnp->conn_sqp = new_sqp;
-	econnp->conn_initial_sqp = new_sqp;
-
-	if (connp->conn_policy != NULL) {
-		ipsec_in_t *ii;
-		ii = (ipsec_in_t *)(first_mp->b_rptr);
-		ASSERT(ii->ipsec_in_policy == NULL);
-		IPPH_REFHOLD(connp->conn_policy);
-		ii->ipsec_in_policy = connp->conn_policy;
-
-		first_mp->b_datap->db_type = IPSEC_POLICY_SET;
-		if (!ip_bind_ipsec_policy_set(econnp, first_mp)) {
-			CONN_DEC_REF(econnp);
-			freemsg(first_mp);
-			return (NULL);
-		}
-	}
-
-	if (ipsec_conn_cache_policy(econnp, ipvers == IPV4_VERSION) != 0) {
-		CONN_DEC_REF(econnp);
-		freemsg(first_mp);
-		return (NULL);
-	}
-
-	/*
-	 * If we know we have some policy, pass the "IPSEC"
-	 * options size TCP uses this adjust the MSS.
-	 */
-	econnp->conn_tcp->tcp_ipsec_overhead = conn_ipsec_length(econnp);
-	if (mctl_present) {
-		freeb(first_mp);
-		*mpp = mp;
-	}
-
-	return (econnp);
+	return (tpi_mp);
 }
 
 /*
@@ -5002,10 +4155,8 @@ tcp_get_ipsec_conn(tcp_t *tcp, squeue_t *sqp, mblk_t **mpp)
  * connection sitting in the freelist. Obviously, this buys us
  * performance.
  *
- * 2) Defence against DOS attack. Allocating a tcp/conn in tcp_conn_request
- * has multiple disadvantages - tying up the squeue during alloc, and the
- * fact that IPSec policy initialization has to happen here which
- * requires us sending a M_CTL and checking for it i.e. real ugliness.
+ * 2) Defence against DOS attack. Allocating a tcp/conn in tcp_input_listener
+ * has multiple disadvantages - tying up the squeue during alloc.
  * But allocating the conn/tcp in IP land is also not the best since
  * we can't check the 'q' and 'q0' which are protected by squeue and
  * blindly allocate memory which might have to be freed here if we are
@@ -5050,9 +4201,15 @@ tcp_get_conn(void *arg, tcp_stack_t *tcps)
 		ns = tcps->tcps_netstack;
 		netstack_hold(ns);
 		connp->conn_netstack = ns;
+		connp->conn_ixa->ixa_ipst = ns->netstack_ip;
 		tcp->tcp_tcps = tcps;
-		TCPS_REFHOLD(tcps);
 		ipcl_globalhash_insert(connp);
+
+		connp->conn_ixa->ixa_notify_cookie = tcp;
+		ASSERT(connp->conn_ixa->ixa_notify == tcp_notify);
+		connp->conn_recv = tcp_input_data;
+		ASSERT(connp->conn_recvicmp == tcp_icmp_input);
+		ASSERT(connp->conn_verifyicmp == tcp_verifyicmp);
 		return ((void *)connp);
 	}
 	mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
@@ -5075,62 +4232,20 @@ tcp_get_conn(void *arg, tcp_stack_t *tcps)
 	mutex_init(&tcp->tcp_rsrv_mp_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	tcp->tcp_tcps = tcps;
-	TCPS_REFHOLD(tcps);
 
-	return ((void *)connp);
-}
+	connp->conn_recv = tcp_input_data;
+	connp->conn_recvicmp = tcp_icmp_input;
+	connp->conn_verifyicmp = tcp_verifyicmp;
 
-/*
- * Update the cached label for the given tcp_t.  This should be called once per
- * connection, and before any packets are sent or tcp_process_options is
- * invoked.  Returns B_FALSE if the correct label could not be constructed.
- */
-static boolean_t
-tcp_update_label(tcp_t *tcp, const cred_t *cr)
-{
-	conn_t *connp = tcp->tcp_connp;
-
-	if (tcp->tcp_ipversion == IPV4_VERSION) {
-		uchar_t optbuf[IP_MAX_OPT_LENGTH];
-		int added;
-
-		if (tsol_compute_label(cr, tcp->tcp_remote, optbuf,
-		    tcp->tcp_tcps->tcps_netstack->netstack_ip) != 0)
-			return (B_FALSE);
-
-		added = tsol_remove_secopt(tcp->tcp_ipha, tcp->tcp_hdr_len);
-		if (added == -1)
-			return (B_FALSE);
-		tcp->tcp_hdr_len += added;
-		tcp->tcp_tcph = (tcph_t *)((uchar_t *)tcp->tcp_tcph + added);
-		tcp->tcp_ip_hdr_len += added;
-		if ((tcp->tcp_label_len = optbuf[IPOPT_OLEN]) != 0) {
-			tcp->tcp_label_len = (tcp->tcp_label_len + 3) & ~3;
-			added = tsol_prepend_option(optbuf, tcp->tcp_ipha,
-			    tcp->tcp_hdr_len);
-			if (added == -1)
-				return (B_FALSE);
-			tcp->tcp_hdr_len += added;
-			tcp->tcp_tcph = (tcph_t *)
-			    ((uchar_t *)tcp->tcp_tcph + added);
-			tcp->tcp_ip_hdr_len += added;
-		}
-	} else {
-		uchar_t optbuf[TSOL_MAX_IPV6_OPTION];
-
-		if (tsol_compute_label_v6(cr, &tcp->tcp_remote_v6, optbuf,
-		    tcp->tcp_tcps->tcps_netstack->netstack_ip) != 0)
-			return (B_FALSE);
-		if (tsol_update_sticky(&tcp->tcp_sticky_ipp,
-		    &tcp->tcp_label_len, optbuf) != 0)
-			return (B_FALSE);
-		if (tcp_build_hdrs(tcp) != 0)
-			return (B_FALSE);
-	}
-
-	connp->conn_ulp_labeled = 1;
+	/*
+	 * Register tcp_notify to listen to capability changes detected by IP.
+	 * This upcall is made in the context of the call to conn_ip_output
+	 * thus it is inside the squeue.
+	 */
+	connp->conn_ixa->ixa_notify = tcp_notify;
+	connp->conn_ixa->ixa_notify_cookie = tcp;
 
-	return (B_TRUE);
+	return ((void *)connp);
 }
 
 /* BEGIN CSTYLED */
@@ -5140,7 +4255,7 @@ tcp_update_label(tcp_t *tcp, const cred_t *cr)
  * =======================
  *
  * The eager is now established in its own perimeter as soon as SYN is
- * received in tcp_conn_request(). When sockfs receives conn_ind, it
+ * received in tcp_input_listener(). When sockfs receives conn_ind, it
  * completes the accept processing on the acceptor STREAM. The sending
  * of conn_ind part is common for both sockfs listener and a TLI/XTI
  * listener but a TLI/XTI listener completes the accept processing
@@ -5149,29 +4264,28 @@ tcp_update_label(tcp_t *tcp, const cred_t *cr)
  * Common control flow for 3 way handshake:
  * ----------------------------------------
  *
- * incoming SYN (listener perimeter) 	-> tcp_rput_data()
- *					-> tcp_conn_request()
+ * incoming SYN (listener perimeter)	-> tcp_input_listener()
  *
- * incoming SYN-ACK-ACK (eager perim) 	-> tcp_rput_data()
+ * incoming SYN-ACK-ACK (eager perim) 	-> tcp_input_data()
  * send T_CONN_IND (listener perim)	-> tcp_send_conn_ind()
  *
  * Sockfs ACCEPT Path:
  * -------------------
  *
- * open acceptor stream (tcp_open allocates tcp_wput_accept()
+ * open acceptor stream (tcp_open allocates tcp_tli_accept()
  * as STREAM entry point)
  *
- * soaccept() sends T_CONN_RES on the acceptor STREAM to tcp_wput_accept()
+ * soaccept() sends T_CONN_RES on the acceptor STREAM to tcp_tli_accept()
  *
- * tcp_wput_accept() extracts the eager and makes the q->q_ptr <-> eager
+ * tcp_tli_accept() extracts the eager and makes the q->q_ptr <-> eager
  * association (we are not behind eager's squeue but sockfs is protecting us
  * and no one knows about this stream yet. The STREAMS entry point q->q_info
  * is changed to point at tcp_wput().
  *
- * tcp_wput_accept() sends any deferred eagers via tcp_send_pending() to
+ * tcp_accept_common() sends any deferred eagers via tcp_send_pending() to
  * listener (done on listener's perimeter).
  *
- * tcp_wput_accept() calls tcp_accept_finish() on eagers perimeter to finish
+ * tcp_tli_accept() calls tcp_accept_finish() on eagers perimeter to finish
  * accept.
  *
  * TLI/XTI client ACCEPT path:
@@ -5179,8 +4293,8 @@ tcp_update_label(tcp_t *tcp, const cred_t *cr)
  *
  * soaccept() sends T_CONN_RES on the listener STREAM.
  *
- * tcp_accept() -> tcp_accept_swap() complete the processing and send
- * the bind_mp to eager perimeter to finish accept (tcp_rput_other()).
+ * tcp_tli_accept() -> tcp_accept_swap() complete the processing and send
+ * a M_SETOPS mblk to eager perimeter to finish accept (tcp_accept_finish()).
  *
  * Locks:
  * ======
@@ -5191,7 +4305,7 @@ tcp_update_label(tcp_t *tcp, const cred_t *cr)
  * Referencing:
  * ============
  *
- * 1) We start out in tcp_conn_request by eager placing a ref on
+ * 1) We start out in tcp_input_listener by eager placing a ref on
  * listener and listener adding eager to listeners->tcp_eager_next_q0.
  *
  * 2) When a SYN-ACK-ACK arrives, we send the conn_ind to listener. Before
@@ -5249,51 +4363,71 @@ tcp_update_label(tcp_t *tcp, const cred_t *cr)
 
 /*
  * THIS FUNCTION IS DIRECTLY CALLED BY IP VIA SQUEUE FOR SYN.
- * tcp_rput_data will not see any SYN packets.
+ * tcp_input_data will not see any packets for listeners since the listener
+ * has conn_recv set to tcp_input_listener.
  */
 /* ARGSUSED */
 void
-tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
+tcp_input_listener(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
 {
-	tcph_t		*tcph;
+	tcpha_t		*tcpha;
 	uint32_t	seg_seq;
 	tcp_t		*eager;
-	uint_t		ipvers;
-	ipha_t		*ipha;
-	ip6_t		*ip6h;
 	int		err;
 	conn_t		*econnp = NULL;
 	squeue_t	*new_sqp;
 	mblk_t		*mp1;
 	uint_t 		ip_hdr_len;
-	conn_t		*connp = (conn_t *)arg;
-	tcp_t		*tcp = connp->conn_tcp;
-	cred_t		*credp;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	ip_stack_t	*ipst;
+	conn_t		*lconnp = (conn_t *)arg;
+	tcp_t		*listener = lconnp->conn_tcp;
+	tcp_stack_t	*tcps = listener->tcp_tcps;
+	ip_stack_t	*ipst = tcps->tcps_netstack->netstack_ip;
+	uint_t		flags;
+	mblk_t		*tpi_mp;
+	uint_t		ifindex = ira->ira_ruifindex;
 
-	if (tcp->tcp_state != TCPS_LISTEN)
+	ip_hdr_len = ira->ira_ip_hdr_length;
+	tcpha = (tcpha_t *)&mp->b_rptr[ip_hdr_len];
+	flags = (unsigned int)tcpha->tha_flags & 0xFF;
+
+	if (!(flags & TH_SYN)) {
+		if ((flags & TH_RST) || (flags & TH_URG)) {
+			freemsg(mp);
+			return;
+		}
+		if (flags & TH_ACK) {
+			/* Note this executes in listener's squeue */
+			tcp_xmit_listeners_reset(mp, ira, ipst, lconnp);
+			return;
+		}
+
+		freemsg(mp);
+		return;
+	}
+
+	if (listener->tcp_state != TCPS_LISTEN)
 		goto error2;
 
-	ASSERT((tcp->tcp_connp->conn_flags & IPCL_BOUND) != 0);
+	ASSERT(IPCL_IS_BOUND(lconnp));
 
-	mutex_enter(&tcp->tcp_eager_lock);
-	if (tcp->tcp_conn_req_cnt_q >= tcp->tcp_conn_req_max) {
-		mutex_exit(&tcp->tcp_eager_lock);
+	mutex_enter(&listener->tcp_eager_lock);
+	if (listener->tcp_conn_req_cnt_q >= listener->tcp_conn_req_max) {
+		mutex_exit(&listener->tcp_eager_lock);
 		TCP_STAT(tcps, tcp_listendrop);
 		BUMP_MIB(&tcps->tcps_mib, tcpListenDrop);
-		if (tcp->tcp_debug) {
+		if (lconnp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE|SL_ERROR,
-			    "tcp_conn_request: listen backlog (max=%d) "
+			    "tcp_input_listener: listen backlog (max=%d) "
 			    "overflow (%d pending) on %s",
-			    tcp->tcp_conn_req_max, tcp->tcp_conn_req_cnt_q,
-			    tcp_display(tcp, NULL, DISP_PORT_ONLY));
+			    listener->tcp_conn_req_max,
+			    listener->tcp_conn_req_cnt_q,
+			    tcp_display(listener, NULL, DISP_PORT_ONLY));
 		}
 		goto error2;
 	}
 
-	if (tcp->tcp_conn_req_cnt_q0 >=
-	    tcp->tcp_conn_req_max + tcps->tcps_conn_req_max_q0) {
+	if (listener->tcp_conn_req_cnt_q0 >=
+	    listener->tcp_conn_req_max + tcps->tcps_conn_req_max_q0) {
 		/*
 		 * Q0 is full. Drop a pending half-open req from the queue
 		 * to make room for the new SYN req. Also mark the time we
@@ -5303,83 +4437,127 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
 		 * be to set the "tcp_syn_defense" flag now.
 		 */
 		TCP_STAT(tcps, tcp_listendropq0);
-		tcp->tcp_last_rcv_lbolt = lbolt64;
-		if (!tcp_drop_q0(tcp)) {
-			mutex_exit(&tcp->tcp_eager_lock);
+		listener->tcp_last_rcv_lbolt = lbolt64;
+		if (!tcp_drop_q0(listener)) {
+			mutex_exit(&listener->tcp_eager_lock);
 			BUMP_MIB(&tcps->tcps_mib, tcpListenDropQ0);
-			if (tcp->tcp_debug) {
+			if (lconnp->conn_debug) {
 				(void) strlog(TCP_MOD_ID, 0, 3, SL_TRACE,
-				    "tcp_conn_request: listen half-open queue "
-				    "(max=%d) full (%d pending) on %s",
+				    "tcp_input_listener: listen half-open "
+				    "queue (max=%d) full (%d pending) on %s",
 				    tcps->tcps_conn_req_max_q0,
-				    tcp->tcp_conn_req_cnt_q0,
-				    tcp_display(tcp, NULL,
+				    listener->tcp_conn_req_cnt_q0,
+				    tcp_display(listener, NULL,
 				    DISP_PORT_ONLY));
 			}
 			goto error2;
 		}
 	}
-	mutex_exit(&tcp->tcp_eager_lock);
+	mutex_exit(&listener->tcp_eager_lock);
 
 	/*
-	 * IP adds STRUIO_EAGER and ensures that the received packet is
-	 * M_DATA even if conn_ipv6_recvpktinfo is enabled or for ip6
-	 * link local address.  If IPSec is enabled, db_struioflag has
-	 * STRUIO_POLICY set (mutually exclusive from STRUIO_EAGER);
-	 * otherwise an error case if neither of them is set.
+	 * IP sets ira_sqp to either the senders conn_sqp (for loopback)
+	 * or based on the ring (for packets from GLD). Otherwise it is
+	 * set based on lbolt i.e., a somewhat random number.
 	 */
-	if ((mp->b_datap->db_struioflag & STRUIO_EAGER) != 0) {
-		new_sqp = (squeue_t *)DB_CKSUMSTART(mp);
-		DB_CKSUMSTART(mp) = 0;
-		mp->b_datap->db_struioflag &= ~STRUIO_EAGER;
-		econnp = (conn_t *)tcp_get_conn(arg2, tcps);
-		if (econnp == NULL)
-			goto error2;
-		ASSERT(econnp->conn_netstack == connp->conn_netstack);
-		econnp->conn_sqp = new_sqp;
-		econnp->conn_initial_sqp = new_sqp;
-	} else if ((mp->b_datap->db_struioflag & STRUIO_POLICY) != 0) {
-		/*
-		 * mp is updated in tcp_get_ipsec_conn().
-		 */
-		econnp = tcp_get_ipsec_conn(tcp, arg2, &mp);
-		if (econnp == NULL) {
-			/*
-			 * mp freed by tcp_get_ipsec_conn.
-			 */
-			return;
-		}
-		ASSERT(econnp->conn_netstack == connp->conn_netstack);
-	} else {
+	ASSERT(ira->ira_sqp != NULL);
+	new_sqp = ira->ira_sqp;
+
+	econnp = (conn_t *)tcp_get_conn(arg2, tcps);
+	if (econnp == NULL)
 		goto error2;
-	}
 
-	ASSERT(DB_TYPE(mp) == M_DATA);
+	ASSERT(econnp->conn_netstack == lconnp->conn_netstack);
+	econnp->conn_sqp = new_sqp;
+	econnp->conn_initial_sqp = new_sqp;
+	econnp->conn_ixa->ixa_sqp = new_sqp;
+
+	econnp->conn_fport = tcpha->tha_lport;
+	econnp->conn_lport = tcpha->tha_fport;
+
+	err = conn_inherit_parent(lconnp, econnp);
+	if (err != 0)
+		goto error3;
 
-	ipvers = IPH_HDR_VERSION(mp->b_rptr);
-	ASSERT(ipvers == IPV6_VERSION || ipvers == IPV4_VERSION);
 	ASSERT(OK_32PTR(mp->b_rptr));
-	if (ipvers == IPV4_VERSION) {
-		ipha = (ipha_t *)mp->b_rptr;
-		ip_hdr_len = IPH_HDR_LENGTH(ipha);
-		tcph = (tcph_t *)&mp->b_rptr[ip_hdr_len];
-	} else {
-		ip6h = (ip6_t *)mp->b_rptr;
-		ip_hdr_len = ip_hdr_length_v6(mp, ip6h);
-		tcph = (tcph_t *)&mp->b_rptr[ip_hdr_len];
-	}
+	ASSERT(IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ||
+	    IPH_HDR_VERSION(mp->b_rptr) == IPV6_VERSION);
 
-	if (tcp->tcp_family == AF_INET) {
-		ASSERT(ipvers == IPV4_VERSION);
-		err = tcp_conn_create_v4(connp, econnp, ipha, tcph, mp);
+	if (lconnp->conn_family == AF_INET) {
+		ASSERT(IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION);
+		tpi_mp = tcp_conn_create_v4(lconnp, econnp, mp, ira);
 	} else {
-		err = tcp_conn_create_v6(connp, econnp, mp, tcph, ipvers, mp);
+		tpi_mp = tcp_conn_create_v6(lconnp, econnp, mp, ira);
 	}
 
-	if (err)
+	if (tpi_mp == NULL)
 		goto error3;
 
 	eager = econnp->conn_tcp;
+	eager->tcp_detached = B_TRUE;
+	SOCK_CONNID_INIT(eager->tcp_connid);
+
+	tcp_init_values(eager);
+
+	ASSERT((econnp->conn_ixa->ixa_flags &
+	    (IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE |
+	    IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO)) ==
+	    (IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE |
+	    IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO));
+
+	if (!tcps->tcps_dev_flow_ctl)
+		econnp->conn_ixa->ixa_flags |= IXAF_NO_DEV_FLOW_CTL;
+
+	/* Prepare for diffing against previous packets */
+	eager->tcp_recvifindex = 0;
+	eager->tcp_recvhops = 0xffffffffU;
+
+	if (!(ira->ira_flags & IRAF_IS_IPV4) && econnp->conn_bound_if == 0) {
+		if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_faddr_v6) ||
+		    IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6)) {
+			econnp->conn_incoming_ifindex = ifindex;
+			econnp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
+			econnp->conn_ixa->ixa_scopeid = ifindex;
+		}
+	}
+
+	if ((ira->ira_flags & (IRAF_IS_IPV4|IRAF_IPV4_OPTIONS)) ==
+	    (IRAF_IS_IPV4|IRAF_IPV4_OPTIONS) &&
+	    tcps->tcps_rev_src_routes) {
+		ipha_t *ipha = (ipha_t *)mp->b_rptr;
+		ip_pkt_t *ipp = &econnp->conn_xmit_ipp;
+
+		/* Source routing option copyover (reverse it) */
+		err = ip_find_hdr_v4(ipha, ipp, B_TRUE);
+		if (err != 0) {
+			freemsg(tpi_mp);
+			goto error3;
+		}
+		ip_pkt_source_route_reverse_v4(ipp);
+	}
+
+	ASSERT(eager->tcp_conn.tcp_eager_conn_ind == NULL);
+	ASSERT(!eager->tcp_tconnind_started);
+	/*
+	 * If the SYN came with a credential, it's a loopback packet or a
+	 * labeled packet; attach the credential to the TPI message.
+	 */
+	if (ira->ira_cred != NULL)
+		mblk_setcred(tpi_mp, ira->ira_cred, ira->ira_cpid);
+
+	eager->tcp_conn.tcp_eager_conn_ind = tpi_mp;
+
+	/* Inherit the listener's SSL protection state */
+	if ((eager->tcp_kssl_ent = listener->tcp_kssl_ent) != NULL) {
+		kssl_hold_ent(eager->tcp_kssl_ent);
+		eager->tcp_kssl_pending = B_TRUE;
+	}
+
+	/* Inherit the listener's non-STREAMS flag */
+	if (IPCL_IS_NONSTR(lconnp)) {
+		econnp->conn_flags |= IPCL_NONSTR;
+	}
+
 	ASSERT(eager->tcp_ordrel_mp == NULL);
 
 	if (!IPCL_IS_NONSTR(econnp)) {
@@ -5392,127 +4570,103 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
 		if ((eager->tcp_ordrel_mp = mi_tpi_ordrel_ind()) == NULL)
 			goto error3;
 	}
-	/* Inherit various TCP parameters from the listener */
-	eager->tcp_naglim = tcp->tcp_naglim;
-	eager->tcp_first_timer_threshold = tcp->tcp_first_timer_threshold;
-	eager->tcp_second_timer_threshold = tcp->tcp_second_timer_threshold;
-
-	eager->tcp_first_ctimer_threshold = tcp->tcp_first_ctimer_threshold;
-	eager->tcp_second_ctimer_threshold = tcp->tcp_second_ctimer_threshold;
-
 	/*
-	 * tcp_adapt_ire() may change tcp_rwnd according to the ire metrics.
-	 * If it does not, the eager's receive window will be set to the
-	 * listener's receive window later in this function.
+	 * Now that the IP addresses and ports are setup in econnp we
+	 * can do the IPsec policy work.
 	 */
-	eager->tcp_rwnd = 0;
+	if (ira->ira_flags & IRAF_IPSEC_SECURE) {
+		if (lconnp->conn_policy != NULL) {
+			/*
+			 * Inherit the policy from the listener; use
+			 * actions from ira
+			 */
+			if (!ip_ipsec_policy_inherit(econnp, lconnp, ira)) {
+				CONN_DEC_REF(econnp);
+				freemsg(mp);
+				goto error3;
+			}
+		}
+	}
 
-	/*
-	 * Inherit listener's tcp_init_cwnd.  Need to do this before
-	 * calling tcp_process_options() where tcp_mss_set() is called
-	 * to set the initial cwnd.
-	 */
-	eager->tcp_init_cwnd = tcp->tcp_init_cwnd;
+	/* Inherit various TCP parameters from the listener */
+	eager->tcp_naglim = listener->tcp_naglim;
+	eager->tcp_first_timer_threshold = listener->tcp_first_timer_threshold;
+	eager->tcp_second_timer_threshold =
+	    listener->tcp_second_timer_threshold;
+	eager->tcp_first_ctimer_threshold =
+	    listener->tcp_first_ctimer_threshold;
+	eager->tcp_second_ctimer_threshold =
+	    listener->tcp_second_ctimer_threshold;
 
 	/*
-	 * Zones: tcp_adapt_ire() and tcp_send_data() both need the
-	 * zone id before the accept is completed in tcp_wput_accept().
+	 * tcp_set_destination() may set tcp_rwnd according to the route
+	 * metrics. If it does not, the eager's receive window will be set
+	 * to the listener's receive window later in this function.
 	 */
-	econnp->conn_zoneid = connp->conn_zoneid;
-	econnp->conn_allzones = connp->conn_allzones;
-
-	/* Copy nexthop information from listener to eager */
-	if (connp->conn_nexthop_set) {
-		econnp->conn_nexthop_set = connp->conn_nexthop_set;
-		econnp->conn_nexthop_v4 = connp->conn_nexthop_v4;
-	}
+	eager->tcp_rwnd = 0;
 
 	/*
-	 * TSOL: tsol_input_proc() needs the eager's cred before the
-	 * eager is accepted
+	 * Inherit listener's tcp_init_cwnd.  Need to do this before
+	 * calling tcp_process_options() which set the initial cwnd.
 	 */
-	econnp->conn_cred = eager->tcp_cred = credp = connp->conn_cred;
-	crhold(credp);
+	eager->tcp_init_cwnd = listener->tcp_init_cwnd;
 
-	ASSERT(econnp->conn_effective_cred == NULL);
 	if (is_system_labeled()) {
-		cred_t *cr;
-		ts_label_t *tsl;
-
-		/*
-		 * If this is an MLP connection or a MAC-Exempt connection
-		 * with an unlabeled node, packets are to be
-		 * exchanged using the security label of the received
-		 * SYN packet instead of the server application's label.
-		 */
-		if ((cr = msg_getcred(mp, NULL)) != NULL &&
-		    (tsl = crgetlabel(cr)) != NULL &&
-		    (connp->conn_mlp_type != mlptSingle ||
-		    (connp->conn_mac_mode != CONN_MAC_AWARE &&
-		    (tsl->tsl_flags & TSLF_UNLABELED)))) {
-			if ((econnp->conn_effective_cred =
-			    copycred_from_tslabel(econnp->conn_cred,
-			    tsl, KM_NOSLEEP)) != NULL) {
-				DTRACE_PROBE2(
-				    syn_accept_peerlabel,
-				    conn_t *, econnp, cred_t *,
-				    econnp->conn_effective_cred);
-			} else {
-				DTRACE_PROBE3(
-				    tx__ip__log__error__set__eagercred__tcp,
-				    char *,
-				    "SYN mp(1) label on eager connp(2) failed",
-				    mblk_t *, mp, conn_t *, econnp);
-				goto error3;
-			}
+		ip_xmit_attr_t *ixa = econnp->conn_ixa;
+
+		ASSERT(ira->ira_tsl != NULL);
+		/* Discard any old label */
+		if (ixa->ixa_free_flags & IXA_FREE_TSL) {
+			ASSERT(ixa->ixa_tsl != NULL);
+			label_rele(ixa->ixa_tsl);
+			ixa->ixa_free_flags &= ~IXA_FREE_TSL;
+			ixa->ixa_tsl = NULL;
+		}
+		if ((lconnp->conn_mlp_type != mlptSingle ||
+		    lconnp->conn_mac_mode != CONN_MAC_DEFAULT) &&
+		    ira->ira_tsl != NULL) {
+			/*
+			 * If this is an MLP connection or a MAC-Exempt
+			 * connection with an unlabeled node, packets are to be
+			 * exchanged using the security label of the received
+			 * SYN packet instead of the server application's label.
+			 * tsol_check_dest called from ip_set_destination
+			 * might later update TSF_UNLABELED by replacing
+			 * ixa_tsl with a new label.
+			 */
+			label_hold(ira->ira_tsl);
+			ip_xmit_attr_replace_tsl(ixa, ira->ira_tsl);
+			DTRACE_PROBE2(mlp_syn_accept, conn_t *,
+			    econnp, ts_label_t *, ixa->ixa_tsl)
 		} else {
+			ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
 			DTRACE_PROBE2(syn_accept, conn_t *,
-			    econnp, cred_t *, econnp->conn_cred)
+			    econnp, ts_label_t *, ixa->ixa_tsl)
 		}
-
 		/*
-		 * Verify the destination is allowed to receive packets
-		 * at the security label of the SYN-ACK we are generating.
-		 * tsol_check_dest() may create a new effective cred for
-		 * this connection with a modified label or label flags.
+		 * conn_connect() called from tcp_set_destination will verify
+		 * the destination is allowed to receive packets at the
+		 * security label of the SYN-ACK we are generating. As part of
+		 * that, tsol_check_dest() may create a new effective label for
+		 * this connection.
+		 * Finally conn_connect() will call conn_update_label.
+		 * All that remains for TCP to do is to call
+		 * conn_build_hdr_template which is done as part of
+		 * tcp_set_destination.
 		 */
-		if (IN6_IS_ADDR_V4MAPPED(&econnp->conn_remv6)) {
-			uint32_t dst;
-			IN6_V4MAPPED_TO_IPADDR(&econnp->conn_remv6, dst);
-			err = tsol_check_dest(CONN_CRED(econnp), &dst,
-			    IPV4_VERSION, B_FALSE, &cr);
-		} else {
-			err = tsol_check_dest(CONN_CRED(econnp),
-			    &econnp->conn_remv6, IPV6_VERSION,
-			    B_FALSE, &cr);
-		}
-		if (err != 0)
-			goto error3;
-		if (cr != NULL) {
-			if (econnp->conn_effective_cred != NULL)
-				crfree(econnp->conn_effective_cred);
-			econnp->conn_effective_cred = cr;
-		}
-
-		/*
-		 * Generate the security label to be used in the text of
-		 * this connection's outgoing packets.
-		 */
-		if (!tcp_update_label(eager, CONN_CRED(econnp))) {
-			DTRACE_PROBE3(
-			    tx__ip__log__error__connrequest__tcp,
-			    char *, "eager connp(1) label on SYN mp(2) failed",
-			    conn_t *, econnp, mblk_t *, mp);
-			goto error3;
-		}
 	}
 
+	/*
+	 * Since we will clear tcp_listener before we clear tcp_detached
+	 * in the accept code we need tcp_hard_binding aka tcp_accept_inprogress
+	 * so we can tell a TCP_DETACHED_NONEAGER apart.
+	 */
 	eager->tcp_hard_binding = B_TRUE;
 
 	tcp_bind_hash_insert(&tcps->tcps_bind_fanout[
-	    TCP_BIND_HASH(eager->tcp_lport)], eager, 0);
+	    TCP_BIND_HASH(econnp->conn_lport)], eager, 0);
 
-	CL_INET_CONNECT(connp, eager, B_FALSE, err);
+	CL_INET_CONNECT(econnp, B_FALSE, err);
 	if (err != 0) {
 		tcp_bind_hash_remove(eager);
 		goto error3;
@@ -5528,32 +4682,27 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
 	SOCK_CONNID_BUMP(eager->tcp_connid);
 
 	/*
-	 * There should be no ire in the mp as we are being called after
-	 * receiving the SYN.
-	 */
-	ASSERT(tcp_ire_mp(&mp) == NULL);
-
-	/*
-	 * Adapt our mss, ttl, ... according to information provided in IRE.
+	 * Adapt our mss, ttl, ... based on the remote address.
 	 */
 
-	if (tcp_adapt_ire(eager, NULL) == 0) {
+	if (tcp_set_destination(eager) != 0) {
+		BUMP_MIB(&tcps->tcps_mib, tcpAttemptFails);
 		/* Undo the bind_hash_insert */
 		tcp_bind_hash_remove(eager);
 		goto error3;
 	}
 
 	/* Process all TCP options. */
-	tcp_process_options(eager, tcph);
+	tcp_process_options(eager, tcpha);
 
 	/* Is the other end ECN capable? */
 	if (tcps->tcps_ecn_permitted >= 1 &&
-	    (tcph->th_flags[0] & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) {
+	    (tcpha->tha_flags & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) {
 		eager->tcp_ecn_ok = B_TRUE;
 	}
 
 	/*
-	 * listeners tcp_recv_hiwater should be the default window size or a
+	 * The listener's conn_rcvbuf should be the default window size or a
 	 * window size changed via SO_RCVBUF option. First round up the
 	 * eager's tcp_rwnd to the nearest MSS. Then find out the window
 	 * scale option value if needed. Call tcp_rwnd_set() to finish the
@@ -5563,7 +4712,7 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
 	 * we should not inherit receive window size from listener.
 	 */
 	eager->tcp_rwnd = MSS_ROUNDUP(
-	    (eager->tcp_rwnd == 0 ? tcp->tcp_recv_hiwater:
+	    (eager->tcp_rwnd == 0 ? econnp->conn_rcvbuf :
 	    eager->tcp_rwnd), eager->tcp_mss);
 	if (eager->tcp_snd_ws_ok)
 		tcp_set_ws_value(eager);
@@ -5575,77 +4724,46 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
 	 */
 	(void) tcp_rwnd_set(eager, eager->tcp_rwnd);
 
-	/*
-	 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
-	 * via soaccept()->soinheritoptions() which essentially applies
-	 * all the listener options to the new STREAM. The options that we
-	 * need to take care of are:
-	 * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
-	 * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
-	 * SO_SNDBUF, SO_RCVBUF.
-	 *
-	 * SO_RCVBUF:	tcp_rwnd_set() above takes care of it.
-	 * SO_SNDBUF:	Set the tcp_xmit_hiwater for the eager. When
-	 *		tcp_maxpsz_set() gets called later from
-	 *		tcp_accept_finish(), the option takes effect.
-	 *
-	 */
-	/* Set the TCP options */
-	eager->tcp_recv_lowater = tcp->tcp_recv_lowater;
-	eager->tcp_xmit_hiwater = tcp->tcp_xmit_hiwater;
-	eager->tcp_dgram_errind = tcp->tcp_dgram_errind;
-	eager->tcp_oobinline = tcp->tcp_oobinline;
-	eager->tcp_reuseaddr = tcp->tcp_reuseaddr;
-	eager->tcp_broadcast = tcp->tcp_broadcast;
-	eager->tcp_useloopback = tcp->tcp_useloopback;
-	eager->tcp_dontroute = tcp->tcp_dontroute;
-	eager->tcp_debug = tcp->tcp_debug;
-	eager->tcp_linger = tcp->tcp_linger;
-	eager->tcp_lingertime = tcp->tcp_lingertime;
-	if (tcp->tcp_ka_enabled)
-		eager->tcp_ka_enabled = 1;
-
-	ASSERT(eager->tcp_recv_hiwater != 0 &&
-	    eager->tcp_recv_hiwater == eager->tcp_rwnd);
-
-	/* Set the IP options */
-	econnp->conn_broadcast = connp->conn_broadcast;
-	econnp->conn_loopback = connp->conn_loopback;
-	econnp->conn_dontroute = connp->conn_dontroute;
-	econnp->conn_reuseaddr = connp->conn_reuseaddr;
+	ASSERT(eager->tcp_connp->conn_rcvbuf != 0 &&
+	    eager->tcp_connp->conn_rcvbuf == eager->tcp_rwnd);
+
+	ASSERT(econnp->conn_rcvbuf != 0 &&
+	    econnp->conn_rcvbuf == eager->tcp_rwnd);
 
 	/* Put a ref on the listener for the eager. */
-	CONN_INC_REF(connp);
-	mutex_enter(&tcp->tcp_eager_lock);
-	tcp->tcp_eager_next_q0->tcp_eager_prev_q0 = eager;
-	eager->tcp_eager_next_q0 = tcp->tcp_eager_next_q0;
-	tcp->tcp_eager_next_q0 = eager;
-	eager->tcp_eager_prev_q0 = tcp;
+	CONN_INC_REF(lconnp);
+	mutex_enter(&listener->tcp_eager_lock);
+	listener->tcp_eager_next_q0->tcp_eager_prev_q0 = eager;
+	eager->tcp_eager_next_q0 = listener->tcp_eager_next_q0;
+	listener->tcp_eager_next_q0 = eager;
+	eager->tcp_eager_prev_q0 = listener;
 
 	/* Set tcp_listener before adding it to tcp_conn_fanout */
-	eager->tcp_listener = tcp;
-	eager->tcp_saved_listener = tcp;
+	eager->tcp_listener = listener;
+	eager->tcp_saved_listener = listener;
 
 	/*
 	 * Tag this detached tcp vector for later retrieval
 	 * by our listener client in tcp_accept().
 	 */
-	eager->tcp_conn_req_seqnum = tcp->tcp_conn_req_seqnum;
-	tcp->tcp_conn_req_cnt_q0++;
-	if (++tcp->tcp_conn_req_seqnum == -1) {
+	eager->tcp_conn_req_seqnum = listener->tcp_conn_req_seqnum;
+	listener->tcp_conn_req_cnt_q0++;
+	if (++listener->tcp_conn_req_seqnum == -1) {
 		/*
 		 * -1 is "special" and defined in TPI as something
 		 * that should never be used in T_CONN_IND
 		 */
-		++tcp->tcp_conn_req_seqnum;
+		++listener->tcp_conn_req_seqnum;
 	}
-	mutex_exit(&tcp->tcp_eager_lock);
+	mutex_exit(&listener->tcp_eager_lock);
 
-	if (tcp->tcp_syn_defense) {
+	if (listener->tcp_syn_defense) {
 		/* Don't drop the SYN that comes from a good IP source */
-		ipaddr_t *addr_cache = (ipaddr_t *)(tcp->tcp_ip_addr_cache);
-		if (addr_cache != NULL && eager->tcp_remote ==
-		    addr_cache[IP_ADDR_CACHE_HASH(eager->tcp_remote)]) {
+		ipaddr_t *addr_cache;
+
+		addr_cache = (ipaddr_t *)(listener->tcp_ip_addr_cache);
+		if (addr_cache != NULL && econnp->conn_faddr_v4 ==
+		    addr_cache[IP_ADDR_CACHE_HASH(econnp->conn_faddr_v4)]) {
 			eager->tcp_dontdrop = B_TRUE;
 		}
 	}
@@ -5655,14 +4773,14 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
 	 * as we do that, we expose the eager to the classifier and
 	 * should not touch any field outside the eager's perimeter.
 	 * So do all the work necessary before inserting the eager
-	 * in its own perimeter. Be optimistic that ipcl_conn_insert()
+	 * in its own perimeter. Be optimistic that conn_connect()
 	 * will succeed but undo everything if it fails.
 	 */
-	seg_seq = ABE32_TO_U32(tcph->th_seq);
+	seg_seq = ntohl(tcpha->tha_seq);
 	eager->tcp_irs = seg_seq;
 	eager->tcp_rack = seg_seq;
 	eager->tcp_rnxt = seg_seq + 1;
-	U32_TO_ABE32(eager->tcp_rnxt, eager->tcp_tcph->th_ack);
+	eager->tcp_tcpha->tha_ack = htonl(eager->tcp_rnxt);
 	BUMP_MIB(&tcps->tcps_mib, tcpPassiveOpens);
 	eager->tcp_state = TCPS_SYN_RCVD;
 	mp1 = tcp_xmit_mp(eager, eager->tcp_xmit_head, eager->tcp_mss,
@@ -5677,24 +4795,10 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
 	}
 
 	/*
-	 * Note that in theory this should use the current pid
-	 * so that getpeerucred on the client returns the actual listener
-	 * that does accept. But accept() hasn't been called yet. We could use
-	 * the pid of the process that did bind/listen on the server.
-	 * However, with common usage like inetd() the bind/listen can be done
-	 * by a different process than the accept().
-	 * Hence we do the simple thing of using the open pid here.
-	 * Note that db_credp is set later in tcp_send_data().
-	 */
-	mblk_setcred(mp1, credp, tcp->tcp_cpid);
-	eager->tcp_cpid = tcp->tcp_cpid;
-	eager->tcp_open_time = lbolt64;
-
-	/*
 	 * We need to start the rto timer. In normal case, we start
 	 * the timer after sending the packet on the wire (or at
 	 * least believing that packet was sent by waiting for
-	 * CALL_IP_WPUT() to return). Since this is the first packet
+	 * conn_ip_output() to return). Since this is the first packet
 	 * being sent on the wire for the eager, our initial tcp_rto
 	 * is at least tcp_rexmit_interval_min which is a fairly
 	 * large value to allow the algorithm to adjust slowly to large
@@ -5716,7 +4820,7 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
 	 * ensure against an eager close race.
 	 */
 
-	CONN_INC_REF(eager->tcp_connp);
+	CONN_INC_REF(econnp);
 
 	TCP_TIMER_RESTART(eager, eager->tcp_rto);
 
@@ -5724,22 +4828,16 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
 	 * Insert the eager in its own perimeter now. We are ready to deal
 	 * with any packets on eager.
 	 */
-	if (eager->tcp_ipversion == IPV4_VERSION) {
-		if (ipcl_conn_insert(econnp, IPPROTO_TCP, 0, 0, 0) != 0) {
-			goto error;
-		}
-	} else {
-		if (ipcl_conn_insert_v6(econnp, IPPROTO_TCP, 0, 0, 0, 0) != 0) {
-			goto error;
-		}
-	}
-
-	/* mark conn as fully-bound */
-	econnp->conn_fully_bound = B_TRUE;
+	if (ipcl_conn_insert(econnp) != 0)
+		goto error;
 
-	/* Send the SYN-ACK */
-	tcp_send_data(eager, eager->tcp_wq, mp1);
-	CONN_DEC_REF(eager->tcp_connp);
+	/*
+	 * Send the SYN-ACK. Can't use tcp_send_data since we can't update
+	 * pmtu etc; we are not on the eager's squeue
+	 */
+	ASSERT(econnp->conn_ixa->ixa_notify_cookie == econnp->conn_tcp);
+	(void) conn_ip_output(mp1, econnp->conn_ixa);
+	CONN_DEC_REF(econnp);
 	freemsg(mp);
 
 	return;
@@ -5749,7 +4847,7 @@ error:
 	TCP_DEBUG_GETPCSTACK(eager->tcmp_stk, 15);
 	mp1 = &eager->tcp_closemp;
 	SQUEUE_ENTER_ONE(econnp->conn_sqp, mp1, tcp_eager_kill,
-	    econnp, SQ_FILL, SQTAG_TCP_CONN_REQ_2);
+	    econnp, NULL, SQ_FILL, SQTAG_TCP_CONN_REQ_2);
 
 	/*
 	 * If a connection already exists, send the mp to that connections so
@@ -5757,7 +4855,7 @@ error:
 	 */
 	ipst = tcps->tcps_netstack->netstack_ip;
 
-	if ((econnp = ipcl_classify(mp, connp->conn_zoneid, ipst)) != NULL) {
+	if ((econnp = ipcl_classify(mp, ira, ipst)) != NULL) {
 		if (!IPCL_IS_CONNECTED(econnp)) {
 			/*
 			 * Something bad happened. ipcl_conn_insert()
@@ -5772,8 +4870,8 @@ error:
 			CONN_DEC_REF(econnp);
 			freemsg(mp);
 		} else {
-			SQUEUE_ENTER_ONE(econnp->conn_sqp, mp,
-			    tcp_input, econnp, SQ_FILL, SQTAG_TCP_CONN_REQ_1);
+			SQUEUE_ENTER_ONE(econnp->conn_sqp, mp, tcp_input_data,
+			    econnp, ira, SQ_FILL, SQTAG_TCP_CONN_REQ_1);
 		}
 	} else {
 		/* Nobody wants this packet */
@@ -5803,18 +4901,21 @@ error2:
  * very first time and there is no attempt to rebind them.
  */
 void
-tcp_conn_request_unbound(void *arg, mblk_t *mp, void *arg2)
+tcp_input_listener_unbound(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *ira)
 {
 	conn_t		*connp = (conn_t *)arg;
 	squeue_t	*sqp = (squeue_t *)arg2;
 	squeue_t	*new_sqp;
 	uint32_t	conn_flags;
 
-	if ((mp->b_datap->db_struioflag & STRUIO_EAGER) != 0) {
-		new_sqp = (squeue_t *)DB_CKSUMSTART(mp);
-	} else {
-		goto done;
-	}
+	/*
+	 * IP sets ira_sqp to either the senders conn_sqp (for loopback)
+	 * or based on the ring (for packets from GLD). Otherwise it is
+	 * set based on lbolt i.e., a somewhat random number.
+	 */
+	ASSERT(ira->ira_sqp != NULL);
+	new_sqp = ira->ira_sqp;
 
 	if (connp->conn_fanout == NULL)
 		goto done;
@@ -5849,6 +4950,8 @@ tcp_conn_request_unbound(void *arg, mblk_t *mp, void *arg2)
 		if (connp->conn_sqp != new_sqp) {
 			while (connp->conn_sqp != new_sqp)
 				(void) casptr(&connp->conn_sqp, sqp, new_sqp);
+			/* No special MT issues for outbound ixa_sqp hint */
+			connp->conn_ixa->ixa_sqp = new_sqp;
 		}
 
 		do {
@@ -5860,49 +4963,47 @@ tcp_conn_request_unbound(void *arg, mblk_t *mp, void *arg2)
 
 		mutex_exit(&connp->conn_fanout->connf_lock);
 		mutex_exit(&connp->conn_lock);
+
+		/*
+		 * Assume we have picked a good squeue for the listener. Make
+		 * subsequent SYNs not try to change the squeue.
+		 */
+		connp->conn_recv = tcp_input_listener;
 	}
 
 done:
 	if (connp->conn_sqp != sqp) {
 		CONN_INC_REF(connp);
 		SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, connp,
-		    SQ_FILL, SQTAG_TCP_CONN_REQ_UNBOUND);
+		    ira, SQ_FILL, SQTAG_TCP_CONN_REQ_UNBOUND);
 	} else {
-		tcp_conn_request(connp, mp, sqp);
+		tcp_input_listener(connp, mp, sqp, ira);
 	}
 }
 
 /*
  * Successful connect request processing begins when our client passes
- * a T_CONN_REQ message into tcp_wput() and ends when tcp_rput() passes
- * our T_OK_ACK reply message upstream.  The control flow looks like this:
- *   upstream -> tcp_wput() -> tcp_wput_proto() -> tcp_tpi_connect() -> IP
- *   upstream <- tcp_rput()		<- IP
+ * a T_CONN_REQ message into tcp_wput(), which performs function calls into
+ * IP and the passes a T_OK_ACK (or T_ERROR_ACK upstream).
+ *
  * After various error checks are completed, tcp_tpi_connect() lays
- * the target address and port into the composite header template,
- * preallocates the T_OK_ACK reply message, construct a full 12 byte bind
- * request followed by an IRE request, and passes the three mblk message
- * down to IP looking like this:
- *   O_T_BIND_REQ for IP  --> IRE req --> T_OK_ACK for our client
- * Processing continues in tcp_rput() when we receive the following message:
- *   T_BIND_ACK from IP --> IRE ack --> T_OK_ACK for our client
- * After consuming the first two mblks, tcp_rput() calls tcp_timer(),
- * to fire off the connection request, and then passes the T_OK_ACK mblk
- * upstream that we filled in below.  There are, of course, numerous
- * error conditions along the way which truncate the processing described
- * above.
+ * the target address and port into the composite header template.
+ * Then we ask IP for information, including a source address if we didn't
+ * already have one. Finally we prepare to send the SYN packet, and then
+ * send up the T_OK_ACK reply message.
  */
 static void
 tcp_tpi_connect(tcp_t *tcp, mblk_t *mp)
 {
 	sin_t		*sin;
-	queue_t		*q = tcp->tcp_wq;
 	struct T_conn_req	*tcr;
 	struct sockaddr	*sa;
 	socklen_t	len;
 	int		error;
 	cred_t		*cr;
 	pid_t		cpid;
+	conn_t		*connp = tcp->tcp_connp;
+	queue_t		*q = connp->conn_wq;
 
 	/*
 	 * All Solaris components should pass a db_credp
@@ -5944,7 +5045,7 @@ tcp_tpi_connect(tcp_t *tcp, mblk_t *mp)
 	 * Determine packet type based on type of address passed in
 	 * the request should contain an IPv4 or IPv6 address.
 	 * Make sure that address family matches the type of
-	 * family of the the address passed down
+	 * family of the address passed down.
 	 */
 	switch (tcr->DEST_length) {
 	default:
@@ -6022,7 +5123,7 @@ tcp_tpi_connect(tcp_t *tcp, mblk_t *mp)
 		break;
 	}
 
-	error = proto_verify_ip_addr(tcp->tcp_family, sa, len);
+	error = proto_verify_ip_addr(connp->conn_family, sa, len);
 	if (error != 0) {
 		tcp_err_ack(tcp, mp, TSYSERR, error);
 		return;
@@ -6111,7 +5212,7 @@ tcp_tpi_connect(tcp_t *tcp, mblk_t *mp)
 	/* return error ack and blow away saved option results if any */
 connect_failed:
 	if (mp != NULL)
-		putnext(tcp->tcp_rq, mp);
+		putnext(connp->conn_rq, mp);
 	else {
 		tcp_err_ack_prim(tcp, NULL, T_CONN_REQ,
 		    TSYSERR, ENOMEM);
@@ -6121,20 +5222,19 @@ connect_failed:
 /*
  * Handle connect to IPv4 destinations, including connections for AF_INET6
  * sockets connecting to IPv4 mapped IPv6 destinations.
+ * Returns zero if OK, a positive errno, or a negative TLI error.
  */
 static int
 tcp_connect_ipv4(tcp_t *tcp, ipaddr_t *dstaddrp, in_port_t dstport,
-    uint_t srcid, cred_t *cr, pid_t pid)
+    uint_t srcid)
 {
-	tcph_t	*tcph;
-	mblk_t	*mp;
-	ipaddr_t dstaddr = *dstaddrp;
-	int32_t	oldstate;
-	uint16_t lport;
-	int	error = 0;
+	ipaddr_t 	dstaddr = *dstaddrp;
+	uint16_t 	lport;
+	conn_t		*connp = tcp->tcp_connp;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	int		error;
 
-	ASSERT(tcp->tcp_ipversion == IPV4_VERSION);
+	ASSERT(connp->conn_ipversion == IPV4_VERSION);
 
 	/* Check for attempt to connect to INADDR_ANY */
 	if (dstaddr == INADDR_ANY)  {
@@ -6157,74 +5257,21 @@ tcp_connect_ipv4(tcp_t *tcp, ipaddr_t *dstaddrp, in_port_t dstport,
 	}
 
 	/* Handle __sin6_src_id if socket not bound to an IP address */
-	if (srcid != 0 && tcp->tcp_ipha->ipha_src == INADDR_ANY) {
-		ip_srcid_find_id(srcid, &tcp->tcp_ip_src_v6,
-		    tcp->tcp_connp->conn_zoneid, tcps->tcps_netstack);
-		IN6_V4MAPPED_TO_IPADDR(&tcp->tcp_ip_src_v6,
-		    tcp->tcp_ipha->ipha_src);
+	if (srcid != 0 && connp->conn_laddr_v4 == INADDR_ANY) {
+		ip_srcid_find_id(srcid, &connp->conn_laddr_v6,
+		    IPCL_ZONEID(connp), tcps->tcps_netstack);
+		connp->conn_saddr_v6 = connp->conn_laddr_v6;
 	}
 
-	/*
-	 * Don't let an endpoint connect to itself.  Note that
-	 * the test here does not catch the case where the
-	 * source IP addr was left unspecified by the user. In
-	 * this case, the source addr is set in tcp_adapt_ire()
-	 * using the reply to the T_BIND message that we send
-	 * down to IP here and the check is repeated in tcp_rput_other.
-	 */
-	if (dstaddr == tcp->tcp_ipha->ipha_src &&
-	    dstport == tcp->tcp_lport) {
-		error = -TBADADDR;
-		goto failed;
-	}
+	IN6_IPADDR_TO_V4MAPPED(dstaddr, &connp->conn_faddr_v6);
+	connp->conn_fport = dstport;
 
 	/*
-	 * Verify the destination is allowed to receive packets
-	 * at the security label of the connection we are initiating.
-	 * tsol_check_dest() may create a new effective cred for this
-	 * connection with a modified label or label flags.
-	 */
-	if (is_system_labeled()) {
-		ASSERT(tcp->tcp_connp->conn_effective_cred == NULL);
-		if ((error = tsol_check_dest(CONN_CRED(tcp->tcp_connp),
-		    &dstaddr, IPV4_VERSION, tcp->tcp_connp->conn_mac_mode,
-		    &tcp->tcp_connp->conn_effective_cred)) != 0) {
-			if (error != EHOSTUNREACH)
-				error = -TSYSERR;
-			goto failed;
-		}
-	}
-
-	tcp->tcp_ipha->ipha_dst = dstaddr;
-	IN6_IPADDR_TO_V4MAPPED(dstaddr, &tcp->tcp_remote_v6);
-
-	/*
-	 * Massage a source route if any putting the first hop
-	 * in iph_dst. Compute a starting value for the checksum which
-	 * takes into account that the original iph_dst should be
-	 * included in the checksum but that ip will include the
-	 * first hop in the source route in the tcp checksum.
-	 */
-	tcp->tcp_sum = ip_massage_options(tcp->tcp_ipha, tcps->tcps_netstack);
-	tcp->tcp_sum = (tcp->tcp_sum & 0xFFFF) + (tcp->tcp_sum >> 16);
-	tcp->tcp_sum -= ((tcp->tcp_ipha->ipha_dst >> 16) +
-	    (tcp->tcp_ipha->ipha_dst & 0xffff));
-	if ((int)tcp->tcp_sum < 0)
-		tcp->tcp_sum--;
-	tcp->tcp_sum = (tcp->tcp_sum & 0xFFFF) + (tcp->tcp_sum >> 16);
-	tcp->tcp_sum = ntohs((tcp->tcp_sum & 0xFFFF) +
-	    (tcp->tcp_sum >> 16));
-	tcph = tcp->tcp_tcph;
-	*(uint16_t *)tcph->th_fport = dstport;
-	tcp->tcp_fport = dstport;
-
-	oldstate = tcp->tcp_state;
-	/*
 	 * At this point the remote destination address and remote port fields
 	 * in the tcp-four-tuple have been filled in the tcp structure. Now we
-	 * have to see which state tcp was in so we can take apropriate action.
+	 * have to see which state tcp was in so we can take appropriate action.
 	 */
-	if (oldstate == TCPS_IDLE) {
+	if (tcp->tcp_state == TCPS_IDLE) {
 		/*
 		 * We support a quick connect capability here, allowing
 		 * clients to transition directly from IDLE to SYN_SENT
@@ -6233,203 +5280,93 @@ tcp_connect_ipv4(tcp_t *tcp, ipaddr_t *dstaddrp, in_port_t dstport,
 		 */
 		lport = tcp_update_next_port(tcps->tcps_next_port_to_try,
 		    tcp, B_TRUE);
-		lport = tcp_bindi(tcp, lport, &tcp->tcp_ip_src_v6, 0, B_TRUE,
+		lport = tcp_bindi(tcp, lport, &connp->conn_laddr_v6, 0, B_TRUE,
 		    B_FALSE, B_FALSE);
-		if (lport == 0) {
-			error = -TNOADDR;
-			goto failed;
-		}
-	}
-	tcp->tcp_state = TCPS_SYN_SENT;
-
-	mp = allocb(sizeof (ire_t), BPRI_HI);
-	if (mp == NULL) {
-		tcp->tcp_state = oldstate;
-		error = ENOMEM;
-		goto failed;
+		if (lport == 0)
+			return (-TNOADDR);
 	}
 
-	mp->b_wptr += sizeof (ire_t);
-	mp->b_datap->db_type = IRE_DB_REQ_TYPE;
-	tcp->tcp_hard_binding = 1;
-
 	/*
-	 * We need to make sure that the conn_recv is set to a non-null
-	 * value before we insert the conn_t into the classifier table.
-	 * This is to avoid a race with an incoming packet which does
-	 * an ipcl_classify().
+	 * Lookup the route to determine a source address and the uinfo.
+	 * If there was a source route we have tcp_ipha->ipha_dst as the first
+	 * hop.
+	 * Setup TCP parameters based on the metrics/DCE.
 	 */
-	tcp->tcp_connp->conn_recv = tcp_input;
+	error = tcp_set_destination(tcp);
+	if (error != 0)
+		return (error);
 
-	if (tcp->tcp_family == AF_INET) {
-		error = ip_proto_bind_connected_v4(tcp->tcp_connp, &mp,
-		    IPPROTO_TCP, &tcp->tcp_ipha->ipha_src, tcp->tcp_lport,
-		    tcp->tcp_remote, tcp->tcp_fport, B_TRUE, B_TRUE, cr);
-	} else {
-		in6_addr_t v6src;
-		if (tcp->tcp_ipversion == IPV4_VERSION) {
-			IN6_IPADDR_TO_V4MAPPED(tcp->tcp_ipha->ipha_src, &v6src);
-		} else {
-			v6src = tcp->tcp_ip6h->ip6_src;
-		}
-		error = ip_proto_bind_connected_v6(tcp->tcp_connp, &mp,
-		    IPPROTO_TCP, &v6src, tcp->tcp_lport, &tcp->tcp_remote_v6,
-		    &tcp->tcp_sticky_ipp, tcp->tcp_fport, B_TRUE, B_TRUE, cr);
-	}
-	BUMP_MIB(&tcps->tcps_mib, tcpActiveOpens);
-	tcp->tcp_active_open = 1;
+	/*
+	 * Don't let an endpoint connect to itself.
+	 */
+	if (connp->conn_faddr_v4 == connp->conn_laddr_v4 &&
+	    connp->conn_fport == connp->conn_lport)
+		return (-TBADADDR);
 
+	tcp->tcp_state = TCPS_SYN_SENT;
 
-	return (tcp_post_ip_bind(tcp, mp, error, cr, pid));
-failed:
-	/* return error ack and blow away saved option results if any */
-	if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
-		tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
-	return (error);
+	return (ipcl_conn_insert_v4(connp));
 }
 
 /*
  * Handle connect to IPv6 destinations.
+ * Returns zero if OK, a positive errno, or a negative TLI error.
  */
 static int
 tcp_connect_ipv6(tcp_t *tcp, in6_addr_t *dstaddrp, in_port_t dstport,
-    uint32_t flowinfo, uint_t srcid, uint32_t scope_id, cred_t *cr, pid_t pid)
+    uint32_t flowinfo, uint_t srcid, uint32_t scope_id)
 {
-	tcph_t	*tcph;
-	mblk_t	*mp;
-	ip6_rthdr_t *rth;
-	int32_t  oldstate;
-	uint16_t lport;
+	uint16_t 	lport;
+	conn_t		*connp = tcp->tcp_connp;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	int	error = 0;
-	conn_t	*connp = tcp->tcp_connp;
+	int		error;
 
-	ASSERT(tcp->tcp_family == AF_INET6);
+	ASSERT(connp->conn_family == AF_INET6);
 
 	/*
 	 * If we're here, it means that the destination address is a native
-	 * IPv6 address.  Return an error if tcp_ipversion is not IPv6.  A
+	 * IPv6 address.  Return an error if conn_ipversion is not IPv6.  A
 	 * reason why it might not be IPv6 is if the socket was bound to an
 	 * IPv4-mapped IPv6 address.
 	 */
-	if (tcp->tcp_ipversion != IPV6_VERSION) {
+	if (connp->conn_ipversion != IPV6_VERSION)
 		return (-TBADADDR);
-	}
 
 	/*
 	 * Interpret a zero destination to mean loopback.
 	 * Update the T_CONN_REQ (sin/sin6) since it is used to
 	 * generate the T_CONN_CON.
 	 */
-	if (IN6_IS_ADDR_UNSPECIFIED(dstaddrp)) {
+	if (IN6_IS_ADDR_UNSPECIFIED(dstaddrp))
 		*dstaddrp = ipv6_loopback;
-	}
 
 	/* Handle __sin6_src_id if socket not bound to an IP address */
-	if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&tcp->tcp_ip6h->ip6_src)) {
-		ip_srcid_find_id(srcid, &tcp->tcp_ip6h->ip6_src,
-		    connp->conn_zoneid, tcps->tcps_netstack);
-		tcp->tcp_ip_src_v6 = tcp->tcp_ip6h->ip6_src;
-	}
-
-	/*
-	 * Take care of the scope_id now and add ip6i_t
-	 * if ip6i_t is not already allocated through TCP
-	 * sticky options. At this point tcp_ip6h does not
-	 * have dst info, thus use dstaddrp.
-	 */
-	if (scope_id != 0 &&
-	    IN6_IS_ADDR_LINKSCOPE(dstaddrp)) {
-		ip6_pkt_t *ipp = &tcp->tcp_sticky_ipp;
-		ip6i_t  *ip6i;
-
-		ipp->ipp_ifindex = scope_id;
-		ip6i = (ip6i_t *)tcp->tcp_iphc;
-
-		if ((ipp->ipp_fields & IPPF_HAS_IP6I) &&
-		    ip6i != NULL && (ip6i->ip6i_nxt == IPPROTO_RAW)) {
-			/* Already allocated */
-			ip6i->ip6i_flags |= IP6I_IFINDEX;
-			ip6i->ip6i_ifindex = ipp->ipp_ifindex;
-			ipp->ipp_fields |= IPPF_SCOPE_ID;
-		} else {
-			int reterr;
-
-			ipp->ipp_fields |= IPPF_SCOPE_ID;
-			if (ipp->ipp_fields & IPPF_HAS_IP6I)
-				ip2dbg(("tcp_connect_v6: SCOPE_ID set\n"));
-			reterr = tcp_build_hdrs(tcp);
-			if (reterr != 0)
-				goto failed;
-			ip1dbg(("tcp_connect_ipv6: tcp_bld_hdrs returned\n"));
-		}
-	}
-
-	/*
-	 * Don't let an endpoint connect to itself.  Note that
-	 * the test here does not catch the case where the
-	 * source IP addr was left unspecified by the user. In
-	 * this case, the source addr is set in tcp_adapt_ire()
-	 * using the reply to the T_BIND message that we send
-	 * down to IP here and the check is repeated in tcp_rput_other.
-	 */
-	if (IN6_ARE_ADDR_EQUAL(dstaddrp, &tcp->tcp_ip6h->ip6_src) &&
-	    (dstport == tcp->tcp_lport)) {
-		error = -TBADADDR;
-		goto failed;
+	if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
+		ip_srcid_find_id(srcid, &connp->conn_laddr_v6,
+		    IPCL_ZONEID(connp), tcps->tcps_netstack);
+		connp->conn_saddr_v6 = connp->conn_laddr_v6;
 	}
 
 	/*
-	 * Verify the destination is allowed to receive packets
-	 * at the security label of the connection we are initiating.
-	 * check_dest may create a new effective cred for this
-	 * connection with a modified label or label flags.
+	 * Take care of the scope_id now.
 	 */
-	if (is_system_labeled()) {
-		ASSERT(tcp->tcp_connp->conn_effective_cred == NULL);
-		if ((error = tsol_check_dest(CONN_CRED(tcp->tcp_connp),
-		    dstaddrp, IPV6_VERSION, tcp->tcp_connp->conn_mac_mode,
-		    &tcp->tcp_connp->conn_effective_cred)) != 0) {
-			if (error != EHOSTUNREACH)
-				error = -TSYSERR;
-			goto failed;
-		}
-	}
-
-	tcp->tcp_ip6h->ip6_dst = *dstaddrp;
-	tcp->tcp_remote_v6 = *dstaddrp;
-	tcp->tcp_ip6h->ip6_vcf =
-	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
-	    (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
-
-	/*
-	 * Massage a routing header (if present) putting the first hop
-	 * in ip6_dst. Compute a starting value for the checksum which
-	 * takes into account that the original ip6_dst should be
-	 * included in the checksum but that ip will include the
-	 * first hop in the source route in the tcp checksum.
-	 */
-	rth = ip_find_rthdr_v6(tcp->tcp_ip6h, (uint8_t *)tcp->tcp_tcph);
-	if (rth != NULL) {
-		tcp->tcp_sum = ip_massage_options_v6(tcp->tcp_ip6h, rth,
-		    tcps->tcps_netstack);
-		tcp->tcp_sum = ntohs((tcp->tcp_sum & 0xFFFF) +
-		    (tcp->tcp_sum >> 16));
+	if (scope_id != 0 && IN6_IS_ADDR_LINKSCOPE(dstaddrp)) {
+		connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
+		connp->conn_ixa->ixa_scopeid = scope_id;
 	} else {
-		tcp->tcp_sum = 0;
+		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
 	}
 
-	tcph = tcp->tcp_tcph;
-	*(uint16_t *)tcph->th_fport = dstport;
-	tcp->tcp_fport = dstport;
+	connp->conn_flowinfo = flowinfo;
+	connp->conn_faddr_v6 = *dstaddrp;
+	connp->conn_fport = dstport;
 
-	oldstate = tcp->tcp_state;
 	/*
 	 * At this point the remote destination address and remote port fields
 	 * in the tcp-four-tuple have been filled in the tcp structure. Now we
-	 * have to see which state tcp was in so we can take apropriate action.
+	 * have to see which state tcp was in so we can take appropriate action.
 	 */
-	if (oldstate == TCPS_IDLE) {
+	if (tcp->tcp_state == TCPS_IDLE) {
 		/*
 		 * We support a quick connect capability here, allowing
 		 * clients to transition directly from IDLE to SYN_SENT
@@ -6438,128 +5375,55 @@ tcp_connect_ipv6(tcp_t *tcp, in6_addr_t *dstaddrp, in_port_t dstport,
 		 */
 		lport = tcp_update_next_port(tcps->tcps_next_port_to_try,
 		    tcp, B_TRUE);
-		lport = tcp_bindi(tcp, lport, &tcp->tcp_ip_src_v6, 0, B_TRUE,
+		lport = tcp_bindi(tcp, lport, &connp->conn_laddr_v6, 0, B_TRUE,
 		    B_FALSE, B_FALSE);
-		if (lport == 0) {
-			error = -TNOADDR;
-			goto failed;
-		}
+		if (lport == 0)
+			return (-TNOADDR);
 	}
-	tcp->tcp_state = TCPS_SYN_SENT;
-
-	mp = allocb(sizeof (ire_t), BPRI_HI);
-	if (mp != NULL) {
-		in6_addr_t v6src;
-
-		mp->b_wptr += sizeof (ire_t);
-		mp->b_datap->db_type = IRE_DB_REQ_TYPE;
 
-		tcp->tcp_hard_binding = 1;
-
-		/*
-		 * We need to make sure that the conn_recv is set to a non-null
-		 * value before we insert the conn_t into the classifier table.
-		 * This is to avoid a race with an incoming packet which does
-		 * an ipcl_classify().
-		 */
-		tcp->tcp_connp->conn_recv = tcp_input;
+	/*
+	 * Lookup the route to determine a source address and the uinfo.
+	 * If there was a source route we have tcp_ip6h->ip6_dst as the first
+	 * hop.
+	 * Setup TCP parameters based on the metrics/DCE.
+	 */
+	error = tcp_set_destination(tcp);
+	if (error != 0)
+		return (error);
 
-		if (tcp->tcp_ipversion == IPV4_VERSION) {
-			IN6_IPADDR_TO_V4MAPPED(tcp->tcp_ipha->ipha_src, &v6src);
-		} else {
-			v6src = tcp->tcp_ip6h->ip6_src;
-		}
-		error = ip_proto_bind_connected_v6(connp, &mp, IPPROTO_TCP,
-		    &v6src, tcp->tcp_lport, &tcp->tcp_remote_v6,
-		    &tcp->tcp_sticky_ipp, tcp->tcp_fport, B_TRUE, B_TRUE, cr);
-		BUMP_MIB(&tcps->tcps_mib, tcpActiveOpens);
-		tcp->tcp_active_open = 1;
+	/*
+	 * Don't let an endpoint connect to itself.
+	 */
+	if (IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6, &connp->conn_laddr_v6) &&
+	    connp->conn_fport == connp->conn_lport)
+		return (-TBADADDR);
 
-		return (tcp_post_ip_bind(tcp, mp, error, cr, pid));
-	}
-	/* Error case */
-	tcp->tcp_state = oldstate;
-	error = ENOMEM;
+	tcp->tcp_state = TCPS_SYN_SENT;
 
-failed:
-	/* return error ack and blow away saved option results if any */
-	if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
-		tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
-	return (error);
+	return (ipcl_conn_insert_v6(connp));
 }
 
 /*
- * We need a stream q for detached closing tcp connections
- * to use.  Our client hereby indicates that this q is the
- * one to use.
+ * Disconnect
+ * Note that unlike other functions this returns a positive tli error
+ * when it fails; it never returns an errno.
  */
-static void
-tcp_def_q_set(tcp_t *tcp, mblk_t *mp)
-{
-	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
-	queue_t	*q = tcp->tcp_wq;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-
-#ifdef NS_DEBUG
-	(void) printf("TCP_IOC_DEFAULT_Q for stack %d\n",
-	    tcps->tcps_netstack->netstack_stackid);
-#endif
-	mp->b_datap->db_type = M_IOCACK;
-	iocp->ioc_count = 0;
-	mutex_enter(&tcps->tcps_g_q_lock);
-	if (tcps->tcps_g_q != NULL) {
-		mutex_exit(&tcps->tcps_g_q_lock);
-		iocp->ioc_error = EALREADY;
-	} else {
-		int error = 0;
-		conn_t *connp = tcp->tcp_connp;
-		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
-
-		tcps->tcps_g_q = tcp->tcp_rq;
-		mutex_exit(&tcps->tcps_g_q_lock);
-		iocp->ioc_error = 0;
-		iocp->ioc_rval = 0;
-		/*
-		 * We are passing tcp_sticky_ipp as NULL
-		 * as it is not useful for tcp_default queue
-		 *
-		 * Set conn_recv just in case.
-		 */
-		tcp->tcp_connp->conn_recv = tcp_conn_request;
-
-		ASSERT(connp->conn_af_isv6);
-		connp->conn_ulp = IPPROTO_TCP;
-
-		if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_TCP].connf_head !=
-		    NULL || (connp->conn_mac_mode != CONN_MAC_DEFAULT)) {
-			error = -TBADADDR;
-		} else {
-			connp->conn_srcv6 = ipv6_all_zeros;
-			ipcl_proto_insert_v6(connp, IPPROTO_TCP);
-		}
-
-		(void) tcp_post_ip_bind(tcp, NULL, error, NULL, 0);
-	}
-	qreply(q, mp);
-}
-
 static int
 tcp_disconnect_common(tcp_t *tcp, t_scalar_t seqnum)
 {
 	tcp_t	*ltcp = NULL;
-	conn_t	*connp;
+	conn_t		*lconnp;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
 
 	/*
 	 * Right now, upper modules pass down a T_DISCON_REQ to TCP,
 	 * when the stream is in BOUND state. Do not send a reset,
 	 * since the destination IP address is not valid, and it can
 	 * be the initialized value of all zeros (broadcast address).
-	 *
-	 * XXX There won't be any pending bind request to IP.
 	 */
-	if (tcp->tcp_state <= TCPS_BOUND) {
-		if (tcp->tcp_debug) {
+	if (tcp->tcp_state <= TCPS_BOUND || tcp->tcp_hard_binding) {
+		if (connp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
 			    "tcp_disconnect: bad state, %d", tcp->tcp_state);
 		}
@@ -6595,19 +5459,23 @@ tcp_disconnect_common(tcp_t *tcp, t_scalar_t seqnum)
 		 * If it used to be a listener, check to make sure no one else
 		 * has taken the port before switching back to LISTEN state.
 		 */
-		if (tcp->tcp_ipversion == IPV4_VERSION) {
-			connp = ipcl_lookup_listener_v4(tcp->tcp_lport,
-			    tcp->tcp_ipha->ipha_src,
-			    tcp->tcp_connp->conn_zoneid, ipst);
-			if (connp != NULL)
-				ltcp = connp->conn_tcp;
+		if (connp->conn_ipversion == IPV4_VERSION) {
+			lconnp = ipcl_lookup_listener_v4(connp->conn_lport,
+			    connp->conn_laddr_v4, IPCL_ZONEID(connp), ipst);
+			if (lconnp != NULL)
+				ltcp = lconnp->conn_tcp;
 		} else {
-			/* Allow tcp_bound_if listeners? */
-			connp = ipcl_lookup_listener_v6(tcp->tcp_lport,
-			    &tcp->tcp_ip6h->ip6_src, 0,
-			    tcp->tcp_connp->conn_zoneid, ipst);
-			if (connp != NULL)
-				ltcp = connp->conn_tcp;
+			uint_t ifindex = 0;
+
+			if (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)
+				ifindex = connp->conn_ixa->ixa_scopeid;
+
+			/* Allow conn_bound_if listeners? */
+			lconnp = ipcl_lookup_listener_v6(connp->conn_lport,
+			    &connp->conn_laddr_v6, ifindex, IPCL_ZONEID(connp),
+			    ipst);
+			if (lconnp != NULL)
+				ltcp = lconnp->conn_tcp;
 		}
 		if (tcp->tcp_conn_req_max && ltcp == NULL) {
 			tcp->tcp_state = TCPS_LISTEN;
@@ -6616,7 +5484,7 @@ tcp_disconnect_common(tcp_t *tcp, t_scalar_t seqnum)
 			tcp->tcp_state = TCPS_BOUND;
 		}
 		if (ltcp != NULL)
-			CONN_DEC_REF(ltcp->tcp_connp);
+			CONN_DEC_REF(lconnp);
 		if (old_state == TCPS_SYN_SENT || old_state == TCPS_SYN_RCVD) {
 			BUMP_MIB(&tcps->tcps_mib, tcpAttemptFails);
 		} else if (old_state == TCPS_ESTABLISHED ||
@@ -6648,7 +5516,7 @@ tcp_disconnect_common(tcp_t *tcp, t_scalar_t seqnum)
 
 /*
  * Our client hereby directs us to reject the connection request
- * that tcp_conn_request() marked with 'seqnum'.  Rejection consists
+ * that tcp_input_listener() marked with 'seqnum'.  Rejection consists
  * of sending the appropriate RST, not an ICMP error.
  */
 static void
@@ -6656,6 +5524,7 @@ tcp_disconnect(tcp_t *tcp, mblk_t *mp)
 {
 	t_scalar_t seqnum;
 	int	error;
+	conn_t	*connp = tcp->tcp_connp;
 
 	ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_discon_req)) {
@@ -6669,11 +5538,11 @@ tcp_disconnect(tcp_t *tcp, mblk_t *mp)
 	else {
 		if (tcp->tcp_state >= TCPS_ESTABLISHED) {
 			/* Send M_FLUSH according to TPI */
-			(void) putnextctl1(tcp->tcp_rq, M_FLUSH, FLUSHRW);
+			(void) putnextctl1(connp->conn_rq, M_FLUSH, FLUSHRW);
 		}
 		mp = mi_tpi_ok_ack_alloc(mp);
-		if (mp)
-			putnext(tcp->tcp_rq, mp);
+		if (mp != NULL)
+			putnext(connp->conn_rq, mp);
 	}
 }
 
@@ -6695,6 +5564,7 @@ tcp_display(tcp_t *tcp, char *sup_buf, char format)
 	in6_addr_t	local, remote;
 	char		local_addrbuf[INET6_ADDRSTRLEN];
 	char		remote_addrbuf[INET6_ADDRSTRLEN];
+	conn_t		*connp;
 
 	if (sup_buf != NULL)
 		buf = sup_buf;
@@ -6703,6 +5573,8 @@ tcp_display(tcp_t *tcp, char *sup_buf, char format)
 
 	if (tcp == NULL)
 		return ("NULL_TCP");
+
+	connp = tcp->tcp_connp;
 	switch (tcp->tcp_state) {
 	case TCPS_CLOSED:
 		cp = "TCP_CLOSED";
@@ -6750,32 +5622,32 @@ tcp_display(tcp_t *tcp, char *sup_buf, char format)
 	}
 	switch (format) {
 	case DISP_ADDR_AND_PORT:
-		if (tcp->tcp_ipversion == IPV4_VERSION) {
+		if (connp->conn_ipversion == IPV4_VERSION) {
 			/*
 			 * Note that we use the remote address in the tcp_b
 			 * structure.  This means that it will print out
 			 * the real destination address, not the next hop's
 			 * address if source routing is used.
 			 */
-			IN6_IPADDR_TO_V4MAPPED(tcp->tcp_ip_src, &local);
-			IN6_IPADDR_TO_V4MAPPED(tcp->tcp_remote, &remote);
+			IN6_IPADDR_TO_V4MAPPED(connp->conn_laddr_v4, &local);
+			IN6_IPADDR_TO_V4MAPPED(connp->conn_faddr_v4, &remote);
 
 		} else {
-			local = tcp->tcp_ip_src_v6;
-			remote = tcp->tcp_remote_v6;
+			local = connp->conn_laddr_v6;
+			remote = connp->conn_faddr_v6;
 		}
 		(void) inet_ntop(AF_INET6, &local, local_addrbuf,
 		    sizeof (local_addrbuf));
 		(void) inet_ntop(AF_INET6, &remote, remote_addrbuf,
 		    sizeof (remote_addrbuf));
 		(void) mi_sprintf(buf, "[%s.%u, %s.%u] %s",
-		    local_addrbuf, ntohs(tcp->tcp_lport), remote_addrbuf,
-		    ntohs(tcp->tcp_fport), cp);
+		    local_addrbuf, ntohs(connp->conn_lport), remote_addrbuf,
+		    ntohs(connp->conn_fport), cp);
 		break;
 	case DISP_PORT_ONLY:
 	default:
 		(void) mi_sprintf(buf, "[%u, %u] %s",
-		    ntohs(tcp->tcp_lport), ntohs(tcp->tcp_fport), cp);
+		    ntohs(connp->conn_lport), ntohs(connp->conn_fport), cp);
 		break;
 	}
 
@@ -6788,26 +5660,24 @@ tcp_display(tcp_t *tcp, char *sup_buf, char format)
  * eager to disappear either by means of tcp_eager_blowoff() or
  * tcp_eager_cleanup() being called. tcp_eager_kill() can also be
  * called (via squeue) if the eager cannot be inserted in the
- * fanout table in tcp_conn_request().
+ * fanout table in tcp_input_listener().
  */
 /* ARGSUSED */
 void
-tcp_eager_kill(void *arg, mblk_t *mp, void *arg2)
+tcp_eager_kill(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
 	conn_t	*econnp = (conn_t *)arg;
 	tcp_t	*eager = econnp->conn_tcp;
 	tcp_t	*listener = eager->tcp_listener;
-	tcp_stack_t	*tcps = eager->tcp_tcps;
 
 	/*
 	 * We could be called because listener is closing. Since
-	 * the eager is using listener's queue's, its not safe.
-	 * Better use the default queue just to send the TH_RST
-	 * out.
+	 * the eager was using listener's queue's, we avoid
+	 * using the listeners queues from now on.
 	 */
-	ASSERT(tcps->tcps_g_q != NULL);
-	eager->tcp_rq = tcps->tcps_g_q;
-	eager->tcp_wq = WR(tcps->tcps_g_q);
+	ASSERT(eager->tcp_detached);
+	econnp->conn_rq = NULL;
+	econnp->conn_wq = NULL;
 
 	/*
 	 * An eager's conn_fanout will be NULL if it's a duplicate
@@ -6828,7 +5698,7 @@ tcp_eager_kill(void *arg, mblk_t *mp, void *arg2)
 			 * The eager has sent a conn_ind up to the
 			 * listener but listener decides to close
 			 * instead. We need to drop the extra ref
-			 * placed on eager in tcp_rput_data() before
+			 * placed on eager in tcp_input_data() before
 			 * sending the conn_ind to listener.
 			 */
 			CONN_DEC_REF(econnp);
@@ -6873,7 +5743,7 @@ tcp_eager_blowoff(tcp_t	*listener, t_scalar_t seqnum)
 	mutex_exit(&listener->tcp_eager_lock);
 	mp = &eager->tcp_closemp;
 	SQUEUE_ENTER_ONE(eager->tcp_connp->conn_sqp, mp, tcp_eager_kill,
-	    eager->tcp_connp, SQ_FILL, SQTAG_TCP_EAGER_BLOWOFF);
+	    eager->tcp_connp, NULL, SQ_FILL, SQTAG_TCP_EAGER_BLOWOFF);
 	return (B_TRUE);
 }
 
@@ -6901,7 +5771,7 @@ tcp_eager_cleanup(tcp_t *listener, boolean_t q0_only)
 				CONN_INC_REF(eager->tcp_connp);
 				mp = &eager->tcp_closemp;
 				SQUEUE_ENTER_ONE(eager->tcp_connp->conn_sqp, mp,
-				    tcp_eager_kill, eager->tcp_connp,
+				    tcp_eager_kill, eager->tcp_connp, NULL,
 				    SQ_FILL, SQTAG_TCP_EAGER_CLEANUP);
 			}
 			eager = eager->tcp_eager_next_q;
@@ -6917,7 +5787,7 @@ tcp_eager_cleanup(tcp_t *listener, boolean_t q0_only)
 			CONN_INC_REF(eager->tcp_connp);
 			mp = &eager->tcp_closemp;
 			SQUEUE_ENTER_ONE(eager->tcp_connp->conn_sqp, mp,
-			    tcp_eager_kill, eager->tcp_connp, SQ_FILL,
+			    tcp_eager_kill, eager->tcp_connp, NULL, SQ_FILL,
 			    SQTAG_TCP_EAGER_CLEANUP_Q0);
 		}
 		eager = eager->tcp_eager_next_q0;
@@ -7008,7 +5878,7 @@ static void
 tcp_err_ack(tcp_t *tcp, mblk_t *mp, int t_error, int sys_error)
 {
 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
-		putnext(tcp->tcp_rq, mp);
+		putnext(tcp->tcp_connp->conn_rq, mp);
 }
 
 /* Shorthand to generate and send TPI error acks to our client */
@@ -7024,7 +5894,7 @@ tcp_err_ack_prim(tcp_t *tcp, mblk_t *mp, int primitive,
 		teackp->ERROR_prim = primitive;
 		teackp->TLI_error = t_error;
 		teackp->UNIX_error = sys_error;
-		putnext(tcp->tcp_rq, mp);
+		putnext(tcp->tcp_connp->conn_rq, mp);
 	}
 }
 
@@ -7194,8 +6064,9 @@ static void
 tcp_copy_info(struct T_info_ack *tia, tcp_t *tcp)
 {
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
 
-	if (tcp->tcp_family == AF_INET6)
+	if (connp->conn_family == AF_INET6)
 		*tia = tcp_g_t_info_ack_v6;
 	else
 		*tia = tcp_g_t_info_ack;
@@ -7203,7 +6074,7 @@ tcp_copy_info(struct T_info_ack *tia, tcp_t *tcp)
 	tia->OPT_size = tcp_max_optsize;
 	if (tcp->tcp_mss == 0) {
 		/* Not yet set - tcp_open does not set mss */
-		if (tcp->tcp_ipversion == IPV4_VERSION)
+		if (connp->conn_ipversion == IPV4_VERSION)
 			tia->TIDU_size = tcps->tcps_mss_def_ipv4;
 		else
 			tia->TIDU_size = tcps->tcps_mss_def_ipv6;
@@ -7258,7 +6129,7 @@ tcp_capability_req(tcp_t *tcp, mblk_t *mp)
 	tcap = (struct T_capability_ack *)mp->b_rptr;
 	tcp_do_capability_ack(tcp, tcap, cap_bits1);
 
-	putnext(tcp->tcp_rq, mp);
+	putnext(tcp->tcp_connp->conn_rq, mp);
 }
 
 /*
@@ -7276,16 +6147,18 @@ tcp_info_req(tcp_t *tcp, mblk_t *mp)
 		return;
 	}
 	tcp_copy_info((struct T_info_ack *)mp->b_rptr, tcp);
-	putnext(tcp->tcp_rq, mp);
+	putnext(tcp->tcp_connp->conn_rq, mp);
 }
 
 /* Respond to the TPI addr request */
 static void
 tcp_addr_req(tcp_t *tcp, mblk_t *mp)
 {
-	sin_t	*sin;
+	struct sockaddr *sa;
 	mblk_t	*ackmp;
 	struct T_addr_ack *taa;
+	conn_t	*connp = tcp->tcp_connp;
+	uint_t	addrlen;
 
 	/* Make it large enough for worst case */
 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
@@ -7295,10 +6168,6 @@ tcp_addr_req(tcp_t *tcp, mblk_t *mp)
 		return;
 	}
 
-	if (tcp->tcp_ipversion == IPV6_VERSION) {
-		tcp_addr_req_ipv6(tcp, ackmp);
-		return;
-	}
 	taa = (struct T_addr_ack *)ackmp->b_rptr;
 
 	bzero(taa, sizeof (struct T_addr_ack));
@@ -7307,110 +6176,38 @@ tcp_addr_req(tcp_t *tcp, mblk_t *mp)
 	taa->PRIM_type = T_ADDR_ACK;
 	ackmp->b_datap->db_type = M_PCPROTO;
 
+	if (connp->conn_family == AF_INET)
+		addrlen = sizeof (sin_t);
+	else
+		addrlen = sizeof (sin6_t);
+
 	/*
 	 * Note: Following code assumes 32 bit alignment of basic
 	 * data structures like sin_t and struct T_addr_ack.
 	 */
 	if (tcp->tcp_state >= TCPS_BOUND) {
 		/*
-		 * Fill in local address
+		 * Fill in local address first
 		 */
-		taa->LOCADDR_length = sizeof (sin_t);
 		taa->LOCADDR_offset = sizeof (*taa);
-
-		sin = (sin_t *)&taa[1];
-
-		/* Fill zeroes and then intialize non-zero fields */
-		*sin = sin_null;
-
-		sin->sin_family = AF_INET;
-
-		sin->sin_addr.s_addr = tcp->tcp_ipha->ipha_src;
-		sin->sin_port = *(uint16_t *)tcp->tcp_tcph->th_lport;
-
-		ackmp->b_wptr = (uchar_t *)&sin[1];
-
-		if (tcp->tcp_state >= TCPS_SYN_RCVD) {
-			/*
-			 * Fill in Remote address
-			 */
-			taa->REMADDR_length = sizeof (sin_t);
-			taa->REMADDR_offset = ROUNDUP32(taa->LOCADDR_offset +
-			    taa->LOCADDR_length);
-
-			sin = (sin_t *)(ackmp->b_rptr + taa->REMADDR_offset);
-			*sin = sin_null;
-			sin->sin_family = AF_INET;
-			sin->sin_addr.s_addr = tcp->tcp_remote;
-			sin->sin_port = tcp->tcp_fport;
-
-			ackmp->b_wptr = (uchar_t *)&sin[1];
-		}
+		taa->LOCADDR_length = addrlen;
+		sa = (struct sockaddr *)&taa[1];
+		(void) conn_getsockname(connp, sa, &addrlen);
+		ackmp->b_wptr += addrlen;
 	}
-	putnext(tcp->tcp_rq, ackmp);
-}
-
-/* Assumes that tcp_addr_req gets enough space and alignment */
-static void
-tcp_addr_req_ipv6(tcp_t *tcp, mblk_t *ackmp)
-{
-	sin6_t	*sin6;
-	struct T_addr_ack *taa;
-
-	ASSERT(tcp->tcp_ipversion == IPV6_VERSION);
-	ASSERT(OK_32PTR(ackmp->b_rptr));
-	ASSERT(ackmp->b_wptr - ackmp->b_rptr >= sizeof (struct T_addr_ack) +
-	    2 * sizeof (sin6_t));
-
-	taa = (struct T_addr_ack *)ackmp->b_rptr;
-
-	bzero(taa, sizeof (struct T_addr_ack));
-	ackmp->b_wptr = (uchar_t *)&taa[1];
-
-	taa->PRIM_type = T_ADDR_ACK;
-	ackmp->b_datap->db_type = M_PCPROTO;
-
-	/*
-	 * Note: Following code assumes 32 bit alignment of basic
-	 * data structures like sin6_t and struct T_addr_ack.
-	 */
-	if (tcp->tcp_state >= TCPS_BOUND) {
+	if (tcp->tcp_state >= TCPS_SYN_RCVD) {
 		/*
-		 * Fill in local address
+		 * Fill in Remote address
 		 */
-		taa->LOCADDR_length = sizeof (sin6_t);
-		taa->LOCADDR_offset = sizeof (*taa);
-
-		sin6 = (sin6_t *)&taa[1];
-		*sin6 = sin6_null;
-
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_addr = tcp->tcp_ip6h->ip6_src;
-		sin6->sin6_port = tcp->tcp_lport;
-
-		ackmp->b_wptr = (uchar_t *)&sin6[1];
-
-		if (tcp->tcp_state >= TCPS_SYN_RCVD) {
-			/*
-			 * Fill in Remote address
-			 */
-			taa->REMADDR_length = sizeof (sin6_t);
-			taa->REMADDR_offset = ROUNDUP32(taa->LOCADDR_offset +
-			    taa->LOCADDR_length);
-
-			sin6 = (sin6_t *)(ackmp->b_rptr + taa->REMADDR_offset);
-			*sin6 = sin6_null;
-			sin6->sin6_family = AF_INET6;
-			sin6->sin6_flowinfo =
-			    tcp->tcp_ip6h->ip6_vcf &
-			    ~IPV6_VERS_AND_FLOW_MASK;
-			sin6->sin6_addr = tcp->tcp_remote_v6;
-			sin6->sin6_port = tcp->tcp_fport;
-
-			ackmp->b_wptr = (uchar_t *)&sin6[1];
-		}
+		taa->REMADDR_length = addrlen;
+		/* assumed 32-bit alignment */
+		taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
+		sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
+		(void) conn_getpeername(connp, sa, &addrlen);
+		ackmp->b_wptr += addrlen;
 	}
-	putnext(tcp->tcp_rq, ackmp);
+	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
+	putnext(tcp->tcp_connp->conn_rq, ackmp);
 }
 
 /*
@@ -7420,19 +6217,19 @@ tcp_addr_req_ipv6(tcp_t *tcp, mblk_t *ackmp)
 static void
 tcp_reinit(tcp_t *tcp)
 {
-	mblk_t	*mp;
-	int 	err;
+	mblk_t		*mp;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp  = tcp->tcp_connp;
 
 	TCP_STAT(tcps, tcp_reinit_calls);
 
 	/* tcp_reinit should never be called for detached tcp_t's */
 	ASSERT(tcp->tcp_listener == NULL);
-	ASSERT((tcp->tcp_family == AF_INET &&
-	    tcp->tcp_ipversion == IPV4_VERSION) ||
-	    (tcp->tcp_family == AF_INET6 &&
-	    (tcp->tcp_ipversion == IPV4_VERSION ||
-	    tcp->tcp_ipversion == IPV6_VERSION)));
+	ASSERT((connp->conn_family == AF_INET &&
+	    connp->conn_ipversion == IPV4_VERSION) ||
+	    (connp->conn_family == AF_INET6 &&
+	    (connp->conn_ipversion == IPV4_VERSION ||
+	    connp->conn_ipversion == IPV6_VERSION)));
 
 	/* Cancel outstanding timers */
 	tcp_timers_stop(tcp);
@@ -7453,7 +6250,7 @@ tcp_reinit(tcp_t *tcp)
 	tcp->tcp_unsent = tcp->tcp_xmit_tail_unsent = 0;
 	mutex_enter(&tcp->tcp_non_sq_lock);
 	if (tcp->tcp_flow_stopped &&
-	    TCP_UNSENT_BYTES(tcp) <= tcp->tcp_xmit_lowater) {
+	    TCP_UNSENT_BYTES(tcp) <= connp->conn_sndlowat) {
 		tcp_clrqfull(tcp);
 	}
 	mutex_exit(&tcp->tcp_non_sq_lock);
@@ -7494,7 +6291,7 @@ tcp_reinit(tcp_t *tcp)
 	 */
 	tcp_close_mpp(&tcp->tcp_conn.tcp_eager_conn_ind);
 
-	CL_INET_DISCONNECT(tcp->tcp_connp, tcp);
+	CL_INET_DISCONNECT(connp);
 
 	/*
 	 * The connection can't be on the tcp_time_wait_head list
@@ -7522,14 +6319,12 @@ tcp_reinit(tcp_t *tcp)
 	 * Reset/preserve other values
 	 */
 	tcp_reinit_values(tcp);
-	ipcl_hash_remove(tcp->tcp_connp);
-	conn_delete_ire(tcp->tcp_connp, NULL);
+	ipcl_hash_remove(connp);
+	ixa_cleanup(connp->conn_ixa);
 	tcp_ipsec_cleanup(tcp);
 
-	if (tcp->tcp_connp->conn_effective_cred != NULL) {
-		crfree(tcp->tcp_connp->conn_effective_cred);
-		tcp->tcp_connp->conn_effective_cred = NULL;
-	}
+	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
+	connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
 
 	if (tcp->tcp_conn_req_max != 0) {
 		/*
@@ -7553,44 +6348,31 @@ tcp_reinit(tcp_t *tcp)
 		tcp->tcp_eager_next_q0 = tcp->tcp_eager_prev_q0 = tcp;
 		tcp->tcp_eager_next_drop_q0 = tcp;
 		tcp->tcp_eager_prev_drop_q0 = tcp;
-		tcp->tcp_connp->conn_recv = tcp_conn_request;
-		if (tcp->tcp_family == AF_INET6) {
-			ASSERT(tcp->tcp_connp->conn_af_isv6);
-			(void) ipcl_bind_insert_v6(tcp->tcp_connp, IPPROTO_TCP,
-			    &tcp->tcp_ip6h->ip6_src, tcp->tcp_lport);
-		} else {
-			ASSERT(!tcp->tcp_connp->conn_af_isv6);
-			(void) ipcl_bind_insert(tcp->tcp_connp, IPPROTO_TCP,
-			    tcp->tcp_ipha->ipha_src, tcp->tcp_lport);
-		}
+		/*
+		 * Initially set conn_recv to tcp_input_listener_unbound to try
+		 * to pick a good squeue for the listener when the first SYN
+		 * arrives. tcp_input_listener_unbound sets it to
+		 * tcp_input_listener on that first SYN.
+		 */
+		connp->conn_recv = tcp_input_listener_unbound;
+
+		connp->conn_proto = IPPROTO_TCP;
+		connp->conn_faddr_v6 = ipv6_all_zeros;
+		connp->conn_fport = 0;
+
+		(void) ipcl_bind_insert(connp);
 	} else {
 		tcp->tcp_state = TCPS_BOUND;
 	}
 
 	/*
 	 * Initialize to default values
-	 * Can't fail since enough header template space already allocated
-	 * at open().
-	 */
-	err = tcp_init_values(tcp);
-	ASSERT(err == 0);
-	/* Restore state in tcp_tcph */
-	bcopy(&tcp->tcp_lport, tcp->tcp_tcph->th_lport, TCP_PORT_LEN);
-	if (tcp->tcp_ipversion == IPV4_VERSION)
-		tcp->tcp_ipha->ipha_src = tcp->tcp_bound_source;
-	else
-		tcp->tcp_ip6h->ip6_src = tcp->tcp_bound_source_v6;
-	/*
-	 * Copy of the src addr. in tcp_t is needed in tcp_t
-	 * since the lookup funcs can only lookup on tcp_t
 	 */
-	tcp->tcp_ip_src_v6 = tcp->tcp_bound_source_v6;
+	tcp_init_values(tcp);
 
 	ASSERT(tcp->tcp_ptpbhn != NULL);
-	tcp->tcp_recv_hiwater = tcps->tcps_recv_hiwat;
-	tcp->tcp_recv_lowater = tcp_rinfo.mi_lowat;
-	tcp->tcp_rwnd = tcps->tcps_recv_hiwat;
-	tcp->tcp_mss = tcp->tcp_ipversion != IPV4_VERSION ?
+	tcp->tcp_rwnd = connp->conn_rcvbuf;
+	tcp->tcp_mss = connp->conn_ipversion != IPV4_VERSION ?
 	    tcps->tcps_mss_def_ipv6 : tcps->tcps_mss_def_ipv4;
 }
 
@@ -7606,6 +6388,7 @@ tcp_reinit_values(tcp)
 	tcp_t *tcp;
 {
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
 
 #ifndef	lint
 #define	DONTCARE(x)
@@ -7626,8 +6409,8 @@ tcp_reinit_values(tcp)
 	ASSERT(tcp->tcp_time_wait_prev == NULL);
 	ASSERT(tcp->tcp_time_wait_expire == 0);
 	PRESERVE(tcp->tcp_state);
-	PRESERVE(tcp->tcp_rq);
-	PRESERVE(tcp->tcp_wq);
+	PRESERVE(connp->conn_rq);
+	PRESERVE(connp->conn_wq);
 
 	ASSERT(tcp->tcp_xmit_head == NULL);
 	ASSERT(tcp->tcp_xmit_last == NULL);
@@ -7638,26 +6421,32 @@ tcp_reinit_values(tcp)
 	tcp->tcp_snxt = 0;			/* Displayed in mib */
 	tcp->tcp_suna = 0;			/* Displayed in mib */
 	tcp->tcp_swnd = 0;
-	DONTCARE(tcp->tcp_cwnd);		/* Init in tcp_mss_set */
+	DONTCARE(tcp->tcp_cwnd);	/* Init in tcp_process_options */
 
 	ASSERT(tcp->tcp_ibsegs == 0);
 	ASSERT(tcp->tcp_obsegs == 0);
 
-	if (tcp->tcp_iphc != NULL) {
-		ASSERT(tcp->tcp_iphc_len >= TCP_MAX_COMBINED_HEADER_LENGTH);
-		bzero(tcp->tcp_iphc, tcp->tcp_iphc_len);
+	if (connp->conn_ht_iphc != NULL) {
+		kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
+		connp->conn_ht_iphc = NULL;
+		connp->conn_ht_iphc_allocated = 0;
+		connp->conn_ht_iphc_len = 0;
+		connp->conn_ht_ulp = NULL;
+		connp->conn_ht_ulp_len = 0;
+		tcp->tcp_ipha = NULL;
+		tcp->tcp_ip6h = NULL;
+		tcp->tcp_tcpha = NULL;
 	}
 
+	/* We clear any IP_OPTIONS and extension headers */
+	ip_pkt_free(&connp->conn_xmit_ipp);
+
 	DONTCARE(tcp->tcp_naglim);		/* Init in tcp_init_values */
-	DONTCARE(tcp->tcp_hdr_len);		/* Init in tcp_init_values */
 	DONTCARE(tcp->tcp_ipha);
 	DONTCARE(tcp->tcp_ip6h);
-	DONTCARE(tcp->tcp_ip_hdr_len);
-	DONTCARE(tcp->tcp_tcph);
-	DONTCARE(tcp->tcp_tcp_hdr_len);		/* Init in tcp_init_values */
+	DONTCARE(tcp->tcp_tcpha);
 	tcp->tcp_valid_bits = 0;
 
-	DONTCARE(tcp->tcp_xmit_hiwater);	/* Init in tcp_init_values */
 	DONTCARE(tcp->tcp_timer_backoff);	/* Init in tcp_init_values */
 	DONTCARE(tcp->tcp_last_recv_time);	/* Init in tcp_init_values */
 	tcp->tcp_last_rcv_lbolt = 0;
@@ -7666,38 +6455,19 @@ tcp_reinit_values(tcp)
 
 	tcp->tcp_urp_last_valid = 0;
 	tcp->tcp_hard_binding = 0;
-	tcp->tcp_hard_bound = 0;
-	PRESERVE(tcp->tcp_cred);
-	PRESERVE(tcp->tcp_cpid);
-	PRESERVE(tcp->tcp_open_time);
-	PRESERVE(tcp->tcp_exclbind);
 
 	tcp->tcp_fin_acked = 0;
 	tcp->tcp_fin_rcvd = 0;
 	tcp->tcp_fin_sent = 0;
 	tcp->tcp_ordrel_done = 0;
 
-	tcp->tcp_debug = 0;
-	tcp->tcp_dontroute = 0;
-	tcp->tcp_broadcast = 0;
-
-	tcp->tcp_useloopback = 0;
-	tcp->tcp_reuseaddr = 0;
-	tcp->tcp_oobinline = 0;
-	tcp->tcp_dgram_errind = 0;
-
 	tcp->tcp_detached = 0;
-	tcp->tcp_bind_pending = 0;
-	tcp->tcp_unbind_pending = 0;
 
 	tcp->tcp_snd_ws_ok = B_FALSE;
 	tcp->tcp_snd_ts_ok = B_FALSE;
-	tcp->tcp_linger = 0;
-	tcp->tcp_ka_enabled = 0;
 	tcp->tcp_zero_win_probe = 0;
 
 	tcp->tcp_loopback = 0;
-	tcp->tcp_refuse = 0;
 	tcp->tcp_localnet = 0;
 	tcp->tcp_syn_defense = 0;
 	tcp->tcp_set_timer = 0;
@@ -7707,19 +6477,12 @@ tcp_reinit_values(tcp)
 	tcp->tcp_xmit_zc_clean = B_FALSE;
 
 	tcp->tcp_snd_sack_ok = B_FALSE;
-	PRESERVE(tcp->tcp_recvdstaddr);
 	tcp->tcp_hwcksum = B_FALSE;
 
-	tcp->tcp_ire_ill_check_done = B_FALSE;
-	DONTCARE(tcp->tcp_maxpsz);		/* Init in tcp_init_values */
-
-	tcp->tcp_mdt = B_FALSE;
-	tcp->tcp_mdt_hdr_head = 0;
-	tcp->tcp_mdt_hdr_tail = 0;
+	DONTCARE(tcp->tcp_maxpsz_multiplier);	/* Init in tcp_init_values */
 
 	tcp->tcp_conn_def_q0 = 0;
 	tcp->tcp_ip_forward_progress = B_FALSE;
-	tcp->tcp_anon_priv_bind = 0;
 	tcp->tcp_ecn_ok = B_FALSE;
 
 	tcp->tcp_cwr = B_FALSE;
@@ -7740,7 +6503,7 @@ tcp_reinit_values(tcp)
 	tcp->tcp_ts_recent = 0;
 	tcp->tcp_rnxt = 0;			/* Displayed in mib */
 	DONTCARE(tcp->tcp_rwnd);		/* Set in tcp_reinit() */
-	tcp->tcp_if_mtu = 0;
+	tcp->tcp_initial_pmtu = 0;
 
 	ASSERT(tcp->tcp_reass_head == NULL);
 	ASSERT(tcp->tcp_reass_tail == NULL);
@@ -7752,7 +6515,7 @@ tcp_reinit_values(tcp)
 	ASSERT(tcp->tcp_rcv_last_tail == NULL);
 	ASSERT(tcp->tcp_rcv_cnt == 0);
 
-	DONTCARE(tcp->tcp_cwnd_ssthresh);	/* Init in tcp_adapt_ire */
+	DONTCARE(tcp->tcp_cwnd_ssthresh); /* Init in tcp_set_destination */
 	DONTCARE(tcp->tcp_cwnd_max);		/* Init in tcp_init_values */
 	tcp->tcp_csuna = 0;
 
@@ -7773,8 +6536,6 @@ tcp_reinit_values(tcp)
 
 	ASSERT(tcp->tcp_listener == NULL);
 
-	DONTCARE(tcp->tcp_xmit_lowater);	/* Init in tcp_init_values */
-
 	DONTCARE(tcp->tcp_irs);			/* tcp_valid_bits cleared */
 	DONTCARE(tcp->tcp_iss);			/* tcp_valid_bits cleared */
 	DONTCARE(tcp->tcp_fss);			/* tcp_valid_bits cleared */
@@ -7785,14 +6546,11 @@ tcp_reinit_values(tcp)
 	PRESERVE(tcp->tcp_conn_req_max);
 	PRESERVE(tcp->tcp_conn_req_seqnum);
 
-	DONTCARE(tcp->tcp_ip_hdr_len);		/* Init in tcp_init_values */
 	DONTCARE(tcp->tcp_first_timer_threshold); /* Init in tcp_init_values */
 	DONTCARE(tcp->tcp_second_timer_threshold); /* Init in tcp_init_values */
 	DONTCARE(tcp->tcp_first_ctimer_threshold); /* Init in tcp_init_values */
 	DONTCARE(tcp->tcp_second_ctimer_threshold); /* in tcp_init_values */
 
-	tcp->tcp_lingertime = 0;
-
 	DONTCARE(tcp->tcp_urp_last);	/* tcp_urp_last_valid is cleared */
 	ASSERT(tcp->tcp_urp_mp == NULL);
 	ASSERT(tcp->tcp_urp_mark_mp == NULL);
@@ -7811,16 +6569,16 @@ tcp_reinit_values(tcp)
 
 	tcp->tcp_client_errno = 0;
 
-	DONTCARE(tcp->tcp_sum);			/* Init in tcp_init_values */
+	DONTCARE(connp->conn_sum);		/* Init in tcp_init_values */
 
-	tcp->tcp_remote_v6 = ipv6_all_zeros;	/* Displayed in MIB */
+	connp->conn_faddr_v6 = ipv6_all_zeros;	/* Displayed in MIB */
 
-	PRESERVE(tcp->tcp_bound_source_v6);
+	PRESERVE(connp->conn_bound_addr_v6);
 	tcp->tcp_last_sent_len = 0;
 	tcp->tcp_dupack_cnt = 0;
 
-	tcp->tcp_fport = 0;			/* Displayed in MIB */
-	PRESERVE(tcp->tcp_lport);
+	connp->conn_fport = 0;			/* Displayed in MIB */
+	PRESERVE(connp->conn_lport);
 
 	PRESERVE(tcp->tcp_acceptor_lockp);
 
@@ -7828,16 +6586,18 @@ tcp_reinit_values(tcp)
 	PRESERVE(tcp->tcp_acceptor_id);
 	DONTCARE(tcp->tcp_ipsec_overhead);
 
-	PRESERVE(tcp->tcp_family);
-	if (tcp->tcp_family == AF_INET6) {
+	PRESERVE(connp->conn_family);
+	/* Remove any remnants of mapped address binding */
+	if (connp->conn_family == AF_INET6) {
+		connp->conn_ipversion = IPV6_VERSION;
 		tcp->tcp_mss = tcps->tcps_mss_def_ipv6;
 	} else {
+		connp->conn_ipversion = IPV4_VERSION;
 		tcp->tcp_mss = tcps->tcps_mss_def_ipv4;
 	}
-	PRESERVE(tcp->tcp_ipversion);		/* Init in tcp_init_values */
 
-	tcp->tcp_bound_if = 0;
-	tcp->tcp_ipv6_recvancillary = 0;
+	connp->conn_bound_if = 0;
+	connp->conn_recv_ancillary.crb_all = 0;
 	tcp->tcp_recvifindex = 0;
 	tcp->tcp_recvhops = 0;
 	tcp->tcp_closed = 0;
@@ -7854,19 +6614,18 @@ tcp_reinit_values(tcp)
 		tcp->tcp_dstoptslen = 0;
 	}
 	ASSERT(tcp->tcp_dstoptslen == 0);
-	if (tcp->tcp_rtdstopts != NULL) {
-		mi_free(tcp->tcp_rtdstopts);
-		tcp->tcp_rtdstopts = NULL;
-		tcp->tcp_rtdstoptslen = 0;
+	if (tcp->tcp_rthdrdstopts != NULL) {
+		mi_free(tcp->tcp_rthdrdstopts);
+		tcp->tcp_rthdrdstopts = NULL;
+		tcp->tcp_rthdrdstoptslen = 0;
 	}
-	ASSERT(tcp->tcp_rtdstoptslen == 0);
+	ASSERT(tcp->tcp_rthdrdstoptslen == 0);
 	if (tcp->tcp_rthdr != NULL) {
 		mi_free(tcp->tcp_rthdr);
 		tcp->tcp_rthdr = NULL;
 		tcp->tcp_rthdrlen = 0;
 	}
 	ASSERT(tcp->tcp_rthdrlen == 0);
-	PRESERVE(tcp->tcp_drop_opt_ack_cnt);
 
 	/* Reset fusion-related fields */
 	tcp->tcp_fused = B_FALSE;
@@ -7902,35 +6661,17 @@ tcp_reinit_values(tcp)
 #undef	PRESERVE
 }
 
-/*
- * Allocate necessary resources and initialize state vector.
- * Guaranteed not to fail so that when an error is returned,
- * the caller doesn't need to do any additional cleanup.
- */
-int
-tcp_init(tcp_t *tcp, queue_t *q)
-{
-	int	err;
-
-	tcp->tcp_rq = q;
-	tcp->tcp_wq = WR(q);
-	tcp->tcp_state = TCPS_IDLE;
-	if ((err = tcp_init_values(tcp)) != 0)
-		tcp_timers_stop(tcp);
-	return (err);
-}
-
-static int
+static void
 tcp_init_values(tcp_t *tcp)
 {
-	int	err;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
 
-	ASSERT((tcp->tcp_family == AF_INET &&
-	    tcp->tcp_ipversion == IPV4_VERSION) ||
-	    (tcp->tcp_family == AF_INET6 &&
-	    (tcp->tcp_ipversion == IPV4_VERSION ||
-	    tcp->tcp_ipversion == IPV6_VERSION)));
+	ASSERT((connp->conn_family == AF_INET &&
+	    connp->conn_ipversion == IPV4_VERSION) ||
+	    (connp->conn_family == AF_INET6 &&
+	    (connp->conn_ipversion == IPV4_VERSION ||
+	    connp->conn_ipversion == IPV6_VERSION)));
 
 	/*
 	 * Initialize tcp_rtt_sa and tcp_rtt_sd so that the calculated RTO
@@ -7953,7 +6694,7 @@ tcp_init_values(tcp_t *tcp)
 	tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN;
 	tcp->tcp_snd_burst = TCP_CWND_INFINITE;
 
-	tcp->tcp_maxpsz = tcps->tcps_maxpsz_multiplier;
+	tcp->tcp_maxpsz_multiplier = tcps->tcps_maxpsz_multiplier;
 
 	tcp->tcp_first_timer_threshold = tcps->tcps_ip_notify_interval;
 	tcp->tcp_first_ctimer_threshold = tcps->tcps_ip_notify_cinterval;
@@ -7966,10 +6707,7 @@ tcp_init_values(tcp_t *tcp)
 
 	tcp->tcp_naglim = tcps->tcps_naglim_def;
 
-	/* NOTE:  ISS is now set in tcp_adapt_ire(). */
-
-	tcp->tcp_mdt_hdr_head = 0;
-	tcp->tcp_mdt_hdr_tail = 0;
+	/* NOTE:  ISS is now set in tcp_set_destination(). */
 
 	/* Reset fusion-related fields */
 	tcp->tcp_fused = B_FALSE;
@@ -7977,280 +6715,84 @@ tcp_init_values(tcp_t *tcp)
 	tcp->tcp_fused_sigurg = B_FALSE;
 	tcp->tcp_loopback_peer = NULL;
 
-	/* Initialize the header template */
-	if (tcp->tcp_family == AF_INET) {
-		err = tcp_header_init_ipv4(tcp);
-	} else {
-		err = tcp_header_init_ipv6(tcp);
-	}
-	if (err)
-		return (err);
+	/* We rebuild the header template on the next connect/conn_request */
+
+	connp->conn_mlp_type = mlptSingle;
 
 	/*
 	 * Init the window scale to the max so tcp_rwnd_set() won't pare
-	 * down tcp_rwnd. tcp_adapt_ire() will set the right value later.
+	 * down tcp_rwnd. tcp_set_destination() will set the right value later.
 	 */
 	tcp->tcp_rcv_ws = TCP_MAX_WINSHIFT;
-	tcp->tcp_xmit_lowater = tcps->tcps_xmit_lowat;
-	tcp->tcp_xmit_hiwater = tcps->tcps_xmit_hiwat;
-	tcp->tcp_recv_hiwater = tcps->tcps_recv_hiwat;
-	tcp->tcp_rwnd = tcps->tcps_recv_hiwat;
-	tcp->tcp_recv_lowater = tcp_rinfo.mi_lowat;
+	tcp->tcp_rwnd = connp->conn_rcvbuf;
 
 	tcp->tcp_cork = B_FALSE;
 	/*
-	 * Init the tcp_debug option.  This value determines whether TCP
+	 * Init the tcp_debug option if it wasn't already set.  This value
+	 * determines whether TCP
 	 * calls strlog() to print out debug messages.  Doing this
 	 * initialization here means that this value is not inherited thru
 	 * tcp_reinit().
 	 */
-	tcp->tcp_debug = tcps->tcps_dbg;
+	if (!connp->conn_debug)
+		connp->conn_debug = tcps->tcps_dbg;
 
 	tcp->tcp_ka_interval = tcps->tcps_keepalive_interval;
 	tcp->tcp_ka_abort_thres = tcps->tcps_keepalive_abort_interval;
-
-	return (0);
-}
-
-/*
- * Initialize the IPv4 header. Loses any record of any IP options.
- */
-static int
-tcp_header_init_ipv4(tcp_t *tcp)
-{
-	tcph_t		*tcph;
-	uint32_t	sum;
-	conn_t		*connp;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-
-	/*
-	 * This is a simple initialization. If there's
-	 * already a template, it should never be too small,
-	 * so reuse it.  Otherwise, allocate space for the new one.
-	 */
-	if (tcp->tcp_iphc == NULL) {
-		ASSERT(tcp->tcp_iphc_len == 0);
-		tcp->tcp_iphc_len = TCP_MAX_COMBINED_HEADER_LENGTH;
-		tcp->tcp_iphc = kmem_cache_alloc(tcp_iphc_cache, KM_NOSLEEP);
-		if (tcp->tcp_iphc == NULL) {
-			tcp->tcp_iphc_len = 0;
-			return (ENOMEM);
-		}
-	}
-
-	/* options are gone; may need a new label */
-	connp = tcp->tcp_connp;
-	connp->conn_mlp_type = mlptSingle;
-	connp->conn_ulp_labeled = !is_system_labeled();
-	ASSERT(tcp->tcp_iphc_len >= TCP_MAX_COMBINED_HEADER_LENGTH);
-
-	/*
-	 * tcp_do_get{sock,peer}name constructs the sockaddr from the
-	 * ip header, and decides which header to use based on ip version.
-	 * That operation happens outside the squeue, so we hold the lock
-	 * here to ensure that the ip version and header remain consistent.
-	 */
-	mutex_enter(&connp->conn_lock);
-	tcp->tcp_ipversion = IPV4_VERSION;
-	tcp->tcp_ipha = (ipha_t *)tcp->tcp_iphc;
-	tcp->tcp_ip6h = NULL;
-	mutex_exit(&connp->conn_lock);
-
-	tcp->tcp_hdr_len = sizeof (ipha_t) + sizeof (tcph_t);
-	tcp->tcp_tcp_hdr_len = sizeof (tcph_t);
-	tcp->tcp_ip_hdr_len = sizeof (ipha_t);
-	tcp->tcp_ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (tcph_t));
-	tcp->tcp_ipha->ipha_version_and_hdr_length
-	    = (IP_VERSION << 4) | IP_SIMPLE_HDR_LENGTH_IN_WORDS;
-	tcp->tcp_ipha->ipha_ident = 0;
-
-	tcp->tcp_ttl = (uchar_t)tcps->tcps_ipv4_ttl;
-	tcp->tcp_tos = 0;
-	tcp->tcp_ipha->ipha_fragment_offset_and_flags = 0;
-	tcp->tcp_ipha->ipha_ttl = (uchar_t)tcps->tcps_ipv4_ttl;
-	tcp->tcp_ipha->ipha_protocol = IPPROTO_TCP;
-
-	tcph = (tcph_t *)(tcp->tcp_iphc + sizeof (ipha_t));
-	tcp->tcp_tcph = tcph;
-	tcph->th_offset_and_rsrvd[0] = (5 << 4);
-	/*
-	 * IP wants our header length in the checksum field to
-	 * allow it to perform a single pseudo-header+checksum
-	 * calculation on behalf of TCP.
-	 * Include the adjustment for a source route once IP_OPTIONS is set.
-	 */
-	sum = sizeof (tcph_t) + tcp->tcp_sum;
-	sum = (sum >> 16) + (sum & 0xFFFF);
-	U16_TO_ABE16(sum, tcph->th_sum);
-	return (0);
-}
-
-/*
- * Initialize the IPv6 header. Loses any record of any IPv6 extension headers.
- */
-static int
-tcp_header_init_ipv6(tcp_t *tcp)
-{
-	tcph_t	*tcph;
-	uint32_t	sum;
-	conn_t	*connp;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-
-	/*
-	 * This is a simple initialization. If there's
-	 * already a template, it should never be too small,
-	 * so reuse it. Otherwise, allocate space for the new one.
-	 * Ensure that there is enough space to "downgrade" the tcp_t
-	 * to an IPv4 tcp_t. This requires having space for a full load
-	 * of IPv4 options, as well as a full load of TCP options
-	 * (TCP_MAX_COMBINED_HEADER_LENGTH, 120 bytes); this is more space
-	 * than a v6 header and a TCP header with a full load of TCP options
-	 * (IPV6_HDR_LEN is 40 bytes; TCP_MAX_HDR_LENGTH is 60 bytes).
-	 * We want to avoid reallocation in the "downgraded" case when
-	 * processing outbound IPv4 options.
-	 */
-	if (tcp->tcp_iphc == NULL) {
-		ASSERT(tcp->tcp_iphc_len == 0);
-		tcp->tcp_iphc_len = TCP_MAX_COMBINED_HEADER_LENGTH;
-		tcp->tcp_iphc = kmem_cache_alloc(tcp_iphc_cache, KM_NOSLEEP);
-		if (tcp->tcp_iphc == NULL) {
-			tcp->tcp_iphc_len = 0;
-			return (ENOMEM);
-		}
-	}
-
-	/* options are gone; may need a new label */
-	connp = tcp->tcp_connp;
-	connp->conn_mlp_type = mlptSingle;
-	connp->conn_ulp_labeled = !is_system_labeled();
-
-	ASSERT(tcp->tcp_iphc_len >= TCP_MAX_COMBINED_HEADER_LENGTH);
-	tcp->tcp_hdr_len = IPV6_HDR_LEN + sizeof (tcph_t);
-	tcp->tcp_tcp_hdr_len = sizeof (tcph_t);
-	tcp->tcp_ip_hdr_len = IPV6_HDR_LEN;
-
-	/*
-	 * tcp_do_get{sock,peer}name constructs the sockaddr from the
-	 * ip header, and decides which header to use based on ip version.
-	 * That operation happens outside the squeue, so we hold the lock
-	 * here to ensure that the ip version and header remain consistent.
-	 */
-	mutex_enter(&connp->conn_lock);
-	tcp->tcp_ipversion = IPV6_VERSION;
-	tcp->tcp_ip6h = (ip6_t *)tcp->tcp_iphc;
-	tcp->tcp_ipha = NULL;
-	mutex_exit(&connp->conn_lock);
-
-	/* Initialize the header template */
-
-	tcp->tcp_ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
-	tcp->tcp_ip6h->ip6_plen = ntohs(sizeof (tcph_t));
-	tcp->tcp_ip6h->ip6_nxt = IPPROTO_TCP;
-	tcp->tcp_ip6h->ip6_hops = (uint8_t)tcps->tcps_ipv6_hoplimit;
-
-	tcph = (tcph_t *)(tcp->tcp_iphc + IPV6_HDR_LEN);
-	tcp->tcp_tcph = tcph;
-	tcph->th_offset_and_rsrvd[0] = (5 << 4);
-	/*
-	 * IP wants our header length in the checksum field to
-	 * allow it to perform a single psuedo-header+checksum
-	 * calculation on behalf of TCP.
-	 * Include the adjustment for a source route when IPV6_RTHDR is set.
-	 */
-	sum = sizeof (tcph_t) + tcp->tcp_sum;
-	sum = (sum >> 16) + (sum & 0xFFFF);
-	U16_TO_ABE16(sum, tcph->th_sum);
-	return (0);
 }
 
 /* At minimum we need 8 bytes in the TCP header for the lookup */
 #define	ICMP_MIN_TCP_HDR	8
 
 /*
- * tcp_icmp_error is called by tcp_rput_other to process ICMP error messages
+ * tcp_icmp_input is called as conn_recvicmp to process ICMP error messages
  * passed up by IP. The message is always received on the correct tcp_t.
  * Assumes that IP has pulled up everything up to and including the ICMP header.
  */
-void
-tcp_icmp_error(tcp_t *tcp, mblk_t *mp)
+/* ARGSUSED2 */
+static void
+tcp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
 {
-	icmph_t *icmph;
-	ipha_t	*ipha;
-	int	iph_hdr_length;
-	tcph_t	*tcph;
-	boolean_t ipsec_mctl = B_FALSE;
-	boolean_t secure;
-	mblk_t *first_mp = mp;
-	int32_t new_mss;
-	uint32_t ratio;
-	size_t mp_size = MBLKL(mp);
-	uint32_t seg_seq;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	ip_stack_t	*ipst = tcps->tcps_netstack->netstack_ip;
-
-	/* Assume IP provides aligned packets - otherwise toss */
-	if (!OK_32PTR(mp->b_rptr)) {
-		freemsg(mp);
-		return;
-	}
-
-	/*
-	 * Since ICMP errors are normal data marked with M_CTL when sent
-	 * to TCP or UDP, we have to look for a IPSEC_IN value to identify
-	 * packets starting with an ipsec_info_t, see ipsec_info.h.
-	 */
-	if ((mp_size == sizeof (ipsec_info_t)) &&
-	    (((ipsec_info_t *)mp->b_rptr)->ipsec_info_type == IPSEC_IN)) {
-		ASSERT(mp->b_cont != NULL);
-		mp = mp->b_cont;
-		/* IP should have done this */
-		ASSERT(OK_32PTR(mp->b_rptr));
-		mp_size = MBLKL(mp);
-		ipsec_mctl = B_TRUE;
-	}
+	conn_t		*connp = (conn_t *)arg1;
+	icmph_t		*icmph;
+	ipha_t		*ipha;
+	int		iph_hdr_length;
+	tcpha_t		*tcpha;
+	uint32_t	seg_seq;
+	tcp_t		*tcp = connp->conn_tcp;
 
-	/*
-	 * Verify that we have a complete outer IP header. If not, drop it.
-	 */
-	if (mp_size < sizeof (ipha_t)) {
-noticmpv4:
-		freemsg(first_mp);
-		return;
-	}
+	/* Assume IP provides aligned packets */
+	ASSERT(OK_32PTR(mp->b_rptr));
+	ASSERT((MBLKL(mp) >= sizeof (ipha_t)));
 
-	ipha = (ipha_t *)mp->b_rptr;
 	/*
 	 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent
 	 * upstream. ICMPv6 is handled in tcp_icmp_error_ipv6.
 	 */
-	switch (IPH_HDR_VERSION(ipha)) {
-	case IPV6_VERSION:
-		tcp_icmp_error_ipv6(tcp, first_mp, ipsec_mctl);
+	if (!(ira->ira_flags & IRAF_IS_IPV4)) {
+		tcp_icmp_error_ipv6(tcp, mp, ira);
 		return;
-	case IPV4_VERSION:
-		break;
-	default:
-		goto noticmpv4;
 	}
 
 	/* Skip past the outer IP and ICMP headers */
-	iph_hdr_length = IPH_HDR_LENGTH(ipha);
+	iph_hdr_length = ira->ira_ip_hdr_length;
 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
 	/*
-	 * If we don't have the correct outer IP header length or if the ULP
-	 * is not IPPROTO_ICMP or if we don't have a complete inner IP header
-	 * send it upstream.
+	 * If we don't have the correct outer IP header length
+	 * or if we don't have a complete inner IP header
+	 * drop it.
 	 */
 	if (iph_hdr_length < sizeof (ipha_t) ||
-	    ipha->ipha_protocol != IPPROTO_ICMP ||
 	    (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) {
-		goto noticmpv4;
+noticmpv4:
+		freemsg(mp);
+		return;
 	}
 	ipha = (ipha_t *)&icmph[1];
 
 	/* Skip past the inner IP and find the ULP header */
 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
-	tcph = (tcph_t *)((char *)ipha + iph_hdr_length);
+	tcpha = (tcpha_t *)((char *)ipha + iph_hdr_length);
 	/*
 	 * If we don't have the correct inner IP header length or if the ULP
 	 * is not IPPROTO_TCP or if we don't have at least ICMP_MIN_TCP_HDR
@@ -8258,166 +6800,20 @@ noticmpv4:
 	 */
 	if (iph_hdr_length < sizeof (ipha_t) ||
 	    ipha->ipha_protocol != IPPROTO_TCP ||
-	    (uchar_t *)tcph + ICMP_MIN_TCP_HDR > mp->b_wptr) {
-		goto noticmpv4;
-	}
-
-	if (TCP_IS_DETACHED_NONEAGER(tcp)) {
-		if (ipsec_mctl) {
-			secure = ipsec_in_is_secure(first_mp);
-		} else {
-			secure = B_FALSE;
-		}
-		if (secure) {
-			/*
-			 * If we are willing to accept this in clear
-			 * we don't have to verify policy.
-			 */
-			if (!ipsec_inbound_accept_clear(mp, ipha, NULL)) {
-				if (!tcp_check_policy(tcp, first_mp,
-				    ipha, NULL, secure, ipsec_mctl)) {
-					/*
-					 * tcp_check_policy called
-					 * ip_drop_packet() on failure.
-					 */
-					return;
-				}
-			}
-		}
-	} else if (ipsec_mctl) {
-		/*
-		 * This is a hard_bound connection. IP has already
-		 * verified policy. We don't have to do it again.
-		 */
-		freeb(first_mp);
-		first_mp = mp;
-		ipsec_mctl = B_FALSE;
-	}
-
-	seg_seq = ABE32_TO_U32(tcph->th_seq);
-	/*
-	 * TCP SHOULD check that the TCP sequence number contained in
-	 * payload of the ICMP error message is within the range
-	 * SND.UNA <= SEG.SEQ < SND.NXT.
-	 */
-	if (SEQ_LT(seg_seq, tcp->tcp_suna) || SEQ_GEQ(seg_seq, tcp->tcp_snxt)) {
-		/*
-		 * The ICMP message is bogus, just drop it.  But if this is
-		 * an ICMP too big message, IP has already changed
-		 * the ire_max_frag to the bogus value.  We need to change
-		 * it back.
-		 */
-		if (icmph->icmph_type == ICMP_DEST_UNREACHABLE &&
-		    icmph->icmph_code == ICMP_FRAGMENTATION_NEEDED) {
-			conn_t *connp = tcp->tcp_connp;
-			ire_t *ire;
-			int flag;
-
-			if (tcp->tcp_ipversion == IPV4_VERSION) {
-				flag = tcp->tcp_ipha->
-				    ipha_fragment_offset_and_flags;
-			} else {
-				flag = 0;
-			}
-			mutex_enter(&connp->conn_lock);
-			if ((ire = connp->conn_ire_cache) != NULL) {
-				mutex_enter(&ire->ire_lock);
-				mutex_exit(&connp->conn_lock);
-				ire->ire_max_frag = tcp->tcp_if_mtu;
-				ire->ire_frag_flag |= flag;
-				mutex_exit(&ire->ire_lock);
-			} else {
-				mutex_exit(&connp->conn_lock);
-			}
-		}
+	    (uchar_t *)tcpha + ICMP_MIN_TCP_HDR > mp->b_wptr) {
 		goto noticmpv4;
 	}
 
+	seg_seq = ntohl(tcpha->tha_seq);
 	switch (icmph->icmph_type) {
 	case ICMP_DEST_UNREACHABLE:
 		switch (icmph->icmph_code) {
 		case ICMP_FRAGMENTATION_NEEDED:
 			/*
-			 * Reduce the MSS based on the new MTU.  This will
-			 * eliminate any fragmentation locally.
-			 * N.B.  There may well be some funny side-effects on
-			 * the local send policy and the remote receive policy.
-			 * Pending further research, we provide
-			 * tcp_ignore_path_mtu just in case this proves
-			 * disastrous somewhere.
-			 *
-			 * After updating the MSS, retransmit part of the
-			 * dropped segment using the new mss by calling
-			 * tcp_wput_data().  Need to adjust all those
-			 * params to make sure tcp_wput_data() work properly.
-			 */
-			if (tcps->tcps_ignore_path_mtu ||
-			    tcp->tcp_ipha->ipha_fragment_offset_and_flags == 0)
-				break;
-
-			/*
-			 * Decrease the MSS by time stamp options
-			 * IP options and IPSEC options. tcp_hdr_len
-			 * includes time stamp option and IP option
-			 * length.  Note that new_mss may be negative
-			 * if tcp_ipsec_overhead is large and the
-			 * icmph_du_mtu is the minimum value, which is 68.
-			 */
-			new_mss = ntohs(icmph->icmph_du_mtu) -
-			    tcp->tcp_hdr_len - tcp->tcp_ipsec_overhead;
-
-			DTRACE_PROBE2(tcp__pmtu__change, tcp_t *, tcp, int,
-			    new_mss);
-
-			/*
-			 * Only update the MSS if the new one is
-			 * smaller than the previous one.  This is
-			 * to avoid problems when getting multiple
-			 * ICMP errors for the same MTU.
-			 */
-			if (new_mss >= tcp->tcp_mss)
-				break;
-
-			/*
-			 * Note that we are using the template header's DF
-			 * bit in the fast path sending.  So we need to compare
-			 * the new mss with both tcps_mss_min and ip_pmtu_min.
-			 * And stop doing IPv4 PMTUd if new_mss is less than
-			 * MAX(tcps_mss_min, ip_pmtu_min).
-			 */
-			if (new_mss < tcps->tcps_mss_min ||
-			    new_mss < ipst->ips_ip_pmtu_min) {
-				tcp->tcp_ipha->ipha_fragment_offset_and_flags =
-				    0;
-			}
-
-			ratio = tcp->tcp_cwnd / tcp->tcp_mss;
-			ASSERT(ratio >= 1);
-			tcp_mss_set(tcp, new_mss, B_TRUE);
-
-			/*
-			 * Make sure we have something to
-			 * send.
+			 * Update Path MTU, then try to send something out.
 			 */
-			if (SEQ_LT(tcp->tcp_suna, tcp->tcp_snxt) &&
-			    (tcp->tcp_xmit_head != NULL)) {
-				/*
-				 * Shrink tcp_cwnd in
-				 * proportion to the old MSS/new MSS.
-				 */
-				tcp->tcp_cwnd = ratio * tcp->tcp_mss;
-				if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
-				    (tcp->tcp_unsent == 0)) {
-					tcp->tcp_rexmit_max = tcp->tcp_fss;
-				} else {
-					tcp->tcp_rexmit_max = tcp->tcp_snxt;
-				}
-				tcp->tcp_rexmit_nxt = tcp->tcp_suna;
-				tcp->tcp_rexmit = B_TRUE;
-				tcp->tcp_dupack_cnt = 0;
-				tcp->tcp_snd_burst = TCP_CWND_SS;
-				tcp_ss_rexmit(tcp);
-			}
+			tcp_update_pmtu(tcp, B_TRUE);
+			tcp_rexmit_after_error(tcp);
 			break;
 		case ICMP_PORT_UNREACHABLE:
 		case ICMP_PROTOCOL_UNREACHABLE:
@@ -8451,7 +6847,6 @@ noticmpv4:
 					 * Ditch the half-open connection if we
 					 * suspect a SYN attack is under way.
 					 */
-					tcp_ip_ire_mark_advice(tcp);
 					(void) tcp_clean_death(tcp,
 					    tcp->tcp_client_errno, 7);
 				}
@@ -8483,67 +6878,191 @@ noticmpv4:
 		break;
 	}
 	}
-	freemsg(first_mp);
+	freemsg(mp);
 }
 
 /*
- * tcp_icmp_error_ipv6 is called by tcp_rput_other to process ICMPv6
- * error messages passed up by IP.
- * Assumes that IP has pulled up all the extension headers as well
- * as the ICMPv6 header.
+ * CALLED OUTSIDE OF SQUEUE! It can not follow any pointers that tcp might
+ * change. But it can refer to fields like tcp_suna and tcp_snxt.
+ *
+ * Function tcp_verifyicmp is called as conn_verifyicmp to verify the ICMP
+ * error messages received by IP. The message is always received on the correct
+ * tcp_t.
+ */
+/* ARGSUSED */
+static boolean_t
+tcp_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6,
+    ip_recv_attr_t *ira)
+{
+	tcpha_t		*tcpha = (tcpha_t *)arg2;
+	uint32_t	seq = ntohl(tcpha->tha_seq);
+	tcp_t		*tcp = connp->conn_tcp;
+
+	/*
+	 * TCP sequence number contained in payload of the ICMP error message
+	 * should be within the range SND.UNA <= SEG.SEQ < SND.NXT. Otherwise,
+	 * the message is either a stale ICMP error, or an attack from the
+	 * network. Fail the verification.
+	 */
+	if (SEQ_LT(seq, tcp->tcp_suna) || SEQ_GEQ(seq, tcp->tcp_snxt))
+		return (B_FALSE);
+
+	/* For "too big" we also check the ignore flag */
+	if (ira->ira_flags & IRAF_IS_IPV4) {
+		ASSERT(icmph != NULL);
+		if (icmph->icmph_type == ICMP_DEST_UNREACHABLE &&
+		    icmph->icmph_code == ICMP_FRAGMENTATION_NEEDED &&
+		    tcp->tcp_tcps->tcps_ignore_path_mtu)
+			return (B_FALSE);
+	} else {
+		ASSERT(icmp6 != NULL);
+		if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG &&
+		    tcp->tcp_tcps->tcps_ignore_path_mtu)
+			return (B_FALSE);
+	}
+	return (B_TRUE);
+}
+
+/*
+ * Update the TCP connection according to change of PMTU.
+ *
+ * Path MTU might have changed by either increase or decrease, so need to
+ * adjust the MSS based on the value of ixa_pmtu. No need to handle tiny
+ * or negative MSS, since tcp_mss_set() will do it.
  */
 static void
-tcp_icmp_error_ipv6(tcp_t *tcp, mblk_t *mp, boolean_t ipsec_mctl)
+tcp_update_pmtu(tcp_t *tcp, boolean_t decrease_only)
 {
-	icmp6_t *icmp6;
-	ip6_t	*ip6h;
-	uint16_t	iph_hdr_length;
-	tcpha_t	*tcpha;
-	uint8_t	*nexthdrp;
-	uint32_t new_mss;
-	uint32_t ratio;
-	boolean_t secure;
-	mblk_t *first_mp = mp;
-	size_t mp_size;
-	uint32_t seg_seq;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	uint32_t	pmtu;
+	int32_t		mss;
+	conn_t		*connp = tcp->tcp_connp;
+	ip_xmit_attr_t	*ixa = connp->conn_ixa;
+	iaflags_t	ixaflags;
+
+	if (tcp->tcp_tcps->tcps_ignore_path_mtu)
+		return;
+
+	if (tcp->tcp_state < TCPS_ESTABLISHED)
+		return;
 
 	/*
-	 * The caller has determined if this is an IPSEC_IN packet and
-	 * set ipsec_mctl appropriately (see tcp_icmp_error).
+	 * Always call ip_get_pmtu() to make sure that IP has updated
+	 * ixa_flags properly.
 	 */
-	if (ipsec_mctl)
-		mp = mp->b_cont;
+	pmtu = ip_get_pmtu(ixa);
+	ixaflags = ixa->ixa_flags;
 
-	mp_size = MBLKL(mp);
+	/*
+	 * Calculate the MSS by decreasing the PMTU by conn_ht_iphc_len and
+	 * IPsec overhead if applied. Make sure to use the most recent
+	 * IPsec information.
+	 */
+	mss = pmtu - connp->conn_ht_iphc_len - conn_ipsec_length(connp);
 
 	/*
-	 * Verify that we have a complete IP header. If not, send it upstream.
+	 * Nothing to change, so just return.
 	 */
-	if (mp_size < sizeof (ip6_t)) {
-noticmpv6:
-		freemsg(first_mp);
+	if (mss == tcp->tcp_mss)
 		return;
-	}
 
 	/*
-	 * Verify this is an ICMPV6 packet, else send it upstream.
+	 * Currently, for ICMP errors, only PMTU decrease is handled.
 	 */
-	ip6h = (ip6_t *)mp->b_rptr;
-	if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
-		iph_hdr_length = IPV6_HDR_LEN;
-	} else if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length,
-	    &nexthdrp) ||
-	    *nexthdrp != IPPROTO_ICMPV6) {
-		goto noticmpv6;
+	if (mss > tcp->tcp_mss && decrease_only)
+		return;
+
+	DTRACE_PROBE2(tcp_update_pmtu, int32_t, tcp->tcp_mss, uint32_t, mss);
+
+	/*
+	 * Update ixa_fragsize and ixa_pmtu.
+	 */
+	ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu;
+
+	/*
+	 * Adjust MSS and all relevant variables.
+	 */
+	tcp_mss_set(tcp, mss);
+
+	/*
+	 * If the PMTU is below the min size maintained by IP, then ip_get_pmtu
+	 * has set IXAF_PMTU_TOO_SMALL and cleared IXAF_PMTU_IPV4_DF. Since TCP
+	 * has a (potentially different) min size we do the same. Make sure to
+	 * clear IXAF_DONTFRAG, which is used by IP to decide whether to
+	 * fragment the packet.
+	 *
+	 * LSO over IPv6 can not be fragmented. So need to disable LSO
+	 * when IPv6 fragmentation is needed.
+	 */
+	if (mss < tcp->tcp_tcps->tcps_mss_min)
+		ixaflags |= IXAF_PMTU_TOO_SMALL;
+
+	if (ixaflags & IXAF_PMTU_TOO_SMALL)
+		ixaflags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
+
+	if ((connp->conn_ipversion == IPV4_VERSION) &&
+	    !(ixaflags & IXAF_PMTU_IPV4_DF)) {
+		tcp->tcp_ipha->ipha_fragment_offset_and_flags = 0;
 	}
+	ixa->ixa_flags = ixaflags;
+}
+
+/*
+ * Do slow start retransmission after ICMP errors of PMTU changes.
+ */
+static void
+tcp_rexmit_after_error(tcp_t *tcp)
+{
+	/*
+	 * All sent data has been acknowledged or no data left to send, just
+	 * to return.
+	 */
+	if (!SEQ_LT(tcp->tcp_suna, tcp->tcp_snxt) ||
+	    (tcp->tcp_xmit_head == NULL))
+		return;
+
+	if ((tcp->tcp_valid_bits & TCP_FSS_VALID) && (tcp->tcp_unsent == 0))
+		tcp->tcp_rexmit_max = tcp->tcp_fss;
+	else
+		tcp->tcp_rexmit_max = tcp->tcp_snxt;
+
+	tcp->tcp_rexmit_nxt = tcp->tcp_suna;
+	tcp->tcp_rexmit = B_TRUE;
+	tcp->tcp_dupack_cnt = 0;
+	tcp->tcp_snd_burst = TCP_CWND_SS;
+	tcp_ss_rexmit(tcp);
+}
+
+/*
+ * tcp_icmp_error_ipv6 is called from tcp_icmp_input to process ICMPv6
+ * error messages passed up by IP.
+ * Assumes that IP has pulled up all the extension headers as well
+ * as the ICMPv6 header.
+ */
+static void
+tcp_icmp_error_ipv6(tcp_t *tcp, mblk_t *mp, ip_recv_attr_t *ira)
+{
+	icmp6_t		*icmp6;
+	ip6_t		*ip6h;
+	uint16_t	iph_hdr_length = ira->ira_ip_hdr_length;
+	tcpha_t		*tcpha;
+	uint8_t		*nexthdrp;
+	uint32_t	seg_seq;
+
+	/*
+	 * Verify that we have a complete IP header.
+	 */
+	ASSERT((MBLKL(mp) >= sizeof (ip6_t)));
+
 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
 	ip6h = (ip6_t *)&icmp6[1];
 	/*
 	 * Verify if we have a complete ICMP and inner IP header.
 	 */
-	if ((uchar_t *)&ip6h[1] > mp->b_wptr)
-		goto noticmpv6;
+	if ((uchar_t *)&ip6h[1] > mp->b_wptr) {
+noticmpv6:
+		freemsg(mp);
+		return;
+	}
 
 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp))
 		goto noticmpv6;
@@ -8558,130 +7077,15 @@ noticmpv6:
 		goto noticmpv6;
 	}
 
-	/*
-	 * ICMP errors come on the right queue or come on
-	 * listener/global queue for detached connections and
-	 * get switched to the right queue. If it comes on the
-	 * right queue, policy check has already been done by IP
-	 * and thus free the first_mp without verifying the policy.
-	 * If it has come for a non-hard bound connection, we need
-	 * to verify policy as IP may not have done it.
-	 */
-	if (!tcp->tcp_hard_bound) {
-		if (ipsec_mctl) {
-			secure = ipsec_in_is_secure(first_mp);
-		} else {
-			secure = B_FALSE;
-		}
-		if (secure) {
-			/*
-			 * If we are willing to accept this in clear
-			 * we don't have to verify policy.
-			 */
-			if (!ipsec_inbound_accept_clear(mp, NULL, ip6h)) {
-				if (!tcp_check_policy(tcp, first_mp,
-				    NULL, ip6h, secure, ipsec_mctl)) {
-					/*
-					 * tcp_check_policy called
-					 * ip_drop_packet() on failure.
-					 */
-					return;
-				}
-			}
-		}
-	} else if (ipsec_mctl) {
-		/*
-		 * This is a hard_bound connection. IP has already
-		 * verified policy. We don't have to do it again.
-		 */
-		freeb(first_mp);
-		first_mp = mp;
-		ipsec_mctl = B_FALSE;
-	}
-
 	seg_seq = ntohl(tcpha->tha_seq);
-	/*
-	 * TCP SHOULD check that the TCP sequence number contained in
-	 * payload of the ICMP error message is within the range
-	 * SND.UNA <= SEG.SEQ < SND.NXT.
-	 */
-	if (SEQ_LT(seg_seq, tcp->tcp_suna) || SEQ_GEQ(seg_seq, tcp->tcp_snxt)) {
-		/*
-		 * If the ICMP message is bogus, should we kill the
-		 * connection, or should we just drop the bogus ICMP
-		 * message? It would probably make more sense to just
-		 * drop the message so that if this one managed to get
-		 * in, the real connection should not suffer.
-		 */
-		goto noticmpv6;
-	}
-
 	switch (icmp6->icmp6_type) {
 	case ICMP6_PACKET_TOO_BIG:
 		/*
-		 * Reduce the MSS based on the new MTU.  This will
-		 * eliminate any fragmentation locally.
-		 * N.B.  There may well be some funny side-effects on
-		 * the local send policy and the remote receive policy.
-		 * Pending further research, we provide
-		 * tcp_ignore_path_mtu just in case this proves
-		 * disastrous somewhere.
-		 *
-		 * After updating the MSS, retransmit part of the
-		 * dropped segment using the new mss by calling
-		 * tcp_wput_data().  Need to adjust all those
-		 * params to make sure tcp_wput_data() work properly.
-		 */
-		if (tcps->tcps_ignore_path_mtu)
-			break;
-
-		/*
-		 * Decrease the MSS by time stamp options
-		 * IP options and IPSEC options. tcp_hdr_len
-		 * includes time stamp option and IP option
-		 * length.
-		 */
-		new_mss = ntohs(icmp6->icmp6_mtu) - tcp->tcp_hdr_len -
-		    tcp->tcp_ipsec_overhead;
-
-		/*
-		 * Only update the MSS if the new one is
-		 * smaller than the previous one.  This is
-		 * to avoid problems when getting multiple
-		 * ICMP errors for the same MTU.
-		 */
-		if (new_mss >= tcp->tcp_mss)
-			break;
-
-		ratio = tcp->tcp_cwnd / tcp->tcp_mss;
-		ASSERT(ratio >= 1);
-		tcp_mss_set(tcp, new_mss, B_TRUE);
-
-		/*
-		 * Make sure we have something to
-		 * send.
+		 * Update Path MTU, then try to send something out.
 		 */
-		if (SEQ_LT(tcp->tcp_suna, tcp->tcp_snxt) &&
-		    (tcp->tcp_xmit_head != NULL)) {
-			/*
-			 * Shrink tcp_cwnd in
-			 * proportion to the old MSS/new MSS.
-			 */
-			tcp->tcp_cwnd = ratio * tcp->tcp_mss;
-			if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
-			    (tcp->tcp_unsent == 0)) {
-				tcp->tcp_rexmit_max = tcp->tcp_fss;
-			} else {
-				tcp->tcp_rexmit_max = tcp->tcp_snxt;
-			}
-			tcp->tcp_rexmit_nxt = tcp->tcp_suna;
-			tcp->tcp_rexmit = B_TRUE;
-			tcp->tcp_dupack_cnt = 0;
-			tcp->tcp_snd_burst = TCP_CWND_SS;
-			tcp_ss_rexmit(tcp);
-		}
+		tcp_update_pmtu(tcp, B_TRUE);
+		tcp_rexmit_after_error(tcp);
 		break;
-
 	case ICMP6_DST_UNREACH:
 		switch (icmp6->icmp6_code) {
 		case ICMP6_DST_UNREACH_NOPORT:
@@ -8692,7 +7096,6 @@ noticmpv6:
 				    ECONNREFUSED, 8);
 			}
 			break;
-
 		case ICMP6_DST_UNREACH_ADMIN:
 		case ICMP6_DST_UNREACH_NOROUTE:
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
@@ -8708,7 +7111,6 @@ noticmpv6:
 					 * Ditch the half-open connection if we
 					 * suspect a SYN attack is under way.
 					 */
-					tcp_ip_ire_mark_advice(tcp);
 					(void) tcp_clean_death(tcp,
 					    tcp->tcp_client_errno, 9);
 				}
@@ -8720,7 +7122,6 @@ noticmpv6:
 			break;
 		}
 		break;
-
 	case ICMP6_PARAM_PROB:
 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
@@ -8739,83 +7140,42 @@ noticmpv6:
 	default:
 		break;
 	}
-	freemsg(first_mp);
+	freemsg(mp);
 }
 
 /*
  * Notify IP that we are having trouble with this connection.  IP should
- * blow the IRE away and start over.
+ * make note so it can potentially use a different IRE.
  */
 static void
 tcp_ip_notify(tcp_t *tcp)
 {
-	struct iocblk	*iocp;
-	ipid_t	*ipid;
-	mblk_t	*mp;
-
-	/* IPv6 has NUD thus notification to delete the IRE is not needed */
-	if (tcp->tcp_ipversion == IPV6_VERSION)
-		return;
-
-	mp = mkiocb(IP_IOCTL);
-	if (mp == NULL)
-		return;
-
-	iocp = (struct iocblk *)mp->b_rptr;
-	iocp->ioc_count = sizeof (ipid_t) + sizeof (tcp->tcp_ipha->ipha_dst);
-
-	mp->b_cont = allocb(iocp->ioc_count, BPRI_HI);
-	if (!mp->b_cont) {
-		freeb(mp);
-		return;
-	}
+	conn_t		*connp = tcp->tcp_connp;
+	ire_t		*ire;
 
-	ipid = (ipid_t *)mp->b_cont->b_rptr;
-	mp->b_cont->b_wptr += iocp->ioc_count;
-	bzero(ipid, sizeof (*ipid));
-	ipid->ipid_cmd = IP_IOC_IRE_DELETE_NO_REPLY;
-	ipid->ipid_ire_type = IRE_CACHE;
-	ipid->ipid_addr_offset = sizeof (ipid_t);
-	ipid->ipid_addr_length = sizeof (tcp->tcp_ipha->ipha_dst);
 	/*
 	 * Note: in the case of source routing we want to blow away the
 	 * route to the first source route hop.
 	 */
-	bcopy(&tcp->tcp_ipha->ipha_dst, &ipid[1],
-	    sizeof (tcp->tcp_ipha->ipha_dst));
-
-	CALL_IP_WPUT(tcp->tcp_connp, tcp->tcp_wq, mp);
-}
-
-/* Unlink and return any mblk that looks like it contains an ire */
-static mblk_t *
-tcp_ire_mp(mblk_t **mpp)
-{
-	mblk_t 	*mp = *mpp;
-	mblk_t	*prev_mp = NULL;
-
-	for (;;) {
-		switch (DB_TYPE(mp)) {
-		case IRE_DB_TYPE:
-		case IRE_DB_REQ_TYPE:
-			if (mp == *mpp) {
-				*mpp = mp->b_cont;
-			} else {
-				prev_mp->b_cont = mp->b_cont;
-			}
-			mp->b_cont = NULL;
-			return (mp);
-		default:
-			break;
+	ire = connp->conn_ixa->ixa_ire;
+	if (ire != NULL && !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
+		if (ire->ire_ipversion == IPV4_VERSION) {
+			/*
+			 * As per RFC 1122, we send an RTM_LOSING to inform
+			 * routing protocols.
+			 */
+			ip_rts_change(RTM_LOSING, ire->ire_addr,
+			    ire->ire_gateway_addr, ire->ire_mask,
+			    connp->conn_laddr_v4,  0, 0, 0,
+			    (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_IFA),
+			    ire->ire_ipst);
 		}
-		prev_mp = mp;
-		mp = mp->b_cont;
-		if (mp == NULL)
-			break;
+		(void) ire_no_good(ire);
 	}
-	return (mp);
 }
 
+#pragma inline(tcp_send_data)
+
 /*
  * Timer callback routine for keepalive probe.  We do a fake resend of
  * last ACKed byte.  Then set a timer using RTO.  When the timer expires,
@@ -8890,7 +7250,7 @@ tcp_keepalive_killer(void *arg)
 			 * timer back.
 			 */
 			if (mp != NULL) {
-				tcp_send_data(tcp, tcp->tcp_wq, mp);
+				tcp_send_data(tcp, mp);
 				BUMP_MIB(&tcps->tcps_mib,
 				    tcpTimKeepaliveProbe);
 				if (tcp->tcp_ka_last_intrvl != 0) {
@@ -8930,17 +7290,17 @@ tcp_keepalive_killer(void *arg)
 int
 tcp_maxpsz_set(tcp_t *tcp, boolean_t set_maxblk)
 {
-	queue_t	*q = tcp->tcp_rq;
+	conn_t	*connp = tcp->tcp_connp;
+	queue_t	*q = connp->conn_rq;
 	int32_t	mss = tcp->tcp_mss;
 	int	maxpsz;
-	conn_t	*connp = tcp->tcp_connp;
 
 	if (TCP_IS_DETACHED(tcp))
 		return (mss);
 	if (tcp->tcp_fused) {
 		maxpsz = tcp_fuse_maxpsz(tcp);
 		mss = INFPSZ;
-	} else if (tcp->tcp_mdt || tcp->tcp_lso || tcp->tcp_maxpsz == 0) {
+	} else if (tcp->tcp_maxpsz_multiplier == 0) {
 		/*
 		 * Set the sd_qn_maxpsz according to the socket send buffer
 		 * size, and sd_maxblk to INFPSZ (-1).  This will essentially
@@ -8948,7 +7308,7 @@ tcp_maxpsz_set(tcp_t *tcp, boolean_t set_maxblk)
 		 * kernel-allocated buffers without breaking it up into smaller
 		 * chunks.  We round up the buffer size to the nearest SMSS.
 		 */
-		maxpsz = MSS_ROUNDUP(tcp->tcp_xmit_hiwater, mss);
+		maxpsz = MSS_ROUNDUP(connp->conn_sndbuf, mss);
 		if (tcp->tcp_kssl_ctx == NULL)
 			mss = INFPSZ;
 		else
@@ -8960,21 +7320,17 @@ tcp_maxpsz_set(tcp_t *tcp, boolean_t set_maxblk)
 		 * head to break down larger than SMSS writes into SMSS-
 		 * size mblks, up to tcp_maxpsz_multiplier mblks at a time.
 		 */
-		/* XXX tune this with ndd tcp_maxpsz_multiplier */
-		maxpsz = tcp->tcp_maxpsz * mss;
-		if (maxpsz > tcp->tcp_xmit_hiwater/2) {
-			maxpsz = tcp->tcp_xmit_hiwater/2;
+		maxpsz = tcp->tcp_maxpsz_multiplier * mss;
+		if (maxpsz > connp->conn_sndbuf / 2) {
+			maxpsz = connp->conn_sndbuf / 2;
 			/* Round up to nearest mss */
 			maxpsz = MSS_ROUNDUP(maxpsz, mss);
 		}
 	}
 
 	(void) proto_set_maxpsz(q, connp, maxpsz);
-	if (!(IPCL_IS_NONSTR(connp))) {
-		/* XXX do it in set_maxpsz()? */
-		tcp->tcp_wq->q_maxpsz = maxpsz;
-	}
-
+	if (!(IPCL_IS_NONSTR(connp)))
+		connp->conn_wq->q_maxpsz = maxpsz;
 	if (set_maxblk)
 		(void) proto_set_tx_maxblk(q, connp, mss);
 	return (mss);
@@ -8985,18 +7341,18 @@ tcp_maxpsz_set(tcp_t *tcp, boolean_t set_maxblk)
  * tcpopt struct and return a bitmask saying which options were found.
  */
 static int
-tcp_parse_options(tcph_t *tcph, tcp_opt_t *tcpopt)
+tcp_parse_options(tcpha_t *tcpha, tcp_opt_t *tcpopt)
 {
 	uchar_t		*endp;
 	int		len;
 	uint32_t	mss;
-	uchar_t		*up = (uchar_t *)tcph;
+	uchar_t		*up = (uchar_t *)tcpha;
 	int		found = 0;
 	int32_t		sack_len;
 	tcp_seq		sack_begin, sack_end;
 	tcp_t		*tcp;
 
-	endp = up + TCP_HDR_LENGTH(tcph);
+	endp = up + TCP_HDR_LENGTH(tcpha);
 	up += TCP_MIN_HEADER_LENGTH;
 	while (up < endp) {
 		len = endp - up;
@@ -9135,28 +7491,20 @@ tcp_parse_options(tcph_t *tcph, tcp_opt_t *tcpopt)
 }
 
 /*
- * Set the mss associated with a particular tcp based on its current value,
- * and a new one passed in. Observe minimums and maximums, and reset
- * other state variables that we want to view as multiples of mss.
- *
- * This function is called mainly because values like tcp_mss, tcp_cwnd,
- * highwater marks etc. need to be initialized or adjusted.
- * 1) From tcp_process_options() when the other side's SYN/SYN-ACK
- *    packet arrives.
- * 2) We need to set a new MSS when ICMP_FRAGMENTATION_NEEDED or
- *    ICMP6_PACKET_TOO_BIG arrives.
- * 3) From tcp_paws_check() if the other side stops sending the timestamp,
- *    to increase the MSS to use the extra bytes available.
+ * Set the MSS associated with a particular tcp based on its current value,
+ * and a new one passed in. Observe minimums and maximums, and reset other
+ * state variables that we want to view as multiples of MSS.
  *
- * Callers except tcp_paws_check() ensure that they only reduce mss.
+ * The value of MSS could be either increased or descreased.
  */
 static void
-tcp_mss_set(tcp_t *tcp, uint32_t mss, boolean_t do_ss)
+tcp_mss_set(tcp_t *tcp, uint32_t mss)
 {
 	uint32_t	mss_max;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
 
-	if (tcp->tcp_ipversion == IPV4_VERSION)
+	if (connp->conn_ipversion == IPV4_VERSION)
 		mss_max = tcps->tcps_mss_max_ipv4;
 	else
 		mss_max = tcps->tcps_mss_max_ipv6;
@@ -9176,34 +7524,22 @@ tcp_mss_set(tcp_t *tcp, uint32_t mss, boolean_t do_ss)
 	 * TCP should be able to buffer at least 4 MSS data for obvious
 	 * performance reason.
 	 */
-	if ((mss << 2) > tcp->tcp_xmit_hiwater)
-		tcp->tcp_xmit_hiwater = mss << 2;
+	if ((mss << 2) > connp->conn_sndbuf)
+		connp->conn_sndbuf = mss << 2;
 
 	/*
-	 * Set the xmit_lowater to at least twice of MSS.
+	 * Set the send lowater to at least twice of MSS.
 	 */
-	if ((mss << 1) > tcp->tcp_xmit_lowater)
-		tcp->tcp_xmit_lowater = mss << 1;
+	if ((mss << 1) > connp->conn_sndlowat)
+		connp->conn_sndlowat = mss << 1;
+
+	/*
+	 * Update tcp_cwnd according to the new value of MSS. Keep the
+	 * previous ratio to preserve the transmit rate.
+	 */
+	tcp->tcp_cwnd = (tcp->tcp_cwnd / tcp->tcp_mss) * mss;
+	tcp->tcp_cwnd_cnt = 0;
 
-	if (do_ss) {
-		/*
-		 * Either the tcp_cwnd is as yet uninitialized, or mss is
-		 * changing due to a reduction in MTU, presumably as a
-		 * result of a new path component, reset cwnd to its
-		 * "initial" value, as a multiple of the new mss.
-		 */
-		SET_TCP_INIT_CWND(tcp, mss, tcps->tcps_slow_start_initial);
-	} else {
-		/*
-		 * Called by tcp_paws_check(), the mss increased
-		 * marginally to allow use of space previously taken
-		 * by the timestamp option. It would be inappropriate
-		 * to apply slow start or tcp_init_cwnd values to
-		 * tcp_cwnd, simply adjust to a multiple of the new mss.
-		 */
-		tcp->tcp_cwnd = (tcp->tcp_cwnd / tcp->tcp_mss) * mss;
-		tcp->tcp_cwnd_cnt = 0;
-	}
 	tcp->tcp_mss = mss;
 	(void) tcp_maxpsz_set(tcp, B_TRUE);
 }
@@ -9223,12 +7559,11 @@ tcp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
 }
 
 static conn_t *
-tcp_create_common(queue_t *q, cred_t *credp, boolean_t isv6,
-    boolean_t issocket, int *errorp)
+tcp_create_common(cred_t *credp, boolean_t isv6, boolean_t issocket,
+    int *errorp)
 {
 	tcp_t		*tcp = NULL;
 	conn_t		*connp;
-	int		err;
 	zoneid_t	zoneid;
 	tcp_stack_t	*tcps;
 	squeue_t	*sqp;
@@ -9265,15 +7600,6 @@ tcp_create_common(queue_t *q, cred_t *credp, boolean_t isv6,
 		else
 			zoneid = crgetzoneid(credp);
 	}
-	/*
-	 * For stackid zero this is done from strplumb.c, but
-	 * non-zero stackids are handled here.
-	 */
-	if (tcps->tcps_g_q == NULL &&
-	    tcps->tcps_netstack->netstack_stackid !=
-	    GLOBAL_NETSTACKID) {
-		tcp_g_q_setup(tcps);
-	}
 
 	sqp = IP_SQUEUE_GET((uint_t)gethrtime());
 	connp = (conn_t *)tcp_get_conn(sqp, tcps);
@@ -9286,41 +7612,50 @@ tcp_create_common(queue_t *q, cred_t *credp, boolean_t isv6,
 		*errorp = ENOSR;
 		return (NULL);
 	}
+	ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
+
 	connp->conn_sqp = sqp;
 	connp->conn_initial_sqp = connp->conn_sqp;
+	connp->conn_ixa->ixa_sqp = connp->conn_sqp;
 	tcp = connp->conn_tcp;
 
+	/*
+	 * Besides asking IP to set the checksum for us, have conn_ip_output
+	 * to do the following checks when necessary:
+	 *
+	 * IXAF_VERIFY_SOURCE: drop packets when our outer source goes invalid
+	 * IXAF_VERIFY_PMTU: verify PMTU changes
+	 * IXAF_VERIFY_LSO: verify LSO capability changes
+	 */
+	connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE |
+	    IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO;
+
+	if (!tcps->tcps_dev_flow_ctl)
+		connp->conn_ixa->ixa_flags |= IXAF_NO_DEV_FLOW_CTL;
+
 	if (isv6) {
-		connp->conn_flags |= IPCL_TCP6;
-		connp->conn_send = ip_output_v6;
-		connp->conn_af_isv6 = B_TRUE;
-		connp->conn_pkt_isv6 = B_TRUE;
-		connp->conn_src_preferences = IPV6_PREFER_SRC_DEFAULT;
-		tcp->tcp_ipversion = IPV6_VERSION;
-		tcp->tcp_family = AF_INET6;
+		connp->conn_ixa->ixa_src_preferences = IPV6_PREFER_SRC_DEFAULT;
+		connp->conn_ipversion = IPV6_VERSION;
+		connp->conn_family = AF_INET6;
 		tcp->tcp_mss = tcps->tcps_mss_def_ipv6;
+		connp->conn_default_ttl = tcps->tcps_ipv6_hoplimit;
 	} else {
-		connp->conn_flags |= IPCL_TCP4;
-		connp->conn_send = ip_output;
-		connp->conn_af_isv6 = B_FALSE;
-		connp->conn_pkt_isv6 = B_FALSE;
-		tcp->tcp_ipversion = IPV4_VERSION;
-		tcp->tcp_family = AF_INET;
+		connp->conn_ipversion = IPV4_VERSION;
+		connp->conn_family = AF_INET;
 		tcp->tcp_mss = tcps->tcps_mss_def_ipv4;
+		connp->conn_default_ttl = tcps->tcps_ipv4_ttl;
 	}
+	connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
+
+	crhold(credp);
+	connp->conn_cred = credp;
+	connp->conn_cpid = curproc->p_pid;
+	connp->conn_open_time = lbolt64;
 
-	/*
-	 * TCP keeps a copy of cred for cache locality reasons but
-	 * we put a reference only once. If connp->conn_cred
-	 * becomes invalid, tcp_cred should also be set to NULL.
-	 */
-	tcp->tcp_cred = connp->conn_cred = credp;
-	crhold(connp->conn_cred);
-	tcp->tcp_cpid = curproc->p_pid;
-	tcp->tcp_open_time = lbolt64;
 	connp->conn_zoneid = zoneid;
+	/* conn_allzones can not be set this early, hence no IPCL_ZONEID */
+	connp->conn_ixa->ixa_zoneid = zoneid;
 	connp->conn_mlp_type = mlptSingle;
-	connp->conn_ulp_labeled = !is_system_labeled();
 	ASSERT(connp->conn_netstack == tcps->tcps_netstack);
 	ASSERT(tcp->tcp_tcps == tcps);
 
@@ -9331,38 +7666,22 @@ tcp_create_common(queue_t *q, cred_t *credp, boolean_t isv6,
 	if (getpflags(NET_MAC_AWARE, credp) != 0)
 		connp->conn_mac_mode = CONN_MAC_AWARE;
 
-	connp->conn_dev = NULL;
+	connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
+
 	if (issocket) {
-		connp->conn_flags |= IPCL_SOCKET;
 		tcp->tcp_issocket = 1;
 	}
 
-	/* Non-zero default values */
-	connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
-
-	if (q == NULL) {
-		/*
-		 * Create a helper stream for non-STREAMS socket.
-		 */
-		err = ip_create_helper_stream(connp, tcps->tcps_ldi_ident);
-		if (err != 0) {
-			ip1dbg(("tcp_create_common: create of IP helper stream "
-			    "failed\n"));
-			CONN_DEC_REF(connp);
-			*errorp = err;
-			return (NULL);
-		}
-		q = connp->conn_rq;
-	}
+	connp->conn_rcvbuf = tcps->tcps_recv_hiwat;
+	connp->conn_sndbuf = tcps->tcps_xmit_hiwat;
+	connp->conn_sndlowat = tcps->tcps_xmit_lowat;
+	connp->conn_so_type = SOCK_STREAM;
+	connp->conn_wroff = connp->conn_ht_iphc_allocated +
+	    tcps->tcps_wroff_xtra;
 
 	SOCK_CONNID_INIT(tcp->tcp_connid);
-	err = tcp_init(tcp, q);
-	if (err != 0) {
-		CONN_DEC_REF(connp);
-		*errorp = err;
-		return (NULL);
-	}
-
+	tcp->tcp_state = TCPS_IDLE;
+	tcp_init_values(tcp);
 	return (connp);
 }
 
@@ -9415,7 +7734,7 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
 		q->q_qinfo = &tcp_acceptor_rinit;
 		/*
 		 * the conn_dev and minor_arena will be subsequently used by
-		 * tcp_wput_accept() and tcp_tpi_close_accept() to figure out
+		 * tcp_tli_accept() and tcp_tpi_close_accept() to figure out
 		 * the minor device number for this connection from the q_ptr.
 		 */
 		RD(q)->q_ptr = (void *)conn_dev;
@@ -9426,7 +7745,7 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
 	}
 
 	issocket = flag & SO_SOCKSTR;
-	connp = tcp_create_common(q, credp, isv6, issocket, &err);
+	connp = tcp_create_common(credp, isv6, issocket, &err);
 
 	if (connp == NULL) {
 		inet_minor_free(minor_arena, conn_dev);
@@ -9434,6 +7753,8 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
 		return (err);
 	}
 
+	connp->conn_rq = q;
+	connp->conn_wq = WR(q);
 	q->q_ptr = WR(q)->q_ptr = connp;
 
 	connp->conn_dev = conn_dev;
@@ -9500,7 +7821,7 @@ tcp_allow_connopt_set(int level, int name)
 }
 
 /*
- * this routine gets default values of certain options whose default
+ * This routine gets default values of certain options whose default
  * values are maintained by protocol specific code
  */
 /* ARGSUSED */
@@ -9553,321 +7874,102 @@ tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
 	return (sizeof (int));
 }
 
+/*
+ * TCP routine to get the values of options.
+ */
 static int
 tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
 {
 	int		*i1 = (int *)ptr;
 	tcp_t		*tcp = connp->conn_tcp;
-	ip6_pkt_t	*ipp = &tcp->tcp_sticky_ipp;
+	conn_opt_arg_t	coas;
+	int		retval;
+
+	coas.coa_connp = connp;
+	coas.coa_ixa = connp->conn_ixa;
+	coas.coa_ipp = &connp->conn_xmit_ipp;
+	coas.coa_ancillary = B_FALSE;
+	coas.coa_changed = 0;
 
 	switch (level) {
 	case SOL_SOCKET:
 		switch (name) {
-		case SO_LINGER:	{
-			struct linger *lgr = (struct linger *)ptr;
-
-			lgr->l_onoff = tcp->tcp_linger ? SO_LINGER : 0;
-			lgr->l_linger = tcp->tcp_lingertime;
-			}
-			return (sizeof (struct linger));
-		case SO_DEBUG:
-			*i1 = tcp->tcp_debug ? SO_DEBUG : 0;
-			break;
-		case SO_KEEPALIVE:
-			*i1 = tcp->tcp_ka_enabled ? SO_KEEPALIVE : 0;
-			break;
-		case SO_DONTROUTE:
-			*i1 = tcp->tcp_dontroute ? SO_DONTROUTE : 0;
-			break;
-		case SO_USELOOPBACK:
-			*i1 = tcp->tcp_useloopback ? SO_USELOOPBACK : 0;
-			break;
-		case SO_BROADCAST:
-			*i1 = tcp->tcp_broadcast ? SO_BROADCAST : 0;
-			break;
-		case SO_REUSEADDR:
-			*i1 = tcp->tcp_reuseaddr ? SO_REUSEADDR : 0;
-			break;
-		case SO_OOBINLINE:
-			*i1 = tcp->tcp_oobinline ? SO_OOBINLINE : 0;
-			break;
-		case SO_DGRAM_ERRIND:
-			*i1 = tcp->tcp_dgram_errind ? SO_DGRAM_ERRIND : 0;
-			break;
-		case SO_TYPE:
-			*i1 = SOCK_STREAM;
-			break;
-		case SO_SNDBUF:
-			*i1 = tcp->tcp_xmit_hiwater;
-			break;
-		case SO_RCVBUF:
-			*i1 = tcp->tcp_recv_hiwater;
-			break;
 		case SO_SND_COPYAVOID:
 			*i1 = tcp->tcp_snd_zcopy_on ?
 			    SO_SND_COPYAVOID : 0;
-			break;
-		case SO_ALLZONES:
-			*i1 = connp->conn_allzones ? 1 : 0;
-			break;
-		case SO_ANON_MLP:
-			*i1 = connp->conn_anon_mlp;
-			break;
-		case SO_MAC_EXEMPT:
-			*i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
-			break;
-		case SO_MAC_IMPLICIT:
-			*i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
-			break;
-		case SO_EXCLBIND:
-			*i1 = tcp->tcp_exclbind ? SO_EXCLBIND : 0;
-			break;
-		case SO_PROTOTYPE:
-			*i1 = IPPROTO_TCP;
-			break;
-		case SO_DOMAIN:
-			*i1 = tcp->tcp_family;
-			break;
+			return (sizeof (int));
 		case SO_ACCEPTCONN:
 			*i1 = (tcp->tcp_state == TCPS_LISTEN);
-		default:
-			return (-1);
+			return (sizeof (int));
 		}
 		break;
 	case IPPROTO_TCP:
 		switch (name) {
 		case TCP_NODELAY:
 			*i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
-			break;
+			return (sizeof (int));
 		case TCP_MAXSEG:
 			*i1 = tcp->tcp_mss;
-			break;
+			return (sizeof (int));
 		case TCP_NOTIFY_THRESHOLD:
 			*i1 = (int)tcp->tcp_first_timer_threshold;
-			break;
+			return (sizeof (int));
 		case TCP_ABORT_THRESHOLD:
 			*i1 = tcp->tcp_second_timer_threshold;
-			break;
+			return (sizeof (int));
 		case TCP_CONN_NOTIFY_THRESHOLD:
 			*i1 = tcp->tcp_first_ctimer_threshold;
-			break;
+			return (sizeof (int));
 		case TCP_CONN_ABORT_THRESHOLD:
 			*i1 = tcp->tcp_second_ctimer_threshold;
-			break;
-		case TCP_RECVDSTADDR:
-			*i1 = tcp->tcp_recvdstaddr;
-			break;
-		case TCP_ANONPRIVBIND:
-			*i1 = tcp->tcp_anon_priv_bind;
-			break;
-		case TCP_EXCLBIND:
-			*i1 = tcp->tcp_exclbind ? TCP_EXCLBIND : 0;
-			break;
+			return (sizeof (int));
 		case TCP_INIT_CWND:
 			*i1 = tcp->tcp_init_cwnd;
-			break;
+			return (sizeof (int));
 		case TCP_KEEPALIVE_THRESHOLD:
 			*i1 = tcp->tcp_ka_interval;
-			break;
+			return (sizeof (int));
 		case TCP_KEEPALIVE_ABORT_THRESHOLD:
 			*i1 = tcp->tcp_ka_abort_thres;
-			break;
+			return (sizeof (int));
 		case TCP_CORK:
 			*i1 = tcp->tcp_cork;
-			break;
-		default:
-			return (-1);
+			return (sizeof (int));
 		}
 		break;
 	case IPPROTO_IP:
-		if (tcp->tcp_family != AF_INET)
+		if (connp->conn_family != AF_INET)
 			return (-1);
 		switch (name) {
 		case IP_OPTIONS:
-		case T_IP_OPTIONS: {
-			/*
-			 * This is compatible with BSD in that in only return
-			 * the reverse source route with the final destination
-			 * as the last entry. The first 4 bytes of the option
-			 * will contain the final destination.
-			 */
-			int	opt_len;
-
-			opt_len = (char *)tcp->tcp_tcph - (char *)tcp->tcp_ipha;
-			opt_len -= tcp->tcp_label_len + IP_SIMPLE_HDR_LENGTH;
-			ASSERT(opt_len >= 0);
+		case T_IP_OPTIONS:
 			/* Caller ensures enough space */
-			if (opt_len > 0) {
-				/*
-				 * TODO: Do we have to handle getsockopt on an
-				 * initiator as well?
-				 */
-				return (ip_opt_get_user(tcp->tcp_ipha, ptr));
-			}
-			return (0);
-			}
-		case IP_TOS:
-		case T_IP_TOS:
-			*i1 = (int)tcp->tcp_ipha->ipha_type_of_service;
-			break;
-		case IP_TTL:
-			*i1 = (int)tcp->tcp_ipha->ipha_ttl;
-			break;
-		case IP_NEXTHOP:
-			/* Handled at IP level */
-			return (-EINVAL);
+			return (ip_opt_get_user(connp, ptr));
 		default:
-			return (-1);
+			break;
 		}
 		break;
+
 	case IPPROTO_IPV6:
 		/*
 		 * IPPROTO_IPV6 options are only supported for sockets
 		 * that are using IPv6 on the wire.
 		 */
-		if (tcp->tcp_ipversion != IPV6_VERSION) {
+		if (connp->conn_ipversion != IPV6_VERSION) {
 			return (-1);
 		}
 		switch (name) {
-		case IPV6_UNICAST_HOPS:
-			*i1 = (unsigned int) tcp->tcp_ip6h->ip6_hops;
-			break;	/* goto sizeof (int) option return */
-		case IPV6_BOUND_IF:
-			/* Zero if not set */
-			*i1 = tcp->tcp_bound_if;
-			break;	/* goto sizeof (int) option return */
-		case IPV6_RECVPKTINFO:
-			if (tcp->tcp_ipv6_recvancillary & TCP_IPV6_RECVPKTINFO)
-				*i1 = 1;
-			else
-				*i1 = 0;
-			break;	/* goto sizeof (int) option return */
-		case IPV6_RECVTCLASS:
-			if (tcp->tcp_ipv6_recvancillary & TCP_IPV6_RECVTCLASS)
-				*i1 = 1;
-			else
-				*i1 = 0;
-			break;	/* goto sizeof (int) option return */
-		case IPV6_RECVHOPLIMIT:
-			if (tcp->tcp_ipv6_recvancillary &
-			    TCP_IPV6_RECVHOPLIMIT)
-				*i1 = 1;
-			else
-				*i1 = 0;
-			break;	/* goto sizeof (int) option return */
-		case IPV6_RECVHOPOPTS:
-			if (tcp->tcp_ipv6_recvancillary & TCP_IPV6_RECVHOPOPTS)
-				*i1 = 1;
-			else
-				*i1 = 0;
-			break;	/* goto sizeof (int) option return */
-		case IPV6_RECVDSTOPTS:
-			if (tcp->tcp_ipv6_recvancillary & TCP_IPV6_RECVDSTOPTS)
-				*i1 = 1;
-			else
-				*i1 = 0;
-			break;	/* goto sizeof (int) option return */
-		case _OLD_IPV6_RECVDSTOPTS:
-			if (tcp->tcp_ipv6_recvancillary &
-			    TCP_OLD_IPV6_RECVDSTOPTS)
-				*i1 = 1;
-			else
-				*i1 = 0;
-			break;	/* goto sizeof (int) option return */
-		case IPV6_RECVRTHDR:
-			if (tcp->tcp_ipv6_recvancillary & TCP_IPV6_RECVRTHDR)
-				*i1 = 1;
-			else
-				*i1 = 0;
-			break;	/* goto sizeof (int) option return */
-		case IPV6_RECVRTHDRDSTOPTS:
-			if (tcp->tcp_ipv6_recvancillary &
-			    TCP_IPV6_RECVRTDSTOPTS)
-				*i1 = 1;
-			else
-				*i1 = 0;
-			break;	/* goto sizeof (int) option return */
-		case IPV6_PKTINFO: {
-			/* XXX assumes that caller has room for max size! */
-			struct in6_pktinfo *pkti;
-
-			pkti = (struct in6_pktinfo *)ptr;
-			if (ipp->ipp_fields & IPPF_IFINDEX)
-				pkti->ipi6_ifindex = ipp->ipp_ifindex;
-			else
-				pkti->ipi6_ifindex = 0;
-			if (ipp->ipp_fields & IPPF_ADDR)
-				pkti->ipi6_addr = ipp->ipp_addr;
-			else
-				pkti->ipi6_addr = ipv6_all_zeros;
-			return (sizeof (struct in6_pktinfo));
-		}
-		case IPV6_TCLASS:
-			if (ipp->ipp_fields & IPPF_TCLASS)
-				*i1 = ipp->ipp_tclass;
-			else
-				*i1 = IPV6_FLOW_TCLASS(
-				    IPV6_DEFAULT_VERS_AND_FLOW);
-			break;	/* goto sizeof (int) option return */
-		case IPV6_NEXTHOP: {
-			sin6_t *sin6 = (sin6_t *)ptr;
-
-			if (!(ipp->ipp_fields & IPPF_NEXTHOP))
-				return (0);
-			*sin6 = sin6_null;
-			sin6->sin6_family = AF_INET6;
-			sin6->sin6_addr = ipp->ipp_nexthop;
-			return (sizeof (sin6_t));
-		}
-		case IPV6_HOPOPTS:
-			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
-				return (0);
-			if (ipp->ipp_hopoptslen <= tcp->tcp_label_len)
-				return (0);
-			bcopy((char *)ipp->ipp_hopopts + tcp->tcp_label_len,
-			    ptr, ipp->ipp_hopoptslen - tcp->tcp_label_len);
-			if (tcp->tcp_label_len > 0) {
-				ptr[0] = ((char *)ipp->ipp_hopopts)[0];
-				ptr[1] = (ipp->ipp_hopoptslen -
-				    tcp->tcp_label_len + 7) / 8 - 1;
-			}
-			return (ipp->ipp_hopoptslen - tcp->tcp_label_len);
-		case IPV6_RTHDRDSTOPTS:
-			if (!(ipp->ipp_fields & IPPF_RTDSTOPTS))
-				return (0);
-			bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen);
-			return (ipp->ipp_rtdstoptslen);
-		case IPV6_RTHDR:
-			if (!(ipp->ipp_fields & IPPF_RTHDR))
-				return (0);
-			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
-			return (ipp->ipp_rthdrlen);
-		case IPV6_DSTOPTS:
-			if (!(ipp->ipp_fields & IPPF_DSTOPTS))
-				return (0);
-			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
-			return (ipp->ipp_dstoptslen);
-		case IPV6_SRC_PREFERENCES:
-			return (ip6_get_src_preferences(connp,
-			    (uint32_t *)ptr));
-		case IPV6_PATHMTU: {
-			struct ip6_mtuinfo *mtuinfo = (struct ip6_mtuinfo *)ptr;
-
+		case IPV6_PATHMTU:
 			if (tcp->tcp_state < TCPS_ESTABLISHED)
 				return (-1);
-
-			return (ip_fill_mtuinfo(&connp->conn_remv6,
-			    connp->conn_fport, mtuinfo,
-			    connp->conn_netstack));
-		}
-		default:
-			return (-1);
+			break;
 		}
 		break;
-	default:
-		return (-1);
 	}
-	return (sizeof (int));
+	mutex_enter(&connp->conn_lock);
+	retval = conn_opt_get(&coas, level, name, ptr);
+	mutex_exit(&connp->conn_lock);
+	return (retval);
 }
 
 /*
@@ -9896,7 +7998,6 @@ tcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
 	    tcp_opt_obj.odb_opt_des_arr,
 	    tcp_opt_obj.odb_opt_arr_cnt,
-	    tcp_opt_obj.odb_topmost_tpiprovider,
 	    B_FALSE, B_TRUE, cr);
 	if (error != 0) {
 		if (error < 0) {
@@ -9909,30 +8010,28 @@ tcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
 
 	error = squeue_synch_enter(sqp, connp, NULL);
 	if (error == ENOMEM) {
+		kmem_free(optvalp_buf, max_optbuf_len);
 		return (ENOMEM);
 	}
 
 	len = tcp_opt_get(connp, level, option_name, optvalp_buf);
 	squeue_synch_exit(sqp, connp);
 
-	if (len < 0) {
-		/*
-		 * Pass on to IP
-		 */
+	if (len == -1) {
 		kmem_free(optvalp_buf, max_optbuf_len);
-		return (ip_get_options(connp, level, option_name,
-		    optvalp, optlen, cr));
-	} else {
-		/*
-		 * update optlen and copy option value
-		 */
-		t_uscalar_t size = MIN(len, *optlen);
-		bcopy(optvalp_buf, optvalp, size);
-		bcopy(&size, optlen, sizeof (size));
-
-		kmem_free(optvalp_buf, max_optbuf_len);
-		return (0);
+		return (EINVAL);
 	}
+
+	/*
+	 * update optlen and copy option value
+	 */
+	t_uscalar_t size = MIN(len, *optlen);
+
+	bcopy(optvalp_buf, optvalp, size);
+	bcopy(&size, optlen, sizeof (size));
+
+	kmem_free(optvalp_buf, max_optbuf_len);
+	return (0);
 }
 
 /*
@@ -9943,7 +8042,7 @@ tcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
 int
 tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
-    void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
+    void *thisdg_attrs, cred_t *cr)
 {
 	tcp_t	*tcp = connp->conn_tcp;
 	int	*i1 = (int *)invalp;
@@ -9951,6 +8050,13 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 	boolean_t checkonly;
 	int	reterr;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_opt_arg_t	coas;
+
+	coas.coa_connp = connp;
+	coas.coa_ixa = connp->conn_ixa;
+	coas.coa_ipp = &connp->conn_xmit_ipp;
+	coas.coa_ancillary = B_FALSE;
+	coas.coa_changed = 0;
 
 	switch (optset_context) {
 	case SETFN_OPTCOM_CHECKONLY:
@@ -10016,37 +8122,6 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 	switch (level) {
 	case SOL_SOCKET:
 		switch (name) {
-		case SO_LINGER: {
-			struct linger *lgr = (struct linger *)invalp;
-
-			if (!checkonly) {
-				if (lgr->l_onoff) {
-					tcp->tcp_linger = 1;
-					tcp->tcp_lingertime = lgr->l_linger;
-				} else {
-					tcp->tcp_linger = 0;
-					tcp->tcp_lingertime = 0;
-				}
-				/* struct copy */
-				*(struct linger *)outvalp = *lgr;
-			} else {
-				if (!lgr->l_onoff) {
-					((struct linger *)
-					    outvalp)->l_onoff = 0;
-					((struct linger *)
-					    outvalp)->l_linger = 0;
-				} else {
-					/* struct copy */
-					*(struct linger *)outvalp = *lgr;
-				}
-			}
-			*outlenp = sizeof (struct linger);
-			return (0);
-		}
-		case SO_DEBUG:
-			if (!checkonly)
-				tcp->tcp_debug = onoff;
-			break;
 		case SO_KEEPALIVE:
 			if (checkonly) {
 				/* check only case */
@@ -10054,65 +8129,25 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 			}
 
 			if (!onoff) {
-				if (tcp->tcp_ka_enabled) {
+				if (connp->conn_keepalive) {
 					if (tcp->tcp_ka_tid != 0) {
 						(void) TCP_TIMER_CANCEL(tcp,
 						    tcp->tcp_ka_tid);
 						tcp->tcp_ka_tid = 0;
 					}
-					tcp->tcp_ka_enabled = 0;
+					connp->conn_keepalive = 0;
 				}
 				break;
 			}
-			if (!tcp->tcp_ka_enabled) {
+			if (!connp->conn_keepalive) {
 				/* Crank up the keepalive timer */
 				tcp->tcp_ka_last_intrvl = 0;
 				tcp->tcp_ka_tid = TCP_TIMER(tcp,
 				    tcp_keepalive_killer,
 				    MSEC_TO_TICK(tcp->tcp_ka_interval));
-				tcp->tcp_ka_enabled = 1;
-			}
-			break;
-		case SO_DONTROUTE:
-			/*
-			 * SO_DONTROUTE, SO_USELOOPBACK, and SO_BROADCAST are
-			 * only of interest to IP.  We track them here only so
-			 * that we can report their current value.
-			 */
-			if (!checkonly) {
-				tcp->tcp_dontroute = onoff;
-				tcp->tcp_connp->conn_dontroute = onoff;
+				connp->conn_keepalive = 1;
 			}
 			break;
-		case SO_USELOOPBACK:
-			if (!checkonly) {
-				tcp->tcp_useloopback = onoff;
-				tcp->tcp_connp->conn_loopback = onoff;
-			}
-			break;
-		case SO_BROADCAST:
-			if (!checkonly) {
-				tcp->tcp_broadcast = onoff;
-				tcp->tcp_connp->conn_broadcast = onoff;
-			}
-			break;
-		case SO_REUSEADDR:
-			if (!checkonly) {
-				tcp->tcp_reuseaddr = onoff;
-				tcp->tcp_connp->conn_reuseaddr = onoff;
-			}
-			break;
-		case SO_OOBINLINE:
-			if (!checkonly) {
-				tcp->tcp_oobinline = onoff;
-				if (IPCL_IS_NONSTR(tcp->tcp_connp))
-					proto_set_rx_oob_opt(connp, onoff);
-			}
-			break;
-		case SO_DGRAM_ERRIND:
-			if (!checkonly)
-				tcp->tcp_dgram_errind = onoff;
-			break;
 		case SO_SNDBUF: {
 			if (*i1 > tcps->tcps_max_buf) {
 				*outlenp = 0;
@@ -10121,11 +8156,11 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 			if (checkonly)
 				break;
 
-			tcp->tcp_xmit_hiwater = *i1;
-			if (tcps->tcps_snd_lowat_fraction != 0)
-				tcp->tcp_xmit_lowater =
-				    tcp->tcp_xmit_hiwater /
+			connp->conn_sndbuf = *i1;
+			if (tcps->tcps_snd_lowat_fraction != 0) {
+				connp->conn_sndlowat = connp->conn_sndbuf /
 				    tcps->tcps_snd_lowat_fraction;
+			}
 			(void) tcp_maxpsz_set(tcp, B_TRUE);
 			/*
 			 * If we are flow-controlled, recheck the condition.
@@ -10135,11 +8170,12 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 			 */
 			mutex_enter(&tcp->tcp_non_sq_lock);
 			if (tcp->tcp_flow_stopped &&
-			    TCP_UNSENT_BYTES(tcp) < tcp->tcp_xmit_hiwater) {
+			    TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
 				tcp_clrqfull(tcp);
 			}
 			mutex_exit(&tcp->tcp_non_sq_lock);
-			break;
+			*outlenp = inlen;
+			return (0);
 		}
 		case SO_RCVBUF:
 			if (*i1 > tcps->tcps_max_buf) {
@@ -10155,43 +8191,20 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 			 * XXX should we return the rwnd here
 			 * and tcp_opt_get ?
 			 */
-			break;
+			*outlenp = inlen;
+			return (0);
 		case SO_SND_COPYAVOID:
 			if (!checkonly) {
-				/* we only allow enable at most once for now */
 				if (tcp->tcp_loopback ||
 				    (tcp->tcp_kssl_ctx != NULL) ||
-				    (!tcp->tcp_snd_zcopy_aware &&
-				    (onoff != 1 || !tcp_zcopy_check(tcp)))) {
+				    (onoff != 1) || !tcp_zcopy_check(tcp)) {
 					*outlenp = 0;
 					return (EOPNOTSUPP);
 				}
 				tcp->tcp_snd_zcopy_aware = 1;
 			}
-			break;
-		case SO_RCVTIMEO:
-		case SO_SNDTIMEO:
-			/*
-			 * Pass these two options in order for third part
-			 * protocol usage. Here just return directly.
-			 */
+			*outlenp = inlen;
 			return (0);
-		case SO_ALLZONES:
-			/* Pass option along to IP level for handling */
-			return (-EINVAL);
-		case SO_ANON_MLP:
-			/* Pass option along to IP level for handling */
-			return (-EINVAL);
-		case SO_MAC_EXEMPT:
-			/* Pass option along to IP level for handling */
-			return (-EINVAL);
-		case SO_EXCLBIND:
-			if (!checkonly)
-				tcp->tcp_exclbind = onoff;
-			break;
-		default:
-			*outlenp = 0;
-			return (EINVAL);
 		}
 		break;
 	case IPPROTO_TCP:
@@ -10217,25 +8230,12 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 				tcp->tcp_second_ctimer_threshold = *i1;
 			break;
 		case TCP_RECVDSTADDR:
-			if (tcp->tcp_state > TCPS_LISTEN)
-				return (EOPNOTSUPP);
-			if (!checkonly)
-				tcp->tcp_recvdstaddr = onoff;
-			break;
-		case TCP_ANONPRIVBIND:
-			if ((reterr = secpolicy_net_privaddr(cr, 0,
-			    IPPROTO_TCP)) != 0) {
+			if (tcp->tcp_state > TCPS_LISTEN) {
 				*outlenp = 0;
-				return (reterr);
-			}
-			if (!checkonly) {
-				tcp->tcp_anon_priv_bind = onoff;
+				return (EOPNOTSUPP);
 			}
+			/* Setting done in conn_opt_set */
 			break;
-		case TCP_EXCLBIND:
-			if (!checkonly)
-				tcp->tcp_exclbind = onoff;
-			break;	/* goto sizeof (int) option return */
 		case TCP_INIT_CWND: {
 			uint32_t init_cwnd = *((uint32_t *)invalp);
 
@@ -10278,7 +8278,7 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 				 * keepalive timer.
 				 */
 				if (tcp->tcp_ka_tid != 0) {
-					ASSERT(tcp->tcp_ka_enabled);
+					ASSERT(connp->conn_keepalive);
 					(void) TCP_TIMER_CANCEL(tcp,
 					    tcp->tcp_ka_tid);
 					tcp->tcp_ka_last_intrvl = 0;
@@ -10318,49 +8318,15 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 			}
 			break;
 		default:
-			*outlenp = 0;
-			return (EINVAL);
+			break;
 		}
 		break;
 	case IPPROTO_IP:
-		if (tcp->tcp_family != AF_INET) {
+		if (connp->conn_family != AF_INET) {
 			*outlenp = 0;
-			return (ENOPROTOOPT);
+			return (EINVAL);
 		}
 		switch (name) {
-		case IP_OPTIONS:
-		case T_IP_OPTIONS:
-			reterr = tcp_opt_set_header(tcp, checkonly,
-			    invalp, inlen);
-			if (reterr) {
-				*outlenp = 0;
-				return (reterr);
-			}
-			/* OK return - copy input buffer into output buffer */
-			if (invalp != outvalp) {
-				/* don't trust bcopy for identical src/dst */
-				bcopy(invalp, outvalp, inlen);
-			}
-			*outlenp = inlen;
-			return (0);
-		case IP_TOS:
-		case T_IP_TOS:
-			if (!checkonly) {
-				tcp->tcp_ipha->ipha_type_of_service =
-				    (uchar_t)*i1;
-				tcp->tcp_tos = (uchar_t)*i1;
-			}
-			break;
-		case IP_TTL:
-			if (!checkonly) {
-				tcp->tcp_ipha->ipha_ttl = (uchar_t)*i1;
-				tcp->tcp_ttl = (uchar_t)*i1;
-			}
-			break;
-		case IP_BOUND_IF:
-		case IP_NEXTHOP:
-			/* Handled at the IP level */
-			return (-EINVAL);
 		case IP_SEC_OPT:
 			/*
 			 * We should not allow policy setting after
@@ -10368,166 +8334,42 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 			 */
 			if (tcp->tcp_state == TCPS_LISTEN) {
 				return (EINVAL);
-			} else {
-				/* Handled at the IP level */
-				return (-EINVAL);
 			}
-		default:
-			*outlenp = 0;
-			return (EINVAL);
+			break;
 		}
 		break;
-	case IPPROTO_IPV6: {
-		ip6_pkt_t		*ipp;
-
+	case IPPROTO_IPV6:
 		/*
 		 * IPPROTO_IPV6 options are only supported for sockets
 		 * that are using IPv6 on the wire.
 		 */
-		if (tcp->tcp_ipversion != IPV6_VERSION) {
+		if (connp->conn_ipversion != IPV6_VERSION) {
 			*outlenp = 0;
-			return (ENOPROTOOPT);
+			return (EINVAL);
 		}
-		/*
-		 * Only sticky options; no ancillary data
-		 */
-		ipp = &tcp->tcp_sticky_ipp;
 
 		switch (name) {
-		case IPV6_UNICAST_HOPS:
-			/* -1 means use default */
-			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
-				*outlenp = 0;
-				return (EINVAL);
-			}
-			if (!checkonly) {
-				if (*i1 == -1) {
-					tcp->tcp_ip6h->ip6_hops =
-					    ipp->ipp_unicast_hops =
-					    (uint8_t)tcps->tcps_ipv6_hoplimit;
-					ipp->ipp_fields &= ~IPPF_UNICAST_HOPS;
-					/* Pass modified value to IP. */
-					*i1 = tcp->tcp_ip6h->ip6_hops;
-				} else {
-					tcp->tcp_ip6h->ip6_hops =
-					    ipp->ipp_unicast_hops =
-					    (uint8_t)*i1;
-					ipp->ipp_fields |= IPPF_UNICAST_HOPS;
-				}
-				reterr = tcp_build_hdrs(tcp);
-				if (reterr != 0)
-					return (reterr);
-			}
-			break;
-		case IPV6_BOUND_IF:
-			if (!checkonly) {
-				tcp->tcp_bound_if = *i1;
-				PASS_OPT_TO_IP(connp);
-			}
-			break;
-		/*
-		 * Set boolean switches for ancillary data delivery
-		 */
 		case IPV6_RECVPKTINFO:
 			if (!checkonly) {
-				if (onoff)
-					tcp->tcp_ipv6_recvancillary |=
-					    TCP_IPV6_RECVPKTINFO;
-				else
-					tcp->tcp_ipv6_recvancillary &=
-					    ~TCP_IPV6_RECVPKTINFO;
 				/* Force it to be sent up with the next msg */
 				tcp->tcp_recvifindex = 0;
-				PASS_OPT_TO_IP(connp);
 			}
 			break;
 		case IPV6_RECVTCLASS:
 			if (!checkonly) {
-				if (onoff)
-					tcp->tcp_ipv6_recvancillary |=
-					    TCP_IPV6_RECVTCLASS;
-				else
-					tcp->tcp_ipv6_recvancillary &=
-					    ~TCP_IPV6_RECVTCLASS;
-				PASS_OPT_TO_IP(connp);
+				/* Force it to be sent up with the next msg */
+				tcp->tcp_recvtclass = 0xffffffffU;
 			}
 			break;
 		case IPV6_RECVHOPLIMIT:
 			if (!checkonly) {
-				if (onoff)
-					tcp->tcp_ipv6_recvancillary |=
-					    TCP_IPV6_RECVHOPLIMIT;
-				else
-					tcp->tcp_ipv6_recvancillary &=
-					    ~TCP_IPV6_RECVHOPLIMIT;
 				/* Force it to be sent up with the next msg */
 				tcp->tcp_recvhops = 0xffffffffU;
-				PASS_OPT_TO_IP(connp);
-			}
-			break;
-		case IPV6_RECVHOPOPTS:
-			if (!checkonly) {
-				if (onoff)
-					tcp->tcp_ipv6_recvancillary |=
-					    TCP_IPV6_RECVHOPOPTS;
-				else
-					tcp->tcp_ipv6_recvancillary &=
-					    ~TCP_IPV6_RECVHOPOPTS;
-				PASS_OPT_TO_IP(connp);
-			}
-			break;
-		case IPV6_RECVDSTOPTS:
-			if (!checkonly) {
-				if (onoff)
-					tcp->tcp_ipv6_recvancillary |=
-					    TCP_IPV6_RECVDSTOPTS;
-				else
-					tcp->tcp_ipv6_recvancillary &=
-					    ~TCP_IPV6_RECVDSTOPTS;
-				PASS_OPT_TO_IP(connp);
-			}
-			break;
-		case _OLD_IPV6_RECVDSTOPTS:
-			if (!checkonly) {
-				if (onoff)
-					tcp->tcp_ipv6_recvancillary |=
-					    TCP_OLD_IPV6_RECVDSTOPTS;
-				else
-					tcp->tcp_ipv6_recvancillary &=
-					    ~TCP_OLD_IPV6_RECVDSTOPTS;
-			}
-			break;
-		case IPV6_RECVRTHDR:
-			if (!checkonly) {
-				if (onoff)
-					tcp->tcp_ipv6_recvancillary |=
-					    TCP_IPV6_RECVRTHDR;
-				else
-					tcp->tcp_ipv6_recvancillary &=
-					    ~TCP_IPV6_RECVRTHDR;
-				PASS_OPT_TO_IP(connp);
-			}
-			break;
-		case IPV6_RECVRTHDRDSTOPTS:
-			if (!checkonly) {
-				if (onoff)
-					tcp->tcp_ipv6_recvancillary |=
-					    TCP_IPV6_RECVRTDSTOPTS;
-				else
-					tcp->tcp_ipv6_recvancillary &=
-					    ~TCP_IPV6_RECVRTDSTOPTS;
-				PASS_OPT_TO_IP(connp);
 			}
 			break;
 		case IPV6_PKTINFO:
-			if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
-				return (EINVAL);
-			if (checkonly)
-				break;
-
-			if (inlen == 0) {
-				ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR);
-			} else {
+			/* This is an extra check for TCP */
+			if (inlen == sizeof (struct in6_pktinfo)) {
 				struct in6_pktinfo *pkti;
 
 				pkti = (struct in6_pktinfo *)invalp;
@@ -10539,219 +8381,8 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 				 */
 				if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
 					return (EINVAL);
-				/*
-				 * IP will validate the source address and
-				 * interface index.
-				 */
-				if (IPCL_IS_NONSTR(tcp->tcp_connp)) {
-					reterr = ip_set_options(tcp->tcp_connp,
-					    level, name, invalp, inlen, cr);
-				} else {
-					reterr = ip6_set_pktinfo(cr,
-					    tcp->tcp_connp, pkti);
-				}
-				if (reterr != 0)
-					return (reterr);
-				ipp->ipp_ifindex = pkti->ipi6_ifindex;
-				ipp->ipp_addr = pkti->ipi6_addr;
-				if (ipp->ipp_ifindex != 0)
-					ipp->ipp_fields |= IPPF_IFINDEX;
-				else
-					ipp->ipp_fields &= ~IPPF_IFINDEX;
-				if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
-					ipp->ipp_fields |= IPPF_ADDR;
-				else
-					ipp->ipp_fields &= ~IPPF_ADDR;
-			}
-			reterr = tcp_build_hdrs(tcp);
-			if (reterr != 0)
-				return (reterr);
-			break;
-		case IPV6_TCLASS:
-			if (inlen != 0 && inlen != sizeof (int))
-				return (EINVAL);
-			if (checkonly)
-				break;
-
-			if (inlen == 0) {
-				ipp->ipp_fields &= ~IPPF_TCLASS;
-			} else {
-				if (*i1 > 255 || *i1 < -1)
-					return (EINVAL);
-				if (*i1 == -1) {
-					ipp->ipp_tclass = 0;
-					*i1 = 0;
-				} else {
-					ipp->ipp_tclass = *i1;
-				}
-				ipp->ipp_fields |= IPPF_TCLASS;
-			}
-			reterr = tcp_build_hdrs(tcp);
-			if (reterr != 0)
-				return (reterr);
-			break;
-		case IPV6_NEXTHOP:
-			/*
-			 * IP will verify that the nexthop is reachable
-			 * and fail for sticky options.
-			 */
-			if (inlen != 0 && inlen != sizeof (sin6_t))
-				return (EINVAL);
-			if (checkonly)
-				break;
-
-			if (inlen == 0) {
-				ipp->ipp_fields &= ~IPPF_NEXTHOP;
-			} else {
-				sin6_t *sin6 = (sin6_t *)invalp;
-
-				if (sin6->sin6_family != AF_INET6)
-					return (EAFNOSUPPORT);
-				if (IN6_IS_ADDR_V4MAPPED(
-				    &sin6->sin6_addr))
-					return (EADDRNOTAVAIL);
-				ipp->ipp_nexthop = sin6->sin6_addr;
-				if (!IN6_IS_ADDR_UNSPECIFIED(
-				    &ipp->ipp_nexthop))
-					ipp->ipp_fields |= IPPF_NEXTHOP;
-				else
-					ipp->ipp_fields &= ~IPPF_NEXTHOP;
-			}
-			reterr = tcp_build_hdrs(tcp);
-			if (reterr != 0)
-				return (reterr);
-			PASS_OPT_TO_IP(connp);
-			break;
-		case IPV6_HOPOPTS: {
-			ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
-
-			/*
-			 * Sanity checks - minimum size, size a multiple of
-			 * eight bytes, and matching size passed in.
-			 */
-			if (inlen != 0 &&
-			    inlen != (8 * (hopts->ip6h_len + 1)))
-				return (EINVAL);
-
-			if (checkonly)
-				break;
-
-			reterr = optcom_pkt_set(invalp, inlen, B_TRUE,
-			    (uchar_t **)&ipp->ipp_hopopts,
-			    &ipp->ipp_hopoptslen, tcp->tcp_label_len);
-			if (reterr != 0)
-				return (reterr);
-			if (ipp->ipp_hopoptslen == 0)
-				ipp->ipp_fields &= ~IPPF_HOPOPTS;
-			else
-				ipp->ipp_fields |= IPPF_HOPOPTS;
-			reterr = tcp_build_hdrs(tcp);
-			if (reterr != 0)
-				return (reterr);
-			break;
-		}
-		case IPV6_RTHDRDSTOPTS: {
-			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
-
-			/*
-			 * Sanity checks - minimum size, size a multiple of
-			 * eight bytes, and matching size passed in.
-			 */
-			if (inlen != 0 &&
-			    inlen != (8 * (dopts->ip6d_len + 1)))
-				return (EINVAL);
-
-			if (checkonly)
-				break;
-
-			reterr = optcom_pkt_set(invalp, inlen, B_TRUE,
-			    (uchar_t **)&ipp->ipp_rtdstopts,
-			    &ipp->ipp_rtdstoptslen, 0);
-			if (reterr != 0)
-				return (reterr);
-			if (ipp->ipp_rtdstoptslen == 0)
-				ipp->ipp_fields &= ~IPPF_RTDSTOPTS;
-			else
-				ipp->ipp_fields |= IPPF_RTDSTOPTS;
-			reterr = tcp_build_hdrs(tcp);
-			if (reterr != 0)
-				return (reterr);
-			break;
-		}
-		case IPV6_DSTOPTS: {
-			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
-
-			/*
-			 * Sanity checks - minimum size, size a multiple of
-			 * eight bytes, and matching size passed in.
-			 */
-			if (inlen != 0 &&
-			    inlen != (8 * (dopts->ip6d_len + 1)))
-				return (EINVAL);
-
-			if (checkonly)
-				break;
-
-			reterr = optcom_pkt_set(invalp, inlen, B_TRUE,
-			    (uchar_t **)&ipp->ipp_dstopts,
-			    &ipp->ipp_dstoptslen, 0);
-			if (reterr != 0)
-				return (reterr);
-			if (ipp->ipp_dstoptslen == 0)
-				ipp->ipp_fields &= ~IPPF_DSTOPTS;
-			else
-				ipp->ipp_fields |= IPPF_DSTOPTS;
-			reterr = tcp_build_hdrs(tcp);
-			if (reterr != 0)
-				return (reterr);
-			break;
-		}
-		case IPV6_RTHDR: {
-			ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp;
-
-			/*
-			 * Sanity checks - minimum size, size a multiple of
-			 * eight bytes, and matching size passed in.
-			 */
-			if (inlen != 0 &&
-			    inlen != (8 * (rt->ip6r_len + 1)))
-				return (EINVAL);
-
-			if (checkonly)
-				break;
-
-			reterr = optcom_pkt_set(invalp, inlen, B_TRUE,
-			    (uchar_t **)&ipp->ipp_rthdr,
-			    &ipp->ipp_rthdrlen, 0);
-			if (reterr != 0)
-				return (reterr);
-			if (ipp->ipp_rthdrlen == 0)
-				ipp->ipp_fields &= ~IPPF_RTHDR;
-			else
-				ipp->ipp_fields |= IPPF_RTHDR;
-			reterr = tcp_build_hdrs(tcp);
-			if (reterr != 0)
-				return (reterr);
-			break;
-		}
-		case IPV6_V6ONLY:
-			if (!checkonly) {
-				tcp->tcp_connp->conn_ipv6_v6only = onoff;
 			}
 			break;
-		case IPV6_USE_MIN_MTU:
-			if (inlen != sizeof (int))
-				return (EINVAL);
-
-			if (*i1 < -1 || *i1 > 1)
-				return (EINVAL);
-
-			if (checkonly)
-				break;
-
-			ipp->ipp_fields |= IPPF_USE_MIN_MTU;
-			ipp->ipp_use_min_mtu = *i1;
-			break;
 		case IPV6_SEC_OPT:
 			/*
 			 * We should not allow policy setting after
@@ -10759,30 +8390,18 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 			 */
 			if (tcp->tcp_state == TCPS_LISTEN) {
 				return (EINVAL);
-			} else {
-				/* Handled at the IP level */
-				return (-EINVAL);
-			}
-		case IPV6_SRC_PREFERENCES:
-			if (inlen != sizeof (uint32_t))
-				return (EINVAL);
-			reterr = ip6_set_src_preferences(tcp->tcp_connp,
-			    *(uint32_t *)invalp);
-			if (reterr != 0) {
-				*outlenp = 0;
-				return (reterr);
 			}
 			break;
-		default:
-			*outlenp = 0;
-			return (EINVAL);
 		}
 		break;
-	}		/* end IPPROTO_IPV6 */
-	default:
+	}
+	reterr = conn_opt_set(&coas, level, name, inlen, invalp,
+	    checkonly, cr);
+	if (reterr != 0) {
 		*outlenp = 0;
-		return (EINVAL);
+		return (reterr);
 	}
+
 	/*
 	 * Common case of OK return with outval same as inval
 	 */
@@ -10791,6 +8410,45 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 		(void) bcopy(invalp, outvalp, inlen);
 	}
 	*outlenp = inlen;
+
+	if (coas.coa_changed & COA_HEADER_CHANGED) {
+		reterr = tcp_build_hdrs(tcp);
+		if (reterr != 0)
+			return (reterr);
+	}
+	if (coas.coa_changed & COA_ROUTE_CHANGED) {
+		in6_addr_t nexthop;
+
+		/*
+		 * If we are connected we re-cache the information.
+		 * We ignore errors to preserve BSD behavior.
+		 * Note that we don't redo IPsec policy lookup here
+		 * since the final destination (or source) didn't change.
+		 */
+		ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
+		    &connp->conn_faddr_v6, &nexthop);
+
+		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
+		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
+			(void) ip_attr_connect(connp, connp->conn_ixa,
+			    &connp->conn_laddr_v6, &connp->conn_faddr_v6,
+			    &nexthop, connp->conn_fport, NULL, NULL,
+			    IPDF_VERIFY_DST);
+		}
+	}
+	if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
+		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
+	}
+	if (coas.coa_changed & COA_WROFF_CHANGED) {
+		connp->conn_wroff = connp->conn_ht_iphc_allocated +
+		    tcps->tcps_wroff_xtra;
+		(void) proto_set_tx_wroff(connp->conn_rq, connp,
+		    connp->conn_wroff);
+	}
+	if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
+		if (IPCL_IS_NONSTR(connp))
+			proto_set_rx_oob_opt(connp, onoff);
+	}
 	return (0);
 }
 
@@ -10798,12 +8456,12 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 int
 tcp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
-    void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
+    void *thisdg_attrs, cred_t *cr)
 {
 	conn_t	*connp =  Q_TO_CONN(q);
 
 	return (tcp_opt_set(connp, optset_context, level, name, inlen, invalp,
-	    outlenp, outvalp, thisdg_attrs, cr, mblk));
+	    outlenp, outvalp, thisdg_attrs, cr));
 }
 
 int
@@ -10843,7 +8501,6 @@ tcp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
 	error = proto_opt_check(level, option_name, optlen, NULL,
 	    tcp_opt_obj.odb_opt_des_arr,
 	    tcp_opt_obj.odb_opt_arr_cnt,
-	    tcp_opt_obj.odb_topmost_tpiprovider,
 	    B_TRUE, B_FALSE, cr);
 
 	if (error != 0) {
@@ -10856,292 +8513,75 @@ tcp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
 
 	error = tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
 	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
-	    NULL, cr, NULL);
+	    NULL, cr);
 	squeue_synch_exit(sqp, connp);
 
-	if (error < 0) {
-		/*
-		 * Pass on to ip
-		 */
-		error = ip_set_options(connp, level, option_name, optvalp,
-		    optlen, cr);
-	}
+	ASSERT(error >= 0);
+
 	return (error);
 }
 
 /*
- * Update tcp_sticky_hdrs based on tcp_sticky_ipp.
- * The headers include ip6i_t (if needed), ip6_t, any sticky extension
+ * Build/update the tcp header template (in conn_ht_iphc) based on
+ * conn_xmit_ipp. The headers include ip6_t, any extension
  * headers, and the maximum size tcp header (to avoid reallocation
  * on the fly for additional tcp options).
+ *
+ * Assumes the caller has already set conn_{faddr,laddr,fport,lport,flowinfo}.
  * Returns failure if can't allocate memory.
  */
 static int
 tcp_build_hdrs(tcp_t *tcp)
 {
-	char	*hdrs;
-	uint_t	hdrs_len;
-	ip6i_t	*ip6i;
-	char	buf[TCP_MAX_HDR_LENGTH];
-	ip6_pkt_t *ipp = &tcp->tcp_sticky_ipp;
-	in6_addr_t src, dst;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	conn_t *connp = tcp->tcp_connp;
+	conn_t		*connp = tcp->tcp_connp;
+	tcpha_t		*tcpha;
+	uint32_t	cksum;
+	int		error;
 
-	/*
-	 * save the existing tcp header and source/dest IP addresses
-	 */
-	bcopy(tcp->tcp_tcph, buf, tcp->tcp_tcp_hdr_len);
-	src = tcp->tcp_ip6h->ip6_src;
-	dst = tcp->tcp_ip6h->ip6_dst;
-	hdrs_len = ip_total_hdrs_len_v6(ipp) + TCP_MAX_HDR_LENGTH;
-	ASSERT(hdrs_len != 0);
-	if (hdrs_len > tcp->tcp_iphc_len) {
-		/* Need to reallocate */
-		hdrs = kmem_zalloc(hdrs_len, KM_NOSLEEP);
-		if (hdrs == NULL)
-			return (ENOMEM);
-		if (tcp->tcp_iphc != NULL) {
-			if (tcp->tcp_hdr_grown) {
-				kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len);
-			} else {
-				bzero(tcp->tcp_iphc, tcp->tcp_iphc_len);
-				kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc);
-			}
-			tcp->tcp_iphc_len = 0;
-		}
-		ASSERT(tcp->tcp_iphc_len == 0);
-		tcp->tcp_iphc = hdrs;
-		tcp->tcp_iphc_len = hdrs_len;
-		tcp->tcp_hdr_grown = B_TRUE;
-	}
-	ip_build_hdrs_v6((uchar_t *)tcp->tcp_iphc,
-	    hdrs_len - TCP_MAX_HDR_LENGTH, ipp, IPPROTO_TCP);
+	/* Grab lock to satisfy ASSERT; TCP is serialized using squeue */
+	mutex_enter(&connp->conn_lock);
+	error = conn_build_hdr_template(connp, TCP_MIN_HEADER_LENGTH,
+	    TCP_MAX_TCP_OPTIONS_LENGTH, &connp->conn_laddr_v6,
+	    &connp->conn_faddr_v6, connp->conn_flowinfo);
+	mutex_exit(&connp->conn_lock);
+	if (error != 0)
+		return (error);
 
-	/* Set header fields not in ipp */
-	if (ipp->ipp_fields & IPPF_HAS_IP6I) {
-		ip6i = (ip6i_t *)tcp->tcp_iphc;
-		tcp->tcp_ip6h = (ip6_t *)&ip6i[1];
-	} else {
-		tcp->tcp_ip6h = (ip6_t *)tcp->tcp_iphc;
-	}
 	/*
-	 * tcp->tcp_ip_hdr_len will include ip6i_t if there is one.
-	 *
-	 * tcp->tcp_tcp_hdr_len doesn't change here.
+	 * Any routing header/option has been massaged. The checksum difference
+	 * is stored in conn_sum for later use.
 	 */
-	tcp->tcp_ip_hdr_len = hdrs_len - TCP_MAX_HDR_LENGTH;
-	tcp->tcp_tcph = (tcph_t *)(tcp->tcp_iphc + tcp->tcp_ip_hdr_len);
-	tcp->tcp_hdr_len = tcp->tcp_ip_hdr_len + tcp->tcp_tcp_hdr_len;
+	tcpha = (tcpha_t *)connp->conn_ht_ulp;
+	tcp->tcp_tcpha = tcpha;
 
-	bcopy(buf, tcp->tcp_tcph, tcp->tcp_tcp_hdr_len);
-
-	tcp->tcp_ip6h->ip6_src = src;
-	tcp->tcp_ip6h->ip6_dst = dst;
+	tcpha->tha_lport = connp->conn_lport;
+	tcpha->tha_fport = connp->conn_fport;
+	tcpha->tha_sum = 0;
+	tcpha->tha_offset_and_reserved = (5 << 4);
 
 	/*
-	 * If the hop limit was not set by ip_build_hdrs_v6(), set it to
-	 * the default value for TCP.
-	 */
-	if (!(ipp->ipp_fields & IPPF_UNICAST_HOPS))
-		tcp->tcp_ip6h->ip6_hops = tcps->tcps_ipv6_hoplimit;
-
-	/*
-	 * If we're setting extension headers after a connection
-	 * has been established, and if we have a routing header
-	 * among the extension headers, call ip_massage_options_v6 to
-	 * manipulate the routing header/ip6_dst set the checksum
-	 * difference in the tcp header template.
-	 * (This happens in tcp_connect_ipv6 if the routing header
-	 * is set prior to the connect.)
-	 * Set the tcp_sum to zero first in case we've cleared a
-	 * routing header or don't have one at all.
+	 * IP wants our header length in the checksum field to
+	 * allow it to perform a single pseudo-header+checksum
+	 * calculation on behalf of TCP.
+	 * Include the adjustment for a source route once IP_OPTIONS is set.
 	 */
-	tcp->tcp_sum = 0;
-	if ((tcp->tcp_state >= TCPS_SYN_SENT) &&
-	    (tcp->tcp_ipp_fields & IPPF_RTHDR)) {
-		ip6_rthdr_t *rth = ip_find_rthdr_v6(tcp->tcp_ip6h,
-		    (uint8_t *)tcp->tcp_tcph);
-		if (rth != NULL) {
-			tcp->tcp_sum = ip_massage_options_v6(tcp->tcp_ip6h,
-			    rth, tcps->tcps_netstack);
-			tcp->tcp_sum = ntohs((tcp->tcp_sum & 0xFFFF) +
-			    (tcp->tcp_sum >> 16));
-		}
-	}
-
-	/* Try to get everything in a single mblk */
-	(void) proto_set_tx_wroff(tcp->tcp_rq, connp,
-	    hdrs_len + tcps->tcps_wroff_xtra);
-	return (0);
-}
-
-/*
- * Transfer any source route option from ipha to buf/dst in reversed form.
- */
-static int
-tcp_opt_rev_src_route(ipha_t *ipha, char *buf, uchar_t *dst)
-{
-	ipoptp_t	opts;
-	uchar_t		*opt;
-	uint8_t		optval;
-	uint8_t		optlen;
-	uint32_t	len = 0;
-
-	for (optval = ipoptp_first(&opts, ipha);
-	    optval != IPOPT_EOL;
-	    optval = ipoptp_next(&opts)) {
-		opt = opts.ipoptp_cur;
-		optlen = opts.ipoptp_len;
-		switch (optval) {
-			int	off1, off2;
-		case IPOPT_SSRR:
-		case IPOPT_LSRR:
-
-			/* Reverse source route */
-			/*
-			 * First entry should be the next to last one in the
-			 * current source route (the last entry is our
-			 * address.)
-			 * The last entry should be the final destination.
-			 */
-			buf[IPOPT_OPTVAL] = (uint8_t)optval;
-			buf[IPOPT_OLEN] = (uint8_t)optlen;
-			off1 = IPOPT_MINOFF_SR - 1;
-			off2 = opt[IPOPT_OFFSET] - IP_ADDR_LEN - 1;
-			if (off2 < 0) {
-				/* No entries in source route */
-				break;
-			}
-			bcopy(opt + off2, dst, IP_ADDR_LEN);
-			/*
-			 * Note: use src since ipha has not had its src
-			 * and dst reversed (it is in the state it was
-			 * received.
-			 */
-			bcopy(&ipha->ipha_src, buf + off2,
-			    IP_ADDR_LEN);
-			off2 -= IP_ADDR_LEN;
-
-			while (off2 > 0) {
-				bcopy(opt + off2, buf + off1,
-				    IP_ADDR_LEN);
-				off1 += IP_ADDR_LEN;
-				off2 -= IP_ADDR_LEN;
-			}
-			buf[IPOPT_OFFSET] = IPOPT_MINOFF_SR;
-			buf += optlen;
-			len += optlen;
-			break;
-		}
-	}
-done:
-	/* Pad the resulting options */
-	while (len & 0x3) {
-		*buf++ = IPOPT_EOL;
-		len++;
-	}
-	return (len);
-}
-
-
-/*
- * Extract and revert a source route from ipha (if any)
- * and then update the relevant fields in both tcp_t and the standard header.
- */
-static void
-tcp_opt_reverse(tcp_t *tcp, ipha_t *ipha)
-{
-	char	buf[TCP_MAX_HDR_LENGTH];
-	uint_t	tcph_len;
-	int	len;
-
-	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
-	len = IPH_HDR_LENGTH(ipha);
-	if (len == IP_SIMPLE_HDR_LENGTH)
-		/* Nothing to do */
-		return;
-	if (len > IP_SIMPLE_HDR_LENGTH + TCP_MAX_IP_OPTIONS_LENGTH ||
-	    (len & 0x3))
-		return;
-
-	tcph_len = tcp->tcp_tcp_hdr_len;
-	bcopy(tcp->tcp_tcph, buf, tcph_len);
-	tcp->tcp_sum = (tcp->tcp_ipha->ipha_dst >> 16) +
-	    (tcp->tcp_ipha->ipha_dst & 0xffff);
-	len = tcp_opt_rev_src_route(ipha, (char *)tcp->tcp_ipha +
-	    IP_SIMPLE_HDR_LENGTH, (uchar_t *)&tcp->tcp_ipha->ipha_dst);
-	len += IP_SIMPLE_HDR_LENGTH;
-	tcp->tcp_sum -= ((tcp->tcp_ipha->ipha_dst >> 16) +
-	    (tcp->tcp_ipha->ipha_dst & 0xffff));
-	if ((int)tcp->tcp_sum < 0)
-		tcp->tcp_sum--;
-	tcp->tcp_sum = (tcp->tcp_sum & 0xFFFF) + (tcp->tcp_sum >> 16);
-	tcp->tcp_sum = ntohs((tcp->tcp_sum & 0xFFFF) + (tcp->tcp_sum >> 16));
-	tcp->tcp_tcph = (tcph_t *)((char *)tcp->tcp_ipha + len);
-	bcopy(buf, tcp->tcp_tcph, tcph_len);
-	tcp->tcp_ip_hdr_len = len;
-	tcp->tcp_ipha->ipha_version_and_hdr_length =
-	    (IP_VERSION << 4) | (len >> 2);
-	len += tcph_len;
-	tcp->tcp_hdr_len = len;
-}
-
-/*
- * Copy the standard header into its new location,
- * lay in the new options and then update the relevant
- * fields in both tcp_t and the standard header.
- */
-static int
-tcp_opt_set_header(tcp_t *tcp, boolean_t checkonly, uchar_t *ptr, uint_t len)
-{
-	uint_t	tcph_len;
-	uint8_t	*ip_optp;
-	tcph_t	*new_tcph;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	conn_t	*connp = tcp->tcp_connp;
-
-	if ((len > TCP_MAX_IP_OPTIONS_LENGTH) || (len & 0x3))
-		return (EINVAL);
-
-	if (len > IP_MAX_OPT_LENGTH - tcp->tcp_label_len)
-		return (EINVAL);
-
-	if (checkonly) {
-		/*
-		 * do not really set, just pretend to - T_CHECK
-		 */
-		return (0);
-	}
+	cksum = sizeof (tcpha_t) + connp->conn_sum;
+	cksum = (cksum >> 16) + (cksum & 0xFFFF);
+	ASSERT(cksum < 0x10000);
+	tcpha->tha_sum = htons(cksum);
 
-	ip_optp = (uint8_t *)tcp->tcp_ipha + IP_SIMPLE_HDR_LENGTH;
-	if (tcp->tcp_label_len > 0) {
-		int padlen;
-		uint8_t opt;
+	if (connp->conn_ipversion == IPV4_VERSION)
+		tcp->tcp_ipha = (ipha_t *)connp->conn_ht_iphc;
+	else
+		tcp->tcp_ip6h = (ip6_t *)connp->conn_ht_iphc;
 
-		/* convert list termination to no-ops */
-		padlen = tcp->tcp_label_len - ip_optp[IPOPT_OLEN];
-		ip_optp += ip_optp[IPOPT_OLEN];
-		opt = len > 0 ? IPOPT_NOP : IPOPT_EOL;
-		while (--padlen >= 0)
-			*ip_optp++ = opt;
-	}
-	tcph_len = tcp->tcp_tcp_hdr_len;
-	new_tcph = (tcph_t *)(ip_optp + len);
-	ovbcopy(tcp->tcp_tcph, new_tcph, tcph_len);
-	tcp->tcp_tcph = new_tcph;
-	bcopy(ptr, ip_optp, len);
-
-	len += IP_SIMPLE_HDR_LENGTH + tcp->tcp_label_len;
-
-	tcp->tcp_ip_hdr_len = len;
-	tcp->tcp_ipha->ipha_version_and_hdr_length =
-	    (IP_VERSION << 4) | (len >> 2);
-	tcp->tcp_hdr_len = len + tcph_len;
-	if (!TCP_IS_DETACHED(tcp)) {
-		/* Always allocate room for all options. */
-		(void) proto_set_tx_wroff(tcp->tcp_rq, connp,
-		    TCP_MAX_COMBINED_HEADER_LENGTH + tcps->tcps_wroff_xtra);
+	if (connp->conn_ht_iphc_allocated + tcps->tcps_wroff_xtra >
+	    connp->conn_wroff) {
+		connp->conn_wroff = connp->conn_ht_iphc_allocated +
+		    tcps->tcps_wroff_xtra;
+		(void) proto_set_tx_wroff(connp->conn_rq, connp,
+		    connp->conn_wroff);
 	}
 	return (0);
 }
@@ -11184,36 +8624,6 @@ tcp_param_register(IDP *ndp, tcpparam_t *tcppa, int cnt, tcp_stack_t *tcps)
 		nd_free(ndp);
 		return (B_FALSE);
 	}
-	tcps->tcps_mdt_head_param = kmem_zalloc(sizeof (tcpparam_t),
-	    KM_SLEEP);
-	bcopy(&lcl_tcp_mdt_head_param, tcps->tcps_mdt_head_param,
-	    sizeof (tcpparam_t));
-	if (!nd_load(ndp, tcps->tcps_mdt_head_param->tcp_param_name,
-	    tcp_param_get, tcp_param_set_aligned,
-	    (caddr_t)tcps->tcps_mdt_head_param)) {
-		nd_free(ndp);
-		return (B_FALSE);
-	}
-	tcps->tcps_mdt_tail_param = kmem_zalloc(sizeof (tcpparam_t),
-	    KM_SLEEP);
-	bcopy(&lcl_tcp_mdt_tail_param, tcps->tcps_mdt_tail_param,
-	    sizeof (tcpparam_t));
-	if (!nd_load(ndp, tcps->tcps_mdt_tail_param->tcp_param_name,
-	    tcp_param_get, tcp_param_set_aligned,
-	    (caddr_t)tcps->tcps_mdt_tail_param)) {
-		nd_free(ndp);
-		return (B_FALSE);
-	}
-	tcps->tcps_mdt_max_pbufs_param = kmem_zalloc(sizeof (tcpparam_t),
-	    KM_SLEEP);
-	bcopy(&lcl_tcp_mdt_max_pbufs_param, tcps->tcps_mdt_max_pbufs_param,
-	    sizeof (tcpparam_t));
-	if (!nd_load(ndp, tcps->tcps_mdt_max_pbufs_param->tcp_param_name,
-	    tcp_param_get, tcp_param_set_aligned,
-	    (caddr_t)tcps->tcps_mdt_max_pbufs_param)) {
-		nd_free(ndp);
-		return (B_FALSE);
-	}
 	if (!nd_load(ndp, "tcp_extra_priv_ports",
 	    tcp_extra_priv_ports_get, NULL, NULL)) {
 		nd_free(ndp);
@@ -11248,7 +8658,7 @@ tcp_param_register(IDP *ndp, tcpparam_t *tcppa, int cnt, tcp_stack_t *tcps)
 	return (B_TRUE);
 }
 
-/* ndd set routine for tcp_wroff_xtra, tcp_mdt_hdr_{head,tail}_min. */
+/* ndd set routine for tcp_wroff_xtra. */
 /* ARGSUSED */
 static int
 tcp_param_set_aligned(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
@@ -11307,6 +8717,7 @@ tcp_reass(tcp_t *tcp, mblk_t *mp, uint32_t start)
 	uint32_t	u1;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
 
+
 	/* Walk through all the new pieces. */
 	do {
 		ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <=
@@ -11433,9 +8844,10 @@ tcp_rwnd_reopen(tcp_t *tcp)
 {
 	uint_t ret = 0;
 	uint_t thwin;
+	conn_t *connp = tcp->tcp_connp;
 
 	/* Learn the latest rwnd information that we sent to the other side. */
-	thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win))
+	thwin = ((uint_t)ntohs(tcp->tcp_tcpha->tha_win))
 	    << tcp->tcp_rcv_ws;
 	/* This is peer's calculated send window (our receive window). */
 	thwin -= tcp->tcp_rnxt - tcp->tcp_rack;
@@ -11444,7 +8856,7 @@ tcp_rwnd_reopen(tcp_t *tcp)
 	 * SWS avoidance.  This means that we need to check the increase of
 	 * of receive window is at least 1 MSS.
 	 */
-	if (tcp->tcp_recv_hiwater - thwin >= tcp->tcp_mss) {
+	if (connp->conn_rcvbuf - thwin >= tcp->tcp_mss) {
 		/*
 		 * If the window that the other side knows is less than max
 		 * deferred acks segments, send an update immediately.
@@ -11453,7 +8865,7 @@ tcp_rwnd_reopen(tcp_t *tcp)
 			BUMP_MIB(&tcp->tcp_tcps->tcps_mib, tcpOutWinUpdate);
 			ret = TH_ACK_NEEDED;
 		}
-		tcp->tcp_rwnd = tcp->tcp_recv_hiwater;
+		tcp->tcp_rwnd = connp->conn_rcvbuf;
 	}
 	return (ret);
 }
@@ -11469,7 +8881,7 @@ tcp_rcv_drain(tcp_t *tcp)
 #ifdef DEBUG
 	uint_t cnt = 0;
 #endif
-	queue_t	*q = tcp->tcp_rq;
+	queue_t	*q = tcp->tcp_connp->conn_rq;
 
 	/* Can't drain on an eager connection */
 	if (tcp->tcp_listener != NULL)
@@ -11511,7 +8923,7 @@ tcp_rcv_drain(tcp_t *tcp)
 		if ((tcp->tcp_kssl_ctx != NULL) && (DB_TYPE(mp) == M_DATA)) {
 			DTRACE_PROBE1(kssl_mblk__ksslinput_rcvdrain,
 			    mblk_t *, mp);
-			tcp_kssl_input(tcp, mp);
+			tcp_kssl_input(tcp, mp, NULL);
 			continue;
 		}
 		putnext(q, mp);
@@ -11538,11 +8950,22 @@ tcp_rcv_drain(tcp_t *tcp)
  * Other messages are added as new (b_next) elements.
  */
 void
-tcp_rcv_enqueue(tcp_t *tcp, mblk_t *mp, uint_t seg_len)
+tcp_rcv_enqueue(tcp_t *tcp, mblk_t *mp, uint_t seg_len, cred_t *cr)
 {
 	ASSERT(seg_len == msgdsize(mp));
 	ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_rcv_last_head != NULL);
 
+	if (is_system_labeled()) {
+		ASSERT(cr != NULL || msg_getcred(mp, NULL) != NULL);
+		/*
+		 * Provide for protocols above TCP such as RPC. NOPID leaves
+		 * db_cpid unchanged.
+		 * The cred could have already been set.
+		 */
+		if (cr != NULL)
+			mblk_setcred(mp, cr, NOPID);
+	}
+
 	if (tcp->tcp_rcv_list == NULL) {
 		ASSERT(tcp->tcp_rcv_last_head == NULL);
 		tcp->tcp_rcv_list = mp;
@@ -11562,176 +8985,6 @@ tcp_rcv_enqueue(tcp_t *tcp, mblk_t *mp, uint_t seg_len)
 	tcp->tcp_rwnd -= seg_len;
 }
 
-/*
- * DEFAULT TCP ENTRY POINT via squeue on READ side.
- *
- * This is the default entry function into TCP on the read side. TCP is
- * always entered via squeue i.e. using squeue's for mutual exclusion.
- * When classifier does a lookup to find the tcp, it also puts a reference
- * on the conn structure associated so the tcp is guaranteed to exist
- * when we come here. We still need to check the state because it might
- * as well has been closed. The squeue processing function i.e. squeue_enter,
- * is responsible for doing the CONN_DEC_REF.
- *
- * Apart from the default entry point, IP also sends packets directly to
- * tcp_rput_data for AF_INET fast path and tcp_conn_request for incoming
- * connections.
- */
-boolean_t tcp_outbound_squeue_switch = B_FALSE;
-void
-tcp_input(void *arg, mblk_t *mp, void *arg2)
-{
-	conn_t	*connp = (conn_t *)arg;
-	tcp_t	*tcp = (tcp_t *)connp->conn_tcp;
-
-	/* arg2 is the sqp */
-	ASSERT(arg2 != NULL);
-	ASSERT(mp != NULL);
-
-	/*
-	 * Don't accept any input on a closed tcp as this TCP logically does
-	 * not exist on the system. Don't proceed further with this TCP.
-	 * For eg. this packet could trigger another close of this tcp
-	 * which would be disastrous for tcp_refcnt. tcp_close_detached /
-	 * tcp_clean_death / tcp_closei_local must be called at most once
-	 * on a TCP. In this case we need to refeed the packet into the
-	 * classifier and figure out where the packet should go. Need to
-	 * preserve the recv_ill somehow. Until we figure that out, for
-	 * now just drop the packet if we can't classify the packet.
-	 */
-	if (tcp->tcp_state == TCPS_CLOSED ||
-	    tcp->tcp_state == TCPS_BOUND) {
-		conn_t	*new_connp;
-		ip_stack_t *ipst = tcp->tcp_tcps->tcps_netstack->netstack_ip;
-
-		new_connp = ipcl_classify(mp, connp->conn_zoneid, ipst);
-		if (new_connp != NULL) {
-			tcp_reinput(new_connp, mp, arg2);
-			return;
-		}
-		/* We failed to classify. For now just drop the packet */
-		freemsg(mp);
-		return;
-	}
-
-	if (DB_TYPE(mp) != M_DATA) {
-		tcp_rput_common(tcp, mp);
-		return;
-	}
-
-	if (mp->b_datap->db_struioflag & STRUIO_CONNECT) {
-		squeue_t	*final_sqp;
-
-		mp->b_datap->db_struioflag &= ~STRUIO_CONNECT;
-		final_sqp = (squeue_t *)DB_CKSUMSTART(mp);
-		DB_CKSUMSTART(mp) = 0;
-		if (tcp->tcp_state == TCPS_SYN_SENT &&
-		    connp->conn_final_sqp == NULL &&
-		    tcp_outbound_squeue_switch) {
-			ASSERT(connp->conn_initial_sqp == connp->conn_sqp);
-			connp->conn_final_sqp = final_sqp;
-			if (connp->conn_final_sqp != connp->conn_sqp) {
-				CONN_INC_REF(connp);
-				SQUEUE_SWITCH(connp, connp->conn_final_sqp);
-				SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
-				    tcp_rput_data, connp, ip_squeue_flag,
-				    SQTAG_CONNECT_FINISH);
-				return;
-			}
-		}
-	}
-	tcp_rput_data(connp, mp, arg2);
-}
-
-/*
- * The read side put procedure.
- * The packets passed up by ip are assume to be aligned according to
- * OK_32PTR and the IP+TCP headers fitting in the first mblk.
- */
-static void
-tcp_rput_common(tcp_t *tcp, mblk_t *mp)
-{
-	/*
-	 * tcp_rput_data() does not expect M_CTL except for the case
-	 * where tcp_ipv6_recvancillary is set and we get a IN_PKTINFO
-	 * type. Need to make sure that any other M_CTLs don't make
-	 * it to tcp_rput_data since it is not expecting any and doesn't
-	 * check for it.
-	 */
-	if (DB_TYPE(mp) == M_CTL) {
-		switch (*(uint32_t *)(mp->b_rptr)) {
-		case TCP_IOC_ABORT_CONN:
-			/*
-			 * Handle connection abort request.
-			 */
-			tcp_ioctl_abort_handler(tcp, mp);
-			return;
-		case IPSEC_IN:
-			/*
-			 * Only secure icmp arrive in TCP and they
-			 * don't go through data path.
-			 */
-			tcp_icmp_error(tcp, mp);
-			return;
-		case IN_PKTINFO:
-			/*
-			 * Handle IPV6_RECVPKTINFO socket option on AF_INET6
-			 * sockets that are receiving IPv4 traffic. tcp
-			 */
-			ASSERT(tcp->tcp_family == AF_INET6);
-			ASSERT(tcp->tcp_ipv6_recvancillary &
-			    TCP_IPV6_RECVPKTINFO);
-			tcp_rput_data(tcp->tcp_connp, mp,
-			    tcp->tcp_connp->conn_sqp);
-			return;
-		case MDT_IOC_INFO_UPDATE:
-			/*
-			 * Handle Multidata information update; the
-			 * following routine will free the message.
-			 */
-			if (tcp->tcp_connp->conn_mdt_ok) {
-				tcp_mdt_update(tcp,
-				    &((ip_mdt_info_t *)mp->b_rptr)->mdt_capab,
-				    B_FALSE);
-			}
-			freemsg(mp);
-			return;
-		case LSO_IOC_INFO_UPDATE:
-			/*
-			 * Handle LSO information update; the following
-			 * routine will free the message.
-			 */
-			if (tcp->tcp_connp->conn_lso_ok) {
-				tcp_lso_update(tcp,
-				    &((ip_lso_info_t *)mp->b_rptr)->lso_capab);
-			}
-			freemsg(mp);
-			return;
-		default:
-			/*
-			 * tcp_icmp_err() will process the M_CTL packets.
-			 * Non-ICMP packets, if any, will be discarded in
-			 * tcp_icmp_err(). We will process the ICMP packet
-			 * even if we are TCP_IS_DETACHED_NONEAGER as the
-			 * incoming ICMP packet may result in changing
-			 * the tcp_mss, which we would need if we have
-			 * packets to retransmit.
-			 */
-			tcp_icmp_error(tcp, mp);
-			return;
-		}
-	}
-
-	/* No point processing the message if tcp is already closed */
-	if (TCP_IS_DETACHED_NONEAGER(tcp)) {
-		freemsg(mp);
-		return;
-	}
-
-	tcp_rput_other(tcp, mp);
-}
-
-
 /* The minimum of smoothed mean deviation in RTO calculation. */
 #define	TCP_SD_MIN	400
 
@@ -11885,12 +9138,12 @@ tcp_get_seg_mp(tcp_t *tcp, uint32_t seq, int32_t *off)
  * segments.  A segment is eligible if sack_cnt for that segment is greater
  * than or equal tcp_dupack_fast_retransmit.  After it has retransmitted
  * all eligible segments, it checks to see if TCP can send some new segments
- * (fast recovery).  If it can, set the appropriate flag for tcp_rput_data().
+ * (fast recovery).  If it can, set the appropriate flag for tcp_input_data().
  *
  * Parameters:
  *	tcp_t *tcp: the tcp structure of the connection.
  *	uint_t *flags: in return, appropriate value will be set for
- *	tcp_rput_data().
+ *	tcp_input_data().
  */
 static void
 tcp_sack_rxmit(tcp_t *tcp, uint_t *flags)
@@ -11988,7 +9241,7 @@ tcp_sack_rxmit(tcp_t *tcp, uint_t *flags)
 		tcp->tcp_pipe += seg_len;
 		tcp->tcp_sack_snxt = begin + seg_len;
 
-		tcp_send_data(tcp, tcp->tcp_wq, xmit_mp);
+		tcp_send_data(tcp, xmit_mp);
 
 		/*
 		 * Update the send timestamp to avoid false retransmission.
@@ -12012,96 +9265,8 @@ tcp_sack_rxmit(tcp_t *tcp, uint_t *flags)
 }
 
 /*
- * This function handles policy checking at TCP level for non-hard_bound/
- * detached connections.
- */
-static boolean_t
-tcp_check_policy(tcp_t *tcp, mblk_t *first_mp, ipha_t *ipha, ip6_t *ip6h,
-    boolean_t secure, boolean_t mctl_present)
-{
-	ipsec_latch_t *ipl = NULL;
-	ipsec_action_t *act = NULL;
-	mblk_t *data_mp;
-	ipsec_in_t *ii;
-	const char *reason;
-	kstat_named_t *counter;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	ipsec_stack_t	*ipss;
-	ip_stack_t	*ipst;
-
-	ASSERT(mctl_present || !secure);
-
-	ASSERT((ipha == NULL && ip6h != NULL) ||
-	    (ip6h == NULL && ipha != NULL));
-
-	/*
-	 * We don't necessarily have an ipsec_in_act action to verify
-	 * policy because of assymetrical policy where we have only
-	 * outbound policy and no inbound policy (possible with global
-	 * policy).
-	 */
-	if (!secure) {
-		if (act == NULL || act->ipa_act.ipa_type == IPSEC_ACT_BYPASS ||
-		    act->ipa_act.ipa_type == IPSEC_ACT_CLEAR)
-			return (B_TRUE);
-		ipsec_log_policy_failure(IPSEC_POLICY_MISMATCH,
-		    "tcp_check_policy", ipha, ip6h, secure,
-		    tcps->tcps_netstack);
-		ipss = tcps->tcps_netstack->netstack_ipsec;
-
-		ip_drop_packet(first_mp, B_TRUE, NULL, NULL,
-		    DROPPER(ipss, ipds_tcp_clear),
-		    &tcps->tcps_dropper);
-		return (B_FALSE);
-	}
-
-	/*
-	 * We have a secure packet.
-	 */
-	if (act == NULL) {
-		ipsec_log_policy_failure(IPSEC_POLICY_NOT_NEEDED,
-		    "tcp_check_policy", ipha, ip6h, secure,
-		    tcps->tcps_netstack);
-		ipss = tcps->tcps_netstack->netstack_ipsec;
-
-		ip_drop_packet(first_mp, B_TRUE, NULL, NULL,
-		    DROPPER(ipss, ipds_tcp_secure),
-		    &tcps->tcps_dropper);
-		return (B_FALSE);
-	}
-
-	/*
-	 * XXX This whole routine is currently incorrect.  ipl should
-	 * be set to the latch pointer, but is currently not set, so
-	 * we initialize it to NULL to avoid picking up random garbage.
-	 */
-	if (ipl == NULL)
-		return (B_TRUE);
-
-	data_mp = first_mp->b_cont;
-
-	ii = (ipsec_in_t *)first_mp->b_rptr;
-
-	ipst = tcps->tcps_netstack->netstack_ip;
-
-	if (ipsec_check_ipsecin_latch(ii, data_mp, ipl, ipha, ip6h, &reason,
-	    &counter, tcp->tcp_connp)) {
-		BUMP_MIB(&ipst->ips_ip_mib, ipsecInSucceeded);
-		return (B_TRUE);
-	}
-	(void) strlog(TCP_MOD_ID, 0, 0, SL_ERROR|SL_WARN|SL_CONSOLE,
-	    "tcp inbound policy mismatch: %s, packet dropped\n",
-	    reason);
-	BUMP_MIB(&ipst->ips_ip_mib, ipsecInFailed);
-
-	ip_drop_packet(first_mp, B_TRUE, NULL, NULL, counter,
-	    &tcps->tcps_dropper);
-	return (B_FALSE);
-}
-
-/*
- * tcp_ss_rexmit() is called in tcp_rput_data() to do slow start
- * retransmission after a timeout.
+ * tcp_ss_rexmit() is called to do slow start retransmission after a timeout
+ * or ICMP errors.
  *
  * To limit the number of duplicate segments, we limit the number of segment
  * to be sent in one time to tcp_snd_burst, the burst variable.
@@ -12150,7 +9315,7 @@ tcp_ss_rexmit(tcp_t *tcp)
 			if (xmit_mp == NULL)
 				return;
 
-			tcp_send_data(tcp, tcp->tcp_wq, xmit_mp);
+			tcp_send_data(tcp, xmit_mp);
 
 			snxt += cnt;
 			win -= cnt;
@@ -12184,7 +9349,7 @@ tcp_ss_rexmit(tcp_t *tcp)
 
 /*
  * Process all TCP option in SYN segment.  Note that this function should
- * be called after tcp_adapt_ire() is called so that the necessary info
+ * be called after tcp_set_destination() is called so that the necessary info
  * from IRE is already set in the tcp structure.
  *
  * This function sets up the correct tcp_mss value according to the
@@ -12194,16 +9359,17 @@ tcp_ss_rexmit(tcp_t *tcp)
  * should do the appropriate change.
  */
 void
-tcp_process_options(tcp_t *tcp, tcph_t *tcph)
+tcp_process_options(tcp_t *tcp, tcpha_t *tcpha)
 {
 	int options;
 	tcp_opt_t tcpopt;
 	uint32_t mss_max;
 	char *tmp_tcph;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
 
 	tcpopt.tcp = NULL;
-	options = tcp_parse_options(tcph, &tcpopt);
+	options = tcp_parse_options(tcpha, &tcpopt);
 
 	/*
 	 * Process MSS option.  Note that MSS option value does not account
@@ -12212,12 +9378,12 @@ tcp_process_options(tcp_t *tcp, tcph_t *tcph)
 	 * IPv6.
 	 */
 	if (!(options & TCP_OPT_MSS_PRESENT)) {
-		if (tcp->tcp_ipversion == IPV4_VERSION)
+		if (connp->conn_ipversion == IPV4_VERSION)
 			tcpopt.tcp_opt_mss = tcps->tcps_mss_def_ipv4;
 		else
 			tcpopt.tcp_opt_mss = tcps->tcps_mss_def_ipv6;
 	} else {
-		if (tcp->tcp_ipversion == IPV4_VERSION)
+		if (connp->conn_ipversion == IPV4_VERSION)
 			mss_max = tcps->tcps_mss_max_ipv4;
 		else
 			mss_max = tcps->tcps_mss_max_ipv6;
@@ -12240,23 +9406,23 @@ tcp_process_options(tcp_t *tcp, tcph_t *tcph)
 	/* Process Timestamp option. */
 	if ((options & TCP_OPT_TSTAMP_PRESENT) &&
 	    (tcp->tcp_snd_ts_ok || TCP_IS_DETACHED(tcp))) {
-		tmp_tcph = (char *)tcp->tcp_tcph;
+		tmp_tcph = (char *)tcp->tcp_tcpha;
 
 		tcp->tcp_snd_ts_ok = B_TRUE;
 		tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val;
 		tcp->tcp_last_rcv_lbolt = lbolt64;
 		ASSERT(OK_32PTR(tmp_tcph));
-		ASSERT(tcp->tcp_tcp_hdr_len == TCP_MIN_HEADER_LENGTH);
+		ASSERT(connp->conn_ht_ulp_len == TCP_MIN_HEADER_LENGTH);
 
 		/* Fill in our template header with basic timestamp option. */
-		tmp_tcph += tcp->tcp_tcp_hdr_len;
+		tmp_tcph += connp->conn_ht_ulp_len;
 		tmp_tcph[0] = TCPOPT_NOP;
 		tmp_tcph[1] = TCPOPT_NOP;
 		tmp_tcph[2] = TCPOPT_TSTAMP;
 		tmp_tcph[3] = TCPOPT_TSTAMP_LEN;
-		tcp->tcp_hdr_len += TCPOPT_REAL_TS_LEN;
-		tcp->tcp_tcp_hdr_len += TCPOPT_REAL_TS_LEN;
-		tcp->tcp_tcph->th_offset_and_rsrvd[0] += (3 << 4);
+		connp->conn_ht_iphc_len += TCPOPT_REAL_TS_LEN;
+		connp->conn_ht_ulp_len += TCPOPT_REAL_TS_LEN;
+		tcp->tcp_tcpha->tha_offset_and_reserved += (3 << 4);
 	} else {
 		tcp->tcp_snd_ts_ok = B_FALSE;
 	}
@@ -12266,12 +9432,11 @@ tcp_process_options(tcp_t *tcp, tcph_t *tcph)
 	 * then allocate the SACK info structure.  Note the following ways
 	 * when tcp_snd_sack_ok is set to true.
 	 *
-	 * For active connection: in tcp_adapt_ire() called in
-	 * tcp_rput_other(), or in tcp_rput_other() when tcp_sack_permitted
-	 * is checked.
+	 * For active connection: in tcp_set_destination() called in
+	 * tcp_connect().
 	 *
-	 * For passive connection: in tcp_adapt_ire() called in
-	 * tcp_accept_comm().
+	 * For passive connection: in tcp_set_destination() called in
+	 * tcp_input_listener().
 	 *
 	 * That's the reason why the extra TCP_IS_DETACHED() check is there.
 	 * That check makes sure that if we did not send a SACK OK option,
@@ -12320,7 +9485,8 @@ tcp_process_options(tcp_t *tcp, tcph_t *tcph)
 	 * Now we know the exact TCP/IP header length, subtract
 	 * that from tcp_mss to get our side's MSS.
 	 */
-	tcp->tcp_mss -= tcp->tcp_hdr_len;
+	tcp->tcp_mss -= connp->conn_ht_iphc_len;
+
 	/*
 	 * Here we assume that the other side's header size will be equal to
 	 * our header size.  We calculate the real MSS accordingly.  Need to
@@ -12328,22 +9494,29 @@ tcp_process_options(tcp_t *tcp, tcph_t *tcph)
 	 *
 	 * Real MSS = Opt.MSS - (our TCP/IP header - min TCP/IP header)
 	 */
-	tcpopt.tcp_opt_mss -= tcp->tcp_hdr_len + tcp->tcp_ipsec_overhead -
-	    ((tcp->tcp_ipversion == IPV4_VERSION ?
+	tcpopt.tcp_opt_mss -= connp->conn_ht_iphc_len +
+	    tcp->tcp_ipsec_overhead -
+	    ((connp->conn_ipversion == IPV4_VERSION ?
 	    IP_SIMPLE_HDR_LENGTH : IPV6_HDR_LEN) + TCP_MIN_HEADER_LENGTH);
 
 	/*
 	 * Set MSS to the smaller one of both ends of the connection.
 	 * We should not have called tcp_mss_set() before, but our
 	 * side of the MSS should have been set to a proper value
-	 * by tcp_adapt_ire().  tcp_mss_set() will also set up the
+	 * by tcp_set_destination().  tcp_mss_set() will also set up the
 	 * STREAM head parameters properly.
 	 *
 	 * If we have a larger-than-16-bit window but the other side
 	 * didn't want to do window scale, tcp_rwnd_set() will take
 	 * care of that.
 	 */
-	tcp_mss_set(tcp, MIN(tcpopt.tcp_opt_mss, tcp->tcp_mss), B_TRUE);
+	tcp_mss_set(tcp, MIN(tcpopt.tcp_opt_mss, tcp->tcp_mss));
+
+	/*
+	 * Initialize tcp_cwnd value. After tcp_mss_set(), tcp_mss has been
+	 * updated properly.
+	 */
+	SET_TCP_INIT_CWND(tcp, tcp->tcp_mss, tcps->tcps_slow_start_initial);
 }
 
 /*
@@ -12410,7 +9583,7 @@ tcp_send_conn_ind(void *arg, mblk_t *mp, void *arg2)
 		tcp_t *tail;
 
 		/*
-		 * The eager already has an extra ref put in tcp_rput_data
+		 * The eager already has an extra ref put in tcp_input_data
 		 * so that it stays till accept comes back even though it
 		 * might get into TCPS_CLOSED as a result of a TH_RST etc.
 		 */
@@ -12496,8 +9669,8 @@ tcp_send_conn_ind(void *arg, mblk_t *mp, void *arg2)
 		 * remote host. This proves the IP addr is good.
 		 * Cache it!
 		 */
-		addr_cache[IP_ADDR_CACHE_HASH(
-		    tcp->tcp_remote)] = tcp->tcp_remote;
+		addr_cache[IP_ADDR_CACHE_HASH(tcp->tcp_connp->conn_faddr_v4)] =
+		    tcp->tcp_connp->conn_faddr_v4;
 	}
 	mutex_exit(&listener->tcp_eager_lock);
 	if (need_send_conn_ind)
@@ -12513,17 +9686,16 @@ tcp_ulp_newconn(conn_t *lconnp, conn_t *econnp, mblk_t *mp)
 {
 	if (IPCL_IS_NONSTR(lconnp)) {
 		cred_t	*cr;
-		pid_t	cpid;
-
-		cr = msg_getcred(mp, &cpid);
+		pid_t	cpid = NOPID;
 
 		ASSERT(econnp->conn_tcp->tcp_listener == lconnp->conn_tcp);
 		ASSERT(econnp->conn_tcp->tcp_saved_listener ==
 		    lconnp->conn_tcp);
 
+		cr = msg_getcred(mp, &cpid);
+
 		/* Keep the message around in case of a fallback to TPI */
 		econnp->conn_tcp->tcp_conn.tcp_eager_conn_ind = mp;
-
 		/*
 		 * Notify the ULP about the newconn. It is guaranteed that no
 		 * tcp_accept() call will be made for the eager if the
@@ -12545,177 +9717,83 @@ tcp_ulp_newconn(conn_t *lconnp, conn_t *econnp, mblk_t *mp)
 			    econnp->conn_tcp->tcp_conn_req_seqnum);
 		}
 	} else {
-		putnext(lconnp->conn_tcp->tcp_rq, mp);
+		putnext(lconnp->conn_rq, mp);
 	}
 }
 
-mblk_t *
-tcp_find_pktinfo(tcp_t *tcp, mblk_t *mp, uint_t *ipversp, uint_t *ip_hdr_lenp,
-    uint_t *ifindexp, ip6_pkt_t *ippp)
+/*
+ * Handle a packet that has been reclassified by TCP.
+ * This function drops the ref on connp that the caller had.
+ */
+static void
+tcp_reinput(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
 {
-	ip_pktinfo_t	*pinfo;
-	ip6_t		*ip6h;
-	uchar_t		*rptr;
-	mblk_t		*first_mp = mp;
-	boolean_t	mctl_present = B_FALSE;
-	uint_t 		ifindex = 0;
-	ip6_pkt_t	ipp;
-	uint_t		ipvers;
-	uint_t		ip_hdr_len;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
 
-	rptr = mp->b_rptr;
-	ASSERT(OK_32PTR(rptr));
-	ASSERT(tcp != NULL);
-	ipp.ipp_fields = 0;
+	if (connp->conn_incoming_ifindex != 0 &&
+	    connp->conn_incoming_ifindex != ira->ira_ruifindex) {
+		freemsg(mp);
+		CONN_DEC_REF(connp);
+		return;
+	}
 
-	switch DB_TYPE(mp) {
-	case M_CTL:
-		mp = mp->b_cont;
-		if (mp == NULL) {
-			freemsg(first_mp);
-			return (NULL);
+	if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
+	    (ira->ira_flags & IRAF_IPSEC_SECURE)) {
+		ip6_t *ip6h;
+		ipha_t *ipha;
+
+		if (ira->ira_flags & IRAF_IS_IPV4) {
+			ipha = (ipha_t *)mp->b_rptr;
+			ip6h = NULL;
+		} else {
+			ipha = NULL;
+			ip6h = (ip6_t *)mp->b_rptr;
 		}
-		if (DB_TYPE(mp) != M_DATA) {
-			freemsg(first_mp);
-			return (NULL);
+		mp = ipsec_check_inbound_policy(mp, connp, ipha, ip6h, ira);
+		if (mp == NULL) {
+			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
+			/* Note that mp is NULL */
+			ip_drop_input("ipIfStatsInDiscards", mp, NULL);
+			CONN_DEC_REF(connp);
+			return;
 		}
-		mctl_present = B_TRUE;
-		break;
-	case M_DATA:
-		break;
-	default:
-		cmn_err(CE_NOTE, "tcp_find_pktinfo: unknown db_type");
-		freemsg(mp);
-		return (NULL);
 	}
-	ipvers = IPH_HDR_VERSION(rptr);
-	if (ipvers == IPV4_VERSION) {
-		if (tcp == NULL) {
-			ip_hdr_len = IPH_HDR_LENGTH(rptr);
-			goto done;
-		}
-
-		ipp.ipp_fields |= IPPF_HOPLIMIT;
-		ipp.ipp_hoplimit = ((ipha_t *)rptr)->ipha_ttl;
 
+	if (IPCL_IS_TCP(connp)) {
 		/*
-		 * If we have IN_PKTINFO in an M_CTL and tcp_ipv6_recvancillary
-		 * has TCP_IPV6_RECVPKTINFO set, pass I/F index along in ipp.
+		 * do not drain, certain use cases can blow
+		 * the stack
 		 */
-		if ((tcp->tcp_ipv6_recvancillary & TCP_IPV6_RECVPKTINFO) &&
-		    mctl_present) {
-			pinfo = (ip_pktinfo_t *)first_mp->b_rptr;
-			if ((MBLKL(first_mp) == sizeof (ip_pktinfo_t)) &&
-			    (pinfo->ip_pkt_ulp_type == IN_PKTINFO) &&
-			    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
-				ipp.ipp_fields |= IPPF_IFINDEX;
-				ipp.ipp_ifindex = pinfo->ip_pkt_ifindex;
-				ifindex = pinfo->ip_pkt_ifindex;
-			}
-			freeb(first_mp);
-			mctl_present = B_FALSE;
-		}
-		ip_hdr_len = IPH_HDR_LENGTH(rptr);
+		SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
+		    connp->conn_recv, connp, ira,
+		    SQ_NODRAIN, SQTAG_IP_TCP_INPUT);
 	} else {
-		ip6h = (ip6_t *)rptr;
-
-		ASSERT(ipvers == IPV6_VERSION);
-		ipp.ipp_fields = IPPF_HOPLIMIT | IPPF_TCLASS;
-		ipp.ipp_tclass = (ip6h->ip6_flow & 0x0FF00000) >> 20;
-		ipp.ipp_hoplimit = ip6h->ip6_hops;
-
-		if (ip6h->ip6_nxt != IPPROTO_TCP) {
-			uint8_t	nexthdrp;
-			ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
-
-			/* Look for ifindex information */
-			if (ip6h->ip6_nxt == IPPROTO_RAW) {
-				ip6i_t *ip6i = (ip6i_t *)ip6h;
-				if ((uchar_t *)&ip6i[1] > mp->b_wptr) {
-					BUMP_MIB(&ipst->ips_ip_mib, tcpInErrs);
-					freemsg(first_mp);
-					return (NULL);
-				}
-
-				if (ip6i->ip6i_flags & IP6I_IFINDEX) {
-					ASSERT(ip6i->ip6i_ifindex != 0);
-					ipp.ipp_fields |= IPPF_IFINDEX;
-					ipp.ipp_ifindex = ip6i->ip6i_ifindex;
-					ifindex = ip6i->ip6i_ifindex;
-				}
-				rptr = (uchar_t *)&ip6i[1];
-				mp->b_rptr = rptr;
-				if (rptr == mp->b_wptr) {
-					mblk_t *mp1;
-					mp1 = mp->b_cont;
-					freeb(mp);
-					mp = mp1;
-					rptr = mp->b_rptr;
-				}
-				if (MBLKL(mp) < IPV6_HDR_LEN +
-				    sizeof (tcph_t)) {
-					BUMP_MIB(&ipst->ips_ip_mib, tcpInErrs);
-					freemsg(first_mp);
-					return (NULL);
-				}
-				ip6h = (ip6_t *)rptr;
-			}
-
-			/*
-			 * Find any potentially interesting extension headers
-			 * as well as the length of the IPv6 + extension
-			 * headers.
-			 */
-			ip_hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp);
-			/* Verify if this is a TCP packet */
-			if (nexthdrp != IPPROTO_TCP) {
-				BUMP_MIB(&ipst->ips_ip_mib, tcpInErrs);
-				freemsg(first_mp);
-				return (NULL);
-			}
-		} else {
-			ip_hdr_len = IPV6_HDR_LEN;
-		}
+		/* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
+		(connp->conn_recv)(connp, mp, NULL,
+		    ira);
+		CONN_DEC_REF(connp);
 	}
 
-done:
-	if (ipversp != NULL)
-		*ipversp = ipvers;
-	if (ip_hdr_lenp != NULL)
-		*ip_hdr_lenp = ip_hdr_len;
-	if (ippp != NULL)
-		*ippp = ipp;
-	if (ifindexp != NULL)
-		*ifindexp = ifindex;
-	if (mctl_present) {
-		freeb(first_mp);
-	}
-	return (mp);
 }
 
+boolean_t tcp_outbound_squeue_switch = B_FALSE;
+
 /*
  * Handle M_DATA messages from IP. Its called directly from IP via
- * squeue for AF_INET type sockets fast path. No M_CTL are expected
- * in this path.
- *
- * For everything else (including AF_INET6 sockets with 'tcp_ipversion'
- * v4 and v6), we are called through tcp_input() and a M_CTL can
- * be present for options but tcp_find_pktinfo() deals with it. We
- * only expect M_DATA packets after tcp_find_pktinfo() is done.
+ * squeue for received IP packets.
  *
  * The first argument is always the connp/tcp to which the mp belongs.
  * There are no exceptions to this rule. The caller has already put
- * a reference on this connp/tcp and once tcp_rput_data() returns,
+ * a reference on this connp/tcp and once tcp_input_data() returns,
  * the squeue will do the refrele.
  *
- * The TH_SYN for the listener directly go to tcp_conn_request via
- * squeue.
+ * The TH_SYN for the listener directly go to tcp_input_listener via
+ * squeue. ICMP errors go directly to tcp_icmp_input().
  *
  * sqp: NULL = recursive, sqp != NULL means called from squeue
  */
 void
-tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
+tcp_input_data(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
 {
 	int32_t		bytes_acked;
 	int32_t		gap;
@@ -12729,11 +9807,10 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 	int		seg_len;
 	uint_t		ip_hdr_len;
 	uint32_t	seg_seq;
-	tcph_t		*tcph;
+	tcpha_t		*tcpha;
 	int		urp;
 	tcp_opt_t	tcpopt;
-	uint_t		ipvers;
-	ip6_pkt_t	ipp;
+	ip_pkt_t	ipp;
 	boolean_t	ofo_seg = B_FALSE; /* Out of order segment */
 	uint32_t	cwnd;
 	uint32_t	add;
@@ -12756,33 +9833,43 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 	rptr = mp->b_rptr;
 	ASSERT(OK_32PTR(rptr));
 
-	/*
-	 * An AF_INET socket is not capable of receiving any pktinfo. Do inline
-	 * processing here. For rest call tcp_find_pktinfo to fill up the
-	 * necessary information.
-	 */
-	if (IPCL_IS_TCP4(connp)) {
-		ipvers = IPV4_VERSION;
-		ip_hdr_len = IPH_HDR_LENGTH(rptr);
-	} else {
-		mp = tcp_find_pktinfo(tcp, mp, &ipvers, &ip_hdr_len,
-		    NULL, &ipp);
-		if (mp == NULL) {
-			TCP_STAT(tcps, tcp_rput_v6_error);
-			return;
+	ip_hdr_len = ira->ira_ip_hdr_length;
+	if (connp->conn_recv_ancillary.crb_all != 0) {
+		/*
+		 * Record packet information in the ip_pkt_t
+		 */
+		ipp.ipp_fields = 0;
+		if (ira->ira_flags & IRAF_IS_IPV4) {
+			(void) ip_find_hdr_v4((ipha_t *)rptr, &ipp,
+			    B_FALSE);
+		} else {
+			uint8_t nexthdrp;
+
+			/*
+			 * IPv6 packets can only be received by applications
+			 * that are prepared to receive IPv6 addresses.
+			 * The IP fanout must ensure this.
+			 */
+			ASSERT(connp->conn_family == AF_INET6);
+
+			(void) ip_find_hdr_v6(mp, (ip6_t *)rptr, B_TRUE, &ipp,
+			    &nexthdrp);
+			ASSERT(nexthdrp == IPPROTO_TCP);
+
+			/* Could have caused a pullup? */
+			iphdr = mp->b_rptr;
+			rptr = mp->b_rptr;
 		}
-		iphdr = mp->b_rptr;
-		rptr = mp->b_rptr;
 	}
 	ASSERT(DB_TYPE(mp) == M_DATA);
 	ASSERT(mp->b_next == NULL);
 
-	tcph = (tcph_t *)&rptr[ip_hdr_len];
-	seg_seq = ABE32_TO_U32(tcph->th_seq);
-	seg_ack = ABE32_TO_U32(tcph->th_ack);
+	tcpha = (tcpha_t *)&rptr[ip_hdr_len];
+	seg_seq = ntohl(tcpha->tha_seq);
+	seg_ack = ntohl(tcpha->tha_ack);
 	ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX);
 	seg_len = (int)(mp->b_wptr - rptr) -
-	    (ip_hdr_len + TCP_HDR_LENGTH(tcph));
+	    (ip_hdr_len + TCP_HDR_LENGTH(tcpha));
 	if ((mp1 = mp->b_cont) != NULL && mp1->b_datap->db_type == M_DATA) {
 		do {
 			ASSERT((uintptr_t)(mp1->b_wptr - mp1->b_rptr) <=
@@ -12794,7 +9881,7 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 
 	if (tcp->tcp_state == TCPS_TIME_WAIT) {
 		tcp_time_wait_processing(tcp, mp, seg_seq, seg_ack,
-		    seg_len, tcph);
+		    seg_len, tcpha, ira);
 		return;
 	}
 
@@ -12809,7 +9896,7 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 		tcp->tcp_last_recv_time = lbolt;
 	}
 
-	flags = (unsigned int)tcph->th_flags[0] & 0xFF;
+	flags = (unsigned int)tcpha->tha_flags & 0xFF;
 
 	BUMP_LOCAL(tcp->tcp_ibsegs);
 	DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
@@ -12840,7 +9927,7 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 		}
 		/* Update pointers into message */
 		iphdr = rptr = mp->b_rptr;
-		tcph = (tcph_t *)&rptr[ip_hdr_len];
+		tcpha = (tcpha_t *)&rptr[ip_hdr_len];
 		if (SEQ_GT(seg_seq, tcp->tcp_rnxt)) {
 			/*
 			 * Since we can't handle any data with this urgent
@@ -12849,13 +9936,29 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 			 * the urgent mark and generate the M_PCSIG,
 			 * which we can do.
 			 */
-			mp->b_wptr = (uchar_t *)tcph + TCP_HDR_LENGTH(tcph);
+			mp->b_wptr = (uchar_t *)tcpha + TCP_HDR_LENGTH(tcpha);
 			seg_len = 0;
 		}
 	}
 
 	switch (tcp->tcp_state) {
 	case TCPS_SYN_SENT:
+		if (connp->conn_final_sqp == NULL &&
+		    tcp_outbound_squeue_switch && sqp != NULL) {
+			ASSERT(connp->conn_initial_sqp == connp->conn_sqp);
+			connp->conn_final_sqp = sqp;
+			if (connp->conn_final_sqp != connp->conn_sqp) {
+				DTRACE_PROBE1(conn__final__sqp__switch,
+				    conn_t *, connp);
+				CONN_INC_REF(connp);
+				SQUEUE_SWITCH(connp, connp->conn_final_sqp);
+				SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
+				    tcp_input_data, connp, ira, ip_squeue_flag,
+				    SQTAG_CONNECT_FINISH);
+				return;
+			}
+			DTRACE_PROBE1(conn__final__sqp__same, conn_t *, connp);
+		}
 		if (flags & TH_ACK) {
 			/*
 			 * Note that our stack cannot send data before a
@@ -12887,13 +9990,13 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 		}
 
 		/* Process all TCP options. */
-		tcp_process_options(tcp, tcph);
+		tcp_process_options(tcp, tcpha);
 		/*
 		 * The following changes our rwnd to be a multiple of the
 		 * MIN(peer MSS, our MSS) for performance reason.
 		 */
-		(void) tcp_rwnd_set(tcp,
-		    MSS_ROUNDUP(tcp->tcp_recv_hiwater, tcp->tcp_mss));
+		(void) tcp_rwnd_set(tcp, MSS_ROUNDUP(connp->conn_rcvbuf,
+		    tcp->tcp_mss));
 
 		/* Is the other end ECN capable? */
 		if (tcp->tcp_ecn_ok) {
@@ -12910,21 +10013,17 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 		tcp->tcp_irs = seg_seq;
 		tcp->tcp_rack = seg_seq;
 		tcp->tcp_rnxt = seg_seq + 1;
-		U32_TO_ABE32(tcp->tcp_rnxt, tcp->tcp_tcph->th_ack);
+		tcp->tcp_tcpha->tha_ack = htonl(tcp->tcp_rnxt);
 		if (!TCP_IS_DETACHED(tcp)) {
 			/* Allocate room for SACK options if needed. */
-			if (tcp->tcp_snd_sack_ok) {
-				(void) proto_set_tx_wroff(tcp->tcp_rq, connp,
-				    tcp->tcp_hdr_len +
-				    TCPOPT_MAX_SACK_LEN +
-				    (tcp->tcp_loopback ? 0 :
-				    tcps->tcps_wroff_xtra));
-			} else {
-				(void) proto_set_tx_wroff(tcp->tcp_rq, connp,
-				    tcp->tcp_hdr_len +
-				    (tcp->tcp_loopback ? 0 :
-				    tcps->tcps_wroff_xtra));
-			}
+			connp->conn_wroff = connp->conn_ht_iphc_len;
+			if (tcp->tcp_snd_sack_ok)
+				connp->conn_wroff += TCPOPT_MAX_SACK_LEN;
+			if (!tcp->tcp_loopback)
+				connp->conn_wroff += tcps->tcps_wroff_xtra;
+
+			(void) proto_set_tx_wroff(connp->conn_rq, connp,
+			    connp->conn_wroff);
 		}
 		if (flags & TH_ACK) {
 			/*
@@ -12944,15 +10043,14 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 			 * sending up connection confirmation
 			 */
 			tcp->tcp_state = TCPS_ESTABLISHED;
-			if (!tcp_conn_con(tcp, iphdr, tcph, mp,
-			    tcp->tcp_loopback ? &mp1 : NULL)) {
+			if (!tcp_conn_con(tcp, iphdr, mp,
+			    tcp->tcp_loopback ? &mp1 : NULL, ira)) {
 				tcp->tcp_state = TCPS_SYN_SENT;
 				freemsg(mp);
 				return;
 			}
 			/* SYN was acked - making progress */
-			if (tcp->tcp_ipversion == IPV6_VERSION)
-				tcp->tcp_ip_forward_progress = B_TRUE;
+			tcp->tcp_ip_forward_progress = B_TRUE;
 
 			/* One for the SYN */
 			tcp->tcp_suna = tcp->tcp_iss + 1;
@@ -12983,7 +10081,7 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 			tcp->tcp_swl1 = seg_seq;
 			tcp->tcp_swl2 = seg_ack;
 
-			new_swnd = BE16_TO_U16(tcph->th_win);
+			new_swnd = ntohs(tcpha->tha_win);
 			tcp->tcp_swnd = new_swnd;
 			if (new_swnd > tcp->tcp_max_swnd)
 				tcp->tcp_max_swnd = new_swnd;
@@ -13022,22 +10120,25 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 						    tcp->tcp_ack_tid);
 						tcp->tcp_ack_tid = 0;
 					}
-					tcp_send_data(tcp, tcp->tcp_wq, ack_mp);
+					tcp_send_data(tcp, ack_mp);
 					BUMP_LOCAL(tcp->tcp_obsegs);
 					BUMP_MIB(&tcps->tcps_mib, tcpOutAck);
 
 					if (!IPCL_IS_NONSTR(connp)) {
 						/* Send up T_CONN_CON */
-						putnext(tcp->tcp_rq, mp1);
+						if (ira->ira_cred != NULL) {
+							mblk_setcred(mp1,
+							    ira->ira_cred,
+							    ira->ira_cpid);
+						}
+						putnext(connp->conn_rq, mp1);
 					} else {
-						cred_t	*cr;
-						pid_t	cpid;
-
-						cr = msg_getcred(mp1, &cpid);
 						(*connp->conn_upcalls->
 						    su_connected)
 						    (connp->conn_upper_handle,
-						    tcp->tcp_connid, cr, cpid);
+						    tcp->tcp_connid,
+						    ira->ira_cred,
+						    ira->ira_cpid);
 						freemsg(mp1);
 					}
 
@@ -13054,15 +10155,16 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 				TCP_STAT(tcps, tcp_fusion_unfusable);
 				tcp->tcp_unfusable = B_TRUE;
 				if (!IPCL_IS_NONSTR(connp)) {
-					putnext(tcp->tcp_rq, mp1);
+					if (ira->ira_cred != NULL) {
+						mblk_setcred(mp1, ira->ira_cred,
+						    ira->ira_cpid);
+					}
+					putnext(connp->conn_rq, mp1);
 				} else {
-					cred_t	*cr;
-					pid_t	cpid;
-
-					cr = msg_getcred(mp1, &cpid);
 					(*connp->conn_upcalls->su_connected)
 					    (connp->conn_upper_handle,
-					    tcp->tcp_connid, cr, cpid);
+					    tcp->tcp_connid, ira->ira_cred,
+					    ira->ira_cpid);
 					freemsg(mp1);
 				}
 			}
@@ -13089,13 +10191,8 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 		tcp->tcp_state = TCPS_SYN_RCVD;
 		mp1 = tcp_xmit_mp(tcp, tcp->tcp_xmit_head, tcp->tcp_mss,
 		    NULL, NULL, tcp->tcp_iss, B_FALSE, NULL, B_FALSE);
-		if (mp1) {
-			/*
-			 * See comment in tcp_conn_request() for why we use
-			 * the open() time pid here.
-			 */
-			DB_CPID(mp1) = tcp->tcp_cpid;
-			tcp_send_data(tcp, tcp->tcp_wq, mp1);
+		if (mp1 != NULL) {
+			tcp_send_data(tcp, mp1);
 			TCP_TIMER_RESTART(tcp, tcp->tcp_rto);
 		}
 		freemsg(mp);
@@ -13146,9 +10243,20 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 		conn_t	*new_connp;
 		ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
 
-		new_connp = ipcl_classify(mp, connp->conn_zoneid, ipst);
+		/*
+		 * Don't accept any input on a closed tcp as this TCP logically
+		 * does not exist on the system. Don't proceed further with
+		 * this TCP. For instance, this packet could trigger another
+		 * close of this tcp which would be disastrous for tcp_refcnt.
+		 * tcp_close_detached / tcp_clean_death / tcp_closei_local must
+		 * be called at most once on a TCP. In this case we need to
+		 * refeed the packet into the classifier and figure out where
+		 * the packet should go.
+		 */
+		new_connp = ipcl_classify(mp, ira, ipst);
 		if (new_connp != NULL) {
-			tcp_reinput(new_connp, mp, connp->conn_sqp);
+			/* Drops ref on new_connp */
+			tcp_reinput(new_connp, mp, ira, ipst);
 			return;
 		}
 		/* We failed to classify. For now just drop the packet */
@@ -13194,7 +10302,7 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 			tcp->tcp_kssl_ctx = NULL;
 
 			tcp->tcp_rnxt += seg_len;
-			U32_TO_ABE32(tcp->tcp_rnxt, tcp->tcp_tcph->th_ack);
+			tcp->tcp_tcpha->tha_ack = htonl(tcp->tcp_rnxt);
 			flags |= TH_ACK_NEEDED;
 			goto ack_check;
 		}
@@ -13205,13 +10313,13 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 		return;
 	}
 
-	mp->b_rptr = (uchar_t *)tcph + TCP_HDR_LENGTH(tcph);
-	urp = BE16_TO_U16(tcph->th_urp) - TCP_OLD_URP_INTERPRETATION;
-	new_swnd = BE16_TO_U16(tcph->th_win) <<
-	    ((tcph->th_flags[0] & TH_SYN) ? 0 : tcp->tcp_snd_ws);
+	mp->b_rptr = (uchar_t *)tcpha + TCP_HDR_LENGTH(tcpha);
+	urp = ntohs(tcpha->tha_urp) - TCP_OLD_URP_INTERPRETATION;
+	new_swnd = ntohs(tcpha->tha_win) <<
+	    ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws);
 
 	if (tcp->tcp_snd_ts_ok) {
-		if (!tcp_paws_check(tcp, tcph, &tcpopt)) {
+		if (!tcp_paws_check(tcp, tcpha, &tcpopt)) {
 			/*
 			 * This segment is not acceptable.
 			 * Drop it and send back an ACK.
@@ -13227,7 +10335,7 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
 		 * SACK info in already updated in tcp_parse_options.  Ignore
 		 * all other TCP options...
 		 */
-		(void) tcp_parse_options(tcph, &tcpopt);
+		(void) tcp_parse_options(tcpha, &tcpopt);
 	}
 try_again:;
 	mss = tcp->tcp_mss;
@@ -13289,7 +10397,7 @@ try_again:;
 			 * Adjust seg_len to the original value for tracing.
 			 */
 			seg_len -= gap;
-			if (tcp->tcp_debug) {
+			if (connp->conn_debug) {
 				(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE,
 				    "tcp_rput: unacceptable, gap %d, rgap %d, "
 				    "flags 0x%x, seg_seq %u, seg_ack %u, "
@@ -13436,7 +10544,7 @@ try_again:;
 						return;
 					}
 					if (!TCP_IS_DETACHED(tcp) &&
-					    !putnextctl1(tcp->tcp_rq,
+					    !putnextctl1(connp->conn_rq,
 					    M_PCSIG, SIGURG)) {
 						/* Try again on the rexmit. */
 						freemsg(mp1);
@@ -13505,7 +10613,7 @@ ok:;
 		 * same segment.  In this case, we once again turn
 		 * on ECN_ECHO.
 		 */
-		if (tcp->tcp_ipversion == IPV4_VERSION) {
+		if (connp->conn_ipversion == IPV4_VERSION) {
 			uchar_t tos = ((ipha_t *)rptr)->ipha_type_of_service;
 
 			if ((tos & IPH_ECN_CE) == IPH_ECN_CE) {
@@ -13705,7 +10813,7 @@ ok:;
 					return;
 				}
 				if (!TCP_IS_DETACHED(tcp) &&
-				    !putnextctl1(tcp->tcp_rq, M_PCSIG,
+				    !putnextctl1(connp->conn_rq, M_PCSIG,
 				    SIGURG)) {
 					/* Try again on the rexmit. */
 					freemsg(mp1);
@@ -13739,7 +10847,7 @@ ok:;
 		} else if (tcp->tcp_urp_mark_mp != NULL) {
 			/*
 			 * An allocation failure prevented the previous
-			 * tcp_rput_data from sending up the allocated
+			 * tcp_input_data from sending up the allocated
 			 * MSG*MARKNEXT message - send it up this time
 			 * around.
 			 */
@@ -13775,14 +10883,14 @@ ok:;
 						 */
 						(void) adjmsg(mp,
 						    urp - seg_len);
-						tcp_rput_data(connp,
-						    mp, NULL);
+						tcp_input_data(connp,
+						    mp, NULL, ira);
 						return;
 					}
 					(void) adjmsg(mp1, urp - seg_len);
 					/* Feed this piece back in. */
 					tmp_rnxt = tcp->tcp_rnxt;
-					tcp_rput_data(connp, mp1, NULL);
+					tcp_input_data(connp, mp1, NULL, ira);
 					/*
 					 * If the data passed back in was not
 					 * processed (ie: bad ACK) sending
@@ -13811,13 +10919,13 @@ ok:;
 						 */
 						(void) adjmsg(mp,
 						    urp + 1 - seg_len);
-						tcp_rput_data(connp,
-						    mp, NULL);
+						tcp_input_data(connp,
+						    mp, NULL, ira);
 						return;
 					}
 					(void) adjmsg(mp1, urp + 1 - seg_len);
 					tmp_rnxt = tcp->tcp_rnxt;
-					tcp_rput_data(connp, mp1, NULL);
+					tcp_input_data(connp, mp1, NULL, ira);
 					/*
 					 * If the data passed back in was not
 					 * processed (ie: bad ACK) sending
@@ -13831,7 +10939,7 @@ ok:;
 						return;
 					}
 				}
-				tcp_rput_data(connp, mp, NULL);
+				tcp_input_data(connp, mp, NULL, ira);
 				return;
 			}
 			/*
@@ -13960,7 +11068,7 @@ process_ack:
 	}
 	bytes_acked = (int)(seg_ack - tcp->tcp_suna);
 
-	if (tcp->tcp_ipversion == IPV6_VERSION && bytes_acked > 0)
+	if (bytes_acked > 0)
 		tcp->tcp_ip_forward_progress = B_TRUE;
 	if (tcp->tcp_state == TCPS_SYN_RCVD) {
 		if ((tcp->tcp_conn.tcp_eager_conn_ind != NULL) &&
@@ -13983,7 +11091,7 @@ process_ack:
 
 			/*
 			 * The listener also exists because of the refhold
-			 * done in tcp_conn_request. Its possible that it
+			 * done in tcp_input_listener. Its possible that it
 			 * might have closed. We will check that once we
 			 * get inside listeners context.
 			 */
@@ -14005,12 +11113,12 @@ process_ack:
 			} else if (!tcp->tcp_loopback) {
 				SQUEUE_ENTER_ONE(listener->tcp_connp->conn_sqp,
 				    mp, tcp_send_conn_ind,
-				    listener->tcp_connp, SQ_FILL,
+				    listener->tcp_connp, NULL, SQ_FILL,
 				    SQTAG_TCP_CONN_IND);
 			} else {
 				SQUEUE_ENTER_ONE(listener->tcp_connp->conn_sqp,
 				    mp, tcp_send_conn_ind,
-				    listener->tcp_connp, SQ_PROCESS,
+				    listener->tcp_connp, NULL, SQ_PROCESS,
 				    SQTAG_TCP_CONN_IND);
 			}
 		}
@@ -14026,7 +11134,7 @@ process_ack:
 		 */
 		tcp->tcp_state = TCPS_ESTABLISHED;
 		if (tcp->tcp_active_open) {
-			if (!tcp_conn_con(tcp, iphdr, tcph, mp, NULL)) {
+			if (!tcp_conn_con(tcp, iphdr, mp, NULL, ira)) {
 				freemsg(mp);
 				tcp->tcp_state = TCPS_SYN_RCVD;
 				return;
@@ -14044,8 +11152,7 @@ process_ack:
 		tcp->tcp_suna = tcp->tcp_iss + 1;	/* One for the SYN */
 		bytes_acked--;
 		/* SYN was acked - making progress */
-		if (tcp->tcp_ipversion == IPV6_VERSION)
-			tcp->tcp_ip_forward_progress = B_TRUE;
+		tcp->tcp_ip_forward_progress = B_TRUE;
 
 		/*
 		 * If SYN was retransmitted, need to reset all
@@ -14083,7 +11190,7 @@ process_ack:
 
 		/* Fuse when both sides are in ESTABLISHED state */
 		if (tcp->tcp_loopback && do_tcp_fusion)
-			tcp_fuse(tcp, iphdr, tcph);
+			tcp_fuse(tcp, iphdr, tcpha);
 
 	}
 	/* This code follows 4.4BSD-Lite2 mostly. */
@@ -14388,7 +11495,7 @@ process_ack:
 			if (mp != NULL) {
 				BUMP_LOCAL(tcp->tcp_obsegs);
 				BUMP_MIB(&tcps->tcps_mib, tcpOutAck);
-				tcp_send_data(tcp, tcp->tcp_wq, mp);
+				tcp_send_data(tcp, mp);
 			}
 			return;
 		}
@@ -14487,7 +11594,6 @@ process_ack:
 				}
 			} else {
 				tcp->tcp_rexmit = B_FALSE;
-				tcp->tcp_xmit_zc_clean = B_FALSE;
 				tcp->tcp_rexmit_nxt = tcp->tcp_snxt;
 				tcp->tcp_snd_burst = tcp->tcp_localnet ?
 				    TCP_CWND_INFINITE : TCP_CWND_NORMAL;
@@ -14662,8 +11768,7 @@ fin_acked:
 			tcp->tcp_xmit_tail = NULL;
 			if (tcp->tcp_fin_sent) {
 				/* FIN was acked - making progress */
-				if (tcp->tcp_ipversion == IPV6_VERSION &&
-				    !tcp->tcp_fin_acked)
+				if (!tcp->tcp_fin_acked)
 					tcp->tcp_ip_forward_progress = B_TRUE;
 				tcp->tcp_fin_acked = B_TRUE;
 				if (tcp->tcp_linger_tid != 0 &&
@@ -14781,7 +11886,7 @@ est:
 				 * bit so this TIME-WAIT connection won't
 				 * interfere with new ones.
 				 */
-				tcp->tcp_exclbind = 0;
+				connp->conn_exclbind = 0;
 				if (!TCP_IS_DETACHED(tcp)) {
 					TCP_TIMER_RESTART(tcp,
 					    tcps->tcps_time_wait_interval);
@@ -14805,8 +11910,8 @@ est:
 		if (!tcp->tcp_fin_rcvd) {
 			tcp->tcp_fin_rcvd = B_TRUE;
 			tcp->tcp_rnxt++;
-			tcph = tcp->tcp_tcph;
-			U32_TO_ABE32(tcp->tcp_rnxt, tcph->th_ack);
+			tcpha = tcp->tcp_tcpha;
+			tcpha->tha_ack = htonl(tcp->tcp_rnxt);
 
 			/*
 			 * Generate the ordrel_ind at the end unless we
@@ -14815,7 +11920,7 @@ est:
 			 * after tcp_accept is done.
 			 */
 			if (tcp->tcp_listener == NULL &&
-			    !TCP_IS_DETACHED(tcp) && (!tcp->tcp_hard_binding))
+			    !TCP_IS_DETACHED(tcp) && !tcp->tcp_hard_binding)
 				flags |= TH_ORDREL_NEEDED;
 			switch (tcp->tcp_state) {
 			case TCPS_SYN_RCVD:
@@ -14836,7 +11941,7 @@ est:
 				 * bit so this TIME-WAIT connection won't
 				 * interfere with new ones.
 				 */
-				tcp->tcp_exclbind = 0;
+				connp->conn_exclbind = 0;
 				if (!TCP_IS_DETACHED(tcp)) {
 					TCP_TIMER_RESTART(tcp,
 					    tcps->tcps_time_wait_interval);
@@ -14872,7 +11977,7 @@ est:
 		freeb(mp1);
 	}
 update_ack:
-	tcph = tcp->tcp_tcph;
+	tcpha = tcp->tcp_tcpha;
 	tcp->tcp_rack_cnt++;
 	{
 		uint32_t cur_max;
@@ -14915,7 +12020,7 @@ update_ack:
 		}
 	}
 	tcp->tcp_rnxt += seg_len;
-	U32_TO_ABE32(tcp->tcp_rnxt, tcph->th_ack);
+	tcpha->tha_ack = htonl(tcp->tcp_rnxt);
 
 	if (mp == NULL)
 		goto xmit_check;
@@ -14942,12 +12047,13 @@ update_ack:
 	/*
 	 * Check for ancillary data changes compared to last segment.
 	 */
-	if (tcp->tcp_ipv6_recvancillary != 0) {
-		mp = tcp_rput_add_ancillary(tcp, mp, &ipp);
-		ASSERT(mp != NULL);
+	if (connp->conn_recv_ancillary.crb_all != 0) {
+		mp = tcp_input_add_ancillary(tcp, mp, &ipp, ira);
+		if (mp == NULL)
+			return;
 	}
 
-	if (tcp->tcp_listener || tcp->tcp_hard_binding) {
+	if (tcp->tcp_listener != NULL || tcp->tcp_hard_binding) {
 		/*
 		 * Side queue inbound data until the accept happens.
 		 * tcp_accept/tcp_rput drains this when the accept happens.
@@ -14961,9 +12067,9 @@ update_ack:
 		if (tcp->tcp_kssl_pending) {
 			DTRACE_PROBE1(kssl_mblk__ksslinput_pending,
 			    mblk_t *, mp);
-			tcp_kssl_input(tcp, mp);
+			tcp_kssl_input(tcp, mp, ira->ira_cred);
 		} else {
-			tcp_rcv_enqueue(tcp, mp, seg_len);
+			tcp_rcv_enqueue(tcp, mp, seg_len, ira->ira_cred);
 		}
 	} else if (IPCL_IS_NONSTR(connp)) {
 		/*
@@ -15015,19 +12121,22 @@ update_ack:
 			    (DB_TYPE(mp) == M_DATA)) {
 				DTRACE_PROBE1(kssl_mblk__ksslinput_data1,
 				    mblk_t *, mp);
-				tcp_kssl_input(tcp, mp);
+				tcp_kssl_input(tcp, mp, ira->ira_cred);
 			} else {
-				putnext(tcp->tcp_rq, mp);
-				if (!canputnext(tcp->tcp_rq))
+				if (is_system_labeled())
+					tcp_setcred_data(mp, ira);
+
+				putnext(connp->conn_rq, mp);
+				if (!canputnext(connp->conn_rq))
 					tcp->tcp_rwnd -= seg_len;
 			}
 		} else if ((tcp->tcp_kssl_ctx != NULL) &&
 		    (DB_TYPE(mp) == M_DATA)) {
 			/* Does this need SSL processing first? */
 			DTRACE_PROBE1(kssl_mblk__ksslinput_data2, mblk_t *, mp);
-			tcp_kssl_input(tcp, mp);
+			tcp_kssl_input(tcp, mp, ira->ira_cred);
 		} else if ((flags & (TH_PUSH|TH_FIN)) ||
-		    tcp->tcp_rcv_cnt + seg_len >= tcp->tcp_recv_hiwater >> 3) {
+		    tcp->tcp_rcv_cnt + seg_len >= connp->conn_rcvbuf >> 3) {
 			if (tcp->tcp_rcv_list != NULL) {
 				/*
 				 * Enqueue the new segment first and then
@@ -15042,11 +12151,15 @@ update_ack:
 				 * canputnext() as tcp_rcv_drain() needs to
 				 * call canputnext().
 				 */
-				tcp_rcv_enqueue(tcp, mp, seg_len);
+				tcp_rcv_enqueue(tcp, mp, seg_len,
+				    ira->ira_cred);
 				flags |= tcp_rcv_drain(tcp);
 			} else {
-				putnext(tcp->tcp_rq, mp);
-				if (!canputnext(tcp->tcp_rq))
+				if (is_system_labeled())
+					tcp_setcred_data(mp, ira);
+
+				putnext(connp->conn_rq, mp);
+				if (!canputnext(connp->conn_rq))
 					tcp->tcp_rwnd -= seg_len;
 			}
 		} else {
@@ -15054,7 +12167,7 @@ update_ack:
 			 * Enqueue all packets when processing an mblk
 			 * from the co queue and also enqueue normal packets.
 			 */
-			tcp_rcv_enqueue(tcp, mp, seg_len);
+			tcp_rcv_enqueue(tcp, mp, seg_len, ira->ira_cred);
 		}
 		/*
 		 * Make sure the timer is running if we have data waiting
@@ -15103,7 +12216,7 @@ xmit_check:
 				BUMP_MIB(&tcps->tcps_mib, tcpRetransSegs);
 				UPDATE_MIB(&tcps->tcps_mib,
 				    tcpRetransBytes, snd_size);
-				tcp_send_data(tcp, tcp->tcp_wq, mp1);
+				tcp_send_data(tcp, mp1);
 			}
 		}
 		if (flags & TH_NEED_SACK_REXMIT) {
@@ -15155,7 +12268,10 @@ ack_check:
 		ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg);
 		mp1 = tcp->tcp_urp_mark_mp;
 		tcp->tcp_urp_mark_mp = NULL;
-		putnext(tcp->tcp_rq, mp1);
+		if (is_system_labeled())
+			tcp_setcred_data(mp1, ira);
+
+		putnext(connp->conn_rq, mp1);
 #ifdef DEBUG
 		(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE,
 		    "tcp_rput: sending zero-length %s %s",
@@ -15172,7 +12288,7 @@ ack_check:
 		mp1 = tcp_ack_mp(tcp);
 
 		if (mp1 != NULL) {
-			tcp_send_data(tcp, tcp->tcp_wq, mp1);
+			tcp_send_data(tcp, mp1);
 			BUMP_LOCAL(tcp->tcp_obsegs);
 			BUMP_MIB(&tcps->tcps_mib, tcpOutAck);
 		}
@@ -15200,6 +12316,7 @@ ack_check:
 		 * after tcp_accept is done.
 		 */
 		ASSERT(tcp->tcp_listener == NULL);
+		ASSERT(!tcp->tcp_detached);
 
 		if (IPCL_IS_NONSTR(connp)) {
 			ASSERT(tcp->tcp_ordrel_mp == NULL);
@@ -15220,7 +12337,7 @@ ack_check:
 		mp1 = tcp->tcp_ordrel_mp;
 		tcp->tcp_ordrel_mp = NULL;
 		tcp->tcp_ordrel_done = B_TRUE;
-		putnext(tcp->tcp_rq, mp1);
+		putnext(connp->conn_rq, mp1);
 	}
 done:
 	ASSERT(!(flags & TH_MARKNEXT_NEEDED));
@@ -15251,21 +12368,22 @@ tcp_update_xmit_tail(tcp_t *tcp, uint32_t snxt)
  * segment passes the PAWS test, else returns B_FALSE.
  */
 boolean_t
-tcp_paws_check(tcp_t *tcp, tcph_t *tcph, tcp_opt_t *tcpoptp)
+tcp_paws_check(tcp_t *tcp, tcpha_t *tcpha, tcp_opt_t *tcpoptp)
 {
 	uint8_t	flags;
 	int	options;
 	uint8_t *up;
+	conn_t	*connp = tcp->tcp_connp;
 
-	flags = (unsigned int)tcph->th_flags[0] & 0xFF;
+	flags = (unsigned int)tcpha->tha_flags & 0xFF;
 	/*
 	 * If timestamp option is aligned nicely, get values inline,
 	 * otherwise call general routine to parse.  Only do that
 	 * if timestamp is the only option.
 	 */
-	if (TCP_HDR_LENGTH(tcph) == (uint32_t)TCP_MIN_HEADER_LENGTH +
+	if (TCP_HDR_LENGTH(tcpha) == (uint32_t)TCP_MIN_HEADER_LENGTH +
 	    TCPOPT_REAL_TS_LEN &&
-	    OK_32PTR((up = ((uint8_t *)tcph) +
+	    OK_32PTR((up = ((uint8_t *)tcpha) +
 	    TCP_MIN_HEADER_LENGTH)) &&
 	    *(uint32_t *)up == TCPOPT_NOP_NOP_TSTAMP) {
 		tcpoptp->tcp_opt_ts_val = ABE32_TO_U32((up+4));
@@ -15278,7 +12396,7 @@ tcp_paws_check(tcp_t *tcp, tcph_t *tcph, tcp_opt_t *tcpoptp)
 		} else {
 			tcpoptp->tcp = NULL;
 		}
-		options = tcp_parse_options(tcph, tcpoptp);
+		options = tcp_parse_options(tcpha, tcpoptp);
 	}
 
 	if (options & TCP_OPT_TSTAMP_PRESENT) {
@@ -15311,16 +12429,15 @@ tcp_paws_check(tcp_t *tcp, tcph_t *tcph, tcp_opt_t *tcpoptp)
 		 */
 		tcp->tcp_snd_ts_ok = B_FALSE;
 
-		tcp->tcp_hdr_len -= TCPOPT_REAL_TS_LEN;
-		tcp->tcp_tcp_hdr_len -= TCPOPT_REAL_TS_LEN;
-		tcp->tcp_tcph->th_offset_and_rsrvd[0] -= (3 << 4);
+		connp->conn_ht_iphc_len -= TCPOPT_REAL_TS_LEN;
+		connp->conn_ht_ulp_len -= TCPOPT_REAL_TS_LEN;
+		tcp->tcp_tcpha->tha_offset_and_reserved -= (3 << 4);
 		/*
-		 * Adjust the tcp_mss accordingly. We also need to
-		 * adjust tcp_cwnd here in accordance with the new mss.
-		 * But we avoid doing a slow start here so as to not
-		 * to lose on the transfer rate built up so far.
+		 * Adjust the tcp_mss and tcp_cwnd accordingly. We avoid
+		 * doing a slow start here so as to not to lose on the
+		 * transfer rate built up so far.
 		 */
-		tcp_mss_set(tcp, tcp->tcp_mss + TCPOPT_REAL_TS_LEN, B_FALSE);
+		tcp_mss_set(tcp, tcp->tcp_mss + TCPOPT_REAL_TS_LEN);
 		if (tcp->tcp_snd_sack_ok) {
 			ASSERT(tcp->tcp_sack_info != NULL);
 			tcp->tcp_max_sack_blk = 4;
@@ -15338,38 +12455,37 @@ tcp_paws_check(tcp_t *tcp, tcph_t *tcph, tcp_opt_t *tcpoptp)
  * when memory allocation fails we can just wait for the next data segment.
  */
 static mblk_t *
-tcp_rput_add_ancillary(tcp_t *tcp, mblk_t *mp, ip6_pkt_t *ipp)
+tcp_input_add_ancillary(tcp_t *tcp, mblk_t *mp, ip_pkt_t *ipp,
+    ip_recv_attr_t *ira)
 {
 	struct T_optdata_ind *todi;
 	int optlen;
 	uchar_t *optptr;
 	struct T_opthdr *toh;
-	uint_t addflag;	/* Which pieces to add */
+	crb_t addflag;	/* Which pieces to add */
 	mblk_t *mp1;
+	conn_t	*connp = tcp->tcp_connp;
 
 	optlen = 0;
-	addflag = 0;
+	addflag.crb_all = 0;
 	/* If app asked for pktinfo and the index has changed ... */
-	if ((ipp->ipp_fields & IPPF_IFINDEX) &&
-	    ipp->ipp_ifindex != tcp->tcp_recvifindex &&
-	    (tcp->tcp_ipv6_recvancillary & TCP_IPV6_RECVPKTINFO)) {
+	if (connp->conn_recv_ancillary.crb_ip_recvpktinfo &&
+	    ira->ira_ruifindex != tcp->tcp_recvifindex) {
 		optlen += sizeof (struct T_opthdr) +
 		    sizeof (struct in6_pktinfo);
-		addflag |= TCP_IPV6_RECVPKTINFO;
+		addflag.crb_ip_recvpktinfo = 1;
 	}
 	/* If app asked for hoplimit and it has changed ... */
-	if ((ipp->ipp_fields & IPPF_HOPLIMIT) &&
-	    ipp->ipp_hoplimit != tcp->tcp_recvhops &&
-	    (tcp->tcp_ipv6_recvancillary & TCP_IPV6_RECVHOPLIMIT)) {
+	if (connp->conn_recv_ancillary.crb_ipv6_recvhoplimit &&
+	    ipp->ipp_hoplimit != tcp->tcp_recvhops) {
 		optlen += sizeof (struct T_opthdr) + sizeof (uint_t);
-		addflag |= TCP_IPV6_RECVHOPLIMIT;
+		addflag.crb_ipv6_recvhoplimit = 1;
 	}
 	/* If app asked for tclass and it has changed ... */
-	if ((ipp->ipp_fields & IPPF_TCLASS) &&
-	    ipp->ipp_tclass != tcp->tcp_recvtclass &&
-	    (tcp->tcp_ipv6_recvancillary & TCP_IPV6_RECVTCLASS)) {
+	if (connp->conn_recv_ancillary.crb_ipv6_recvtclass &&
+	    ipp->ipp_tclass != tcp->tcp_recvtclass) {
 		optlen += sizeof (struct T_opthdr) + sizeof (uint_t);
-		addflag |= TCP_IPV6_RECVTCLASS;
+		addflag.crb_ipv6_recvtclass = 1;
 	}
 	/*
 	 * If app asked for hopbyhop headers and it has changed ...
@@ -15377,51 +12493,51 @@ tcp_rput_add_ancillary(tcp_t *tcp, mblk_t *mp, ip6_pkt_t *ipp)
 	 * a connected socket at all, (2) we're connected to at most one peer,
 	 * (3) if anything changes, then it must be some other extra option.
 	 */
-	if ((tcp->tcp_ipv6_recvancillary & TCP_IPV6_RECVHOPOPTS) &&
+	if (connp->conn_recv_ancillary.crb_ipv6_recvhopopts &&
 	    ip_cmpbuf(tcp->tcp_hopopts, tcp->tcp_hopoptslen,
 	    (ipp->ipp_fields & IPPF_HOPOPTS),
 	    ipp->ipp_hopopts, ipp->ipp_hopoptslen)) {
-		optlen += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen -
-		    tcp->tcp_label_len;
-		addflag |= TCP_IPV6_RECVHOPOPTS;
+		optlen += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
+		addflag.crb_ipv6_recvhopopts = 1;
 		if (!ip_allocbuf((void **)&tcp->tcp_hopopts,
 		    &tcp->tcp_hopoptslen, (ipp->ipp_fields & IPPF_HOPOPTS),
 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen))
 			return (mp);
 	}
 	/* If app asked for dst headers before routing headers ... */
-	if ((tcp->tcp_ipv6_recvancillary & TCP_IPV6_RECVRTDSTOPTS) &&
-	    ip_cmpbuf(tcp->tcp_rtdstopts, tcp->tcp_rtdstoptslen,
-	    (ipp->ipp_fields & IPPF_RTDSTOPTS),
-	    ipp->ipp_rtdstopts, ipp->ipp_rtdstoptslen)) {
+	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts &&
+	    ip_cmpbuf(tcp->tcp_rthdrdstopts, tcp->tcp_rthdrdstoptslen,
+	    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
+	    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen)) {
 		optlen += sizeof (struct T_opthdr) +
-		    ipp->ipp_rtdstoptslen;
-		addflag |= TCP_IPV6_RECVRTDSTOPTS;
-		if (!ip_allocbuf((void **)&tcp->tcp_rtdstopts,
-		    &tcp->tcp_rtdstoptslen, (ipp->ipp_fields & IPPF_RTDSTOPTS),
-		    ipp->ipp_rtdstopts, ipp->ipp_rtdstoptslen))
+		    ipp->ipp_rthdrdstoptslen;
+		addflag.crb_ipv6_recvrthdrdstopts = 1;
+		if (!ip_allocbuf((void **)&tcp->tcp_rthdrdstopts,
+		    &tcp->tcp_rthdrdstoptslen,
+		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
+		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen))
 			return (mp);
 	}
 	/* If app asked for routing headers and it has changed ... */
-	if ((tcp->tcp_ipv6_recvancillary & TCP_IPV6_RECVRTHDR) &&
+	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdr &&
 	    ip_cmpbuf(tcp->tcp_rthdr, tcp->tcp_rthdrlen,
 	    (ipp->ipp_fields & IPPF_RTHDR),
 	    ipp->ipp_rthdr, ipp->ipp_rthdrlen)) {
 		optlen += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
-		addflag |= TCP_IPV6_RECVRTHDR;
+		addflag.crb_ipv6_recvrthdr = 1;
 		if (!ip_allocbuf((void **)&tcp->tcp_rthdr,
 		    &tcp->tcp_rthdrlen, (ipp->ipp_fields & IPPF_RTHDR),
 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen))
 			return (mp);
 	}
 	/* If app asked for dest headers and it has changed ... */
-	if ((tcp->tcp_ipv6_recvancillary &
-	    (TCP_IPV6_RECVDSTOPTS | TCP_OLD_IPV6_RECVDSTOPTS)) &&
+	if ((connp->conn_recv_ancillary.crb_ipv6_recvdstopts ||
+	    connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts) &&
 	    ip_cmpbuf(tcp->tcp_dstopts, tcp->tcp_dstoptslen,
 	    (ipp->ipp_fields & IPPF_DSTOPTS),
 	    ipp->ipp_dstopts, ipp->ipp_dstoptslen)) {
 		optlen += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
-		addflag |= TCP_IPV6_RECVDSTOPTS;
+		addflag.crb_ipv6_recvdstopts = 1;
 		if (!ip_allocbuf((void **)&tcp->tcp_dstopts,
 		    &tcp->tcp_dstoptslen, (ipp->ipp_fields & IPPF_DSTOPTS),
 		    ipp->ipp_dstopts, ipp->ipp_dstoptslen))
@@ -15454,9 +12570,11 @@ tcp_rput_add_ancillary(tcp_t *tcp, mblk_t *mp, ip6_pkt_t *ipp)
 	 * If app asked for pktinfo and the index has changed ...
 	 * Note that the local address never changes for the connection.
 	 */
-	if (addflag & TCP_IPV6_RECVPKTINFO) {
+	if (addflag.crb_ip_recvpktinfo) {
 		struct in6_pktinfo *pkti;
+		uint_t ifindex;
 
+		ifindex = ira->ira_ruifindex;
 		toh = (struct T_opthdr *)optptr;
 		toh->level = IPPROTO_IPV6;
 		toh->name = IPV6_PKTINFO;
@@ -15464,19 +12582,15 @@ tcp_rput_add_ancillary(tcp_t *tcp, mblk_t *mp, ip6_pkt_t *ipp)
 		toh->status = 0;
 		optptr += sizeof (*toh);
 		pkti = (struct in6_pktinfo *)optptr;
-		if (tcp->tcp_ipversion == IPV6_VERSION)
-			pkti->ipi6_addr = tcp->tcp_ip6h->ip6_src;
-		else
-			IN6_IPADDR_TO_V4MAPPED(tcp->tcp_ipha->ipha_src,
-			    &pkti->ipi6_addr);
-		pkti->ipi6_ifindex = ipp->ipp_ifindex;
+		pkti->ipi6_addr = connp->conn_laddr_v6;
+		pkti->ipi6_ifindex = ifindex;
 		optptr += sizeof (*pkti);
 		ASSERT(OK_32PTR(optptr));
 		/* Save as "last" value */
-		tcp->tcp_recvifindex = ipp->ipp_ifindex;
+		tcp->tcp_recvifindex = ifindex;
 	}
 	/* If app asked for hoplimit and it has changed ... */
-	if (addflag & TCP_IPV6_RECVHOPLIMIT) {
+	if (addflag.crb_ipv6_recvhoplimit) {
 		toh = (struct T_opthdr *)optptr;
 		toh->level = IPPROTO_IPV6;
 		toh->name = IPV6_HOPLIMIT;
@@ -15490,7 +12604,7 @@ tcp_rput_add_ancillary(tcp_t *tcp, mblk_t *mp, ip6_pkt_t *ipp)
 		tcp->tcp_recvhops = ipp->ipp_hoplimit;
 	}
 	/* If app asked for tclass and it has changed ... */
-	if (addflag & TCP_IPV6_RECVTCLASS) {
+	if (addflag.crb_ipv6_recvtclass) {
 		toh = (struct T_opthdr *)optptr;
 		toh->level = IPPROTO_IPV6;
 		toh->name = IPV6_TCLASS;
@@ -15503,40 +12617,38 @@ tcp_rput_add_ancillary(tcp_t *tcp, mblk_t *mp, ip6_pkt_t *ipp)
 		/* Save as "last" value */
 		tcp->tcp_recvtclass = ipp->ipp_tclass;
 	}
-	if (addflag & TCP_IPV6_RECVHOPOPTS) {
+	if (addflag.crb_ipv6_recvhopopts) {
 		toh = (struct T_opthdr *)optptr;
 		toh->level = IPPROTO_IPV6;
 		toh->name = IPV6_HOPOPTS;
-		toh->len = sizeof (*toh) + ipp->ipp_hopoptslen -
-		    tcp->tcp_label_len;
+		toh->len = sizeof (*toh) + ipp->ipp_hopoptslen;
 		toh->status = 0;
 		optptr += sizeof (*toh);
-		bcopy((uchar_t *)ipp->ipp_hopopts + tcp->tcp_label_len, optptr,
-		    ipp->ipp_hopoptslen - tcp->tcp_label_len);
-		optptr += ipp->ipp_hopoptslen - tcp->tcp_label_len;
+		bcopy((uchar_t *)ipp->ipp_hopopts, optptr, ipp->ipp_hopoptslen);
+		optptr += ipp->ipp_hopoptslen;
 		ASSERT(OK_32PTR(optptr));
 		/* Save as last value */
 		ip_savebuf((void **)&tcp->tcp_hopopts, &tcp->tcp_hopoptslen,
 		    (ipp->ipp_fields & IPPF_HOPOPTS),
 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen);
 	}
-	if (addflag & TCP_IPV6_RECVRTDSTOPTS) {
+	if (addflag.crb_ipv6_recvrthdrdstopts) {
 		toh = (struct T_opthdr *)optptr;
 		toh->level = IPPROTO_IPV6;
 		toh->name = IPV6_RTHDRDSTOPTS;
-		toh->len = sizeof (*toh) + ipp->ipp_rtdstoptslen;
+		toh->len = sizeof (*toh) + ipp->ipp_rthdrdstoptslen;
 		toh->status = 0;
 		optptr += sizeof (*toh);
-		bcopy(ipp->ipp_rtdstopts, optptr, ipp->ipp_rtdstoptslen);
-		optptr += ipp->ipp_rtdstoptslen;
+		bcopy(ipp->ipp_rthdrdstopts, optptr, ipp->ipp_rthdrdstoptslen);
+		optptr += ipp->ipp_rthdrdstoptslen;
 		ASSERT(OK_32PTR(optptr));
 		/* Save as last value */
-		ip_savebuf((void **)&tcp->tcp_rtdstopts,
-		    &tcp->tcp_rtdstoptslen,
-		    (ipp->ipp_fields & IPPF_RTDSTOPTS),
-		    ipp->ipp_rtdstopts, ipp->ipp_rtdstoptslen);
+		ip_savebuf((void **)&tcp->tcp_rthdrdstopts,
+		    &tcp->tcp_rthdrdstoptslen,
+		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
+		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen);
 	}
-	if (addflag & TCP_IPV6_RECVRTHDR) {
+	if (addflag.crb_ipv6_recvrthdr) {
 		toh = (struct T_opthdr *)optptr;
 		toh->level = IPPROTO_IPV6;
 		toh->name = IPV6_RTHDR;
@@ -15551,7 +12663,7 @@ tcp_rput_add_ancillary(tcp_t *tcp, mblk_t *mp, ip6_pkt_t *ipp)
 		    (ipp->ipp_fields & IPPF_RTHDR),
 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen);
 	}
-	if (addflag & (TCP_IPV6_RECVDSTOPTS | TCP_OLD_IPV6_RECVDSTOPTS)) {
+	if (addflag.crb_ipv6_recvdstopts) {
 		toh = (struct T_opthdr *)optptr;
 		toh->level = IPPROTO_IPV6;
 		toh->name = IPV6_DSTOPTS;
@@ -15570,99 +12682,13 @@ tcp_rput_add_ancillary(tcp_t *tcp, mblk_t *mp, ip6_pkt_t *ipp)
 	return (mp);
 }
 
-/*
- * tcp_rput_other is called by tcp_rput to handle everything other than M_DATA
- * messages.
- */
-void
-tcp_rput_other(tcp_t *tcp, mblk_t *mp)
-{
-	uchar_t	*rptr = mp->b_rptr;
-	queue_t	*q = tcp->tcp_rq;
-	struct T_error_ack *tea;
-
-	switch (mp->b_datap->db_type) {
-	case M_PROTO:
-	case M_PCPROTO:
-		ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX);
-		if ((mp->b_wptr - rptr) < sizeof (t_scalar_t))
-			break;
-		tea = (struct T_error_ack *)rptr;
-		ASSERT(tea->PRIM_type != T_BIND_ACK);
-		ASSERT(tea->ERROR_prim != O_T_BIND_REQ &&
-		    tea->ERROR_prim != T_BIND_REQ);
-		switch (tea->PRIM_type) {
-		case T_ERROR_ACK:
-			if (tcp->tcp_debug) {
-				(void) strlog(TCP_MOD_ID, 0, 1,
-				    SL_TRACE|SL_ERROR,
-				    "tcp_rput_other: case T_ERROR_ACK, "
-				    "ERROR_prim == %d",
-				    tea->ERROR_prim);
-			}
-			switch (tea->ERROR_prim) {
-			case T_SVR4_OPTMGMT_REQ:
-				if (tcp->tcp_drop_opt_ack_cnt > 0) {
-					/* T_OPTMGMT_REQ generated by TCP */
-					printf("T_SVR4_OPTMGMT_REQ failed "
-					    "%d/%d - dropped (cnt %d)\n",
-					    tea->TLI_error, tea->UNIX_error,
-					    tcp->tcp_drop_opt_ack_cnt);
-					freemsg(mp);
-					tcp->tcp_drop_opt_ack_cnt--;
-					return;
-				}
-				break;
-			}
-			if (tea->ERROR_prim == T_SVR4_OPTMGMT_REQ &&
-			    tcp->tcp_drop_opt_ack_cnt > 0) {
-				printf("T_SVR4_OPTMGMT_REQ failed %d/%d "
-				    "- dropped (cnt %d)\n",
-				    tea->TLI_error, tea->UNIX_error,
-				    tcp->tcp_drop_opt_ack_cnt);
-				freemsg(mp);
-				tcp->tcp_drop_opt_ack_cnt--;
-				return;
-			}
-			break;
-		case T_OPTMGMT_ACK:
-			if (tcp->tcp_drop_opt_ack_cnt > 0) {
-				/* T_OPTMGMT_REQ generated by TCP */
-				freemsg(mp);
-				tcp->tcp_drop_opt_ack_cnt--;
-				return;
-			}
-			break;
-		default:
-			ASSERT(tea->ERROR_prim != T_UNBIND_REQ);
-			break;
-		}
-		break;
-	case M_FLUSH:
-		if (*rptr & FLUSHR)
-			flushq(q, FLUSHDATA);
-		break;
-	default:
-		/* M_CTL will be directly sent to tcp_icmp_error() */
-		ASSERT(DB_TYPE(mp) != M_CTL);
-		break;
-	}
-	/*
-	 * Make sure we set this bit before sending the ACK for
-	 * bind. Otherwise accept could possibly run and free
-	 * this tcp struct.
-	 */
-	ASSERT(q != NULL);
-	putnext(q, mp);
-}
-
 /* ARGSUSED */
 static void
-tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2)
+tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
 	conn_t	*connp = (conn_t *)arg;
 	tcp_t	*tcp = connp->conn_tcp;
-	queue_t	*q = tcp->tcp_rq;
+	queue_t	*q = connp->conn_rq;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
 
 	ASSERT(!IPCL_IS_NONSTR(connp));
@@ -15683,7 +12709,7 @@ tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2)
 
 	if (canputnext(q)) {
 		/* Not flow-controlled, open rwnd */
-		tcp->tcp_rwnd = tcp->tcp_recv_hiwater;
+		tcp->tcp_rwnd = connp->conn_rcvbuf;
 
 		/*
 		 * Send back a window update immediately if TCP is above
@@ -15712,16 +12738,10 @@ tcp_rsrv(queue_t *q)
 	conn_t		*connp = Q_TO_CONN(q);
 	tcp_t		*tcp = connp->conn_tcp;
 	mblk_t		*mp;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
 
 	/* No code does a putq on the read side */
 	ASSERT(q->q_first == NULL);
 
-	/* Nothing to do for the default queue */
-	if (q == tcps->tcps_g_q) {
-		return;
-	}
-
 	/*
 	 * If tcp->tcp_rsrv_mp == NULL, it means that tcp_rsrv() has already
 	 * been run.  So just return.
@@ -15736,7 +12756,7 @@ tcp_rsrv(queue_t *q)
 
 	CONN_INC_REF(connp);
 	SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_rsrv_input, connp,
-	    SQ_PROCESS, SQTAG_TCP_RSRV);
+	    NULL, SQ_PROCESS, SQTAG_TCP_RSRV);
 }
 
 /*
@@ -15746,8 +12766,8 @@ tcp_rsrv(queue_t *q)
  *
  * This function is called in 2 cases:
  *
- * 1) Before data transfer begins, in tcp_accept_comm() for accepting a
- *    connection (passive open) and in tcp_rput_data() for active connect.
+ * 1) Before data transfer begins, in tcp_input_listener() for accepting a
+ *    connection (passive open) and in tcp_input_data() for active connect.
  *    This is called after tcp_mss_set() when the desired MSS value is known.
  *    This makes sure that our window size is a mutiple of the other side's
  *    MSS.
@@ -15766,6 +12786,7 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd)
 	uint32_t	max_transmittable_rwnd;
 	boolean_t	tcp_detached = TCP_IS_DETACHED(tcp);
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
 
 	/*
 	 * Insist on a receive window that is at least
@@ -15782,7 +12803,7 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd)
 		ASSERT(peer_tcp != NULL);
 		sth_hiwat = tcp_fuse_set_rcv_hiwat(tcp, rwnd);
 		if (!tcp_detached) {
-			(void) proto_set_rx_hiwat(tcp->tcp_rq, tcp->tcp_connp,
+			(void) proto_set_rx_hiwat(connp->conn_rq, connp,
 			    sth_hiwat);
 			tcp_set_recv_threshold(tcp, sth_hiwat >> 3);
 		}
@@ -15797,11 +12818,10 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd)
 		return (sth_hiwat);
 	}
 
-	if (tcp_detached) {
+	if (tcp_detached)
 		old_max_rwnd = tcp->tcp_rwnd;
-	} else {
-		old_max_rwnd = tcp->tcp_recv_hiwater;
-	}
+	else
+		old_max_rwnd = connp->conn_rcvbuf;
 
 
 	/*
@@ -15854,9 +12874,14 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd)
 	 * connection.)
 	 */
 	tcp->tcp_rwnd += rwnd - old_max_rwnd;
-	tcp->tcp_recv_hiwater = rwnd;
+	connp->conn_rcvbuf = rwnd;
+
+	/* Are we already connected? */
+	if (tcp->tcp_tcpha != NULL) {
+		tcp->tcp_tcpha->tha_win =
+		    htons(tcp->tcp_rwnd >> tcp->tcp_rcv_ws);
+	}
 
-	U32_TO_ABE16(tcp->tcp_rwnd >> tcp->tcp_rcv_ws, tcp->tcp_tcph->th_win);
 	if ((tcp->tcp_rcv_ws > 0) && rwnd > tcp->tcp_cwnd_max)
 		tcp->tcp_cwnd_max = rwnd;
 
@@ -15865,7 +12890,7 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd)
 
 	tcp_set_recv_threshold(tcp, rwnd >> 3);
 
-	(void) proto_set_rx_hiwat(tcp->tcp_rq, tcp->tcp_connp, rwnd);
+	(void) proto_set_rx_hiwat(connp->conn_rq, connp, rwnd);
 	return (rwnd);
 }
 
@@ -15944,7 +12969,7 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl)
 		connp = NULL;
 
 		while ((connp =
-		    ipcl_get_next_conn(connfp, connp, IPCL_TCP)) != NULL) {
+		    ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) {
 			tcp_t *tcp;
 			boolean_t needattr;
 
@@ -15992,11 +13017,10 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl)
 				needattr = B_TRUE;
 				break;
 			}
-			if (connp->conn_fully_bound &&
-			    connp->conn_effective_cred != NULL) {
+			if (connp->conn_ixa->ixa_tsl != NULL) {
 				ts_label_t *tsl;
 
-				tsl = crgetlabel(connp->conn_effective_cred);
+				tsl = connp->conn_ixa->ixa_tsl;
 				mlp.tme_flags |= MIB2_TMEF_IS_LABELED;
 				mlp.tme_doi = label2doi(tsl);
 				mlp.tme_label = *label2bslabel(tsl);
@@ -16004,12 +13028,17 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl)
 			}
 
 			/* Create a message to report on IPv6 entries */
-			if (tcp->tcp_ipversion == IPV6_VERSION) {
-			tce6.tcp6ConnLocalAddress = tcp->tcp_ip_src_v6;
-			tce6.tcp6ConnRemAddress = tcp->tcp_remote_v6;
-			tce6.tcp6ConnLocalPort = ntohs(tcp->tcp_lport);
-			tce6.tcp6ConnRemPort = ntohs(tcp->tcp_fport);
-			tce6.tcp6ConnIfIndex = tcp->tcp_bound_if;
+			if (connp->conn_ipversion == IPV6_VERSION) {
+			tce6.tcp6ConnLocalAddress = connp->conn_laddr_v6;
+			tce6.tcp6ConnRemAddress = connp->conn_faddr_v6;
+			tce6.tcp6ConnLocalPort = ntohs(connp->conn_lport);
+			tce6.tcp6ConnRemPort = ntohs(connp->conn_fport);
+			if (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET) {
+				tce6.tcp6ConnIfIndex =
+				    connp->conn_ixa->ixa_scopeid;
+			} else {
+				tce6.tcp6ConnIfIndex = connp->conn_bound_if;
+			}
 			/* Don't want just anybody seeing these... */
 			if (ispriv) {
 				tce6.tcp6ConnEntryInfo.ce_snxt =
@@ -16041,9 +13070,9 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl)
 			tce6.tcp6ConnEntryInfo.ce_state = tcp->tcp_state;
 
 			tce6.tcp6ConnCreationProcess =
-			    (tcp->tcp_cpid < 0) ? MIB2_UNKNOWN_PROCESS :
-			    tcp->tcp_cpid;
-			tce6.tcp6ConnCreationTime = tcp->tcp_open_time;
+			    (connp->conn_cpid < 0) ? MIB2_UNKNOWN_PROCESS :
+			    connp->conn_cpid;
+			tce6.tcp6ConnCreationTime = connp->conn_open_time;
 
 			(void) snmp_append_data2(mp6_conn_ctl->b_cont,
 			    &mp6_conn_tail, (char *)&tce6, sizeof (tce6));
@@ -16059,21 +13088,21 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl)
 			 * but don't have IPV6_V6ONLY set.
 			 * (i.e. anything an IPv4 peer could connect to)
 			 */
-			if (tcp->tcp_ipversion == IPV4_VERSION ||
+			if (connp->conn_ipversion == IPV4_VERSION ||
 			    (tcp->tcp_state <= TCPS_LISTEN &&
-			    !tcp->tcp_connp->conn_ipv6_v6only &&
-			    IN6_IS_ADDR_UNSPECIFIED(&tcp->tcp_ip_src_v6))) {
-				if (tcp->tcp_ipversion == IPV6_VERSION) {
+			    !connp->conn_ipv6_v6only &&
+			    IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) {
+				if (connp->conn_ipversion == IPV6_VERSION) {
 					tce.tcpConnRemAddress = INADDR_ANY;
 					tce.tcpConnLocalAddress = INADDR_ANY;
 				} else {
 					tce.tcpConnRemAddress =
-					    tcp->tcp_remote;
+					    connp->conn_faddr_v4;
 					tce.tcpConnLocalAddress =
-					    tcp->tcp_ip_src;
+					    connp->conn_laddr_v4;
 				}
-				tce.tcpConnLocalPort = ntohs(tcp->tcp_lport);
-				tce.tcpConnRemPort = ntohs(tcp->tcp_fport);
+				tce.tcpConnLocalPort = ntohs(connp->conn_lport);
+				tce.tcpConnRemPort = ntohs(connp->conn_fport);
 				/* Don't want just anybody seeing these... */
 				if (ispriv) {
 					tce.tcpConnEntryInfo.ce_snxt =
@@ -16107,9 +13136,10 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl)
 				    tcp->tcp_state;
 
 				tce.tcpConnCreationProcess =
-				    (tcp->tcp_cpid < 0) ? MIB2_UNKNOWN_PROCESS :
-				    tcp->tcp_cpid;
-				tce.tcpConnCreationTime = tcp->tcp_open_time;
+				    (connp->conn_cpid < 0) ?
+				    MIB2_UNKNOWN_PROCESS :
+				    connp->conn_cpid;
+				tce.tcpConnCreationTime = connp->conn_open_time;
 
 				(void) snmp_append_data2(mp_conn_ctl->b_cont,
 				    &mp_conn_tail, (char *)&tce, sizeof (tce));
@@ -16273,7 +13303,6 @@ tcp_timer(void *arg)
 		tcp_t	*listener = tcp->tcp_listener;
 
 		if (tcp->tcp_syn_rcvd_timeout == 0 && (listener != NULL)) {
-			ASSERT(tcp->tcp_rq == listener->tcp_rq);
 			/* it's our first timeout */
 			tcp->tcp_syn_rcvd_timeout = 1;
 			mutex_enter(&listener->tcp_eager_lock);
@@ -16295,7 +13324,7 @@ tcp_timer(void *arg)
 				cmn_err(CE_WARN, "High TCP connect timeout "
 				    "rate! System (port %d) may be under a "
 				    "SYN flood attack!",
-				    BE16_TO_U16(listener->tcp_tcph->th_lport));
+				    ntohs(listener->tcp_connp->conn_lport));
 
 				listener->tcp_ip_addr_cache = kmem_zalloc(
 				    IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t),
@@ -16363,7 +13392,7 @@ tcp_timer(void *arg)
 			 * backoff.
 			 */
 			if (tcp->tcp_swnd == 0 || tcp->tcp_zero_win_probe) {
-				if (tcp->tcp_debug) {
+				if (connp->conn_debug) {
 					(void) strlog(TCP_MOD_ID, 0, 1,
 					    SL_TRACE, "tcp_timer: zero win");
 				}
@@ -16415,6 +13444,13 @@ tcp_timer(void *arg)
 		 * 3.  But 1 and 3 are exclusive.
 		 */
 		if (tcp->tcp_unsent != 0) {
+			/*
+			 * Should not hold the zero-copy messages for too long.
+			 */
+			if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
+				tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
+				    tcp->tcp_xmit_head, B_TRUE);
+
 			if (tcp->tcp_cwnd == 0) {
 				/*
 				 * Set tcp_cwnd to 1 MSS so that a
@@ -16477,7 +13513,7 @@ tcp_timer(void *arg)
 		(void) tcp_clean_death(tcp, 0, 24);
 		return;
 	default:
-		if (tcp->tcp_debug) {
+		if (connp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE|SL_ERROR,
 			    "tcp_timer: strange state (%d) %s",
 			    tcp->tcp_state, tcp_display(tcp, NULL,
@@ -16485,8 +13521,16 @@ tcp_timer(void *arg)
 		}
 		return;
 	}
+
 	if ((ms = tcp->tcp_ms_we_have_waited) > second_threshold) {
 		/*
+		 * Should not hold the zero-copy messages for too long.
+		 */
+		if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
+			tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
+			    tcp->tcp_xmit_head, B_TRUE);
+
+		/*
 		 * For zero window probe, we need to send indefinitely,
 		 * unless we have not heard from the other side for some
 		 * time...
@@ -16529,11 +13573,13 @@ tcp_timer(void *arg)
 			tcp->tcp_ms_we_have_waited = second_threshold;
 		}
 	} else if (ms > first_threshold) {
-		if (tcp->tcp_snd_zcopy_aware && (!tcp->tcp_xmit_zc_clean) &&
-		    tcp->tcp_xmit_head != NULL) {
-			tcp->tcp_xmit_head =
-			    tcp_zcopy_backoff(tcp, tcp->tcp_xmit_head, 1);
-		}
+		/*
+		 * Should not hold the zero-copy messages for too long.
+		 */
+		if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
+			tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
+			    tcp->tcp_xmit_head, B_TRUE);
+
 		/*
 		 * We have been retransmitting for too long...  The RTT
 		 * we calculated is probably incorrect.  Reinitialize it.
@@ -16618,20 +13664,11 @@ tcp_timer(void *arg)
 	if (mp == NULL) {
 		return;
 	}
-	/*
-	 * Attach credentials to retransmitted initial SYNs.
-	 * In theory we should use the credentials from the connect()
-	 * call to ensure that getpeerucred() on the peer will be correct.
-	 * But we assume that SYN's are not dropped for loopback connections.
-	 */
-	if (tcp->tcp_state == TCPS_SYN_SENT) {
-		mblk_setcred(mp, CONN_CRED(tcp->tcp_connp), tcp->tcp_cpid);
-	}
 
 	tcp->tcp_csuna = tcp->tcp_snxt;
 	BUMP_MIB(&tcps->tcps_mib, tcpRetransSegs);
 	UPDATE_MIB(&tcps->tcps_mib, tcpRetransBytes, mss);
-	tcp_send_data(tcp, tcp->tcp_wq, mp);
+	tcp_send_data(tcp, mp);
 
 }
 
@@ -16639,7 +13676,6 @@ static int
 tcp_do_unbind(conn_t *connp)
 {
 	tcp_t *tcp = connp->conn_tcp;
-	int error = 0;
 
 	switch (tcp->tcp_state) {
 	case TCPS_BOUND:
@@ -16659,41 +13695,36 @@ tcp_do_unbind(conn_t *connp)
 	}
 	mutex_exit(&tcp->tcp_eager_lock);
 
-	if (tcp->tcp_ipversion == IPV4_VERSION) {
-		tcp->tcp_ipha->ipha_src = 0;
-	} else {
-		V6_SET_ZERO(tcp->tcp_ip6h->ip6_src);
-	}
-	V6_SET_ZERO(tcp->tcp_ip_src_v6);
-	bzero(tcp->tcp_tcph->th_lport, sizeof (tcp->tcp_tcph->th_lport));
+	connp->conn_laddr_v6 = ipv6_all_zeros;
+	connp->conn_saddr_v6 = ipv6_all_zeros;
 	tcp_bind_hash_remove(tcp);
 	tcp->tcp_state = TCPS_IDLE;
-	tcp->tcp_mdt = B_FALSE;
 
-	connp = tcp->tcp_connp;
-	connp->conn_mdt_ok = B_FALSE;
-	ipcl_hash_remove(connp);
+	ip_unbind(connp);
 	bzero(&connp->conn_ports, sizeof (connp->conn_ports));
 
-	return (error);
+	return (0);
 }
 
 /* tcp_unbind is called by tcp_wput_proto to handle T_UNBIND_REQ messages. */
 static void
 tcp_tpi_unbind(tcp_t *tcp, mblk_t *mp)
 {
-	int error = tcp_do_unbind(tcp->tcp_connp);
+	conn_t *connp = tcp->tcp_connp;
+	int error;
 
+	error = tcp_do_unbind(connp);
 	if (error > 0) {
 		tcp_err_ack(tcp, mp, TSYSERR, error);
 	} else if (error < 0) {
 		tcp_err_ack(tcp, mp, -error, 0);
 	} else {
 		/* Send M_FLUSH according to TPI */
-		(void) putnextctl1(tcp->tcp_rq, M_FLUSH, FLUSHRW);
+		(void) putnextctl1(connp->conn_rq, M_FLUSH, FLUSHRW);
 
 		mp = mi_tpi_ok_ack_alloc(mp);
-		putnext(tcp->tcp_rq, mp);
+		if (mp != NULL)
+			putnext(connp->conn_rq, mp);
 	}
 }
 
@@ -16764,7 +13795,7 @@ retry:
 		}
 	}
 	if (is_system_labeled() &&
-	    (i = tsol_next_port(crgetzone(tcp->tcp_cred), port,
+	    (i = tsol_next_port(crgetzone(tcp->tcp_connp->conn_cred), port,
 	    IPPROTO_TCP, B_TRUE)) != 0) {
 		port = i;
 		goto retry;
@@ -16796,7 +13827,7 @@ retry:
 		restart = B_TRUE;
 	}
 	if (is_system_labeled() &&
-	    (nextport = tsol_next_port(crgetzone(tcp->tcp_cred),
+	    (nextport = tsol_next_port(crgetzone(tcp->tcp_connp->conn_cred),
 	    next_priv_port, IPPROTO_TCP, B_FALSE)) != 0) {
 		next_priv_port = nextport;
 		goto retry;
@@ -16820,11 +13851,10 @@ struct {
  */
 /* ARGSUSED */
 static void
-tcp_wput_nondata(void *arg, mblk_t *mp, void *arg2)
+tcp_wput_nondata(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
 	conn_t	*connp = (conn_t *)arg;
 	tcp_t	*tcp = connp->conn_tcp;
-	queue_t	*q = tcp->tcp_wq;
 
 	ASSERT(DB_TYPE(mp) != M_IOCTL);
 	/*
@@ -16851,7 +13881,7 @@ tcp_wput_nondata(void *arg, mblk_t *mp, void *arg2)
 		tcp_wput_flush(tcp, mp);
 		break;
 	default:
-		CALL_IP_WPUT(connp, q, mp);
+		ip_wput_nondata(connp->conn_wq, mp);
 		break;
 	}
 }
@@ -16862,7 +13892,7 @@ tcp_wput_nondata(void *arg, mblk_t *mp, void *arg2)
  */
 /* ARGSUSED */
 void
-tcp_output(void *arg, mblk_t *mp, void *arg2)
+tcp_output(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
 	int		len;
 	int		hdrlen;
@@ -16870,7 +13900,7 @@ tcp_output(void *arg, mblk_t *mp, void *arg2)
 	mblk_t		*mp1;
 	uchar_t		*rptr;
 	uint32_t	snxt;
-	tcph_t		*tcph;
+	tcpha_t		*tcpha;
 	struct datab	*db;
 	uint32_t	suna;
 	uint32_t	mss;
@@ -16882,7 +13912,7 @@ tcp_output(void *arg, mblk_t *mp, void *arg2)
 	tcp_t		*tcp = connp->conn_tcp;
 	uint32_t	msize;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	ip_stack_t	*ipst = tcps->tcps_netstack->netstack_ip;
+	ip_xmit_attr_t	*ixa;
 
 	/*
 	 * Try and ASSERT the minimum possible references on the
@@ -16903,25 +13933,18 @@ tcp_output(void *arg, mblk_t *mp, void *arg2)
 	tcp->tcp_squeue_bytes -= msize;
 	mutex_exit(&tcp->tcp_non_sq_lock);
 
-	/* Check to see if this connection wants to be re-fused. */
-	if (tcp->tcp_refuse) {
-		if (tcp->tcp_ipversion == IPV4_VERSION &&
-		    !ipst->ips_ip4_observe.he_interested) {
-			tcp_fuse(tcp, (uchar_t *)&tcp->tcp_saved_ipha,
-			    &tcp->tcp_saved_tcph);
-		} else if (tcp->tcp_ipversion == IPV6_VERSION &&
-		    !ipst->ips_ip6_observe.he_interested) {
-			tcp_fuse(tcp, (uchar_t *)&tcp->tcp_saved_ip6h,
-			    &tcp->tcp_saved_tcph);
-		}
-	}
 	/* Bypass tcp protocol for fused tcp loopback */
 	if (tcp->tcp_fused && tcp_fuse_output(tcp, mp, msize))
 		return;
 
 	mss = tcp->tcp_mss;
-	if (tcp->tcp_xmit_zc_clean)
-		mp = tcp_zcopy_backoff(tcp, mp, 0);
+	/*
+	 * If ZEROCOPY has turned off, try not to send any zero-copy message
+	 * down. Do backoff, now.
+	 */
+	if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_snd_zcopy_on)
+		mp = tcp_zcopy_backoff(tcp, mp, B_FALSE);
+
 
 	ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
 	len = (int)(mp->b_wptr - mp->b_rptr);
@@ -16977,8 +14000,7 @@ tcp_output(void *arg, mblk_t *mp, void *arg2)
 	 * start again to get back the connection's "self-clock" as
 	 * described in VJ's paper.
 	 *
-	 * Refer to the comment in tcp_mss_set() for the calculation
-	 * of tcp_cwnd after idle.
+	 * Reinitialize tcp_cwnd after idle.
 	 */
 	if ((tcp->tcp_suna == snxt) && !tcp->tcp_localnet &&
 	    (TICK_TO_MSEC(lbolt - tcp->tcp_last_recv_time) >= tcp->tcp_rto)) {
@@ -16999,7 +14021,7 @@ tcp_output(void *arg, mblk_t *mp, void *arg2)
 
 	mutex_enter(&tcp->tcp_non_sq_lock);
 	if (tcp->tcp_flow_stopped &&
-	    TCP_UNSENT_BYTES(tcp) <= tcp->tcp_xmit_lowater) {
+	    TCP_UNSENT_BYTES(tcp) <= connp->conn_sndlowat) {
 		tcp_clrqfull(tcp);
 	}
 	mutex_exit(&tcp->tcp_non_sq_lock);
@@ -17046,43 +14068,43 @@ tcp_output(void *arg, mblk_t *mp, void *arg2)
 	mp->b_next = (mblk_t *)(uintptr_t)snxt;
 
 	/* adjust tcp header information */
-	tcph = tcp->tcp_tcph;
-	tcph->th_flags[0] = (TH_ACK|TH_PUSH);
+	tcpha = tcp->tcp_tcpha;
+	tcpha->tha_flags = (TH_ACK|TH_PUSH);
 
-	sum = len + tcp->tcp_tcp_hdr_len + tcp->tcp_sum;
+	sum = len + connp->conn_ht_ulp_len + connp->conn_sum;
 	sum = (sum >> 16) + (sum & 0xFFFF);
-	U16_TO_ABE16(sum, tcph->th_sum);
+	tcpha->tha_sum = htons(sum);
 
-	U32_TO_ABE32(snxt, tcph->th_seq);
+	tcpha->tha_seq = htonl(snxt);
 
 	BUMP_MIB(&tcps->tcps_mib, tcpOutDataSegs);
 	UPDATE_MIB(&tcps->tcps_mib, tcpOutDataBytes, len);
 	BUMP_LOCAL(tcp->tcp_obsegs);
 
 	/* Update the latest receive window size in TCP header. */
-	U32_TO_ABE16(tcp->tcp_rwnd >> tcp->tcp_rcv_ws,
-	    tcph->th_win);
+	tcpha->tha_win = htons(tcp->tcp_rwnd >> tcp->tcp_rcv_ws);
 
 	tcp->tcp_last_sent_len = (ushort_t)len;
 
-	plen = len + tcp->tcp_hdr_len;
+	plen = len + connp->conn_ht_iphc_len;
 
-	if (tcp->tcp_ipversion == IPV4_VERSION) {
+	ixa = connp->conn_ixa;
+	ixa->ixa_pktlen = plen;
+
+	if (ixa->ixa_flags & IXAF_IS_IPV4) {
 		tcp->tcp_ipha->ipha_length = htons(plen);
 	} else {
-		tcp->tcp_ip6h->ip6_plen = htons(plen -
-		    ((char *)&tcp->tcp_ip6h[1] - tcp->tcp_iphc));
+		tcp->tcp_ip6h->ip6_plen = htons(plen - IPV6_HDR_LEN);
 	}
 
 	/* see if we need to allocate a mblk for the headers */
-	hdrlen = tcp->tcp_hdr_len;
+	hdrlen = connp->conn_ht_iphc_len;
 	rptr = mp1->b_rptr - hdrlen;
 	db = mp1->b_datap;
 	if ((db->db_ref != 2) || rptr < db->db_base ||
 	    (!OK_32PTR(rptr))) {
 		/* NOTE: we assume allocb returns an OK_32PTR */
-		mp = allocb(tcp->tcp_ip_hdr_len + TCP_MAX_HDR_LENGTH +
-		    tcps->tcps_wroff_xtra, BPRI_MED);
+		mp = allocb(hdrlen + tcps->tcps_wroff_xtra, BPRI_MED);
 		if (!mp) {
 			freemsg(mp1);
 			goto no_memory;
@@ -17090,7 +14112,6 @@ tcp_output(void *arg, mblk_t *mp, void *arg2)
 		mp->b_cont = mp1;
 		mp1 = mp;
 		/* Leave room for Link Level header */
-		/* hdrlen = tcp->tcp_hdr_len; */
 		rptr = &mp1->b_rptr[tcps->tcps_wroff_xtra];
 		mp1->b_wptr = &rptr[hdrlen];
 	}
@@ -17099,16 +14120,16 @@ tcp_output(void *arg, mblk_t *mp, void *arg2)
 	/* Fill in the timestamp option. */
 	if (tcp->tcp_snd_ts_ok) {
 		U32_TO_BE32((uint32_t)lbolt,
-		    (char *)tcph+TCP_MIN_HEADER_LENGTH+4);
+		    (char *)tcpha + TCP_MIN_HEADER_LENGTH+4);
 		U32_TO_BE32(tcp->tcp_ts_recent,
-		    (char *)tcph+TCP_MIN_HEADER_LENGTH+8);
+		    (char *)tcpha + TCP_MIN_HEADER_LENGTH+8);
 	} else {
-		ASSERT(tcp->tcp_tcp_hdr_len == TCP_MIN_HEADER_LENGTH);
+		ASSERT(connp->conn_ht_ulp_len == TCP_MIN_HEADER_LENGTH);
 	}
 
 	/* copy header into outgoing packet */
 	dst = (ipaddr_t *)rptr;
-	src = (ipaddr_t *)tcp->tcp_iphc;
+	src = (ipaddr_t *)connp->conn_ht_iphc;
 	dst[0] = src[0];
 	dst[1] = src[1];
 	dst[2] = src[2];
@@ -17135,21 +14156,22 @@ tcp_output(void *arg, mblk_t *mp, void *arg2)
 	if (tcp->tcp_ecn_ok) {
 		SET_ECT(tcp, rptr);
 
-		tcph = (tcph_t *)(rptr + tcp->tcp_ip_hdr_len);
+		tcpha = (tcpha_t *)(rptr + ixa->ixa_ip_hdr_length);
 		if (tcp->tcp_ecn_echo_on)
-			tcph->th_flags[0] |= TH_ECE;
+			tcpha->tha_flags |= TH_ECE;
 		if (tcp->tcp_cwr && !tcp->tcp_ecn_cwr_sent) {
-			tcph->th_flags[0] |= TH_CWR;
+			tcpha->tha_flags |= TH_CWR;
 			tcp->tcp_ecn_cwr_sent = B_TRUE;
 		}
 	}
 
 	if (tcp->tcp_ip_forward_progress) {
-		ASSERT(tcp->tcp_ipversion == IPV6_VERSION);
-		*(uint32_t *)mp1->b_rptr  |= IP_FORWARD_PROG;
 		tcp->tcp_ip_forward_progress = B_FALSE;
+		connp->conn_ixa->ixa_flags |= IXAF_REACH_CONF;
+	} else {
+		connp->conn_ixa->ixa_flags &= ~IXAF_REACH_CONF;
 	}
-	tcp_send_data(tcp, tcp->tcp_wq, mp1);
+	tcp_send_data(tcp, mp1);
 	return;
 
 	/*
@@ -17166,29 +14188,27 @@ slow:
 	tcp_wput_data(tcp, NULL, B_FALSE);
 }
 
+/*
+ * This runs at the tail end of accept processing on the squeue of the
+ * new connection.
+ */
 /* ARGSUSED */
 void
-tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
+tcp_accept_finish(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
 	conn_t			*connp = (conn_t *)arg;
 	tcp_t			*tcp = connp->conn_tcp;
-	queue_t			*q = tcp->tcp_rq;
-	struct tcp_options	*tcpopt;
+	queue_t			*q = connp->conn_rq;
 	tcp_stack_t		*tcps = tcp->tcp_tcps;
-
 	/* socket options */
-	uint_t 			sopp_flags;
-	ssize_t			sopp_rxhiwat;
-	ssize_t			sopp_maxblk;
-	ushort_t		sopp_wroff;
-	ushort_t		sopp_tail;
-	ushort_t		sopp_copyopt;
+	struct sock_proto_props	sopp;
 
-	tcpopt = (struct tcp_options *)mp->b_rptr;
+	/* We should just receive a single mblk that fits a T_discon_ind */
+	ASSERT(mp->b_cont == NULL);
 
 	/*
 	 * Drop the eager's ref on the listener, that was placed when
-	 * this eager began life in tcp_conn_request.
+	 * this eager began life in tcp_input_listener.
 	 */
 	CONN_DEC_REF(tcp->tcp_saved_listener->tcp_connp);
 	if (IPCL_IS_NONSTR(connp)) {
@@ -17227,15 +14247,12 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
 				 * memory allocation failure problems. We know
 				 * that the size of the incoming mblk i.e.
 				 * stroptions is greater than sizeof
-				 * T_discon_ind. So the reallocb below can't
-				 * fail.
+				 * T_discon_ind.
 				 */
-				freemsg(mp->b_cont);
-				mp->b_cont = NULL;
 				ASSERT(DB_REF(mp) == 1);
-				mp = reallocb(mp, sizeof (struct T_discon_ind),
-				    B_FALSE);
-				ASSERT(mp != NULL);
+				ASSERT(MBLKSIZE(mp) >=
+				    sizeof (struct T_discon_ind));
+
 				DB_TYPE(mp) = M_PROTO;
 				((union T_primitives *)mp->b_rptr)->type =
 				    T_DISCON_IND;
@@ -17251,41 +14268,21 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
 				mp->b_wptr = mp->b_rptr +
 				    sizeof (struct T_discon_ind);
 				putnext(q, mp);
-				return;
 			}
 		}
-		if (tcp->tcp_hard_binding) {
-			tcp->tcp_hard_binding = B_FALSE;
-			tcp->tcp_hard_bound = B_TRUE;
-		}
+		tcp->tcp_hard_binding = B_FALSE;
 		return;
 	}
 
-	if (tcpopt->to_flags & TCPOPT_BOUNDIF) {
-		int boundif = tcpopt->to_boundif;
-		uint_t len = sizeof (int);
-
-		(void) tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, IPPROTO_IPV6,
-		    IPV6_BOUND_IF, len, (uchar_t *)&boundif, &len,
-		    (uchar_t *)&boundif, NULL, tcp->tcp_cred, NULL);
-	}
-	if (tcpopt->to_flags & TCPOPT_RECVPKTINFO) {
-		uint_t on = 1;
-		uint_t len = sizeof (uint_t);
-		(void) tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, IPPROTO_IPV6,
-		    IPV6_RECVPKTINFO, len, (uchar_t *)&on, &len,
-		    (uchar_t *)&on, NULL, tcp->tcp_cred, NULL);
-	}
-
 	/*
-	 * Set max window size (tcp_recv_hiwater) of the acceptor.
+	 * Set max window size (conn_rcvbuf) of the acceptor.
 	 */
 	if (tcp->tcp_rcv_list == NULL) {
 		/*
 		 * Recv queue is empty, tcp_rwnd should not have changed.
 		 * That means it should be equal to the listener's tcp_rwnd.
 		 */
-		tcp->tcp_recv_hiwater = tcp->tcp_rwnd;
+		connp->conn_rcvbuf = tcp->tcp_rwnd;
 	} else {
 #ifdef DEBUG
 		mblk_t *tmp;
@@ -17300,19 +14297,19 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
 		ASSERT(cnt != 0 && tcp->tcp_rcv_cnt == cnt);
 #endif
 		/* There is some data, add them back to get the max. */
-		tcp->tcp_recv_hiwater = tcp->tcp_rwnd + tcp->tcp_rcv_cnt;
+		connp->conn_rcvbuf = tcp->tcp_rwnd + tcp->tcp_rcv_cnt;
 	}
 	/*
 	 * This is the first time we run on the correct
 	 * queue after tcp_accept. So fix all the q parameters
 	 * here.
 	 */
-	sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_MAXBLK | SOCKOPT_WROFF;
-	sopp_maxblk = tcp_maxpsz_set(tcp, B_FALSE);
+	sopp.sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_MAXBLK | SOCKOPT_WROFF;
+	sopp.sopp_maxblk = tcp_maxpsz_set(tcp, B_FALSE);
 
-	sopp_rxhiwat = tcp->tcp_fused ?
-	    tcp_fuse_set_rcv_hiwat(tcp, tcp->tcp_recv_hiwater) :
-	    tcp->tcp_recv_hiwater;
+	sopp.sopp_rxhiwat = tcp->tcp_fused ?
+	    tcp_fuse_set_rcv_hiwat(tcp, connp->conn_rcvbuf) :
+	    connp->conn_rcvbuf;
 
 	/*
 	 * Determine what write offset value to use depending on SACK and
@@ -17328,18 +14325,18 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
 		 * since it would reduce the amount of work done by kmem.
 		 * Non-fused tcp loopback case is handled separately below.
 		 */
-		sopp_wroff = 0;
+		sopp.sopp_wroff = 0;
 		/*
 		 * Update the peer's transmit parameters according to
 		 * our recently calculated high water mark value.
 		 */
 		(void) tcp_maxpsz_set(tcp->tcp_loopback_peer, B_TRUE);
 	} else if (tcp->tcp_snd_sack_ok) {
-		sopp_wroff = tcp->tcp_hdr_len + TCPOPT_MAX_SACK_LEN +
+		sopp.sopp_wroff = connp->conn_ht_iphc_allocated +
 		    (tcp->tcp_loopback ? 0 : tcps->tcps_wroff_xtra);
 	} else {
-		sopp_wroff = tcp->tcp_hdr_len + (tcp->tcp_loopback ? 0 :
-		    tcps->tcps_wroff_xtra);
+		sopp.sopp_wroff = connp->conn_ht_iphc_len +
+		    (tcp->tcp_loopback ? 0 : tcps->tcps_wroff_xtra);
 	}
 
 	/*
@@ -17354,30 +14351,22 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
 	 * costs.
 	 */
 	if (tcp->tcp_kssl_ctx != NULL) {
-		sopp_wroff += SSL3_WROFFSET;
+		sopp.sopp_wroff += SSL3_WROFFSET;
 
-		sopp_flags |= SOCKOPT_TAIL;
-		sopp_tail = SSL3_MAX_TAIL_LEN;
+		sopp.sopp_flags |= SOCKOPT_TAIL;
+		sopp.sopp_tail = SSL3_MAX_TAIL_LEN;
 
-		sopp_flags |= SOCKOPT_ZCOPY;
-		sopp_copyopt = ZCVMUNSAFE;
+		sopp.sopp_flags |= SOCKOPT_ZCOPY;
+		sopp.sopp_zcopyflag = ZCVMUNSAFE;
 
-		sopp_maxblk = SSL3_MAX_RECORD_LEN;
+		sopp.sopp_maxblk = SSL3_MAX_RECORD_LEN;
 	}
 
 	/* Send the options up */
 	if (IPCL_IS_NONSTR(connp)) {
-		struct sock_proto_props sopp;
-
-		sopp.sopp_flags = sopp_flags;
-		sopp.sopp_wroff = sopp_wroff;
-		sopp.sopp_maxblk = sopp_maxblk;
-		sopp.sopp_rxhiwat = sopp_rxhiwat;
-		if (sopp_flags & SOCKOPT_TAIL) {
+		if (sopp.sopp_flags & SOCKOPT_TAIL) {
 			ASSERT(tcp->tcp_kssl_ctx != NULL);
-			ASSERT(sopp_flags & SOCKOPT_ZCOPY);
-			sopp.sopp_tail = sopp_tail;
-			sopp.sopp_zcopyflag = sopp_copyopt;
+			ASSERT(sopp.sopp_flags & SOCKOPT_ZCOPY);
 		}
 		if (tcp->tcp_loopback) {
 			sopp.sopp_flags |= SOCKOPT_LOOPBACK;
@@ -17385,34 +14374,40 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
 		}
 		(*connp->conn_upcalls->su_set_proto_props)
 		    (connp->conn_upper_handle, &sopp);
+		freemsg(mp);
 	} else {
+		/*
+		 * Let us reuse the incoming mblk to avoid
+		 * memory allocation failure problems. We know
+		 * that the size of the incoming mblk is at least
+		 * stroptions
+		 */
 		struct stroptions *stropt;
-		mblk_t *stropt_mp = allocb(sizeof (struct stroptions), BPRI_HI);
-		if (stropt_mp == NULL) {
-			tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
-			return;
-		}
-		DB_TYPE(stropt_mp) = M_SETOPTS;
-		stropt = (struct stroptions *)stropt_mp->b_rptr;
-		stropt_mp->b_wptr += sizeof (struct stroptions);
+
+		ASSERT(DB_REF(mp) == 1);
+		ASSERT(MBLKSIZE(mp) >= sizeof (struct stroptions));
+
+		DB_TYPE(mp) = M_SETOPTS;
+		stropt = (struct stroptions *)mp->b_rptr;
+		mp->b_wptr = mp->b_rptr + sizeof (struct stroptions);
+		stropt = (struct stroptions *)mp->b_rptr;
 		stropt->so_flags = SO_HIWAT | SO_WROFF | SO_MAXBLK;
-		stropt->so_hiwat = sopp_rxhiwat;
-		stropt->so_wroff = sopp_wroff;
-		stropt->so_maxblk = sopp_maxblk;
+		stropt->so_hiwat = sopp.sopp_rxhiwat;
+		stropt->so_wroff = sopp.sopp_wroff;
+		stropt->so_maxblk = sopp.sopp_maxblk;
 
-		if (sopp_flags & SOCKOPT_TAIL) {
+		if (sopp.sopp_flags & SOCKOPT_TAIL) {
 			ASSERT(tcp->tcp_kssl_ctx != NULL);
 
 			stropt->so_flags |= SO_TAIL | SO_COPYOPT;
-			stropt->so_tail = sopp_tail;
-			stropt->so_copyopt = sopp_copyopt;
+			stropt->so_tail = sopp.sopp_tail;
+			stropt->so_copyopt = sopp.sopp_zcopyflag;
 		}
 
 		/* Send the options up */
-		putnext(q, stropt_mp);
+		putnext(q, mp);
 	}
 
-	freemsg(mp);
 	/*
 	 * Pass up any data and/or a fin that has been received.
 	 *
@@ -17432,7 +14427,7 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
 			if (!tcp->tcp_fused && (*connp->conn_upcalls->su_recv)
 			    (connp->conn_upper_handle, NULL, 0, 0, &error,
 			    &push) >= 0) {
-				tcp->tcp_rwnd = tcp->tcp_recv_hiwater;
+				tcp->tcp_rwnd = connp->conn_rcvbuf;
 				if (tcp->tcp_state >= TCPS_ESTABLISHED &&
 				    tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) {
 					tcp_xmit_ctl(NULL,
@@ -17463,7 +14458,7 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
 			/* We drain directly in case of fused tcp loopback */
 
 			if (!tcp->tcp_fused && canputnext(q)) {
-				tcp->tcp_rwnd = tcp->tcp_recv_hiwater;
+				tcp->tcp_rwnd = connp->conn_rcvbuf;
 				if (tcp->tcp_state >= TCPS_ESTABLISHED &&
 				    tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) {
 					tcp_xmit_ctl(NULL,
@@ -17508,12 +14503,9 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
 			putnext(q, mp);
 		}
 	}
-	if (tcp->tcp_hard_binding) {
-		tcp->tcp_hard_binding = B_FALSE;
-		tcp->tcp_hard_bound = B_TRUE;
-	}
+	tcp->tcp_hard_binding = B_FALSE;
 
-	if (tcp->tcp_ka_enabled) {
+	if (connp->conn_keepalive) {
 		tcp->tcp_ka_last_intrvl = 0;
 		tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_killer,
 		    MSEC_TO_TICK(tcp->tcp_ka_interval));
@@ -17535,14 +14527,14 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
 
 /*
  * The function called through squeue to get behind listener's perimeter to
- * send a deffered conn_ind.
+ * send a deferred conn_ind.
  */
 /* ARGSUSED */
 void
-tcp_send_pending(void *arg, mblk_t *mp, void *arg2)
+tcp_send_pending(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
-	conn_t	*connp = (conn_t *)arg;
-	tcp_t *listener = connp->conn_tcp;
+	conn_t	*lconnp = (conn_t *)arg;
+	tcp_t *listener = lconnp->conn_tcp;
 	struct T_conn_ind *conn_ind;
 	tcp_t *tcp;
 
@@ -17560,29 +14552,34 @@ tcp_send_pending(void *arg, mblk_t *mp, void *arg2)
 		return;
 	}
 
-	tcp_ulp_newconn(connp, tcp->tcp_connp, mp);
+	tcp_ulp_newconn(lconnp, tcp->tcp_connp, mp);
 }
 
-/* ARGSUSED */
+/*
+ * Common to TPI and sockfs accept code.
+ */
+/* ARGSUSED2 */
 static int
 tcp_accept_common(conn_t *lconnp, conn_t *econnp, cred_t *cr)
 {
 	tcp_t *listener, *eager;
-	mblk_t *opt_mp;
-	struct tcp_options *tcpopt;
+	mblk_t *discon_mp;
 
 	listener = lconnp->conn_tcp;
 	ASSERT(listener->tcp_state == TCPS_LISTEN);
 	eager = econnp->conn_tcp;
 	ASSERT(eager->tcp_listener != NULL);
 
-	ASSERT(eager->tcp_rq != NULL);
+	/*
+	 * Pre allocate the discon_ind mblk also. tcp_accept_finish will
+	 * use it if something failed.
+	 */
+	discon_mp = allocb(MAX(sizeof (struct T_discon_ind),
+	    sizeof (struct stroptions)), BPRI_HI);
 
-	opt_mp = allocb(sizeof (struct tcp_options), BPRI_HI);
-	if (opt_mp == NULL) {
+	if (discon_mp == NULL) {
 		return (-TPROTO);
 	}
-	bzero((char *)opt_mp->b_rptr, sizeof (struct tcp_options));
 	eager->tcp_issocket = B_TRUE;
 
 	econnp->conn_zoneid = listener->tcp_connp->conn_zoneid;
@@ -17607,24 +14604,6 @@ tcp_accept_common(conn_t *lconnp, conn_t *econnp, cred_t *cr)
 	 */
 	ASSERT(econnp->conn_ref >= 3);
 
-	opt_mp->b_datap->db_type = M_SETOPTS;
-	opt_mp->b_wptr += sizeof (struct tcp_options);
-
-	/*
-	 * Prepare for inheriting IPV6_BOUND_IF and IPV6_RECVPKTINFO
-	 * from listener to acceptor.
-	 */
-	tcpopt = (struct tcp_options *)opt_mp->b_rptr;
-	tcpopt->to_flags = 0;
-
-	if (listener->tcp_bound_if != 0) {
-		tcpopt->to_flags |= TCPOPT_BOUNDIF;
-		tcpopt->to_boundif = listener->tcp_bound_if;
-	}
-	if (listener->tcp_ipv6_recvancillary & TCP_IPV6_RECVPKTINFO) {
-		tcpopt->to_flags |= TCPOPT_RECVPKTINFO;
-	}
-
 	mutex_enter(&listener->tcp_eager_lock);
 	if (listener->tcp_eager_prev_q0->tcp_conn_def_q0) {
 
@@ -17686,7 +14665,7 @@ tcp_accept_common(conn_t *lconnp, conn_t *econnp, cred_t *cr)
 		/* Need to get inside the listener perimeter */
 		CONN_INC_REF(listener->tcp_connp);
 		SQUEUE_ENTER_ONE(listener->tcp_connp->conn_sqp, mp1,
-		    tcp_send_pending, listener->tcp_connp, SQ_FILL,
+		    tcp_send_pending, listener->tcp_connp, NULL, SQ_FILL,
 		    SQTAG_TCP_SEND_PENDING);
 	}
 no_more_eagers:
@@ -17700,8 +14679,8 @@ no_more_eagers:
 	 * before sending the conn_ind in tcp_send_conn_ind.
 	 * The ref will be dropped in tcp_accept_finish().
 	 */
-	SQUEUE_ENTER_ONE(econnp->conn_sqp, opt_mp, tcp_accept_finish,
-	    econnp, SQ_NODRAIN, SQTAG_TCP_ACCEPT_FINISH_Q0);
+	SQUEUE_ENTER_ONE(econnp->conn_sqp, discon_mp, tcp_accept_finish,
+	    econnp, NULL, SQ_NODRAIN, SQTAG_TCP_ACCEPT_FINISH_Q0);
 	return (0);
 }
 
@@ -17712,7 +14691,6 @@ tcp_accept(sock_lower_handle_t lproto_handle,
 {
 	conn_t *lconnp, *econnp;
 	tcp_t *listener, *eager;
-	tcp_stack_t	*tcps;
 
 	lconnp = (conn_t *)lproto_handle;
 	listener = lconnp->conn_tcp;
@@ -17720,7 +14698,6 @@ tcp_accept(sock_lower_handle_t lproto_handle,
 	econnp = (conn_t *)eproto_handle;
 	eager = econnp->conn_tcp;
 	ASSERT(eager->tcp_listener != NULL);
-	tcps = eager->tcp_tcps;
 
 	/*
 	 * It is OK to manipulate these fields outside the eager's squeue
@@ -17732,19 +14709,6 @@ tcp_accept(sock_lower_handle_t lproto_handle,
 	econnp->conn_upper_handle = sock_handle;
 	econnp->conn_upcalls = lconnp->conn_upcalls;
 	ASSERT(IPCL_IS_NONSTR(econnp));
-	/*
-	 * Create helper stream if it is a non-TPI TCP connection.
-	 */
-	if (ip_create_helper_stream(econnp, tcps->tcps_ldi_ident)) {
-		ip1dbg(("tcp_accept: create of IP helper stream"
-		    " failed\n"));
-		return (EPROTO);
-	}
-	eager->tcp_rq = econnp->conn_rq;
-	eager->tcp_wq = econnp->conn_wq;
-
-	ASSERT(eager->tcp_rq != NULL);
-
 	return (tcp_accept_common(lconnp, econnp, cr));
 }
 
@@ -17752,7 +14716,7 @@ tcp_accept(sock_lower_handle_t lproto_handle,
 /*
  * This is the STREAMS entry point for T_CONN_RES coming down on
  * Acceptor STREAM when  sockfs listener does accept processing.
- * Read the block comment on top of tcp_conn_request().
+ * Read the block comment on top of tcp_input_listener().
  */
 void
 tcp_tpi_accept(queue_t *q, mblk_t *mp)
@@ -17815,8 +14779,8 @@ tcp_tpi_accept(queue_t *q, mblk_t *mp)
 		econnp = eager->tcp_connp;
 		econnp->conn_dev = (dev_t)RD(q)->q_ptr;
 		econnp->conn_minor_arena = (vmem_t *)(WR(q)->q_ptr);
-		eager->tcp_rq = rq;
-		eager->tcp_wq = q;
+		econnp->conn_rq = rq;
+		econnp->conn_wq = q;
 		rq->q_ptr = econnp;
 		rq->q_qinfo = &tcp_rinitv4;	/* No open - same as rinitv6 */
 		q->q_ptr = econnp;
@@ -17836,7 +14800,7 @@ tcp_tpi_accept(queue_t *q, mblk_t *mp)
 		 * should already be enough space in the mp that came
 		 * down from soaccept().
 		 */
-		if (eager->tcp_family == AF_INET) {
+		if (econnp->conn_family == AF_INET) {
 			sin_t *sin;
 
 			ASSERT((mp->b_datap->db_lim - mp->b_datap->db_base) >=
@@ -17844,8 +14808,8 @@ tcp_tpi_accept(queue_t *q, mblk_t *mp)
 			sin = (sin_t *)mp->b_wptr;
 			mp->b_wptr += sizeof (sin_t);
 			sin->sin_family = AF_INET;
-			sin->sin_port = eager->tcp_lport;
-			sin->sin_addr.s_addr = eager->tcp_ipha->ipha_src;
+			sin->sin_port = econnp->conn_lport;
+			sin->sin_addr.s_addr = econnp->conn_laddr_v4;
 		} else {
 			sin6_t *sin6;
 
@@ -17854,20 +14818,23 @@ tcp_tpi_accept(queue_t *q, mblk_t *mp)
 			sin6 = (sin6_t *)mp->b_wptr;
 			mp->b_wptr += sizeof (sin6_t);
 			sin6->sin6_family = AF_INET6;
-			sin6->sin6_port = eager->tcp_lport;
-			if (eager->tcp_ipversion == IPV4_VERSION) {
+			sin6->sin6_port = econnp->conn_lport;
+			sin6->sin6_addr = econnp->conn_laddr_v6;
+			if (econnp->conn_ipversion == IPV4_VERSION) {
 				sin6->sin6_flowinfo = 0;
-				IN6_IPADDR_TO_V4MAPPED(
-				    eager->tcp_ipha->ipha_src,
-				    &sin6->sin6_addr);
 			} else {
 				ASSERT(eager->tcp_ip6h != NULL);
 				sin6->sin6_flowinfo =
 				    eager->tcp_ip6h->ip6_vcf &
 				    ~IPV6_VERS_AND_FLOW_MASK;
-				sin6->sin6_addr = eager->tcp_ip6h->ip6_src;
 			}
-			sin6->sin6_scope_id = 0;
+			if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6) &&
+			    (econnp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
+				sin6->sin6_scope_id =
+				    econnp->conn_ixa->ixa_scopeid;
+			} else {
+				sin6->sin6_scope_id = 0;
+			}
 			sin6->__sin6_src_id = 0;
 		}
 
@@ -17881,97 +14848,6 @@ tcp_tpi_accept(queue_t *q, mblk_t *mp)
 	}
 }
 
-static int
-tcp_do_getsockname(tcp_t *tcp, struct sockaddr *sa, uint_t *salenp)
-{
-	sin_t *sin = (sin_t *)sa;
-	sin6_t *sin6 = (sin6_t *)sa;
-
-	switch (tcp->tcp_family) {
-	case AF_INET:
-		ASSERT(tcp->tcp_ipversion == IPV4_VERSION);
-
-		if (*salenp < sizeof (sin_t))
-			return (EINVAL);
-
-		*sin = sin_null;
-		sin->sin_family = AF_INET;
-		if (tcp->tcp_state >= TCPS_BOUND) {
-			sin->sin_port = tcp->tcp_lport;
-			sin->sin_addr.s_addr = tcp->tcp_ipha->ipha_src;
-		}
-		*salenp = sizeof (sin_t);
-		break;
-
-	case AF_INET6:
-		if (*salenp < sizeof (sin6_t))
-			return (EINVAL);
-
-		*sin6 = sin6_null;
-		sin6->sin6_family = AF_INET6;
-		if (tcp->tcp_state >= TCPS_BOUND) {
-			sin6->sin6_port = tcp->tcp_lport;
-			mutex_enter(&tcp->tcp_connp->conn_lock);
-			if (tcp->tcp_ipversion == IPV4_VERSION) {
-				IN6_IPADDR_TO_V4MAPPED(tcp->tcp_ipha->ipha_src,
-				    &sin6->sin6_addr);
-			} else {
-				sin6->sin6_addr = tcp->tcp_ip6h->ip6_src;
-			}
-			mutex_exit(&tcp->tcp_connp->conn_lock);
-		}
-		*salenp = sizeof (sin6_t);
-		break;
-	}
-
-	return (0);
-}
-
-static int
-tcp_do_getpeername(tcp_t *tcp, struct sockaddr *sa, uint_t *salenp)
-{
-	sin_t *sin = (sin_t *)sa;
-	sin6_t *sin6 = (sin6_t *)sa;
-
-	if (tcp->tcp_state < TCPS_SYN_RCVD)
-		return (ENOTCONN);
-
-	switch (tcp->tcp_family) {
-	case AF_INET:
-		ASSERT(tcp->tcp_ipversion == IPV4_VERSION);
-
-		if (*salenp < sizeof (sin_t))
-			return (EINVAL);
-
-		*sin = sin_null;
-		sin->sin_family = AF_INET;
-		sin->sin_port = tcp->tcp_fport;
-		IN6_V4MAPPED_TO_IPADDR(&tcp->tcp_remote_v6,
-		    sin->sin_addr.s_addr);
-		*salenp = sizeof (sin_t);
-		break;
-
-	case AF_INET6:
-		if (*salenp < sizeof (sin6_t))
-			return (EINVAL);
-
-		*sin6 = sin6_null;
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = tcp->tcp_fport;
-		sin6->sin6_addr = tcp->tcp_remote_v6;
-		mutex_enter(&tcp->tcp_connp->conn_lock);
-		if (tcp->tcp_ipversion == IPV6_VERSION) {
-			sin6->sin6_flowinfo = tcp->tcp_ip6h->ip6_vcf &
-			    ~IPV6_VERS_AND_FLOW_MASK;
-		}
-		mutex_exit(&tcp->tcp_connp->conn_lock);
-		*salenp = sizeof (sin6_t);
-		break;
-	}
-
-	return (0);
-}
-
 /*
  * Handle special out-of-band ioctl requests (see PSARC/2008/265).
  */
@@ -17980,7 +14856,8 @@ tcp_wput_cmdblk(queue_t *q, mblk_t *mp)
 {
 	void	*data;
 	mblk_t	*datamp = mp->b_cont;
-	tcp_t	*tcp = Q_TO_TCP(q);
+	conn_t	*connp = Q_TO_CONN(q);
+	tcp_t	*tcp = connp->conn_tcp;
 	cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
 
 	if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
@@ -17993,10 +14870,14 @@ tcp_wput_cmdblk(queue_t *q, mblk_t *mp)
 
 	switch (cmdp->cb_cmd) {
 	case TI_GETPEERNAME:
-		cmdp->cb_error = tcp_do_getpeername(tcp, data, &cmdp->cb_len);
+		if (tcp->tcp_state < TCPS_SYN_RCVD)
+			cmdp->cb_error = ENOTCONN;
+		else
+			cmdp->cb_error = conn_getpeername(connp, data,
+			    &cmdp->cb_len);
 		break;
 	case TI_GETMYNAME:
-		cmdp->cb_error = tcp_do_getsockname(tcp, data, &cmdp->cb_len);
+		cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
 		break;
 	default:
 		cmdp->cb_error = EINVAL;
@@ -18029,14 +14910,14 @@ tcp_wput(queue_t *q, mblk_t *mp)
 
 		mutex_enter(&tcp->tcp_non_sq_lock);
 		tcp->tcp_squeue_bytes += size;
-		if (TCP_UNSENT_BYTES(tcp) > tcp->tcp_xmit_hiwater) {
+		if (TCP_UNSENT_BYTES(tcp) > connp->conn_sndbuf) {
 			tcp_setqfull(tcp);
 		}
 		mutex_exit(&tcp->tcp_non_sq_lock);
 
 		CONN_INC_REF(connp);
 		SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output, connp,
-		    tcp_squeue_flag, SQTAG_TCP_OUTPUT);
+		    NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT);
 		return;
 
 	case M_CMD:
@@ -18053,7 +14934,7 @@ tcp_wput(queue_t *q, mblk_t *mp)
 		if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) {
 			type = ((union T_primitives *)rptr)->type;
 		} else {
-			if (tcp->tcp_debug) {
+			if (connp->conn_debug) {
 				(void) strlog(TCP_MOD_ID, 0, 1,
 				    SL_ERROR|SL_TRACE,
 				    "tcp_wput_proto, dropping one...");
@@ -18093,7 +14974,7 @@ tcp_wput(queue_t *q, mblk_t *mp)
 		/*
 		 * Most ioctls can be processed right away without going via
 		 * squeues - process them right here. Those that do require
-		 * squeue (currently TCP_IOC_DEFAULT_Q and _SIOCSOCKFALLBACK)
+		 * squeue (currently _SIOCSOCKFALLBACK)
 		 * are processed by tcp_wput_ioctl().
 		 */
 		iocp = (struct iocblk *)mp->b_rptr;
@@ -18111,26 +14992,13 @@ tcp_wput(queue_t *q, mblk_t *mp)
 		case ND_SET:
 			/* nd_getset does the necessary checks */
 		case ND_GET:
-			if (!nd_getset(q, tcps->tcps_g_nd, mp)) {
-				CALL_IP_WPUT(connp, q, mp);
-				return;
-			}
-			qreply(q, mp);
-			return;
-		case TCP_IOC_DEFAULT_Q:
-			/*
-			 * Wants to be the default wq. Check the credentials
-			 * first, the rest is executed via squeue.
-			 */
-			if (secpolicy_ip_config(iocp->ioc_cr, B_FALSE) != 0) {
-				iocp->ioc_error = EPERM;
-				iocp->ioc_count = 0;
-				mp->b_datap->db_type = M_IOCACK;
+			if (nd_getset(q, tcps->tcps_g_nd, mp)) {
 				qreply(q, mp);
 				return;
 			}
-			output_proc = tcp_wput_ioctl;
-			break;
+			ip_wput_nondata(q, mp);
+			return;
+
 		default:
 			output_proc = tcp_wput_ioctl;
 			break;
@@ -18143,7 +15011,7 @@ tcp_wput(queue_t *q, mblk_t *mp)
 
 	CONN_INC_REF(connp);
 	SQUEUE_ENTER_ONE(connp->conn_sqp, mp, output_proc, connp,
-	    tcp_squeue_flag, SQTAG_TCP_WPUT_OTHER);
+	    NULL, tcp_squeue_flag, SQTAG_TCP_WPUT_OTHER);
 }
 
 /*
@@ -18188,52 +15056,32 @@ tcp_wput_fallback(queue_t *wq, mblk_t *mp)
 	freemsg(mp);
 }
 
+/*
+ * Check the usability of ZEROCOPY. It's instead checking the flag set by IP.
+ */
 static boolean_t
 tcp_zcopy_check(tcp_t *tcp)
 {
-	conn_t	*connp = tcp->tcp_connp;
-	ire_t	*ire;
+	conn_t		*connp = tcp->tcp_connp;
+	ip_xmit_attr_t	*ixa = connp->conn_ixa;
 	boolean_t	zc_enabled = B_FALSE;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
 
 	if (do_tcpzcopy == 2)
 		zc_enabled = B_TRUE;
-	else if (tcp->tcp_ipversion == IPV4_VERSION &&
-	    IPCL_IS_CONNECTED(connp) &&
-	    (connp->conn_flags & IPCL_CHECK_POLICY) == 0 &&
-	    connp->conn_dontroute == 0 &&
-	    !connp->conn_nexthop_set &&
-	    connp->conn_outgoing_ill == NULL &&
-	    do_tcpzcopy == 1) {
-		/*
-		 * the checks above  closely resemble the fast path checks
-		 * in tcp_send_data().
-		 */
-		mutex_enter(&connp->conn_lock);
-		ire = connp->conn_ire_cache;
-		ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT));
-		if (ire != NULL && !(ire->ire_marks & IRE_MARK_CONDEMNED)) {
-			IRE_REFHOLD(ire);
-			if (ire->ire_stq != NULL) {
-				ill_t	*ill = (ill_t *)ire->ire_stq->q_ptr;
-
-				zc_enabled = ill && (ill->ill_capabilities &
-				    ILL_CAPAB_ZEROCOPY) &&
-				    (ill->ill_zerocopy_capab->
-				    ill_zerocopy_flags != 0);
-			}
-			IRE_REFRELE(ire);
-		}
-		mutex_exit(&connp->conn_lock);
-	}
+	else if ((do_tcpzcopy == 1) && (ixa->ixa_flags & IXAF_ZCOPY_CAPAB))
+		zc_enabled = B_TRUE;
+
 	tcp->tcp_snd_zcopy_on = zc_enabled;
 	if (!TCP_IS_DETACHED(tcp)) {
 		if (zc_enabled) {
-			(void) proto_set_tx_copyopt(tcp->tcp_rq, connp,
+			ixa->ixa_flags |= IXAF_VERIFY_ZCOPY;
+			(void) proto_set_tx_copyopt(connp->conn_rq, connp,
 			    ZCVMSAFE);
 			TCP_STAT(tcps, tcp_zcopy_on);
 		} else {
-			(void) proto_set_tx_copyopt(tcp->tcp_rq, connp,
+			ixa->ixa_flags &= ~IXAF_VERIFY_ZCOPY;
+			(void) proto_set_tx_copyopt(connp->conn_rq, connp,
 			    ZCVMUNSAFE);
 			TCP_STAT(tcps, tcp_zcopy_off);
 		}
@@ -18241,99 +15089,84 @@ tcp_zcopy_check(tcp_t *tcp)
 	return (zc_enabled);
 }
 
-static mblk_t *
-tcp_zcopy_disable(tcp_t *tcp, mblk_t *bp)
-{
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-
-	if (do_tcpzcopy == 2)
-		return (bp);
-	else if (tcp->tcp_snd_zcopy_on) {
-		tcp->tcp_snd_zcopy_on = B_FALSE;
-		if (!TCP_IS_DETACHED(tcp)) {
-			(void) proto_set_tx_copyopt(tcp->tcp_rq, tcp->tcp_connp,
-			    ZCVMUNSAFE);
-			TCP_STAT(tcps, tcp_zcopy_disable);
-		}
-	}
-	return (tcp_zcopy_backoff(tcp, bp, 0));
-}
-
 /*
- * Backoff from a zero-copy mblk by copying data to a new mblk and freeing
- * the original desballoca'ed segmapped mblk.
+ * Backoff from a zero-copy message by copying data to a new allocated
+ * message and freeing the original desballoca'ed segmapped message.
+ *
+ * This function is called by following two callers:
+ * 1. tcp_timer: fix_xmitlist is set to B_TRUE, because it's safe to free
+ *    the origial desballoca'ed message and notify sockfs. This is in re-
+ *    transmit state.
+ * 2. tcp_output: fix_xmitlist is set to B_FALSE. Flag STRUIO_ZCNOTIFY need
+ *    to be copied to new message.
  */
 static mblk_t *
-tcp_zcopy_backoff(tcp_t *tcp, mblk_t *bp, int fix_xmitlist)
+tcp_zcopy_backoff(tcp_t *tcp, mblk_t *bp, boolean_t fix_xmitlist)
 {
-	mblk_t *head, *tail, *nbp;
+	mblk_t		*nbp;
+	mblk_t		*head = NULL;
+	mblk_t		*tail = NULL;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
 
-	if (IS_VMLOANED_MBLK(bp)) {
-		TCP_STAT(tcps, tcp_zcopy_backoff);
-		if ((head = copyb(bp)) == NULL) {
-			/* fail to backoff; leave it for the next backoff */
-			tcp->tcp_xmit_zc_clean = B_FALSE;
-			return (bp);
-		}
-		if (bp->b_datap->db_struioflag & STRUIO_ZCNOTIFY) {
-			if (fix_xmitlist)
-				tcp_zcopy_notify(tcp);
-			else
-				head->b_datap->db_struioflag |= STRUIO_ZCNOTIFY;
-		}
-		nbp = bp->b_cont;
-		if (fix_xmitlist) {
-			head->b_prev = bp->b_prev;
-			head->b_next = bp->b_next;
-			if (tcp->tcp_xmit_tail == bp)
-				tcp->tcp_xmit_tail = head;
-		}
-		bp->b_next = NULL;
-		bp->b_prev = NULL;
-		freeb(bp);
-	} else {
-		head = bp;
-		nbp = bp->b_cont;
-	}
-	tail = head;
-	while (nbp) {
-		if (IS_VMLOANED_MBLK(nbp)) {
+	ASSERT(bp != NULL);
+	while (bp != NULL) {
+		if (IS_VMLOANED_MBLK(bp)) {
 			TCP_STAT(tcps, tcp_zcopy_backoff);
-			if ((tail->b_cont = copyb(nbp)) == NULL) {
+			if ((nbp = copyb(bp)) == NULL) {
 				tcp->tcp_xmit_zc_clean = B_FALSE;
-				tail->b_cont = nbp;
-				return (head);
+				if (tail != NULL)
+					tail->b_cont = bp;
+				return ((head == NULL) ? bp : head);
 			}
-			tail = tail->b_cont;
-			if (nbp->b_datap->db_struioflag & STRUIO_ZCNOTIFY) {
+
+			if (bp->b_datap->db_struioflag & STRUIO_ZCNOTIFY) {
 				if (fix_xmitlist)
 					tcp_zcopy_notify(tcp);
 				else
-					tail->b_datap->db_struioflag |=
+					nbp->b_datap->db_struioflag |=
 					    STRUIO_ZCNOTIFY;
 			}
-			bp = nbp;
-			nbp = nbp->b_cont;
+			nbp->b_cont = bp->b_cont;
+
+			/*
+			 * Copy saved information and adjust tcp_xmit_tail
+			 * if needed.
+			 */
 			if (fix_xmitlist) {
-				tail->b_prev = bp->b_prev;
-				tail->b_next = bp->b_next;
+				nbp->b_prev = bp->b_prev;
+				nbp->b_next = bp->b_next;
+
 				if (tcp->tcp_xmit_tail == bp)
-					tcp->tcp_xmit_tail = tail;
+					tcp->tcp_xmit_tail = nbp;
 			}
-			bp->b_next = NULL;
+
+			/* Free the original message. */
 			bp->b_prev = NULL;
+			bp->b_next = NULL;
 			freeb(bp);
+
+			bp = nbp;
+		}
+
+		if (head == NULL) {
+			head = bp;
+		}
+		if (tail == NULL) {
+			tail = bp;
 		} else {
-			tail->b_cont = nbp;
-			tail = nbp;
-			nbp = nbp->b_cont;
+			tail->b_cont = bp;
+			tail = bp;
 		}
+
+		/* Move forward. */
+		bp = bp->b_cont;
 	}
+
 	if (fix_xmitlist) {
 		tcp->tcp_xmit_last = tail;
 		tcp->tcp_xmit_zc_clean = B_TRUE;
 	}
+
 	return (head);
 }
 
@@ -18341,7 +15174,7 @@ static void
 tcp_zcopy_notify(tcp_t *tcp)
 {
 	struct stdata	*stp;
-	conn_t *connp;
+	conn_t		*connp;
 
 	if (tcp->tcp_detached)
 		return;
@@ -18351,323 +15184,149 @@ tcp_zcopy_notify(tcp_t *tcp)
 		    (connp->conn_upper_handle);
 		return;
 	}
-	stp = STREAM(tcp->tcp_rq);
+	stp = STREAM(connp->conn_rq);
 	mutex_enter(&stp->sd_lock);
 	stp->sd_flag |= STZCNOTIFY;
 	cv_broadcast(&stp->sd_zcopy_wait);
 	mutex_exit(&stp->sd_lock);
 }
 
-static boolean_t
-tcp_send_find_ire(tcp_t *tcp, ipaddr_t *dst, ire_t **irep)
+/*
+ * Update the TCP connection according to change of LSO capability.
+ */
+static void
+tcp_update_lso(tcp_t *tcp, ip_xmit_attr_t *ixa)
 {
-	ire_t	*ire;
-	conn_t	*connp = tcp->tcp_connp;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	ip_stack_t	*ipst = tcps->tcps_netstack->netstack_ip;
-
-	mutex_enter(&connp->conn_lock);
-	ire = connp->conn_ire_cache;
-	ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT));
-
-	if ((ire != NULL) &&
-	    (((dst != NULL) && (ire->ire_addr == *dst)) || ((dst == NULL) &&
-	    IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &tcp->tcp_ip6h->ip6_dst))) &&
-	    !(ire->ire_marks & IRE_MARK_CONDEMNED)) {
-		IRE_REFHOLD(ire);
-		mutex_exit(&connp->conn_lock);
-	} else {
-		boolean_t cached = B_FALSE;
-		ts_label_t *tsl;
-
-		/* force a recheck later on */
-		tcp->tcp_ire_ill_check_done = B_FALSE;
-
-		TCP_DBGSTAT(tcps, tcp_ire_null1);
-		connp->conn_ire_cache = NULL;
-		mutex_exit(&connp->conn_lock);
-
-		if (ire != NULL)
-			IRE_REFRELE_NOTR(ire);
-
-		tsl = crgetlabel(CONN_CRED(connp));
-		ire = (dst ?
-		    ire_cache_lookup(*dst, connp->conn_zoneid, tsl, ipst) :
-		    ire_cache_lookup_v6(&tcp->tcp_ip6h->ip6_dst,
-		    connp->conn_zoneid, tsl, ipst));
+	/*
+	 * We check against IPv4 header length to preserve the old behavior
+	 * of only enabling LSO when there are no IP options.
+	 * But this restriction might not be necessary at all. Before removing
+	 * it, need to verify how LSO is handled for source routing case, with
+	 * which IP does software checksum.
+	 *
+	 * For IPv6, whenever any extension header is needed, LSO is supressed.
+	 */
+	if (ixa->ixa_ip_hdr_length != ((ixa->ixa_flags & IXAF_IS_IPV4) ?
+	    IP_SIMPLE_HDR_LENGTH : IPV6_HDR_LEN))
+		return;
 
-		if (ire == NULL) {
-			TCP_STAT(tcps, tcp_ire_null);
-			return (B_FALSE);
-		}
+	/*
+	 * Either the LSO capability newly became usable, or it has changed.
+	 */
+	if (ixa->ixa_flags & IXAF_LSO_CAPAB) {
+		ill_lso_capab_t	*lsoc = &ixa->ixa_lso_capab;
 
-		IRE_REFHOLD_NOTR(ire);
+		ASSERT(lsoc->ill_lso_max > 0);
+		tcp->tcp_lso_max = MIN(TCP_MAX_LSO_LENGTH, lsoc->ill_lso_max);
 
-		mutex_enter(&connp->conn_lock);
-		if (CONN_CACHE_IRE(connp)) {
-			rw_enter(&ire->ire_bucket->irb_lock, RW_READER);
-			if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) {
-				TCP_CHECK_IREINFO(tcp, ire);
-				connp->conn_ire_cache = ire;
-				cached = B_TRUE;
-			}
-			rw_exit(&ire->ire_bucket->irb_lock);
-		}
-		mutex_exit(&connp->conn_lock);
+		DTRACE_PROBE3(tcp_update_lso, boolean_t, tcp->tcp_lso,
+		    boolean_t, B_TRUE, uint32_t, tcp->tcp_lso_max);
 
 		/*
-		 * We can continue to use the ire but since it was
-		 * not cached, we should drop the extra reference.
+		 * If LSO to be enabled, notify the STREAM header with larger
+		 * data block.
 		 */
-		if (!cached)
-			IRE_REFRELE_NOTR(ire);
+		if (!tcp->tcp_lso)
+			tcp->tcp_maxpsz_multiplier = 0;
+
+		tcp->tcp_lso = B_TRUE;
+		TCP_STAT(tcp->tcp_tcps, tcp_lso_enabled);
+	} else { /* LSO capability is not usable any more. */
+		DTRACE_PROBE3(tcp_update_lso, boolean_t, tcp->tcp_lso,
+		    boolean_t, B_FALSE, uint32_t, tcp->tcp_lso_max);
 
 		/*
-		 * Rampart note: no need to select a new label here, since
-		 * labels are not allowed to change during the life of a TCP
-		 * connection.
+		 * If LSO to be disabled, notify the STREAM header with smaller
+		 * data block. And need to restore fragsize to PMTU.
 		 */
+		if (tcp->tcp_lso) {
+			tcp->tcp_maxpsz_multiplier =
+			    tcp->tcp_tcps->tcps_maxpsz_multiplier;
+			ixa->ixa_fragsize = ixa->ixa_pmtu;
+			tcp->tcp_lso = B_FALSE;
+			TCP_STAT(tcp->tcp_tcps, tcp_lso_disabled);
+		}
 	}
 
-	*irep = ire;
-
-	return (B_TRUE);
+	(void) tcp_maxpsz_set(tcp, B_TRUE);
 }
 
 /*
- * Called from tcp_send() or tcp_send_data() to find workable IRE.
- *
- * 0 = success;
- * 1 = failed to find ire and ill.
+ * Update the TCP connection according to change of ZEROCOPY capability.
  */
-static boolean_t
-tcp_send_find_ire_ill(tcp_t *tcp, mblk_t *mp, ire_t **irep, ill_t **illp)
+static void
+tcp_update_zcopy(tcp_t *tcp)
 {
-	ipha_t		*ipha;
-	ipaddr_t	dst;
-	ire_t		*ire;
-	ill_t		*ill;
-	mblk_t		*ire_fp_mp;
+	conn_t		*connp = tcp->tcp_connp;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
 
-	if (mp != NULL)
-		ipha = (ipha_t *)mp->b_rptr;
-	else
-		ipha = tcp->tcp_ipha;
-	dst = ipha->ipha_dst;
-
-	if (!tcp_send_find_ire(tcp, &dst, &ire))
-		return (B_FALSE);
-
-	if ((ire->ire_flags & RTF_MULTIRT) ||
-	    (ire->ire_stq == NULL) ||
-	    (ire->ire_nce == NULL) ||
-	    ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL) ||
-	    ((mp != NULL) && (ire->ire_max_frag < ntohs(ipha->ipha_length) ||
-	    MBLKL(ire_fp_mp) > MBLKHEAD(mp)))) {
-		TCP_STAT(tcps, tcp_ip_ire_send);
-		IRE_REFRELE(ire);
-		return (B_FALSE);
+	if (tcp->tcp_snd_zcopy_on) {
+		tcp->tcp_snd_zcopy_on = B_FALSE;
+		if (!TCP_IS_DETACHED(tcp)) {
+			(void) proto_set_tx_copyopt(connp->conn_rq, connp,
+			    ZCVMUNSAFE);
+			TCP_STAT(tcps, tcp_zcopy_off);
+		}
+	} else {
+		tcp->tcp_snd_zcopy_on = B_TRUE;
+		if (!TCP_IS_DETACHED(tcp)) {
+			(void) proto_set_tx_copyopt(connp->conn_rq, connp,
+			    ZCVMSAFE);
+			TCP_STAT(tcps, tcp_zcopy_on);
+		}
 	}
+}
 
-	ill = ire_to_ill(ire);
-	ASSERT(ill != NULL);
+/*
+ * Notify function registered with ip_xmit_attr_t. It's called in the squeue
+ * so it's safe to update the TCP connection.
+ */
+/* ARGSUSED1 */
+static void
+tcp_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype,
+    ixa_notify_arg_t narg)
+{
+	tcp_t		*tcp = (tcp_t *)arg;
+	conn_t		*connp = tcp->tcp_connp;
 
-	if (!tcp->tcp_ire_ill_check_done) {
-		tcp_ire_ill_check(tcp, ire, ill, B_TRUE);
-		tcp->tcp_ire_ill_check_done = B_TRUE;
+	switch (ntype) {
+	case IXAN_LSO:
+		tcp_update_lso(tcp, connp->conn_ixa);
+		break;
+	case IXAN_PMTU:
+		tcp_update_pmtu(tcp, B_FALSE);
+		break;
+	case IXAN_ZCOPY:
+		tcp_update_zcopy(tcp);
+		break;
+	default:
+		break;
 	}
-
-	*irep = ire;
-	*illp = ill;
-
-	return (B_TRUE);
 }
 
 static void
-tcp_send_data(tcp_t *tcp, queue_t *q, mblk_t *mp)
+tcp_send_data(tcp_t *tcp, mblk_t *mp)
 {
-	ipha_t		*ipha;
-	ipaddr_t	src;
-	ipaddr_t	dst;
-	uint32_t	cksum;
-	ire_t		*ire;
-	uint16_t	*up;
-	ill_t		*ill;
 	conn_t		*connp = tcp->tcp_connp;
-	uint32_t	hcksum_txflags = 0;
-	mblk_t		*ire_fp_mp;
-	uint_t		ire_fp_mp_len;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	ip_stack_t	*ipst = tcps->tcps_netstack->netstack_ip;
-	cred_t		*cr;
-	pid_t		cpid;
-
-	ASSERT(DB_TYPE(mp) == M_DATA);
 
 	/*
-	 * Here we need to handle the overloading of the cred_t for
-	 * both getpeerucred and TX.
-	 * If this is a SYN then the caller already set db_credp so
-	 * that getpeerucred will work. But if TX is in use we might have
-	 * a conn_effective_cred which is different, and we need to use that
-	 * cred to make TX use the correct label and label dependent route.
+	 * Check here to avoid sending zero-copy message down to IP when
+	 * ZEROCOPY capability has turned off. We only need to deal with
+	 * the race condition between sockfs and the notification here.
+	 * Since we have tried to backoff the tcp_xmit_head when turning
+	 * zero-copy off and new messages in tcp_output(), we simply drop
+	 * the dup'ed packet here and let tcp retransmit, if tcp_xmit_zc_clean
+	 * is not true.
 	 */
-	if (is_system_labeled()) {
-		cr = msg_getcred(mp, &cpid);
-		if (cr == NULL || connp->conn_effective_cred != NULL)
-			mblk_setcred(mp, CONN_CRED(connp), cpid);
-	}
-
-	ipha = (ipha_t *)mp->b_rptr;
-	src = ipha->ipha_src;
-	dst = ipha->ipha_dst;
-
-	ASSERT(q != NULL);
-	DTRACE_PROBE2(tcp__trace__send, mblk_t *, mp, tcp_t *, tcp);
-
-	/*
-	 * Drop off fast path for IPv6 and also if options are present or
-	 * we need to resolve a TS label.
-	 */
-	if (tcp->tcp_ipversion != IPV4_VERSION ||
-	    !IPCL_IS_CONNECTED(connp) ||
-	    !CONN_IS_LSO_MD_FASTPATH(connp) ||
-	    (connp->conn_flags & IPCL_CHECK_POLICY) != 0 ||
-	    !connp->conn_ulp_labeled ||
-	    ipha->ipha_ident == IP_HDR_INCLUDED ||
-	    ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION ||
-	    IPP_ENABLED(IPP_LOCAL_OUT, ipst)) {
-		if (tcp->tcp_snd_zcopy_aware)
-			mp = tcp_zcopy_disable(tcp, mp);
-		TCP_STAT(tcps, tcp_ip_send);
-		CALL_IP_WPUT(connp, q, mp);
-		return;
-	}
-
-	if (!tcp_send_find_ire_ill(tcp, mp, &ire, &ill)) {
-		if (tcp->tcp_snd_zcopy_aware)
-			mp = tcp_zcopy_backoff(tcp, mp, 0);
-		CALL_IP_WPUT(connp, q, mp);
+	if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_snd_zcopy_on &&
+	    !tcp->tcp_xmit_zc_clean) {
+		ip_drop_output("TCP ZC was disabled but not clean", mp, NULL);
+		freemsg(mp);
 		return;
 	}
-	ire_fp_mp = ire->ire_nce->nce_fp_mp;
-	ire_fp_mp_len = MBLKL(ire_fp_mp);
-
-	ASSERT(ipha->ipha_ident == 0 || ipha->ipha_ident == IP_HDR_INCLUDED);
-	ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1);
-#ifndef _BIG_ENDIAN
-	ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8);
-#endif
-
-	/*
-	 * Check to see if we need to re-enable LSO/MDT for this connection
-	 * because it was previously disabled due to changes in the ill;
-	 * note that by doing it here, this re-enabling only applies when
-	 * the packet is not dispatched through CALL_IP_WPUT().
-	 *
-	 * That means for IPv4, it is worth re-enabling LSO/MDT for the fastpath
-	 * case, since that's how we ended up here.  For IPv6, we do the
-	 * re-enabling work in ip_xmit_v6(), albeit indirectly via squeue.
-	 */
-	if (connp->conn_lso_ok && !tcp->tcp_lso && ILL_LSO_TCP_USABLE(ill)) {
-		/*
-		 * Restore LSO for this connection, so that next time around
-		 * it is eligible to go through tcp_lsosend() path again.
-		 */
-		TCP_STAT(tcps, tcp_lso_enabled);
-		tcp->tcp_lso = B_TRUE;
-		ip1dbg(("tcp_send_data: reenabling LSO for connp %p on "
-		    "interface %s\n", (void *)connp, ill->ill_name));
-	} else if (connp->conn_mdt_ok && !tcp->tcp_mdt && ILL_MDT_USABLE(ill)) {
-		/*
-		 * Restore MDT for this connection, so that next time around
-		 * it is eligible to go through tcp_multisend() path again.
-		 */
-		TCP_STAT(tcps, tcp_mdt_conn_resumed1);
-		tcp->tcp_mdt = B_TRUE;
-		ip1dbg(("tcp_send_data: reenabling MDT for connp %p on "
-		    "interface %s\n", (void *)connp, ill->ill_name));
-	}
-
-	if (tcp->tcp_snd_zcopy_aware) {
-		if ((ill->ill_capabilities & ILL_CAPAB_ZEROCOPY) == 0 ||
-		    (ill->ill_zerocopy_capab->ill_zerocopy_flags == 0))
-			mp = tcp_zcopy_disable(tcp, mp);
-		/*
-		 * we shouldn't need to reset ipha as the mp containing
-		 * ipha should never be a zero-copy mp.
-		 */
-	}
-
-	if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) {
-		ASSERT(ill->ill_hcksum_capab != NULL);
-		hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags;
-	}
-
-	/* pseudo-header checksum (do it in parts for IP header checksum) */
-	cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF);
-
-	ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION);
-	up = IPH_TCPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH);
-
-	IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, mp, ipha, up,
-	    IPPROTO_TCP, IP_SIMPLE_HDR_LENGTH, ntohs(ipha->ipha_length), cksum);
-
-	/* Software checksum? */
-	if (DB_CKSUMFLAGS(mp) == 0) {
-		TCP_STAT(tcps, tcp_out_sw_cksum);
-		TCP_STAT_UPDATE(tcps, tcp_out_sw_cksum_bytes,
-		    ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH);
-	}
-
-	/* Calculate IP header checksum if hardware isn't capable */
-	if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) {
-		IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0],
-		    ((uint16_t *)ipha)[4]);
-	}
 
-	ASSERT(DB_TYPE(ire_fp_mp) == M_DATA);
-	mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len;
-	bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len);
-
-	UPDATE_OB_PKT_COUNT(ire);
-	ire->ire_last_used_time = lbolt;
-
-	BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
-	BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits);
-	UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets,
-	    ntohs(ipha->ipha_length));
-
-	DTRACE_PROBE4(ip4__physical__out__start,
-	    ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp);
-	FW_HOOKS(ipst->ips_ip4_physical_out_event,
-	    ipst->ips_ipv4firewall_physical_out,
-	    NULL, ill, ipha, mp, mp, 0, ipst);
-	DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp);
-	DTRACE_IP_FASTPATH(mp, ipha, ill, ipha, NULL);
-
-	if (mp != NULL) {
-		if (ipst->ips_ip4_observe.he_interested) {
-			zoneid_t szone;
-
-			/*
-			 * Both of these functions expect b_rptr to be
-			 * where the IP header starts, so advance past the
-			 * link layer header if present.
-			 */
-			mp->b_rptr += ire_fp_mp_len;
-			szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
-			    ipst, ALL_ZONES);
-			ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
-			    ALL_ZONES, ill, ipst);
-			mp->b_rptr -= ire_fp_mp_len;
-		}
-
-		ILL_SEND_TX(ill, ire, connp, mp, 0, NULL);
-	}
-
-	IRE_REFRELE(ire);
+	ASSERT(connp->conn_ixa->ixa_notify_cookie == connp->conn_tcp);
+	(void) conn_ip_output(mp, connp->conn_ixa);
 }
 
 /*
@@ -18731,15 +15390,13 @@ tcp_wput_data(tcp_t *tcp, mblk_t *mp, boolean_t urgent)
 	int		tcpstate;
 	int		usable = 0;
 	mblk_t		*xmit_tail;
-	queue_t		*q = tcp->tcp_wq;
 	int32_t		mss;
 	int32_t		num_sack_blk = 0;
+	int32_t		total_hdr_len;
 	int32_t		tcp_hdr_len;
-	int32_t		tcp_tcp_hdr_len;
-	int		mdt_thres;
 	int		rc;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	ip_stack_t	*ipst;
+	conn_t		*connp = tcp->tcp_connp;
 
 	tcpstate = tcp->tcp_state;
 	if (mp == NULL) {
@@ -18771,7 +15428,7 @@ tcp_wput_data(tcp_t *tcp, mblk_t *mp, boolean_t urgent)
 			    tcp_display(tcp, NULL,
 			    DISP_ADDR_AND_PORT));
 #else
-			if (tcp->tcp_debug) {
+			if (connp->conn_debug) {
 				(void) strlog(TCP_MOD_ID, 0, 1,
 				    SL_TRACE|SL_ERROR,
 				    "tcp_wput_data: data after ordrel, %s\n",
@@ -18781,12 +15438,12 @@ tcp_wput_data(tcp_t *tcp, mblk_t *mp, boolean_t urgent)
 #endif /* DEBUG */
 		}
 		if (tcp->tcp_snd_zcopy_aware &&
-		    (mp->b_datap->db_struioflag & STRUIO_ZCNOTIFY) != 0)
+		    (mp->b_datap->db_struioflag & STRUIO_ZCNOTIFY))
 			tcp_zcopy_notify(tcp);
 		freemsg(mp);
 		mutex_enter(&tcp->tcp_non_sq_lock);
 		if (tcp->tcp_flow_stopped &&
-		    TCP_UNSENT_BYTES(tcp) <= tcp->tcp_xmit_lowater) {
+		    TCP_UNSENT_BYTES(tcp) <= connp->conn_sndlowat) {
 			tcp_clrqfull(tcp);
 		}
 		mutex_exit(&tcp->tcp_non_sq_lock);
@@ -18886,12 +15543,12 @@ data_null:
 		opt_len = num_sack_blk * sizeof (sack_blk_t) + TCPOPT_NOP_LEN *
 		    2 + TCPOPT_HEADER_LEN;
 		mss = tcp->tcp_mss - opt_len;
-		tcp_hdr_len = tcp->tcp_hdr_len + opt_len;
-		tcp_tcp_hdr_len = tcp->tcp_tcp_hdr_len + opt_len;
+		total_hdr_len = connp->conn_ht_iphc_len + opt_len;
+		tcp_hdr_len = connp->conn_ht_ulp_len + opt_len;
 	} else {
 		mss = tcp->tcp_mss;
-		tcp_hdr_len = tcp->tcp_hdr_len;
-		tcp_tcp_hdr_len = tcp->tcp_tcp_hdr_len;
+		total_hdr_len = connp->conn_ht_iphc_len;
+		tcp_hdr_len = connp->conn_ht_ulp_len;
 	}
 
 	if ((tcp->tcp_suna == snxt) && !tcp->tcp_localnet &&
@@ -18913,7 +15570,7 @@ data_null:
 		 * In the special case when cwnd is zero, which can only
 		 * happen if the connection is ECN capable, return now.
 		 * New segments is sent using tcp_timer().  The timer
-		 * is set in tcp_rput_data().
+		 * is set in tcp_input_data().
 		 */
 		if (tcp->tcp_cwnd == 0) {
 			/*
@@ -19023,66 +15680,12 @@ data_null:
 	}
 
 	/* Update the latest receive window size in TCP header. */
-	U32_TO_ABE16(tcp->tcp_rwnd >> tcp->tcp_rcv_ws,
-	    tcp->tcp_tcph->th_win);
+	tcp->tcp_tcpha->tha_win = htons(tcp->tcp_rwnd >> tcp->tcp_rcv_ws);
 
-	/*
-	 * Determine if it's worthwhile to attempt LSO or MDT, based on:
-	 *
-	 * 1. Simple TCP/IP{v4,v6} (no options).
-	 * 2. IPSEC/IPQoS processing is not needed for the TCP connection.
-	 * 3. If the TCP connection is in ESTABLISHED state.
-	 * 4. The TCP is not detached.
-	 *
-	 * If any of the above conditions have changed during the
-	 * connection, stop using LSO/MDT and restore the stream head
-	 * parameters accordingly.
-	 */
-	ipst = tcps->tcps_netstack->netstack_ip;
-
-	if ((tcp->tcp_lso || tcp->tcp_mdt) &&
-	    ((tcp->tcp_ipversion == IPV4_VERSION &&
-	    tcp->tcp_ip_hdr_len != IP_SIMPLE_HDR_LENGTH) ||
-	    (tcp->tcp_ipversion == IPV6_VERSION &&
-	    tcp->tcp_ip_hdr_len != IPV6_HDR_LEN) ||
-	    tcp->tcp_state != TCPS_ESTABLISHED ||
-	    TCP_IS_DETACHED(tcp) || !CONN_IS_LSO_MD_FASTPATH(tcp->tcp_connp) ||
-	    CONN_IPSEC_OUT_ENCAPSULATED(tcp->tcp_connp) ||
-	    IPP_ENABLED(IPP_LOCAL_OUT, ipst))) {
-		if (tcp->tcp_lso) {
-			tcp->tcp_connp->conn_lso_ok = B_FALSE;
-			tcp->tcp_lso = B_FALSE;
-		} else {
-			tcp->tcp_connp->conn_mdt_ok = B_FALSE;
-			tcp->tcp_mdt = B_FALSE;
-		}
-
-		/* Anything other than detached is considered pathological */
-		if (!TCP_IS_DETACHED(tcp)) {
-			if (tcp->tcp_lso)
-				TCP_STAT(tcps, tcp_lso_disabled);
-			else
-				TCP_STAT(tcps, tcp_mdt_conn_halted1);
-			(void) tcp_maxpsz_set(tcp, B_TRUE);
-		}
-	}
-
-	/* Use MDT if sendable amount is greater than the threshold */
-	if (tcp->tcp_mdt &&
-	    (mdt_thres = mss << tcp_mdt_smss_threshold, usable > mdt_thres) &&
-	    (tail_unsent > mdt_thres || (xmit_tail->b_cont != NULL &&
-	    MBLKL(xmit_tail->b_cont) > mdt_thres)) &&
-	    (tcp->tcp_valid_bits == 0 ||
-	    tcp->tcp_valid_bits == TCP_FSS_VALID)) {
-		ASSERT(tcp->tcp_connp->conn_mdt_ok);
-		rc = tcp_multisend(q, tcp, mss, tcp_hdr_len, tcp_tcp_hdr_len,
-		    num_sack_blk, &usable, &snxt, &tail_unsent, &xmit_tail,
-		    local_time, mdt_thres);
-	} else {
-		rc = tcp_send(q, tcp, mss, tcp_hdr_len, tcp_tcp_hdr_len,
-		    num_sack_blk, &usable, &snxt, &tail_unsent, &xmit_tail,
-		    local_time, INT_MAX);
-	}
+	/* Send the packet. */
+	rc = tcp_send(tcp, mss, total_hdr_len, tcp_hdr_len,
+	    num_sack_blk, &usable, &snxt, &tail_unsent, &xmit_tail,
+	    local_time);
 
 	/* Pretend that all we were trying to send really got sent */
 	if (rc < 0 && tail_unsent < 0) {
@@ -19131,39 +15734,41 @@ done:;
 	tcp->tcp_unsent += len;
 	mutex_enter(&tcp->tcp_non_sq_lock);
 	if (tcp->tcp_flow_stopped) {
-		if (TCP_UNSENT_BYTES(tcp) <= tcp->tcp_xmit_lowater) {
+		if (TCP_UNSENT_BYTES(tcp) <= connp->conn_sndlowat) {
 			tcp_clrqfull(tcp);
 		}
-	} else if (TCP_UNSENT_BYTES(tcp) >= tcp->tcp_xmit_hiwater) {
-		tcp_setqfull(tcp);
+	} else if (TCP_UNSENT_BYTES(tcp) >= connp->conn_sndbuf) {
+		if (!(tcp->tcp_detached))
+			tcp_setqfull(tcp);
 	}
 	mutex_exit(&tcp->tcp_non_sq_lock);
 }
 
 /*
- * tcp_fill_header is called by tcp_send() and tcp_multisend() to fill the
- * outgoing TCP header with the template header, as well as other
- * options such as time-stamp, ECN and/or SACK.
+ * tcp_fill_header is called by tcp_send() to fill the outgoing TCP header
+ * with the template header, as well as other options such as time-stamp,
+ * ECN and/or SACK.
  */
 static void
 tcp_fill_header(tcp_t *tcp, uchar_t *rptr, clock_t now, int num_sack_blk)
 {
-	tcph_t *tcp_tmpl, *tcp_h;
+	tcpha_t *tcp_tmpl, *tcpha;
 	uint32_t *dst, *src;
 	int hdrlen;
+	conn_t *connp = tcp->tcp_connp;
 
 	ASSERT(OK_32PTR(rptr));
 
 	/* Template header */
-	tcp_tmpl = tcp->tcp_tcph;
+	tcp_tmpl = tcp->tcp_tcpha;
 
 	/* Header of outgoing packet */
-	tcp_h = (tcph_t *)(rptr + tcp->tcp_ip_hdr_len);
+	tcpha = (tcpha_t *)(rptr + connp->conn_ixa->ixa_ip_hdr_length);
 
 	/* dst and src are opaque 32-bit fields, used for copying */
 	dst = (uint32_t *)rptr;
-	src = (uint32_t *)tcp->tcp_iphc;
-	hdrlen = tcp->tcp_hdr_len;
+	src = (uint32_t *)connp->conn_ht_iphc;
+	hdrlen = connp->conn_ht_iphc_len;
 
 	/* Fill time-stamp option if needed */
 	if (tcp->tcp_snd_ts_ok) {
@@ -19172,7 +15777,7 @@ tcp_fill_header(tcp_t *tcp, uchar_t *rptr, clock_t now, int num_sack_blk)
 		U32_TO_BE32(tcp->tcp_ts_recent,
 		    (char *)tcp_tmpl + TCP_MIN_HEADER_LENGTH + 8);
 	} else {
-		ASSERT(tcp->tcp_tcp_hdr_len == TCP_MIN_HEADER_LENGTH);
+		ASSERT(connp->conn_ht_ulp_len == TCP_MIN_HEADER_LENGTH);
 	}
 
 	/*
@@ -19208,16 +15813,16 @@ tcp_fill_header(tcp_t *tcp, uchar_t *rptr, clock_t now, int num_sack_blk)
 		SET_ECT(tcp, rptr);
 
 		if (tcp->tcp_ecn_echo_on)
-			tcp_h->th_flags[0] |= TH_ECE;
+			tcpha->tha_flags |= TH_ECE;
 		if (tcp->tcp_cwr && !tcp->tcp_ecn_cwr_sent) {
-			tcp_h->th_flags[0] |= TH_CWR;
+			tcpha->tha_flags |= TH_CWR;
 			tcp->tcp_ecn_cwr_sent = B_TRUE;
 		}
 	}
 
 	/* Fill in SACK options */
 	if (num_sack_blk > 0) {
-		uchar_t *wptr = rptr + tcp->tcp_hdr_len;
+		uchar_t *wptr = rptr + connp->conn_ht_iphc_len;
 		sack_blk_t *tmp;
 		int32_t	i;
 
@@ -19235,1536 +15840,62 @@ tcp_fill_header(tcp_t *tcp, uchar_t *rptr, clock_t now, int num_sack_blk)
 			U32_TO_BE32(tmp[i].end, wptr);
 			wptr += sizeof (tcp_seq);
 		}
-		tcp_h->th_offset_and_rsrvd[0] +=
+		tcpha->tha_offset_and_reserved +=
 		    ((num_sack_blk * 2 + 1) << 4);
 	}
 }
 
 /*
- * tcp_mdt_add_attrs() is called by tcp_multisend() in order to attach
- * the destination address and SAP attribute, and if necessary, the
- * hardware checksum offload attribute to a Multidata message.
- */
-static int
-tcp_mdt_add_attrs(multidata_t *mmd, const mblk_t *dlmp, const boolean_t hwcksum,
-    const uint32_t start, const uint32_t stuff, const uint32_t end,
-    const uint32_t flags, tcp_stack_t *tcps)
-{
-	/* Add global destination address & SAP attribute */
-	if (dlmp == NULL || !ip_md_addr_attr(mmd, NULL, dlmp)) {
-		ip1dbg(("tcp_mdt_add_attrs: can't add global physical "
-		    "destination address+SAP\n"));
-
-		if (dlmp != NULL)
-			TCP_STAT(tcps, tcp_mdt_allocfail);
-		return (-1);
-	}
-
-	/* Add global hwcksum attribute */
-	if (hwcksum &&
-	    !ip_md_hcksum_attr(mmd, NULL, start, stuff, end, flags)) {
-		ip1dbg(("tcp_mdt_add_attrs: can't add global hardware "
-		    "checksum attribute\n"));
-
-		TCP_STAT(tcps, tcp_mdt_allocfail);
-		return (-1);
-	}
-
-	return (0);
-}
-
-/*
- * Smaller and private version of pdescinfo_t used specifically for TCP,
- * which allows for only two payload spans per packet.
- */
-typedef struct tcp_pdescinfo_s PDESCINFO_STRUCT(2) tcp_pdescinfo_t;
-
-/*
- * tcp_multisend() is called by tcp_wput_data() for Multidata Transmit
- * scheme, and returns one the following:
+ * tcp_send() is called by tcp_wput_data() and returns one of the following:
  *
  * -1 = failed allocation.
  *  0 = success; burst count reached, or usable send window is too small,
  *      and that we'd rather wait until later before sending again.
  */
 static int
-tcp_multisend(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
-    const int tcp_tcp_hdr_len, const int num_sack_blk, int *usable,
-    uint_t *snxt, int *tail_unsent, mblk_t **xmit_tail, mblk_t *local_time,
-    const int mdt_thres)
-{
-	mblk_t		*md_mp_head, *md_mp, *md_pbuf, *md_pbuf_nxt, *md_hbuf;
-	multidata_t	*mmd;
-	uint_t		obsegs, obbytes, hdr_frag_sz;
-	uint_t		cur_hdr_off, cur_pld_off, base_pld_off, first_snxt;
-	int		num_burst_seg, max_pld;
-	pdesc_t		*pkt;
-	tcp_pdescinfo_t	tcp_pkt_info;
-	pdescinfo_t	*pkt_info;
-	int		pbuf_idx, pbuf_idx_nxt;
-	int		seg_len, len, spill, af;
-	boolean_t	add_buffer, zcopy, clusterwide;
-	boolean_t	rconfirm = B_FALSE;
-	boolean_t	done = B_FALSE;
-	uint32_t	cksum;
-	uint32_t	hwcksum_flags;
-	ire_t		*ire = NULL;
-	ill_t		*ill;
-	ipha_t		*ipha;
-	ip6_t		*ip6h;
-	ipaddr_t	src, dst;
-	ill_zerocopy_capab_t *zc_cap = NULL;
-	uint16_t	*up;
-	int		err;
-	conn_t		*connp;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	ip_stack_t 	*ipst = tcps->tcps_netstack->netstack_ip;
-	int		usable_mmd, tail_unsent_mmd;
-	uint_t		snxt_mmd, obsegs_mmd, obbytes_mmd;
-	mblk_t		*xmit_tail_mmd;
-	netstackid_t	stack_id;
-
-#ifdef	_BIG_ENDIAN
-#define	IPVER(ip6h)	((((uint32_t *)ip6h)[0] >> 28) & 0x7)
-#else
-#define	IPVER(ip6h)	((((uint32_t *)ip6h)[0] >> 4) & 0x7)
-#endif
-
-#define	PREP_NEW_MULTIDATA() {			\
-	mmd = NULL;				\
-	md_mp = md_hbuf = NULL;			\
-	cur_hdr_off = 0;			\
-	max_pld = tcp->tcp_mdt_max_pld;		\
-	pbuf_idx = pbuf_idx_nxt = -1;		\
-	add_buffer = B_TRUE;			\
-	zcopy = B_FALSE;			\
-}
-
-#define	PREP_NEW_PBUF() {			\
-	md_pbuf = md_pbuf_nxt = NULL;		\
-	pbuf_idx = pbuf_idx_nxt = -1;		\
-	cur_pld_off = 0;			\
-	first_snxt = *snxt;			\
-	ASSERT(*tail_unsent > 0);		\
-	base_pld_off = MBLKL(*xmit_tail) - *tail_unsent; \
-}
-
-	ASSERT(mdt_thres >= mss);
-	ASSERT(*usable > 0 && *usable > mdt_thres);
-	ASSERT(tcp->tcp_state == TCPS_ESTABLISHED);
-	ASSERT(!TCP_IS_DETACHED(tcp));
-	ASSERT(tcp->tcp_valid_bits == 0 ||
-	    tcp->tcp_valid_bits == TCP_FSS_VALID);
-	ASSERT((tcp->tcp_ipversion == IPV4_VERSION &&
-	    tcp->tcp_ip_hdr_len == IP_SIMPLE_HDR_LENGTH) ||
-	    (tcp->tcp_ipversion == IPV6_VERSION &&
-	    tcp->tcp_ip_hdr_len == IPV6_HDR_LEN));
-
-	connp = tcp->tcp_connp;
-	ASSERT(connp != NULL);
-	ASSERT(CONN_IS_LSO_MD_FASTPATH(connp));
-	ASSERT(!CONN_IPSEC_OUT_ENCAPSULATED(connp));
-
-	stack_id = connp->conn_netstack->netstack_stackid;
-
-	usable_mmd = tail_unsent_mmd = 0;
-	snxt_mmd = obsegs_mmd = obbytes_mmd = 0;
-	xmit_tail_mmd = NULL;
-	/*
-	 * Note that tcp will only declare at most 2 payload spans per
-	 * packet, which is much lower than the maximum allowable number
-	 * of packet spans per Multidata.  For this reason, we use the
-	 * privately declared and smaller descriptor info structure, in
-	 * order to save some stack space.
-	 */
-	pkt_info = (pdescinfo_t *)&tcp_pkt_info;
-
-	af = (tcp->tcp_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6;
-	if (af == AF_INET) {
-		dst = tcp->tcp_ipha->ipha_dst;
-		src = tcp->tcp_ipha->ipha_src;
-		ASSERT(!CLASSD(dst));
-	}
-	ASSERT(af == AF_INET ||
-	    !IN6_IS_ADDR_MULTICAST(&tcp->tcp_ip6h->ip6_dst));
-
-	obsegs = obbytes = 0;
-	num_burst_seg = tcp->tcp_snd_burst;
-	md_mp_head = NULL;
-	PREP_NEW_MULTIDATA();
-
-	/*
-	 * Before we go on further, make sure there is an IRE that we can
-	 * use, and that the ILL supports MDT.  Otherwise, there's no point
-	 * in proceeding any further, and we should just hand everything
-	 * off to the legacy path.
-	 */
-	if (!tcp_send_find_ire(tcp, (af == AF_INET) ? &dst : NULL, &ire))
-		goto legacy_send_no_md;
-
-	ASSERT(ire != NULL);
-	ASSERT(af != AF_INET || ire->ire_ipversion == IPV4_VERSION);
-	ASSERT(af == AF_INET || !IN6_IS_ADDR_V4MAPPED(&(ire->ire_addr_v6)));
-	ASSERT(af == AF_INET || ire->ire_nce != NULL);
-	ASSERT(!(ire->ire_type & IRE_BROADCAST));
-	/*
-	 * If we do support loopback for MDT (which requires modifications
-	 * to the receiving paths), the following assertions should go away,
-	 * and we would be sending the Multidata to loopback conn later on.
-	 */
-	ASSERT(!IRE_IS_LOCAL(ire));
-	ASSERT(ire->ire_stq != NULL);
-
-	ill = ire_to_ill(ire);
-	ASSERT(ill != NULL);
-	ASSERT(!ILL_MDT_CAPABLE(ill) || ill->ill_mdt_capab != NULL);
-
-	if (!tcp->tcp_ire_ill_check_done) {
-		tcp_ire_ill_check(tcp, ire, ill, B_TRUE);
-		tcp->tcp_ire_ill_check_done = B_TRUE;
-	}
-
-	/*
-	 * If the underlying interface conditions have changed, or if the
-	 * new interface does not support MDT, go back to legacy path.
-	 */
-	if (!ILL_MDT_USABLE(ill) || (ire->ire_flags & RTF_MULTIRT) != 0) {
-		/* don't go through this path anymore for this connection */
-		TCP_STAT(tcps, tcp_mdt_conn_halted2);
-		tcp->tcp_mdt = B_FALSE;
-		ip1dbg(("tcp_multisend: disabling MDT for connp %p on "
-		    "interface %s\n", (void *)connp, ill->ill_name));
-		/* IRE will be released prior to returning */
-		goto legacy_send_no_md;
-	}
-
-	if (ill->ill_capabilities & ILL_CAPAB_ZEROCOPY)
-		zc_cap = ill->ill_zerocopy_capab;
-
-	/*
-	 * Check if we can take tcp fast-path. Note that "incomplete"
-	 * ire's (where the link-layer for next hop is not resolved
-	 * or where the fast-path header in nce_fp_mp is not available
-	 * yet) are sent down the legacy (slow) path.
-	 * NOTE: We should fix ip_xmit_v4 to handle M_MULTIDATA
-	 */
-	if (ire->ire_nce && ire->ire_nce->nce_state != ND_REACHABLE) {
-		/* IRE will be released prior to returning */
-		goto legacy_send_no_md;
-	}
-
-	/* go to legacy path if interface doesn't support zerocopy */
-	if (tcp->tcp_snd_zcopy_aware && do_tcpzcopy != 2 &&
-	    (zc_cap == NULL || zc_cap->ill_zerocopy_flags == 0)) {
-		/* IRE will be released prior to returning */
-		goto legacy_send_no_md;
-	}
-
-	/* does the interface support hardware checksum offload? */
-	hwcksum_flags = 0;
-	if (ILL_HCKSUM_CAPABLE(ill) &&
-	    (ill->ill_hcksum_capab->ill_hcksum_txflags &
-	    (HCKSUM_INET_FULL_V4 | HCKSUM_INET_FULL_V6 | HCKSUM_INET_PARTIAL |
-	    HCKSUM_IPHDRCKSUM)) && dohwcksum) {
-		if (ill->ill_hcksum_capab->ill_hcksum_txflags &
-		    HCKSUM_IPHDRCKSUM)
-			hwcksum_flags = HCK_IPV4_HDRCKSUM;
-
-		if (ill->ill_hcksum_capab->ill_hcksum_txflags &
-		    (HCKSUM_INET_FULL_V4 | HCKSUM_INET_FULL_V6))
-			hwcksum_flags |= HCK_FULLCKSUM;
-		else if (ill->ill_hcksum_capab->ill_hcksum_txflags &
-		    HCKSUM_INET_PARTIAL)
-			hwcksum_flags |= HCK_PARTIALCKSUM;
-	}
-
-	/*
-	 * Each header fragment consists of the leading extra space,
-	 * followed by the TCP/IP header, and the trailing extra space.
-	 * We make sure that each header fragment begins on a 32-bit
-	 * aligned memory address (tcp_mdt_hdr_head is already 32-bit
-	 * aligned in tcp_mdt_update).
-	 */
-	hdr_frag_sz = roundup((tcp->tcp_mdt_hdr_head + tcp_hdr_len +
-	    tcp->tcp_mdt_hdr_tail), 4);
-
-	/* are we starting from the beginning of data block? */
-	if (*tail_unsent == 0) {
-		*xmit_tail = (*xmit_tail)->b_cont;
-		ASSERT((uintptr_t)MBLKL(*xmit_tail) <= (uintptr_t)INT_MAX);
-		*tail_unsent = (int)MBLKL(*xmit_tail);
-	}
-
-	/*
-	 * Here we create one or more Multidata messages, each made up of
-	 * one header buffer and up to N payload buffers.  This entire
-	 * operation is done within two loops:
-	 *
-	 * The outer loop mostly deals with creating the Multidata message,
-	 * as well as the header buffer that gets added to it.  It also
-	 * links the Multidata messages together such that all of them can
-	 * be sent down to the lower layer in a single putnext call; this
-	 * linking behavior depends on the tcp_mdt_chain tunable.
-	 *
-	 * The inner loop takes an existing Multidata message, and adds
-	 * one or more (up to tcp_mdt_max_pld) payload buffers to it.  It
-	 * packetizes those buffers by filling up the corresponding header
-	 * buffer fragments with the proper IP and TCP headers, and by
-	 * describing the layout of each packet in the packet descriptors
-	 * that get added to the Multidata.
-	 */
-	do {
-		/*
-		 * If usable send window is too small, or data blocks in
-		 * transmit list are smaller than our threshold (i.e. app
-		 * performs large writes followed by small ones), we hand
-		 * off the control over to the legacy path.  Note that we'll
-		 * get back the control once it encounters a large block.
-		 */
-		if (*usable < mss || (*tail_unsent <= mdt_thres &&
-		    (*xmit_tail)->b_cont != NULL &&
-		    MBLKL((*xmit_tail)->b_cont) <= mdt_thres)) {
-			/* send down what we've got so far */
-			if (md_mp_head != NULL) {
-				tcp_multisend_data(tcp, ire, ill, md_mp_head,
-				    obsegs, obbytes, &rconfirm);
-			}
-			/*
-			 * Pass control over to tcp_send(), but tell it to
-			 * return to us once a large-size transmission is
-			 * possible.
-			 */
-			TCP_STAT(tcps, tcp_mdt_legacy_small);
-			if ((err = tcp_send(q, tcp, mss, tcp_hdr_len,
-			    tcp_tcp_hdr_len, num_sack_blk, usable, snxt,
-			    tail_unsent, xmit_tail, local_time,
-			    mdt_thres)) <= 0) {
-				/* burst count reached, or alloc failed */
-				IRE_REFRELE(ire);
-				return (err);
-			}
-
-			/* tcp_send() may have sent everything, so check */
-			if (*usable <= 0) {
-				IRE_REFRELE(ire);
-				return (0);
-			}
-
-			TCP_STAT(tcps, tcp_mdt_legacy_ret);
-			/*
-			 * We may have delivered the Multidata, so make sure
-			 * to re-initialize before the next round.
-			 */
-			md_mp_head = NULL;
-			obsegs = obbytes = 0;
-			num_burst_seg = tcp->tcp_snd_burst;
-			PREP_NEW_MULTIDATA();
-
-			/* are we starting from the beginning of data block? */
-			if (*tail_unsent == 0) {
-				*xmit_tail = (*xmit_tail)->b_cont;
-				ASSERT((uintptr_t)MBLKL(*xmit_tail) <=
-				    (uintptr_t)INT_MAX);
-				*tail_unsent = (int)MBLKL(*xmit_tail);
-			}
-		}
-		/*
-		 * Record current values for parameters we may need to pass
-		 * to tcp_send() or tcp_multisend_data(). We checkpoint at
-		 * each iteration of the outer loop (each multidata message
-		 * creation). If we have a failure in the inner loop, we send
-		 * any complete multidata messages we have before reverting
-		 * to using the traditional non-md path.
-		 */
-		snxt_mmd = *snxt;
-		usable_mmd = *usable;
-		xmit_tail_mmd = *xmit_tail;
-		tail_unsent_mmd = *tail_unsent;
-		obsegs_mmd = obsegs;
-		obbytes_mmd = obbytes;
-
-		/*
-		 * max_pld limits the number of mblks in tcp's transmit
-		 * queue that can be added to a Multidata message.  Once
-		 * this counter reaches zero, no more additional mblks
-		 * can be added to it.  What happens afterwards depends
-		 * on whether or not we are set to chain the Multidata
-		 * messages.  If we are to link them together, reset
-		 * max_pld to its original value (tcp_mdt_max_pld) and
-		 * prepare to create a new Multidata message which will
-		 * get linked to md_mp_head.  Else, leave it alone and
-		 * let the inner loop break on its own.
-		 */
-		if (tcp_mdt_chain && max_pld == 0)
-			PREP_NEW_MULTIDATA();
-
-		/* adding a payload buffer; re-initialize values */
-		if (add_buffer)
-			PREP_NEW_PBUF();
-
-		/*
-		 * If we don't have a Multidata, either because we just
-		 * (re)entered this outer loop, or after we branched off
-		 * to tcp_send above, setup the Multidata and header
-		 * buffer to be used.
-		 */
-		if (md_mp == NULL) {
-			int md_hbuflen;
-			uint32_t start, stuff;
-
-			/*
-			 * Calculate Multidata header buffer size large enough
-			 * to hold all of the headers that can possibly be
-			 * sent at this moment.  We'd rather over-estimate
-			 * the size than running out of space; this is okay
-			 * since this buffer is small anyway.
-			 */
-			md_hbuflen = (howmany(*usable, mss) + 1) * hdr_frag_sz;
-
-			/*
-			 * Start and stuff offset for partial hardware
-			 * checksum offload; these are currently for IPv4.
-			 * For full checksum offload, they are set to zero.
-			 */
-			if ((hwcksum_flags & HCK_PARTIALCKSUM)) {
-				if (af == AF_INET) {
-					start = IP_SIMPLE_HDR_LENGTH;
-					stuff = IP_SIMPLE_HDR_LENGTH +
-					    TCP_CHECKSUM_OFFSET;
-				} else {
-					start = IPV6_HDR_LEN;
-					stuff = IPV6_HDR_LEN +
-					    TCP_CHECKSUM_OFFSET;
-				}
-			} else {
-				start = stuff = 0;
-			}
-
-			/*
-			 * Create the header buffer, Multidata, as well as
-			 * any necessary attributes (destination address,
-			 * SAP and hardware checksum offload) that should
-			 * be associated with the Multidata message.
-			 */
-			ASSERT(cur_hdr_off == 0);
-			if ((md_hbuf = allocb(md_hbuflen, BPRI_HI)) == NULL ||
-			    ((md_hbuf->b_wptr += md_hbuflen),
-			    (mmd = mmd_alloc(md_hbuf, &md_mp,
-			    KM_NOSLEEP)) == NULL) || (tcp_mdt_add_attrs(mmd,
-			    /* fastpath mblk */
-			    ire->ire_nce->nce_res_mp,
-			    /* hardware checksum enabled */
-			    (hwcksum_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)),
-			    /* hardware checksum offsets */
-			    start, stuff, 0,
-			    /* hardware checksum flag */
-			    hwcksum_flags, tcps) != 0)) {
-legacy_send:
-				/*
-				 * We arrive here from a failure within the
-				 * inner (packetizer) loop or we fail one of
-				 * the conditionals above. We restore the
-				 * previously checkpointed values for:
-				 *    xmit_tail
-				 *    usable
-				 *    tail_unsent
-				 *    snxt
-				 *    obbytes
-				 *    obsegs
-				 * We should then be able to dispatch any
-				 * complete multidata before reverting to the
-				 * traditional path with consistent parameters
-				 * (the inner loop updates these as it
-				 * iterates).
-				 */
-				*xmit_tail = xmit_tail_mmd;
-				*usable = usable_mmd;
-				*tail_unsent = tail_unsent_mmd;
-				*snxt = snxt_mmd;
-				obbytes = obbytes_mmd;
-				obsegs = obsegs_mmd;
-				if (md_mp != NULL) {
-					/* Unlink message from the chain */
-					if (md_mp_head != NULL) {
-						err = (intptr_t)rmvb(md_mp_head,
-						    md_mp);
-						/*
-						 * We can't assert that rmvb
-						 * did not return -1, since we
-						 * may get here before linkb
-						 * happens.  We do, however,
-						 * check if we just removed the
-						 * only element in the list.
-						 */
-						if (err == 0)
-							md_mp_head = NULL;
-					}
-					/* md_hbuf gets freed automatically */
-					TCP_STAT(tcps, tcp_mdt_discarded);
-					freeb(md_mp);
-				} else {
-					/* Either allocb or mmd_alloc failed */
-					TCP_STAT(tcps, tcp_mdt_allocfail);
-					if (md_hbuf != NULL)
-						freeb(md_hbuf);
-				}
-
-				/* send down what we've got so far */
-				if (md_mp_head != NULL) {
-					tcp_multisend_data(tcp, ire, ill,
-					    md_mp_head, obsegs, obbytes,
-					    &rconfirm);
-				}
-legacy_send_no_md:
-				if (ire != NULL)
-					IRE_REFRELE(ire);
-				/*
-				 * Too bad; let the legacy path handle this.
-				 * We specify INT_MAX for the threshold, since
-				 * we gave up with the Multidata processings
-				 * and let the old path have it all.
-				 */
-				TCP_STAT(tcps, tcp_mdt_legacy_all);
-				return (tcp_send(q, tcp, mss, tcp_hdr_len,
-				    tcp_tcp_hdr_len, num_sack_blk, usable,
-				    snxt, tail_unsent, xmit_tail, local_time,
-				    INT_MAX));
-			}
-
-			/* link to any existing ones, if applicable */
-			TCP_STAT(tcps, tcp_mdt_allocd);
-			if (md_mp_head == NULL) {
-				md_mp_head = md_mp;
-			} else if (tcp_mdt_chain) {
-				TCP_STAT(tcps, tcp_mdt_linked);
-				linkb(md_mp_head, md_mp);
-			}
-		}
-
-		ASSERT(md_mp_head != NULL);
-		ASSERT(tcp_mdt_chain || md_mp_head->b_cont == NULL);
-		ASSERT(md_mp != NULL && mmd != NULL);
-		ASSERT(md_hbuf != NULL);
-
-		/*
-		 * Packetize the transmittable portion of the data block;
-		 * each data block is essentially added to the Multidata
-		 * as a payload buffer.  We also deal with adding more
-		 * than one payload buffers, which happens when the remaining
-		 * packetized portion of the current payload buffer is less
-		 * than MSS, while the next data block in transmit queue
-		 * has enough data to make up for one.  This "spillover"
-		 * case essentially creates a split-packet, where portions
-		 * of the packet's payload fragments may span across two
-		 * virtually discontiguous address blocks.
-		 */
-		seg_len = mss;
-		do {
-			len = seg_len;
-
-			/* one must remain NULL for DTRACE_IP_FASTPATH */
-			ipha = NULL;
-			ip6h = NULL;
-
-			ASSERT(len > 0);
-			ASSERT(max_pld >= 0);
-			ASSERT(!add_buffer || cur_pld_off == 0);
-
-			/*
-			 * First time around for this payload buffer; note
-			 * in the case of a spillover, the following has
-			 * been done prior to adding the split-packet
-			 * descriptor to Multidata, and we don't want to
-			 * repeat the process.
-			 */
-			if (add_buffer) {
-				ASSERT(mmd != NULL);
-				ASSERT(md_pbuf == NULL);
-				ASSERT(md_pbuf_nxt == NULL);
-				ASSERT(pbuf_idx == -1 && pbuf_idx_nxt == -1);
-
-				/*
-				 * Have we reached the limit?  We'd get to
-				 * this case when we're not chaining the
-				 * Multidata messages together, and since
-				 * we're done, terminate this loop.
-				 */
-				if (max_pld == 0)
-					break; /* done */
-
-				if ((md_pbuf = dupb(*xmit_tail)) == NULL) {
-					TCP_STAT(tcps, tcp_mdt_allocfail);
-					goto legacy_send; /* out_of_mem */
-				}
-
-				if (IS_VMLOANED_MBLK(md_pbuf) && !zcopy &&
-				    zc_cap != NULL) {
-					if (!ip_md_zcopy_attr(mmd, NULL,
-					    zc_cap->ill_zerocopy_flags)) {
-						freeb(md_pbuf);
-						TCP_STAT(tcps,
-						    tcp_mdt_allocfail);
-						/* out_of_mem */
-						goto legacy_send;
-					}
-					zcopy = B_TRUE;
-				}
-
-				md_pbuf->b_rptr += base_pld_off;
-
-				/*
-				 * Add a payload buffer to the Multidata; this
-				 * operation must not fail, or otherwise our
-				 * logic in this routine is broken.  There
-				 * is no memory allocation done by the
-				 * routine, so any returned failure simply
-				 * tells us that we've done something wrong.
-				 *
-				 * A failure tells us that either we're adding
-				 * the same payload buffer more than once, or
-				 * we're trying to add more buffers than
-				 * allowed (max_pld calculation is wrong).
-				 * None of the above cases should happen, and
-				 * we panic because either there's horrible
-				 * heap corruption, and/or programming mistake.
-				 */
-				pbuf_idx = mmd_addpldbuf(mmd, md_pbuf);
-				if (pbuf_idx < 0) {
-					cmn_err(CE_PANIC, "tcp_multisend: "
-					    "payload buffer logic error "
-					    "detected for tcp %p mmd %p "
-					    "pbuf %p (%d)\n",
-					    (void *)tcp, (void *)mmd,
-					    (void *)md_pbuf, pbuf_idx);
-				}
-
-				ASSERT(max_pld > 0);
-				--max_pld;
-				add_buffer = B_FALSE;
-			}
-
-			ASSERT(md_mp_head != NULL);
-			ASSERT(md_pbuf != NULL);
-			ASSERT(md_pbuf_nxt == NULL);
-			ASSERT(pbuf_idx != -1);
-			ASSERT(pbuf_idx_nxt == -1);
-			ASSERT(*usable > 0);
-
-			/*
-			 * We spillover to the next payload buffer only
-			 * if all of the following is true:
-			 *
-			 *   1. There is not enough data on the current
-			 *	payload buffer to make up `len',
-			 *   2. We are allowed to send `len',
-			 *   3. The next payload buffer length is large
-			 *	enough to accomodate `spill'.
-			 */
-			if ((spill = len - *tail_unsent) > 0 &&
-			    *usable >= len &&
-			    MBLKL((*xmit_tail)->b_cont) >= spill &&
-			    max_pld > 0) {
-				md_pbuf_nxt = dupb((*xmit_tail)->b_cont);
-				if (md_pbuf_nxt == NULL) {
-					TCP_STAT(tcps, tcp_mdt_allocfail);
-					goto legacy_send; /* out_of_mem */
-				}
-
-				if (IS_VMLOANED_MBLK(md_pbuf_nxt) && !zcopy &&
-				    zc_cap != NULL) {
-					if (!ip_md_zcopy_attr(mmd, NULL,
-					    zc_cap->ill_zerocopy_flags)) {
-						freeb(md_pbuf_nxt);
-						TCP_STAT(tcps,
-						    tcp_mdt_allocfail);
-						/* out_of_mem */
-						goto legacy_send;
-					}
-					zcopy = B_TRUE;
-				}
-
-				/*
-				 * See comments above on the first call to
-				 * mmd_addpldbuf for explanation on the panic.
-				 */
-				pbuf_idx_nxt = mmd_addpldbuf(mmd, md_pbuf_nxt);
-				if (pbuf_idx_nxt < 0) {
-					panic("tcp_multisend: "
-					    "next payload buffer logic error "
-					    "detected for tcp %p mmd %p "
-					    "pbuf %p (%d)\n",
-					    (void *)tcp, (void *)mmd,
-					    (void *)md_pbuf_nxt, pbuf_idx_nxt);
-				}
-
-				ASSERT(max_pld > 0);
-				--max_pld;
-			} else if (spill > 0) {
-				/*
-				 * If there's a spillover, but the following
-				 * xmit_tail couldn't give us enough octets
-				 * to reach "len", then stop the current
-				 * Multidata creation and let the legacy
-				 * tcp_send() path take over.  We don't want
-				 * to send the tiny segment as part of this
-				 * Multidata for performance reasons; instead,
-				 * we let the legacy path deal with grouping
-				 * it with the subsequent small mblks.
-				 */
-				if (*usable >= len &&
-				    MBLKL((*xmit_tail)->b_cont) < spill) {
-					max_pld = 0;
-					break;	/* done */
-				}
-
-				/*
-				 * We can't spillover, and we are near
-				 * the end of the current payload buffer,
-				 * so send what's left.
-				 */
-				ASSERT(*tail_unsent > 0);
-				len = *tail_unsent;
-			}
-
-			/* tail_unsent is negated if there is a spillover */
-			*tail_unsent -= len;
-			*usable -= len;
-			ASSERT(*usable >= 0);
-
-			if (*usable < mss)
-				seg_len = *usable;
-			/*
-			 * Sender SWS avoidance; see comments in tcp_send();
-			 * everything else is the same, except that we only
-			 * do this here if there is no more data to be sent
-			 * following the current xmit_tail.  We don't check
-			 * for 1-byte urgent data because we shouldn't get
-			 * here if TCP_URG_VALID is set.
-			 */
-			if (*usable > 0 && *usable < mss &&
-			    ((md_pbuf_nxt == NULL &&
-			    (*xmit_tail)->b_cont == NULL) ||
-			    (md_pbuf_nxt != NULL &&
-			    (*xmit_tail)->b_cont->b_cont == NULL)) &&
-			    seg_len < (tcp->tcp_max_swnd >> 1) &&
-			    (tcp->tcp_unsent -
-			    ((*snxt + len) - tcp->tcp_snxt)) > seg_len &&
-			    !tcp->tcp_zero_win_probe) {
-				if ((*snxt + len) == tcp->tcp_snxt &&
-				    (*snxt + len) == tcp->tcp_suna) {
-					TCP_TIMER_RESTART(tcp, tcp->tcp_rto);
-				}
-				done = B_TRUE;
-			}
-
-			/*
-			 * Prime pump for IP's checksumming on our behalf;
-			 * include the adjustment for a source route if any.
-			 * Do this only for software/partial hardware checksum
-			 * offload, as this field gets zeroed out later for
-			 * the full hardware checksum offload case.
-			 */
-			if (!(hwcksum_flags & HCK_FULLCKSUM)) {
-				cksum = len + tcp_tcp_hdr_len + tcp->tcp_sum;
-				cksum = (cksum >> 16) + (cksum & 0xFFFF);
-				U16_TO_ABE16(cksum, tcp->tcp_tcph->th_sum);
-			}
-
-			U32_TO_ABE32(*snxt, tcp->tcp_tcph->th_seq);
-			*snxt += len;
-
-			tcp->tcp_tcph->th_flags[0] = TH_ACK;
-			/*
-			 * We set the PUSH bit only if TCP has no more buffered
-			 * data to be transmitted (or if sender SWS avoidance
-			 * takes place), as opposed to setting it for every
-			 * last packet in the burst.
-			 */
-			if (done ||
-			    (tcp->tcp_unsent - (*snxt - tcp->tcp_snxt)) == 0)
-				tcp->tcp_tcph->th_flags[0] |= TH_PUSH;
-
-			/*
-			 * Set FIN bit if this is our last segment; snxt
-			 * already includes its length, and it will not
-			 * be adjusted after this point.
-			 */
-			if (tcp->tcp_valid_bits == TCP_FSS_VALID &&
-			    *snxt == tcp->tcp_fss) {
-				if (!tcp->tcp_fin_acked) {
-					tcp->tcp_tcph->th_flags[0] |= TH_FIN;
-					BUMP_MIB(&tcps->tcps_mib,
-					    tcpOutControl);
-				}
-				if (!tcp->tcp_fin_sent) {
-					tcp->tcp_fin_sent = B_TRUE;
-					/*
-					 * tcp state must be ESTABLISHED
-					 * in order for us to get here in
-					 * the first place.
-					 */
-					tcp->tcp_state = TCPS_FIN_WAIT_1;
-
-					/*
-					 * Upon returning from this routine,
-					 * tcp_wput_data() will set tcp_snxt
-					 * to be equal to snxt + tcp_fin_sent.
-					 * This is essentially the same as
-					 * setting it to tcp_fss + 1.
-					 */
-				}
-			}
-
-			tcp->tcp_last_sent_len = (ushort_t)len;
-
-			len += tcp_hdr_len;
-			if (tcp->tcp_ipversion == IPV4_VERSION)
-				tcp->tcp_ipha->ipha_length = htons(len);
-			else
-				tcp->tcp_ip6h->ip6_plen = htons(len -
-				    ((char *)&tcp->tcp_ip6h[1] -
-				    tcp->tcp_iphc));
-
-			pkt_info->flags = (PDESC_HBUF_REF | PDESC_PBUF_REF);
-
-			/* setup header fragment */
-			PDESC_HDR_ADD(pkt_info,
-			    md_hbuf->b_rptr + cur_hdr_off,	/* base */
-			    tcp->tcp_mdt_hdr_head,		/* head room */
-			    tcp_hdr_len,			/* len */
-			    tcp->tcp_mdt_hdr_tail);		/* tail room */
-
-			ASSERT(pkt_info->hdr_lim - pkt_info->hdr_base ==
-			    hdr_frag_sz);
-			ASSERT(MBLKIN(md_hbuf,
-			    (pkt_info->hdr_base - md_hbuf->b_rptr),
-			    PDESC_HDRSIZE(pkt_info)));
-
-			/* setup first payload fragment */
-			PDESC_PLD_INIT(pkt_info);
-			PDESC_PLD_SPAN_ADD(pkt_info,
-			    pbuf_idx,				/* index */
-			    md_pbuf->b_rptr + cur_pld_off,	/* start */
-			    tcp->tcp_last_sent_len);		/* len */
-
-			/* create a split-packet in case of a spillover */
-			if (md_pbuf_nxt != NULL) {
-				ASSERT(spill > 0);
-				ASSERT(pbuf_idx_nxt > pbuf_idx);
-				ASSERT(!add_buffer);
-
-				md_pbuf = md_pbuf_nxt;
-				md_pbuf_nxt = NULL;
-				pbuf_idx = pbuf_idx_nxt;
-				pbuf_idx_nxt = -1;
-				cur_pld_off = spill;
-
-				/* trim out first payload fragment */
-				PDESC_PLD_SPAN_TRIM(pkt_info, 0, spill);
-
-				/* setup second payload fragment */
-				PDESC_PLD_SPAN_ADD(pkt_info,
-				    pbuf_idx,			/* index */
-				    md_pbuf->b_rptr,		/* start */
-				    spill);			/* len */
-
-				if ((*xmit_tail)->b_next == NULL) {
-					/*
-					 * Store the lbolt used for RTT
-					 * estimation. We can only record one
-					 * timestamp per mblk so we do it when
-					 * we reach the end of the payload
-					 * buffer.  Also we only take a new
-					 * timestamp sample when the previous
-					 * timed data from the same mblk has
-					 * been ack'ed.
-					 */
-					(*xmit_tail)->b_prev = local_time;
-					(*xmit_tail)->b_next =
-					    (mblk_t *)(uintptr_t)first_snxt;
-				}
-
-				first_snxt = *snxt - spill;
-
-				/*
-				 * Advance xmit_tail; usable could be 0 by
-				 * the time we got here, but we made sure
-				 * above that we would only spillover to
-				 * the next data block if usable includes
-				 * the spilled-over amount prior to the
-				 * subtraction.  Therefore, we are sure
-				 * that xmit_tail->b_cont can't be NULL.
-				 */
-				ASSERT((*xmit_tail)->b_cont != NULL);
-				*xmit_tail = (*xmit_tail)->b_cont;
-				ASSERT((uintptr_t)MBLKL(*xmit_tail) <=
-				    (uintptr_t)INT_MAX);
-				*tail_unsent = (int)MBLKL(*xmit_tail) - spill;
-			} else {
-				cur_pld_off += tcp->tcp_last_sent_len;
-			}
-
-			/*
-			 * Fill in the header using the template header, and
-			 * add options such as time-stamp, ECN and/or SACK,
-			 * as needed.
-			 */
-			tcp_fill_header(tcp, pkt_info->hdr_rptr,
-			    (clock_t)local_time, num_sack_blk);
-
-			/* take care of some IP header businesses */
-			if (af == AF_INET) {
-				ipha = (ipha_t *)pkt_info->hdr_rptr;
-
-				ASSERT(OK_32PTR((uchar_t *)ipha));
-				ASSERT(PDESC_HDRL(pkt_info) >=
-				    IP_SIMPLE_HDR_LENGTH);
-				ASSERT(ipha->ipha_version_and_hdr_length ==
-				    IP_SIMPLE_HDR_VERSION);
-
-				/*
-				 * Assign ident value for current packet; see
-				 * related comments in ip_wput_ire() about the
-				 * contract private interface with clustering
-				 * group.
-				 */
-				clusterwide = B_FALSE;
-				if (cl_inet_ipident != NULL) {
-					ASSERT(cl_inet_isclusterwide != NULL);
-					if ((*cl_inet_isclusterwide)(stack_id,
-					    IPPROTO_IP, AF_INET,
-					    (uint8_t *)(uintptr_t)src, NULL)) {
-						ipha->ipha_ident =
-						    (*cl_inet_ipident)(stack_id,
-						    IPPROTO_IP, AF_INET,
-						    (uint8_t *)(uintptr_t)src,
-						    (uint8_t *)(uintptr_t)dst,
-						    NULL);
-						clusterwide = B_TRUE;
-					}
-				}
-
-				if (!clusterwide) {
-					ipha->ipha_ident = (uint16_t)
-					    atomic_add_32_nv(
-						&ire->ire_ident, 1);
-				}
-#ifndef _BIG_ENDIAN
-				ipha->ipha_ident = (ipha->ipha_ident << 8) |
-				    (ipha->ipha_ident >> 8);
-#endif
-			} else {
-				ip6h = (ip6_t *)pkt_info->hdr_rptr;
-
-				ASSERT(OK_32PTR((uchar_t *)ip6h));
-				ASSERT(IPVER(ip6h) == IPV6_VERSION);
-				ASSERT(ip6h->ip6_nxt == IPPROTO_TCP);
-				ASSERT(PDESC_HDRL(pkt_info) >=
-				    (IPV6_HDR_LEN + TCP_CHECKSUM_OFFSET +
-				    TCP_CHECKSUM_SIZE));
-				ASSERT(tcp->tcp_ipversion == IPV6_VERSION);
-
-				if (tcp->tcp_ip_forward_progress) {
-					rconfirm = B_TRUE;
-					tcp->tcp_ip_forward_progress = B_FALSE;
-				}
-			}
-
-			/* at least one payload span, and at most two */
-			ASSERT(pkt_info->pld_cnt > 0 && pkt_info->pld_cnt < 3);
-
-			/* add the packet descriptor to Multidata */
-			if ((pkt = mmd_addpdesc(mmd, pkt_info, &err,
-			    KM_NOSLEEP)) == NULL) {
-				/*
-				 * Any failure other than ENOMEM indicates
-				 * that we have passed in invalid pkt_info
-				 * or parameters to mmd_addpdesc, which must
-				 * not happen.
-				 *
-				 * EINVAL is a result of failure on boundary
-				 * checks against the pkt_info contents.  It
-				 * should not happen, and we panic because
-				 * either there's horrible heap corruption,
-				 * and/or programming mistake.
-				 */
-				if (err != ENOMEM) {
-					cmn_err(CE_PANIC, "tcp_multisend: "
-					    "pdesc logic error detected for "
-					    "tcp %p mmd %p pinfo %p (%d)\n",
-					    (void *)tcp, (void *)mmd,
-					    (void *)pkt_info, err);
-				}
-				TCP_STAT(tcps, tcp_mdt_addpdescfail);
-				goto legacy_send; /* out_of_mem */
-			}
-			ASSERT(pkt != NULL);
-
-			/* calculate IP header and TCP checksums */
-			if (af == AF_INET) {
-				/* calculate pseudo-header checksum */
-				cksum = (dst >> 16) + (dst & 0xFFFF) +
-				    (src >> 16) + (src & 0xFFFF);
-
-				/* offset for TCP header checksum */
-				up = IPH_TCPH_CHECKSUMP(ipha,
-				    IP_SIMPLE_HDR_LENGTH);
-			} else {
-				up = (uint16_t *)&ip6h->ip6_src;
-
-				/* calculate pseudo-header checksum */
-				cksum = up[0] + up[1] + up[2] + up[3] +
-				    up[4] + up[5] + up[6] + up[7] +
-				    up[8] + up[9] + up[10] + up[11] +
-				    up[12] + up[13] + up[14] + up[15];
-
-				/* Fold the initial sum */
-				cksum = (cksum & 0xffff) + (cksum >> 16);
-
-				up = (uint16_t *)(((uchar_t *)ip6h) +
-				    IPV6_HDR_LEN + TCP_CHECKSUM_OFFSET);
-			}
-
-			if (hwcksum_flags & HCK_FULLCKSUM) {
-				/* clear checksum field for hardware */
-				*up = 0;
-			} else if (hwcksum_flags & HCK_PARTIALCKSUM) {
-				uint32_t sum;
-
-				/* pseudo-header checksumming */
-				sum = *up + cksum + IP_TCP_CSUM_COMP;
-				sum = (sum & 0xFFFF) + (sum >> 16);
-				*up = (sum & 0xFFFF) + (sum >> 16);
-			} else {
-				/* software checksumming */
-				TCP_STAT(tcps, tcp_out_sw_cksum);
-				TCP_STAT_UPDATE(tcps, tcp_out_sw_cksum_bytes,
-				    tcp->tcp_hdr_len + tcp->tcp_last_sent_len);
-				*up = IP_MD_CSUM(pkt, tcp->tcp_ip_hdr_len,
-				    cksum + IP_TCP_CSUM_COMP);
-				if (*up == 0)
-					*up = 0xFFFF;
-			}
-
-			/* IPv4 header checksum */
-			if (af == AF_INET) {
-				if (hwcksum_flags & HCK_IPV4_HDRCKSUM) {
-					ipha->ipha_hdr_checksum = 0;
-				} else {
-					IP_HDR_CKSUM(ipha, cksum,
-					    ((uint32_t *)ipha)[0],
-					    ((uint16_t *)ipha)[4]);
-				}
-			}
-
-			if (af == AF_INET &&
-			    HOOKS4_INTERESTED_PHYSICAL_OUT(ipst) ||
-			    af == AF_INET6 &&
-			    HOOKS6_INTERESTED_PHYSICAL_OUT(ipst)) {
-				mblk_t	*mp, *mp1;
-				uchar_t	*hdr_rptr, *hdr_wptr;
-				uchar_t	*pld_rptr, *pld_wptr;
-
-				/*
-				 * We reconstruct a pseudo packet for the hooks
-				 * framework using mmd_transform_link().
-				 * If it is a split packet we pullup the
-				 * payload. FW_HOOKS expects a pkt comprising
-				 * of two mblks: a header and the payload.
-				 */
-				if ((mp = mmd_transform_link(pkt)) == NULL) {
-					TCP_STAT(tcps, tcp_mdt_allocfail);
-					goto legacy_send;
-				}
-
-				if (pkt_info->pld_cnt > 1) {
-					/* split payload, more than one pld */
-					if ((mp1 = msgpullup(mp->b_cont, -1)) ==
-					    NULL) {
-						freemsg(mp);
-						TCP_STAT(tcps,
-						    tcp_mdt_allocfail);
-						goto legacy_send;
-					}
-					freemsg(mp->b_cont);
-					mp->b_cont = mp1;
-				} else {
-					mp1 = mp->b_cont;
-				}
-				ASSERT(mp1 != NULL && mp1->b_cont == NULL);
-
-				/*
-				 * Remember the message offsets. This is so we
-				 * can detect changes when we return from the
-				 * FW_HOOKS callbacks.
-				 */
-				hdr_rptr = mp->b_rptr;
-				hdr_wptr = mp->b_wptr;
-				pld_rptr = mp->b_cont->b_rptr;
-				pld_wptr = mp->b_cont->b_wptr;
-
-				if (af == AF_INET) {
-					DTRACE_PROBE4(
-					    ip4__physical__out__start,
-					    ill_t *, NULL,
-					    ill_t *, ill,
-					    ipha_t *, ipha,
-					    mblk_t *, mp);
-					FW_HOOKS(
-					    ipst->ips_ip4_physical_out_event,
-					    ipst->ips_ipv4firewall_physical_out,
-					    NULL, ill, ipha, mp, mp, 0, ipst);
-					DTRACE_PROBE1(
-					    ip4__physical__out__end,
-					    mblk_t *, mp);
-				} else {
-					DTRACE_PROBE4(
-					    ip6__physical__out_start,
-					    ill_t *, NULL,
-					    ill_t *, ill,
-					    ip6_t *, ip6h,
-					    mblk_t *, mp);
-					FW_HOOKS6(
-					    ipst->ips_ip6_physical_out_event,
-					    ipst->ips_ipv6firewall_physical_out,
-					    NULL, ill, ip6h, mp, mp, 0, ipst);
-					DTRACE_PROBE1(
-					    ip6__physical__out__end,
-					    mblk_t *, mp);
-				}
-
-				if (mp == NULL ||
-				    (mp1 = mp->b_cont) == NULL ||
-				    mp->b_rptr != hdr_rptr ||
-				    mp->b_wptr != hdr_wptr ||
-				    mp1->b_rptr != pld_rptr ||
-				    mp1->b_wptr != pld_wptr ||
-				    mp1->b_cont != NULL) {
-					/*
-					 * We abandon multidata processing and
-					 * return to the normal path, either
-					 * when a packet is blocked, or when
-					 * the boundaries of header buffer or
-					 * payload buffer have been changed by
-					 * FW_HOOKS[6].
-					 */
-					if (mp != NULL)
-						freemsg(mp);
-					goto legacy_send;
-				}
-				/* Finished with the pseudo packet */
-				freemsg(mp);
-			}
-			DTRACE_IP_FASTPATH(md_hbuf, pkt_info->hdr_rptr,
-			    ill, ipha, ip6h);
-			/* advance header offset */
-			cur_hdr_off += hdr_frag_sz;
-
-			obbytes += tcp->tcp_last_sent_len;
-			++obsegs;
-		} while (!done && *usable > 0 && --num_burst_seg > 0 &&
-		    *tail_unsent > 0);
-
-		if ((*xmit_tail)->b_next == NULL) {
-			/*
-			 * Store the lbolt used for RTT estimation. We can only
-			 * record one timestamp per mblk so we do it when we
-			 * reach the end of the payload buffer. Also we only
-			 * take a new timestamp sample when the previous timed
-			 * data from the same mblk has been ack'ed.
-			 */
-			(*xmit_tail)->b_prev = local_time;
-			(*xmit_tail)->b_next = (mblk_t *)(uintptr_t)first_snxt;
-		}
-
-		ASSERT(*tail_unsent >= 0);
-		if (*tail_unsent > 0) {
-			/*
-			 * We got here because we broke out of the above
-			 * loop due to of one of the following cases:
-			 *
-			 *   1. len < adjusted MSS (i.e. small),
-			 *   2. Sender SWS avoidance,
-			 *   3. max_pld is zero.
-			 *
-			 * We are done for this Multidata, so trim our
-			 * last payload buffer (if any) accordingly.
-			 */
-			if (md_pbuf != NULL)
-				md_pbuf->b_wptr -= *tail_unsent;
-		} else if (*usable > 0) {
-			*xmit_tail = (*xmit_tail)->b_cont;
-			ASSERT((uintptr_t)MBLKL(*xmit_tail) <=
-			    (uintptr_t)INT_MAX);
-			*tail_unsent = (int)MBLKL(*xmit_tail);
-			add_buffer = B_TRUE;
-		}
-	} while (!done && *usable > 0 && num_burst_seg > 0 &&
-	    (tcp_mdt_chain || max_pld > 0));
-
-	if (md_mp_head != NULL) {
-		/* send everything down */
-		tcp_multisend_data(tcp, ire, ill, md_mp_head, obsegs, obbytes,
-		    &rconfirm);
-	}
-
-#undef PREP_NEW_MULTIDATA
-#undef PREP_NEW_PBUF
-#undef IPVER
-
-	IRE_REFRELE(ire);
-	return (0);
-}
-
-/*
- * A wrapper function for sending one or more Multidata messages down to
- * the module below ip; this routine does not release the reference of the
- * IRE (caller does that).  This routine is analogous to tcp_send_data().
- */
-static void
-tcp_multisend_data(tcp_t *tcp, ire_t *ire, const ill_t *ill, mblk_t *md_mp_head,
-    const uint_t obsegs, const uint_t obbytes, boolean_t *rconfirm)
+tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len,
+    const int tcp_hdr_len, const int num_sack_blk, int *usable,
+    uint_t *snxt, int *tail_unsent, mblk_t **xmit_tail, mblk_t *local_time)
 {
-	uint64_t delta;
-	nce_t *nce;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	ip_stack_t	*ipst = tcps->tcps_netstack->netstack_ip;
-
-	ASSERT(ire != NULL && ill != NULL);
-	ASSERT(ire->ire_stq != NULL);
-	ASSERT(md_mp_head != NULL);
-	ASSERT(rconfirm != NULL);
-
-	/* adjust MIBs and IRE timestamp */
-	DTRACE_PROBE2(tcp__trace__send, mblk_t *, md_mp_head, tcp_t *, tcp);
-	tcp->tcp_obsegs += obsegs;
-	UPDATE_MIB(&tcps->tcps_mib, tcpOutDataSegs, obsegs);
-	UPDATE_MIB(&tcps->tcps_mib, tcpOutDataBytes, obbytes);
-	TCP_STAT_UPDATE(tcps, tcp_mdt_pkt_out, obsegs);
-
-	if (tcp->tcp_ipversion == IPV4_VERSION) {
-		TCP_STAT_UPDATE(tcps, tcp_mdt_pkt_out_v4, obsegs);
-	} else {
-		TCP_STAT_UPDATE(tcps, tcp_mdt_pkt_out_v6, obsegs);
-	}
-	UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests, obsegs);
-	UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, obsegs);
-	UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, obbytes);
-
-	ire->ire_ob_pkt_count += obsegs;
-	if (ire->ire_ipif != NULL)
-		atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, obsegs);
-	ire->ire_last_used_time = lbolt;
-
-	if ((tcp->tcp_ipversion == IPV4_VERSION &&
-	    ipst->ips_ip4_observe.he_interested) ||
-	    (tcp->tcp_ipversion == IPV6_VERSION &&
-	    ipst->ips_ip6_observe.he_interested)) {
-		multidata_t *dlmdp = mmd_getmultidata(md_mp_head);
-		pdesc_t *dl_pkt;
-		pdescinfo_t pinfo;
-		mblk_t *nmp;
-		zoneid_t szone = tcp->tcp_connp->conn_zoneid;
-
-		for (dl_pkt = mmd_getfirstpdesc(dlmdp, &pinfo);
-		    (dl_pkt != NULL);
-		    dl_pkt = mmd_getnextpdesc(dl_pkt, &pinfo)) {
-			if ((nmp = mmd_transform_link(dl_pkt)) == NULL)
-				continue;
-			ipobs_hook(nmp, IPOBS_HOOK_OUTBOUND, szone,
-			    ALL_ZONES, ill, ipst);
-			freemsg(nmp);
-		}
-	}
-
-	/* send it down */
-	putnext(ire->ire_stq, md_mp_head);
-
-	/* we're done for TCP/IPv4 */
-	if (tcp->tcp_ipversion == IPV4_VERSION)
-		return;
-
-	nce = ire->ire_nce;
-
-	ASSERT(nce != NULL);
-	ASSERT(!(nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)));
-	ASSERT(nce->nce_state != ND_INCOMPLETE);
-
-	/* reachability confirmation? */
-	if (*rconfirm) {
-		nce->nce_last = TICK_TO_MSEC(lbolt64);
-		if (nce->nce_state != ND_REACHABLE) {
-			mutex_enter(&nce->nce_lock);
-			nce->nce_state = ND_REACHABLE;
-			nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
-			mutex_exit(&nce->nce_lock);
-			(void) untimeout(nce->nce_timeout_id);
-			if (ip_debug > 2) {
-				/* ip1dbg */
-				pr_addr_dbg("tcp_multisend_data: state "
-				    "for %s changed to REACHABLE\n",
-				    AF_INET6, &ire->ire_addr_v6);
-			}
-		}
-		/* reset transport reachability confirmation */
-		*rconfirm = B_FALSE;
-	}
-
-	delta =  TICK_TO_MSEC(lbolt64) - nce->nce_last;
-	ip1dbg(("tcp_multisend_data: delta = %" PRId64
-	    " ill_reachable_time = %d \n", delta, ill->ill_reachable_time));
-
-	if (delta > (uint64_t)ill->ill_reachable_time) {
-		mutex_enter(&nce->nce_lock);
-		switch (nce->nce_state) {
-		case ND_REACHABLE:
-		case ND_STALE:
-			/*
-			 * ND_REACHABLE is identical to ND_STALE in this
-			 * specific case. If reachable time has expired for
-			 * this neighbor (delta is greater than reachable
-			 * time), conceptually, the neighbor cache is no
-			 * longer in REACHABLE state, but already in STALE
-			 * state.  So the correct transition here is to
-			 * ND_DELAY.
-			 */
-			nce->nce_state = ND_DELAY;
-			mutex_exit(&nce->nce_lock);
-			NDP_RESTART_TIMER(nce,
-			    ipst->ips_delay_first_probe_time);
-			if (ip_debug > 3) {
-				/* ip2dbg */
-				pr_addr_dbg("tcp_multisend_data: state "
-				    "for %s changed to DELAY\n",
-				    AF_INET6, &ire->ire_addr_v6);
-			}
-			break;
-		case ND_DELAY:
-		case ND_PROBE:
-			mutex_exit(&nce->nce_lock);
-			/* Timers have already started */
-			break;
-		case ND_UNREACHABLE:
-			/*
-			 * ndp timer has detected that this nce is
-			 * unreachable and initiated deleting this nce
-			 * and all its associated IREs. This is a race
-			 * where we found the ire before it was deleted
-			 * and have just sent out a packet using this
-			 * unreachable nce.
-			 */
-			mutex_exit(&nce->nce_lock);
-			break;
-		default:
-			ASSERT(0);
-		}
-	}
-}
-
-/*
- * Derived from tcp_send_data().
- */
-static void
-tcp_lsosend_data(tcp_t *tcp, mblk_t *mp, ire_t *ire, ill_t *ill, const int mss,
-    int num_lso_seg)
-{
-	ipha_t		*ipha;
-	mblk_t		*ire_fp_mp;
-	uint_t		ire_fp_mp_len;
-	uint32_t	hcksum_txflags = 0;
-	ipaddr_t	src;
-	ipaddr_t	dst;
-	uint32_t	cksum;
-	uint16_t	*up;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	ip_stack_t	*ipst = tcps->tcps_netstack->netstack_ip;
-
-	ASSERT(DB_TYPE(mp) == M_DATA);
-	ASSERT(tcp->tcp_state == TCPS_ESTABLISHED);
-	ASSERT(tcp->tcp_ipversion == IPV4_VERSION);
-	ASSERT(tcp->tcp_connp != NULL);
-	ASSERT(CONN_IS_LSO_MD_FASTPATH(tcp->tcp_connp));
-
-	ipha = (ipha_t *)mp->b_rptr;
-	src = ipha->ipha_src;
-	dst = ipha->ipha_dst;
-
-	DTRACE_PROBE2(tcp__trace__send, mblk_t *, mp, tcp_t *, tcp);
-
-	ASSERT(ipha->ipha_ident == 0 || ipha->ipha_ident == IP_HDR_INCLUDED);
-	ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident,
-	    num_lso_seg);
-#ifndef _BIG_ENDIAN
-	ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8);
-#endif
-	if (tcp->tcp_snd_zcopy_aware) {
-		if ((ill->ill_capabilities & ILL_CAPAB_ZEROCOPY) == 0 ||
-		    (ill->ill_zerocopy_capab->ill_zerocopy_flags == 0))
-			mp = tcp_zcopy_disable(tcp, mp);
-	}
-
-	if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) {
-		ASSERT(ill->ill_hcksum_capab != NULL);
-		hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags;
-	}
-
-	/*
-	 * Since the TCP checksum should be recalculated by h/w, we can just
-	 * zero the checksum field for HCK_FULLCKSUM, or calculate partial
-	 * pseudo-header checksum for HCK_PARTIALCKSUM.
-	 * The partial pseudo-header excludes TCP length, that was calculated
-	 * in tcp_send(), so to zero *up before further processing.
-	 */
-	cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF);
-
-	up = IPH_TCPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH);
-	*up = 0;
-
-	IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, mp, ipha, up,
-	    IPPROTO_TCP, IP_SIMPLE_HDR_LENGTH, ntohs(ipha->ipha_length), cksum);
-
-	/*
-	 * Append LSO flags and mss to the mp.
-	 */
-	lso_info_set(mp, mss, HW_LSO);
-
-	ipha->ipha_fragment_offset_and_flags |=
-	    (uint32_t)htons(ire->ire_frag_flag);
-
-	ire_fp_mp = ire->ire_nce->nce_fp_mp;
-	ire_fp_mp_len = MBLKL(ire_fp_mp);
-	ASSERT(DB_TYPE(ire_fp_mp) == M_DATA);
-	mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len;
-	bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len);
-
-	UPDATE_OB_PKT_COUNT(ire);
-	ire->ire_last_used_time = lbolt;
-	BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
-	BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits);
-	UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets,
-	    ntohs(ipha->ipha_length));
-
-	DTRACE_PROBE4(ip4__physical__out__start,
-	    ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp);
-	FW_HOOKS(ipst->ips_ip4_physical_out_event,
-	    ipst->ips_ipv4firewall_physical_out, NULL,
-	    ill, ipha, mp, mp, 0, ipst);
-	DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp);
-	DTRACE_IP_FASTPATH(mp, ipha, ill, ipha, NULL);
-
-	if (mp != NULL) {
-		if (ipst->ips_ip4_observe.he_interested) {
-			zoneid_t szone;
-
-			if (ire_fp_mp_len != 0)
-				mp->b_rptr += ire_fp_mp_len;
-			szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
-			    ipst, ALL_ZONES);
-			ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
-			    ALL_ZONES, ill, ipst);
-			if (ire_fp_mp_len != 0)
-				mp->b_rptr -= ire_fp_mp_len;
-		}
-
-		ILL_SEND_TX(ill, ire, tcp->tcp_connp, mp, 0, NULL);
-	}
-}
-
-/*
- * tcp_send() is called by tcp_wput_data() for non-Multidata transmission
- * scheme, and returns one of the following:
- *
- * -1 = failed allocation.
- *  0 = success; burst count reached, or usable send window is too small,
- *      and that we'd rather wait until later before sending again.
- *  1 = success; we are called from tcp_multisend(), and both usable send
- *      window and tail_unsent are greater than the MDT threshold, and thus
- *      Multidata Transmit should be used instead.
- */
-static int
-tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
-    const int tcp_tcp_hdr_len, const int num_sack_blk, int *usable,
-    uint_t *snxt, int *tail_unsent, mblk_t **xmit_tail, mblk_t *local_time,
-    const int mdt_thres)
-{
-	int num_burst_seg = tcp->tcp_snd_burst;
-	ire_t		*ire = NULL;
-	ill_t		*ill = NULL;
-	mblk_t		*ire_fp_mp = NULL;
-	uint_t		ire_fp_mp_len = 0;
+	int		num_burst_seg = tcp->tcp_snd_burst;
 	int		num_lso_seg = 1;
 	uint_t		lso_usable;
 	boolean_t	do_lso_send = B_FALSE;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
+	ip_xmit_attr_t	*ixa = connp->conn_ixa;
 
 	/*
-	 * Check LSO capability before any further work. And the similar check
-	 * need to be done in for(;;) loop.
-	 * LSO will be deployed when therer is more than one mss of available
-	 * data and a burst transmission is allowed.
+	 * Check LSO possibility. The value of tcp->tcp_lso indicates whether
+	 * the underlying connection is LSO capable. Will check whether having
+	 * enough available data to initiate LSO transmission in the for(){}
+	 * loops.
 	 */
-	if (tcp->tcp_lso &&
-	    (tcp->tcp_valid_bits == 0 ||
-	    tcp->tcp_valid_bits == TCP_FSS_VALID) &&
-	    num_burst_seg >= 2 && (*usable - 1) / mss >= 1) {
-		/*
-		 * Try to find usable IRE/ILL and do basic check to the ILL.
-		 * Double check LSO usability before going further, since the
-		 * underlying interface could have been changed. In case of any
-		 * change of LSO capability, set tcp_ire_ill_check_done to
-		 * B_FALSE to force to check the ILL with the next send.
-		 */
-		if (tcp_send_find_ire_ill(tcp, NULL, &ire, &ill) &&
-		    tcp->tcp_lso && ILL_LSO_TCP_USABLE(ill)) {
-			/*
-			 * Enable LSO with this transmission.
-			 * Since IRE has been hold in tcp_send_find_ire_ill(),
-			 * IRE_REFRELE(ire) should be called before return.
-			 */
+	if (tcp->tcp_lso && (tcp->tcp_valid_bits & ~TCP_FSS_VALID) == 0)
 			do_lso_send = B_TRUE;
-			ire_fp_mp = ire->ire_nce->nce_fp_mp;
-			ire_fp_mp_len = MBLKL(ire_fp_mp);
-			/* Round up to multiple of 4 */
-			ire_fp_mp_len = ((ire_fp_mp_len + 3) / 4) * 4;
-		} else {
-			tcp->tcp_lso = B_FALSE;
-			tcp->tcp_ire_ill_check_done = B_FALSE;
-			do_lso_send = B_FALSE;
-			ill = NULL;
-		}
-	}
 
 	for (;;) {
 		struct datab	*db;
-		tcph_t		*tcph;
+		tcpha_t		*tcpha;
 		uint32_t	sum;
 		mblk_t		*mp, *mp1;
 		uchar_t		*rptr;
 		int		len;
 
 		/*
-		 * If we're called by tcp_multisend(), and the amount of
-		 * sendable data as well as the size of current xmit_tail
-		 * is beyond the MDT threshold, return to the caller and
-		 * let the large data transmit be done using MDT.
+		 * Burst count reached, return successfully.
 		 */
-		if (*usable > 0 && *usable > mdt_thres &&
-		    (*tail_unsent > mdt_thres || (*tail_unsent == 0 &&
-		    MBLKL((*xmit_tail)->b_cont) > mdt_thres))) {
-			ASSERT(tcp->tcp_mdt);
-			return (1);	/* success; do large send */
-		}
-
 		if (num_burst_seg == 0)
-			break;		/* success; burst count reached */
+			break;
 
 		/*
-		 * Calculate the maximum payload length we can send in *one*
+		 * Calculate the maximum payload length we can send at one
 		 * time.
 		 */
 		if (do_lso_send) {
 			/*
-			 * Check whether need to do LSO any more.
+			 * Check whether be able to to do LSO for the current
+			 * available data.
 			 */
 			if (num_burst_seg >= 2 && (*usable - 1) / mss >= 1) {
 				lso_usable = MIN(tcp->tcp_lso_max, *usable);
@@ -20787,7 +15918,10 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
 		}
 
 		ASSERT(num_lso_seg <= IP_MAXPACKET / mss + 1);
-
+#ifdef DEBUG
+		DTRACE_PROBE2(tcp_send_lso, int, num_lso_seg, boolean_t,
+		    do_lso_send);
+#endif
 		/*
 		 * Adjust num_burst_seg here.
 		 */
@@ -20817,7 +15951,7 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
 				/*
 				 * If the retransmit timer is not running
 				 * we start it so that we will retransmit
-				 * in the case when the the receiver has
+				 * in the case when the receiver has
 				 * decremented the window.
 				 */
 				if (*snxt == tcp->tcp_snxt &&
@@ -20838,7 +15972,7 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
 			}
 		}
 
-		tcph = tcp->tcp_tcph;
+		tcpha = tcp->tcp_tcpha;
 
 		/*
 		 * The reason to adjust len here is that we need to set flags
@@ -20849,19 +15983,25 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
 
 		*usable -= len; /* Approximate - can be adjusted later */
 		if (*usable > 0)
-			tcph->th_flags[0] = TH_ACK;
+			tcpha->tha_flags = TH_ACK;
 		else
-			tcph->th_flags[0] = (TH_ACK | TH_PUSH);
+			tcpha->tha_flags = (TH_ACK | TH_PUSH);
 
 		/*
-		 * Prime pump for IP's checksumming on our behalf
+		 * Prime pump for IP's checksumming on our behalf.
 		 * Include the adjustment for a source route if any.
+		 * In case of LSO, the partial pseudo-header checksum should
+		 * exclusive TCP length, so zero tha_sum before IP calculate
+		 * pseudo-header checksum for partial checksum offload.
 		 */
-		sum = len + tcp_tcp_hdr_len + tcp->tcp_sum;
-		sum = (sum >> 16) + (sum & 0xFFFF);
-		U16_TO_ABE16(sum, tcph->th_sum);
-
-		U32_TO_ABE32(*snxt, tcph->th_seq);
+		if (do_lso_send) {
+			sum = 0;
+		} else {
+			sum = len + tcp_hdr_len + connp->conn_sum;
+			sum = (sum >> 16) + (sum & 0xFFFF);
+		}
+		tcpha->tha_sum = htons(sum);
+		tcpha->tha_seq = htonl(*snxt);
 
 		/*
 		 * Branch off to tcp_xmit_mp() if any of the VALID bits is
@@ -20907,8 +16047,6 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
 				(*xmit_tail)->b_rptr = prev_rptr;
 
 			if (mp == NULL) {
-				if (ire != NULL)
-					IRE_REFRELE(ire);
 				return (-1);
 			}
 			mp1 = mp->b_cont;
@@ -20927,7 +16065,7 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
 			BUMP_LOCAL(tcp->tcp_obsegs);
 			BUMP_MIB(&tcps->tcps_mib, tcpOutDataSegs);
 			UPDATE_MIB(&tcps->tcps_mib, tcpOutDataBytes, len);
-			tcp_send_data(tcp, q, mp);
+			tcp_send_data(tcp, mp);
 			continue;
 		}
 
@@ -20942,18 +16080,18 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
 				*tail_unsent -= len;
 				if (len <= mss) /* LSO is unusable */
 					tcp->tcp_last_sent_len = (ushort_t)len;
-				len += tcp_hdr_len;
-				if (tcp->tcp_ipversion == IPV4_VERSION)
+				len += total_hdr_len;
+				ixa->ixa_pktlen = len;
+
+				if (ixa->ixa_flags & IXAF_IS_IPV4) {
 					tcp->tcp_ipha->ipha_length = htons(len);
-				else
+				} else {
 					tcp->tcp_ip6h->ip6_plen =
-					    htons(len -
-					    ((char *)&tcp->tcp_ip6h[1] -
-					    tcp->tcp_iphc));
+					    htons(len - IPV6_HDR_LEN);
+				}
+
 				mp = dupb(*xmit_tail);
 				if (mp == NULL) {
-					if (ire != NULL)
-						IRE_REFRELE(ire);
 					return (-1);	/* out_of_mem */
 				}
 				mp->b_rptr = rptr;
@@ -20983,21 +16121,21 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
 		if (len <= mss) /* LSO is unusable (!do_lso_send) */
 			tcp->tcp_last_sent_len = (ushort_t)len;
 
-		len += tcp_hdr_len;
-		if (tcp->tcp_ipversion == IPV4_VERSION)
+		len += total_hdr_len;
+		ixa->ixa_pktlen = len;
+
+		if (ixa->ixa_flags & IXAF_IS_IPV4) {
 			tcp->tcp_ipha->ipha_length = htons(len);
-		else
-			tcp->tcp_ip6h->ip6_plen = htons(len -
-			    ((char *)&tcp->tcp_ip6h[1] - tcp->tcp_iphc));
+		} else {
+			tcp->tcp_ip6h->ip6_plen = htons(len - IPV6_HDR_LEN);
+		}
 
 		mp = dupb(*xmit_tail);
 		if (mp == NULL) {
-			if (ire != NULL)
-				IRE_REFRELE(ire);
 			return (-1);	/* out_of_mem */
 		}
 
-		len = tcp_hdr_len;
+		len = total_hdr_len;
 		/*
 		 * There are four reasons to allocate a new hdr mblk:
 		 *  1) The bytes above us are in use by another packet
@@ -21008,24 +16146,21 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
 		rptr = mp->b_rptr - len;
 		if (!OK_32PTR(rptr) ||
 		    ((db = mp->b_datap), db->db_ref != 2) ||
-		    rptr < db->db_base + ire_fp_mp_len) {
+		    rptr < db->db_base) {
 			/* NOTE: we assume allocb returns an OK_32PTR */
 
 		must_alloc:;
-			mp1 = allocb(tcp->tcp_ip_hdr_len + TCP_MAX_HDR_LENGTH +
-			    tcps->tcps_wroff_xtra + ire_fp_mp_len, BPRI_MED);
+			mp1 = allocb(connp->conn_ht_iphc_allocated +
+			    tcps->tcps_wroff_xtra, BPRI_MED);
 			if (mp1 == NULL) {
 				freemsg(mp);
-				if (ire != NULL)
-					IRE_REFRELE(ire);
 				return (-1);	/* out_of_mem */
 			}
 			mp1->b_cont = mp;
 			mp = mp1;
 			/* Leave room for Link Level header */
-			len = tcp_hdr_len;
-			rptr =
-			    &mp->b_rptr[tcps->tcps_wroff_xtra + ire_fp_mp_len];
+			len = total_hdr_len;
+			rptr = &mp->b_rptr[tcps->tcps_wroff_xtra];
 			mp->b_wptr = &rptr[len];
 		}
 
@@ -21057,18 +16192,17 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
 
 				/*
 				 * Excess data in mblk; can we split it?
-				 * If MDT is enabled for the connection,
+				 * If LSO is enabled for the connection,
 				 * keep on splitting as this is a transient
 				 * send path.
 				 */
-				if (!do_lso_send && !tcp->tcp_mdt &&
-				    (spill + nmpsz > 0)) {
+				if (!do_lso_send && (spill + nmpsz > 0)) {
 					/*
 					 * Don't split if stream head was
 					 * told to break up larger writes
 					 * into smaller ones.
 					 */
-					if (tcp->tcp_maxpsz > 0)
+					if (tcp->tcp_maxpsz_multiplier > 0)
 						break;
 
 					/*
@@ -21096,8 +16230,6 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
 				if (mp1 == NULL) {
 					*tail_unsent = spill;
 					freemsg(mp);
-					if (ire != NULL)
-						IRE_REFRELE(ire);
 					return (-1);	/* out_of_mem */
 				}
 			}
@@ -21119,11 +16251,12 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
 				/*
 				 * Adjust the checksum
 				 */
-				tcph = (tcph_t *)(rptr + tcp->tcp_ip_hdr_len);
+				tcpha = (tcpha_t *)(rptr +
+				    ixa->ixa_ip_hdr_length);
 				sum += spill;
 				sum = (sum >> 16) + (sum & 0xFFFF);
-				U16_TO_ABE16(sum, tcph->th_sum);
-				if (tcp->tcp_ipversion == IPV4_VERSION) {
+				tcpha->tha_sum = htons(sum);
+				if (connp->conn_ipversion == IPV4_VERSION) {
 					sum = ntohs(
 					    ((ipha_t *)rptr)->ipha_length) +
 					    spill;
@@ -21136,311 +16269,55 @@ tcp_send(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len,
 					((ip6_t *)rptr)->ip6_plen =
 					    htons(sum);
 				}
+				ixa->ixa_pktlen += spill;
 				*tail_unsent = 0;
 			}
 		}
 		if (tcp->tcp_ip_forward_progress) {
-			ASSERT(tcp->tcp_ipversion == IPV6_VERSION);
-			*(uint32_t *)mp->b_rptr  |= IP_FORWARD_PROG;
 			tcp->tcp_ip_forward_progress = B_FALSE;
+			ixa->ixa_flags |= IXAF_REACH_CONF;
+		} else {
+			ixa->ixa_flags &= ~IXAF_REACH_CONF;
 		}
 
+		/*
+		 * Append LSO information, both flags and mss, to the mp.
+		 */
 		if (do_lso_send) {
-			tcp_lsosend_data(tcp, mp, ire, ill, mss,
-			    num_lso_seg);
-			tcp->tcp_obsegs += num_lso_seg;
+			lso_info_set(mp, mss, HW_LSO);
+			ixa->ixa_fragsize = IP_MAXPACKET;
+			ixa->ixa_extra_ident = num_lso_seg - 1;
 
+			DTRACE_PROBE2(tcp_send_lso, int, num_lso_seg,
+			    boolean_t, B_TRUE);
+
+			tcp_send_data(tcp, mp);
+
+			/*
+			 * Restore values of ixa_fragsize and ixa_extra_ident.
+			 */
+			ixa->ixa_fragsize = ixa->ixa_pmtu;
+			ixa->ixa_extra_ident = 0;
+			tcp->tcp_obsegs += num_lso_seg;
 			TCP_STAT(tcps, tcp_lso_times);
 			TCP_STAT_UPDATE(tcps, tcp_lso_pkt_out, num_lso_seg);
 		} else {
-			tcp_send_data(tcp, q, mp);
+			tcp_send_data(tcp, mp);
 			BUMP_LOCAL(tcp->tcp_obsegs);
 		}
 	}
 
-	if (ire != NULL)
-		IRE_REFRELE(ire);
 	return (0);
 }
 
-/* Unlink and return any mblk that looks like it contains a MDT info */
-static mblk_t *
-tcp_mdt_info_mp(mblk_t *mp)
-{
-	mblk_t	*prev_mp;
-
-	for (;;) {
-		prev_mp = mp;
-		/* no more to process? */
-		if ((mp = mp->b_cont) == NULL)
-			break;
-
-		switch (DB_TYPE(mp)) {
-		case M_CTL:
-			if (*(uint32_t *)mp->b_rptr != MDT_IOC_INFO_UPDATE)
-				continue;
-			ASSERT(prev_mp != NULL);
-			prev_mp->b_cont = mp->b_cont;
-			mp->b_cont = NULL;
-			return (mp);
-		default:
-			break;
-		}
-	}
-	return (mp);
-}
-
-/* MDT info update routine, called when IP notifies us about MDT */
-static void
-tcp_mdt_update(tcp_t *tcp, ill_mdt_capab_t *mdt_capab, boolean_t first)
-{
-	boolean_t prev_state;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-
-	/*
-	 * IP is telling us to abort MDT on this connection?  We know
-	 * this because the capability is only turned off when IP
-	 * encounters some pathological cases, e.g. link-layer change
-	 * where the new driver doesn't support MDT, or in situation
-	 * where MDT usage on the link-layer has been switched off.
-	 * IP would not have sent us the initial MDT_IOC_INFO_UPDATE
-	 * if the link-layer doesn't support MDT, and if it does, it
-	 * will indicate that the feature is to be turned on.
-	 */
-	prev_state = tcp->tcp_mdt;
-	tcp->tcp_mdt = (mdt_capab->ill_mdt_on != 0);
-	if (!tcp->tcp_mdt && !first) {
-		TCP_STAT(tcps, tcp_mdt_conn_halted3);
-		ip1dbg(("tcp_mdt_update: disabling MDT for connp %p\n",
-		    (void *)tcp->tcp_connp));
-	}
-
-	/*
-	 * We currently only support MDT on simple TCP/{IPv4,IPv6},
-	 * so disable MDT otherwise.  The checks are done here
-	 * and in tcp_wput_data().
-	 */
-	if (tcp->tcp_mdt &&
-	    (tcp->tcp_ipversion == IPV4_VERSION &&
-	    tcp->tcp_ip_hdr_len != IP_SIMPLE_HDR_LENGTH) ||
-	    (tcp->tcp_ipversion == IPV6_VERSION &&
-	    tcp->tcp_ip_hdr_len != IPV6_HDR_LEN))
-		tcp->tcp_mdt = B_FALSE;
-
-	if (tcp->tcp_mdt) {
-		if (mdt_capab->ill_mdt_version != MDT_VERSION_2) {
-			cmn_err(CE_NOTE, "tcp_mdt_update: unknown MDT "
-			    "version (%d), expected version is %d",
-			    mdt_capab->ill_mdt_version, MDT_VERSION_2);
-			tcp->tcp_mdt = B_FALSE;
-			return;
-		}
-
-		/*
-		 * We need the driver to be able to handle at least three
-		 * spans per packet in order for tcp MDT to be utilized.
-		 * The first is for the header portion, while the rest are
-		 * needed to handle a packet that straddles across two
-		 * virtually non-contiguous buffers; a typical tcp packet
-		 * therefore consists of only two spans.  Note that we take
-		 * a zero as "don't care".
-		 */
-		if (mdt_capab->ill_mdt_span_limit > 0 &&
-		    mdt_capab->ill_mdt_span_limit < 3) {
-			tcp->tcp_mdt = B_FALSE;
-			return;
-		}
-
-		/* a zero means driver wants default value */
-		tcp->tcp_mdt_max_pld = MIN(mdt_capab->ill_mdt_max_pld,
-		    tcps->tcps_mdt_max_pbufs);
-		if (tcp->tcp_mdt_max_pld == 0)
-			tcp->tcp_mdt_max_pld = tcps->tcps_mdt_max_pbufs;
-
-		/* ensure 32-bit alignment */
-		tcp->tcp_mdt_hdr_head = roundup(MAX(tcps->tcps_mdt_hdr_head_min,
-		    mdt_capab->ill_mdt_hdr_head), 4);
-		tcp->tcp_mdt_hdr_tail = roundup(MAX(tcps->tcps_mdt_hdr_tail_min,
-		    mdt_capab->ill_mdt_hdr_tail), 4);
-
-		if (!first && !prev_state) {
-			TCP_STAT(tcps, tcp_mdt_conn_resumed2);
-			ip1dbg(("tcp_mdt_update: reenabling MDT for connp %p\n",
-			    (void *)tcp->tcp_connp));
-		}
-	}
-}
-
-/* Unlink and return any mblk that looks like it contains a LSO info */
-static mblk_t *
-tcp_lso_info_mp(mblk_t *mp)
-{
-	mblk_t	*prev_mp;
-
-	for (;;) {
-		prev_mp = mp;
-		/* no more to process? */
-		if ((mp = mp->b_cont) == NULL)
-			break;
-
-		switch (DB_TYPE(mp)) {
-		case M_CTL:
-			if (*(uint32_t *)mp->b_rptr != LSO_IOC_INFO_UPDATE)
-				continue;
-			ASSERT(prev_mp != NULL);
-			prev_mp->b_cont = mp->b_cont;
-			mp->b_cont = NULL;
-			return (mp);
-		default:
-			break;
-		}
-	}
-
-	return (mp);
-}
-
-/* LSO info update routine, called when IP notifies us about LSO */
-static void
-tcp_lso_update(tcp_t *tcp, ill_lso_capab_t *lso_capab)
-{
-	tcp_stack_t *tcps = tcp->tcp_tcps;
-
-	/*
-	 * IP is telling us to abort LSO on this connection?  We know
-	 * this because the capability is only turned off when IP
-	 * encounters some pathological cases, e.g. link-layer change
-	 * where the new NIC/driver doesn't support LSO, or in situation
-	 * where LSO usage on the link-layer has been switched off.
-	 * IP would not have sent us the initial LSO_IOC_INFO_UPDATE
-	 * if the link-layer doesn't support LSO, and if it does, it
-	 * will indicate that the feature is to be turned on.
-	 */
-	tcp->tcp_lso = (lso_capab->ill_lso_on != 0);
-	TCP_STAT(tcps, tcp_lso_enabled);
-
-	/*
-	 * We currently only support LSO on simple TCP/IPv4,
-	 * so disable LSO otherwise.  The checks are done here
-	 * and in tcp_wput_data().
-	 */
-	if (tcp->tcp_lso &&
-	    (tcp->tcp_ipversion == IPV4_VERSION &&
-	    tcp->tcp_ip_hdr_len != IP_SIMPLE_HDR_LENGTH) ||
-	    (tcp->tcp_ipversion == IPV6_VERSION)) {
-		tcp->tcp_lso = B_FALSE;
-		TCP_STAT(tcps, tcp_lso_disabled);
-	} else {
-		tcp->tcp_lso_max = MIN(TCP_MAX_LSO_LENGTH,
-		    lso_capab->ill_lso_max);
-	}
-}
-
-static void
-tcp_ire_ill_check(tcp_t *tcp, ire_t *ire, ill_t *ill, boolean_t check_lso_mdt)
-{
-	conn_t *connp = tcp->tcp_connp;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	ip_stack_t	*ipst = tcps->tcps_netstack->netstack_ip;
-
-	ASSERT(ire != NULL);
-
-	/*
-	 * We may be in the fastpath here, and although we essentially do
-	 * similar checks as in ip_bind_connected{_v6}/ip_xxinfo_return,
-	 * we try to keep things as brief as possible.  After all, these
-	 * are only best-effort checks, and we do more thorough ones prior
-	 * to calling tcp_send()/tcp_multisend().
-	 */
-	if ((ipst->ips_ip_lso_outbound || ipst->ips_ip_multidata_outbound) &&
-	    check_lso_mdt && !(ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
-	    ill != NULL && !CONN_IPSEC_OUT_ENCAPSULATED(connp) &&
-	    !(ire->ire_flags & RTF_MULTIRT) &&
-	    !IPP_ENABLED(IPP_LOCAL_OUT, ipst) &&
-	    CONN_IS_LSO_MD_FASTPATH(connp)) {
-		if (ipst->ips_ip_lso_outbound && ILL_LSO_CAPABLE(ill)) {
-			/* Cache the result */
-			connp->conn_lso_ok = B_TRUE;
-
-			ASSERT(ill->ill_lso_capab != NULL);
-			if (!ill->ill_lso_capab->ill_lso_on) {
-				ill->ill_lso_capab->ill_lso_on = 1;
-				ip1dbg(("tcp_ire_ill_check: connp %p enables "
-				    "LSO for interface %s\n", (void *)connp,
-				    ill->ill_name));
-			}
-			tcp_lso_update(tcp, ill->ill_lso_capab);
-		} else if (ipst->ips_ip_multidata_outbound &&
-		    ILL_MDT_CAPABLE(ill)) {
-			/* Cache the result */
-			connp->conn_mdt_ok = B_TRUE;
-
-			ASSERT(ill->ill_mdt_capab != NULL);
-			if (!ill->ill_mdt_capab->ill_mdt_on) {
-				ill->ill_mdt_capab->ill_mdt_on = 1;
-				ip1dbg(("tcp_ire_ill_check: connp %p enables "
-				    "MDT for interface %s\n", (void *)connp,
-				    ill->ill_name));
-			}
-			tcp_mdt_update(tcp, ill->ill_mdt_capab, B_TRUE);
-		}
-	}
-
-	/*
-	 * The goal is to reduce the number of generated tcp segments by
-	 * setting the maxpsz multiplier to 0; this will have an affect on
-	 * tcp_maxpsz_set().  With this behavior, tcp will pack more data
-	 * into each packet, up to SMSS bytes.  Doing this reduces the number
-	 * of outbound segments and incoming ACKs, thus allowing for better
-	 * network and system performance.  In contrast the legacy behavior
-	 * may result in sending less than SMSS size, because the last mblk
-	 * for some packets may have more data than needed to make up SMSS,
-	 * and the legacy code refused to "split" it.
-	 *
-	 * We apply the new behavior on following situations:
-	 *
-	 *   1) Loopback connections,
-	 *   2) Connections in which the remote peer is not on local subnet,
-	 *   3) Local subnet connections over the bge interface (see below).
-	 *
-	 * Ideally, we would like this behavior to apply for interfaces other
-	 * than bge.  However, doing so would negatively impact drivers which
-	 * perform dynamic mapping and unmapping of DMA resources, which are
-	 * increased by setting the maxpsz multiplier to 0 (more mblks per
-	 * packet will be generated by tcp).  The bge driver does not suffer
-	 * from this, as it copies the mblks into pre-mapped buffers, and
-	 * therefore does not require more I/O resources than before.
-	 *
-	 * Otherwise, this behavior is present on all network interfaces when
-	 * the destination endpoint is non-local, since reducing the number
-	 * of packets in general is good for the network.
-	 *
-	 * TODO We need to remove this hard-coded conditional for bge once
-	 *	a better "self-tuning" mechanism, or a way to comprehend
-	 *	the driver transmit strategy is devised.  Until the solution
-	 *	is found and well understood, we live with this hack.
-	 */
-	if (!tcp_static_maxpsz &&
-	    (tcp->tcp_loopback || !tcp->tcp_localnet ||
-	    (ill->ill_name_length > 3 && bcmp(ill->ill_name, "bge", 3) == 0))) {
-		/* override the default value */
-		tcp->tcp_maxpsz = 0;
-
-		ip3dbg(("tcp_ire_ill_check: connp %p tcp_maxpsz %d on "
-		    "interface %s\n", (void *)connp, tcp->tcp_maxpsz,
-		    ill != NULL ? ill->ill_name : ipif_loopback_name));
-	}
-
-	/* set the stream head parameters accordingly */
-	(void) tcp_maxpsz_set(tcp, B_TRUE);
-}
-
 /* tcp_wput_flush is called by tcp_wput_nondata to handle M_FLUSH messages. */
 static void
 tcp_wput_flush(tcp_t *tcp, mblk_t *mp)
 {
 	uchar_t	fval = *mp->b_rptr;
 	mblk_t	*tail;
-	queue_t	*q = tcp->tcp_wq;
+	conn_t	*connp = tcp->tcp_connp;
+	queue_t	*q = connp->conn_wq;
 
 	/* TODO: How should flush interact with urgent data? */
 	if ((fval & FLUSHW) && tcp->tcp_xmit_head &&
@@ -21473,7 +16350,7 @@ tcp_wput_flush(tcp_t *tcp, mblk_t *mp)
 		}
 		/*
 		 * We have no unsent data, so unsent must be less than
-		 * tcp_xmit_lowater, so re-enable flow.
+		 * conn_sndlowat, so re-enable flow.
 		 */
 		mutex_enter(&tcp->tcp_non_sq_lock);
 		if (tcp->tcp_flow_stopped) {
@@ -21501,12 +16378,12 @@ tcp_wput_flush(tcp_t *tcp, mblk_t *mp)
 static void
 tcp_wput_iocdata(tcp_t *tcp, mblk_t *mp)
 {
-	mblk_t	*mp1;
-	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
+	mblk_t		*mp1;
+	struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
 	STRUCT_HANDLE(strbuf, sb);
-	queue_t *q = tcp->tcp_wq;
-	int	error;
-	uint_t	addrlen;
+	uint_t		addrlen;
+	conn_t		*connp = tcp->tcp_connp;
+	queue_t 	*q = connp->conn_wq;
 
 	/* Make sure it is one of ours. */
 	switch (iocp->ioc_cmd) {
@@ -21514,7 +16391,7 @@ tcp_wput_iocdata(tcp_t *tcp, mblk_t *mp)
 	case TI_GETPEERNAME:
 		break;
 	default:
-		CALL_IP_WPUT(tcp->tcp_connp, q, mp);
+		ip_wput_nondata(q, mp);
 		return;
 	}
 	switch (mi_copy_state(q, mp, &mp1)) {
@@ -21541,43 +16418,56 @@ tcp_wput_iocdata(tcp_t *tcp, mblk_t *mp)
 	}
 
 	STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
-	addrlen = tcp->tcp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t);
+
+	if (connp->conn_family == AF_INET)
+		addrlen = sizeof (sin_t);
+	else
+		addrlen = sizeof (sin6_t);
+
 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
 		mi_copy_done(q, mp, EINVAL);
 		return;
 	}
 
-	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
-	if (mp1 == NULL)
-		return;
-
 	switch (iocp->ioc_cmd) {
 	case TI_GETMYNAME:
-		error = tcp_do_getsockname(tcp, (void *)mp1->b_rptr, &addrlen);
 		break;
 	case TI_GETPEERNAME:
-		error = tcp_do_getpeername(tcp, (void *)mp1->b_rptr, &addrlen);
+		if (tcp->tcp_state < TCPS_SYN_RCVD) {
+			mi_copy_done(q, mp, ENOTCONN);
+			return;
+		}
 		break;
 	}
+	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
+	if (!mp1)
+		return;
 
-	if (error != 0) {
-		mi_copy_done(q, mp, error);
-	} else {
-		mp1->b_wptr += addrlen;
-		STRUCT_FSET(sb, len, addrlen);
-
-		/* Copy out the address */
-		mi_copyout(q, mp);
+	STRUCT_FSET(sb, len, addrlen);
+	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
+	case TI_GETMYNAME:
+		(void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
+		    &addrlen);
+		break;
+	case TI_GETPEERNAME:
+		(void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
+		    &addrlen);
+		break;
 	}
+	mp1->b_wptr += addrlen;
+	/* Copy out the address */
+	mi_copyout(q, mp);
 }
 
 static void
 tcp_use_pure_tpi(tcp_t *tcp)
 {
+	conn_t		*connp = tcp->tcp_connp;
+
 #ifdef	_ILP32
-	tcp->tcp_acceptor_id = (t_uscalar_t)tcp->tcp_rq;
+	tcp->tcp_acceptor_id = (t_uscalar_t)connp->conn_rq;
 #else
-	tcp->tcp_acceptor_id = tcp->tcp_connp->conn_dev;
+	tcp->tcp_acceptor_id = connp->conn_dev;
 #endif
 	/*
 	 * Insert this socket into the acceptor hash.
@@ -21595,11 +16485,11 @@ tcp_use_pure_tpi(tcp_t *tcp)
  */
 /* ARGSUSED */
 static void
-tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2)
+tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
-	conn_t 	*connp = (conn_t *)arg;
-	tcp_t	*tcp = connp->conn_tcp;
-	queue_t	*q = tcp->tcp_wq;
+	conn_t 		*connp = (conn_t *)arg;
+	tcp_t		*tcp = connp->conn_tcp;
+	queue_t		*q = connp->conn_wq;
 	struct iocblk	*iocp;
 
 	ASSERT(DB_TYPE(mp) == M_IOCTL);
@@ -21617,17 +16507,6 @@ tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2)
 
 	iocp = (struct iocblk *)mp->b_rptr;
 	switch (iocp->ioc_cmd) {
-	case TCP_IOC_DEFAULT_Q:
-		/* Wants to be the default wq. */
-		if (secpolicy_ip_config(iocp->ioc_cr, B_FALSE) != 0) {
-			iocp->ioc_error = EPERM;
-			iocp->ioc_count = 0;
-			mp->b_datap->db_type = M_IOCACK;
-			qreply(q, mp);
-			return;
-		}
-		tcp_def_q_set(tcp, mp);
-		return;
 	case _SIOCSOCKFALLBACK:
 		/*
 		 * Either sockmod is about to be popped and the socket
@@ -21650,7 +16529,7 @@ tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2)
 		qreply(q, mp);
 		return;
 	}
-	CALL_IP_WPUT(connp, q, mp);
+	ip_wput_nondata(q, mp);
 }
 
 /*
@@ -21658,14 +16537,14 @@ tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2)
  */
 /* ARGSUSED */
 static void
-tcp_wput_proto(void *arg, mblk_t *mp, void *arg2)
+tcp_wput_proto(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
-	conn_t 	*connp = (conn_t *)arg;
-	tcp_t	*tcp = connp->conn_tcp;
+	conn_t		*connp = (conn_t *)arg;
+	tcp_t		*tcp = connp->conn_tcp;
 	union T_primitives *tprim = (union T_primitives *)mp->b_rptr;
-	uchar_t *rptr;
-	t_scalar_t type;
-	cred_t *cr;
+	uchar_t		*rptr;
+	t_scalar_t	type;
+	cred_t		*cr;
 
 	/*
 	 * Try and ASSERT the minimum possible references on the
@@ -21684,7 +16563,7 @@ tcp_wput_proto(void *arg, mblk_t *mp, void *arg2)
 	if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) {
 		type = ((union T_primitives *)rptr)->type;
 		if (type == T_EXDATA_REQ) {
-			tcp_output_urgent(connp, mp, arg2);
+			tcp_output_urgent(connp, mp, arg2, NULL);
 		} else if (type != T_DATA_REQ) {
 			goto non_urgent_data;
 		} else {
@@ -21695,7 +16574,7 @@ tcp_wput_proto(void *arg, mblk_t *mp, void *arg2)
 		}
 		return;
 	} else {
-		if (tcp->tcp_debug) {
+		if (connp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
 			    "tcp_wput_proto, dropping one...");
 		}
@@ -21776,17 +16655,10 @@ non_urgent_data:
 		 * for subsequent processing by ip_restart_optmgmt(), which
 		 * will do the CONN_DEC_REF().
 		 */
-		CONN_INC_REF(connp);
 		if ((int)tprim->type == T_SVR4_OPTMGMT_REQ) {
-			if (svr4_optcom_req(tcp->tcp_wq, mp, cr, &tcp_opt_obj,
-			    B_TRUE) != EINPROGRESS) {
-				CONN_DEC_REF(connp);
-			}
+			svr4_optcom_req(connp->conn_wq, mp, cr, &tcp_opt_obj);
 		} else {
-			if (tpi_optcom_req(tcp->tcp_wq, mp, cr, &tcp_opt_obj,
-			    B_TRUE) != EINPROGRESS) {
-				CONN_DEC_REF(connp);
-			}
+			tpi_optcom_req(connp->conn_wq, mp, cr, &tcp_opt_obj);
 		}
 		break;
 
@@ -21804,7 +16676,7 @@ non_urgent_data:
 			 * We were crossing FINs and got a reset from
 			 * the other side. Just ignore it.
 			 */
-			if (tcp->tcp_debug) {
+			if (connp->conn_debug) {
 				(void) strlog(TCP_MOD_ID, 0, 1,
 				    SL_ERROR|SL_TRACE,
 				    "tcp_wput_proto, T_ORDREL_REQ out of "
@@ -21818,7 +16690,7 @@ non_urgent_data:
 		tcp_addr_req(tcp, mp);
 		break;
 	default:
-		if (tcp->tcp_debug) {
+		if (connp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
 			    "tcp_wput_proto, bogus TPI msg, type %d",
 			    tprim->type);
@@ -21844,19 +16716,6 @@ tcp_wsrv(queue_t *q)
 	TCP_STAT(tcps, tcp_wsrv_called);
 }
 
-/* Non overlapping byte exchanger */
-static void
-tcp_xchg(uchar_t *a, uchar_t *b, int len)
-{
-	uchar_t	uch;
-
-	while (len-- > 0) {
-		uch = a[len];
-		a[len] = b[len];
-		b[len] = uch;
-	}
-}
-
 /*
  * Send out a control packet on the tcp connection specified.  This routine
  * is typically called where we need a simple ACK or RST generated.
@@ -21865,50 +16724,51 @@ static void
 tcp_xmit_ctl(char *str, tcp_t *tcp, uint32_t seq, uint32_t ack, int ctl)
 {
 	uchar_t		*rptr;
-	tcph_t		*tcph;
+	tcpha_t		*tcpha;
 	ipha_t		*ipha = NULL;
 	ip6_t		*ip6h = NULL;
 	uint32_t	sum;
-	int		tcp_hdr_len;
-	int		tcp_ip_hdr_len;
+	int		total_hdr_len;
+	int		ip_hdr_len;
 	mblk_t		*mp;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
+	ip_xmit_attr_t	*ixa = connp->conn_ixa;
 
 	/*
 	 * Save sum for use in source route later.
 	 */
-	ASSERT(tcp != NULL);
-	sum = tcp->tcp_tcp_hdr_len + tcp->tcp_sum;
-	tcp_hdr_len = tcp->tcp_hdr_len;
-	tcp_ip_hdr_len = tcp->tcp_ip_hdr_len;
+	sum = connp->conn_ht_ulp_len + connp->conn_sum;
+	total_hdr_len = connp->conn_ht_iphc_len;
+	ip_hdr_len = ixa->ixa_ip_hdr_length;
 
 	/* If a text string is passed in with the request, pass it to strlog. */
-	if (str != NULL && tcp->tcp_debug) {
+	if (str != NULL && connp->conn_debug) {
 		(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE,
 		    "tcp_xmit_ctl: '%s', seq 0x%x, ack 0x%x, ctl 0x%x",
 		    str, seq, ack, ctl);
 	}
-	mp = allocb(tcp_ip_hdr_len + TCP_MAX_HDR_LENGTH + tcps->tcps_wroff_xtra,
+	mp = allocb(connp->conn_ht_iphc_allocated + tcps->tcps_wroff_xtra,
 	    BPRI_MED);
 	if (mp == NULL) {
 		return;
 	}
 	rptr = &mp->b_rptr[tcps->tcps_wroff_xtra];
 	mp->b_rptr = rptr;
-	mp->b_wptr = &rptr[tcp_hdr_len];
-	bcopy(tcp->tcp_iphc, rptr, tcp_hdr_len);
+	mp->b_wptr = &rptr[total_hdr_len];
+	bcopy(connp->conn_ht_iphc, rptr, total_hdr_len);
+
+	ixa->ixa_pktlen = total_hdr_len;
 
-	if (tcp->tcp_ipversion == IPV4_VERSION) {
+	if (ixa->ixa_flags & IXAF_IS_IPV4) {
 		ipha = (ipha_t *)rptr;
-		ipha->ipha_length = htons(tcp_hdr_len);
+		ipha->ipha_length = htons(total_hdr_len);
 	} else {
 		ip6h = (ip6_t *)rptr;
-		ASSERT(tcp != NULL);
-		ip6h->ip6_plen = htons(tcp->tcp_hdr_len -
-		    ((char *)&tcp->tcp_ip6h[1] - tcp->tcp_iphc));
+		ip6h->ip6_plen = htons(total_hdr_len - IPV6_HDR_LEN);
 	}
-	tcph = (tcph_t *)&rptr[tcp_ip_hdr_len];
-	tcph->th_flags[0] = (uint8_t)ctl;
+	tcpha = (tcpha_t *)&rptr[ip_hdr_len];
+	tcpha->tha_flags = (uint8_t)ctl;
 	if (ctl & TH_RST) {
 		BUMP_MIB(&tcps->tcps_mib, tcpOutRsts);
 		BUMP_MIB(&tcps->tcps_mib, tcpOutControl);
@@ -21917,43 +16777,45 @@ tcp_xmit_ctl(char *str, tcp_t *tcp, uint32_t seq, uint32_t ack, int ctl)
 		 */
 		if (tcp->tcp_snd_ts_ok &&
 		    tcp->tcp_state > TCPS_SYN_SENT) {
-			mp->b_wptr = &rptr[tcp_hdr_len - TCPOPT_REAL_TS_LEN];
+			mp->b_wptr = &rptr[total_hdr_len - TCPOPT_REAL_TS_LEN];
 			*(mp->b_wptr) = TCPOPT_EOL;
-			if (tcp->tcp_ipversion == IPV4_VERSION) {
-				ipha->ipha_length = htons(tcp_hdr_len -
+
+			ixa->ixa_pktlen = total_hdr_len - TCPOPT_REAL_TS_LEN;
+
+			if (connp->conn_ipversion == IPV4_VERSION) {
+				ipha->ipha_length = htons(total_hdr_len -
 				    TCPOPT_REAL_TS_LEN);
 			} else {
-				ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) -
-				    TCPOPT_REAL_TS_LEN);
+				ip6h->ip6_plen = htons(total_hdr_len -
+				    IPV6_HDR_LEN - TCPOPT_REAL_TS_LEN);
 			}
-			tcph->th_offset_and_rsrvd[0] -= (3 << 4);
+			tcpha->tha_offset_and_reserved -= (3 << 4);
 			sum -= TCPOPT_REAL_TS_LEN;
 		}
 	}
 	if (ctl & TH_ACK) {
 		if (tcp->tcp_snd_ts_ok) {
 			U32_TO_BE32(lbolt,
-			    (char *)tcph+TCP_MIN_HEADER_LENGTH+4);
+			    (char *)tcpha + TCP_MIN_HEADER_LENGTH+4);
 			U32_TO_BE32(tcp->tcp_ts_recent,
-			    (char *)tcph+TCP_MIN_HEADER_LENGTH+8);
+			    (char *)tcpha + TCP_MIN_HEADER_LENGTH+8);
 		}
 
 		/* Update the latest receive window size in TCP header. */
-		U32_TO_ABE16(tcp->tcp_rwnd >> tcp->tcp_rcv_ws,
-		    tcph->th_win);
+		tcpha->tha_win = htons(tcp->tcp_rwnd >> tcp->tcp_rcv_ws);
 		tcp->tcp_rack = ack;
 		tcp->tcp_rack_cnt = 0;
 		BUMP_MIB(&tcps->tcps_mib, tcpOutAck);
 	}
 	BUMP_LOCAL(tcp->tcp_obsegs);
-	U32_TO_BE32(seq, tcph->th_seq);
-	U32_TO_BE32(ack, tcph->th_ack);
+	tcpha->tha_seq = htonl(seq);
+	tcpha->tha_ack = htonl(ack);
 	/*
 	 * Include the adjustment for a source route if any.
 	 */
 	sum = (sum >> 16) + (sum & 0xFFFF);
-	U16_TO_BE16(sum, tcph->th_sum);
-	tcp_send_data(tcp, tcp->tcp_wq, mp);
+	tcpha->tha_sum = htons(sum);
+	tcp_send_data(tcp, mp);
 }
 
 /*
@@ -21991,115 +16853,32 @@ tcp_send_rst_chk(tcp_stack_t *tcps)
 }
 
 /*
- * Send down the advice IP ioctl to tell IP to mark an IRE temporary.
- */
-static void
-tcp_ip_ire_mark_advice(tcp_t *tcp)
-{
-	mblk_t *mp;
-	ipic_t *ipic;
-
-	if (tcp->tcp_ipversion == IPV4_VERSION) {
-		mp = tcp_ip_advise_mblk(&tcp->tcp_ipha->ipha_dst, IP_ADDR_LEN,
-		    &ipic);
-	} else {
-		mp = tcp_ip_advise_mblk(&tcp->tcp_ip6h->ip6_dst, IPV6_ADDR_LEN,
-		    &ipic);
-	}
-	if (mp == NULL)
-		return;
-	ipic->ipic_ire_marks |= IRE_MARK_TEMPORARY;
-	CALL_IP_WPUT(tcp->tcp_connp, tcp->tcp_wq, mp);
-}
-
-/*
- * Return an IP advice ioctl mblk and set ipic to be the pointer
- * to the advice structure.
- */
-static mblk_t *
-tcp_ip_advise_mblk(void *addr, int addr_len, ipic_t **ipic)
-{
-	struct iocblk *ioc;
-	mblk_t *mp, *mp1;
-
-	mp = allocb(sizeof (ipic_t) + addr_len, BPRI_HI);
-	if (mp == NULL)
-		return (NULL);
-	bzero(mp->b_rptr, sizeof (ipic_t) + addr_len);
-	*ipic = (ipic_t *)mp->b_rptr;
-	(*ipic)->ipic_cmd = IP_IOC_IRE_ADVISE_NO_REPLY;
-	(*ipic)->ipic_addr_offset = sizeof (ipic_t);
-
-	bcopy(addr, *ipic + 1, addr_len);
-
-	(*ipic)->ipic_addr_length = addr_len;
-	mp->b_wptr = &mp->b_rptr[sizeof (ipic_t) + addr_len];
-
-	mp1 = mkiocb(IP_IOCTL);
-	if (mp1 == NULL) {
-		freemsg(mp);
-		return (NULL);
-	}
-	mp1->b_cont = mp;
-	ioc = (struct iocblk *)mp1->b_rptr;
-	ioc->ioc_count = sizeof (ipic_t) + addr_len;
-
-	return (mp1);
-}
-
-/*
  * Generate a reset based on an inbound packet, connp is set by caller
  * when RST is in response to an unexpected inbound packet for which
  * there is active tcp state in the system.
  *
  * IPSEC NOTE : Try to send the reply with the same protection as it came
- * in.  We still have the ipsec_mp that the packet was attached to. Thus
- * the packet will go out at the same level of protection as it came in by
- * converting the IPSEC_IN to IPSEC_OUT.
+ * in.  We have the ip_recv_attr_t which is reversed to form the ip_xmit_attr_t.
+ * That way the packet will go out at the same level of protection as it
+ * came in with.
  */
 static void
-tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq,
-    uint32_t ack, int ctl, uint_t ip_hdr_len, zoneid_t zoneid,
-    tcp_stack_t *tcps, conn_t *connp)
+tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq, uint32_t ack, int ctl,
+    ip_recv_attr_t *ira, ip_stack_t *ipst, conn_t *connp)
 {
 	ipha_t		*ipha = NULL;
 	ip6_t		*ip6h = NULL;
 	ushort_t	len;
-	tcph_t		*tcph;
+	tcpha_t		*tcpha;
 	int		i;
-	mblk_t		*ipsec_mp;
-	boolean_t	mctl_present;
-	ipic_t		*ipic;
 	ipaddr_t	v4addr;
 	in6_addr_t	v6addr;
-	int		addr_len;
-	void		*addr;
-	queue_t		*q = tcps->tcps_g_q;
-	tcp_t		*tcp;
-	cred_t		*cr;
-	pid_t		pid;
-	mblk_t		*nmp;
-	ip_stack_t	*ipst = tcps->tcps_netstack->netstack_ip;
-
-	if (tcps->tcps_g_q == NULL) {
-		/*
-		 * For non-zero stackids the default queue isn't created
-		 * until the first open, thus there can be a need to send
-		 * a reset before then. But we can't do that, hence we just
-		 * drop the packet. Later during boot, when the default queue
-		 * has been setup, a retransmitted packet from the peer
-		 * will result in a reset.
-		 */
-		ASSERT(tcps->tcps_netstack->netstack_stackid !=
-		    GLOBAL_NETSTACKID);
-		freemsg(mp);
-		return;
-	}
-
-	if (connp != NULL)
-		tcp = connp->conn_tcp;
-	else
-		tcp = Q_TO_TCP(q);
+	netstack_t	*ns = ipst->ips_netstack;
+	tcp_stack_t	*tcps = ns->netstack_tcp;
+	ip_xmit_attr_t	ixas, *ixa;
+	uint_t		ip_hdr_len = ira->ira_ip_hdr_length;
+	boolean_t	need_refrele = B_FALSE;		/* ixa_refrele(ixa) */
+	ushort_t	port;
 
 	if (!tcp_send_rst_chk(tcps)) {
 		tcps->tcps_rst_unsent++;
@@ -22107,16 +16886,41 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq,
 		return;
 	}
 
-	if (mp->b_datap->db_type == M_CTL) {
-		ipsec_mp = mp;
-		mp = mp->b_cont;
-		mctl_present = B_TRUE;
+	/*
+	 * If connp != NULL we use conn_ixa to keep IP_NEXTHOP and other
+	 * options from the listener. In that case the caller must ensure that
+	 * we are running on the listener = connp squeue.
+	 *
+	 * We get a safe copy of conn_ixa so we don't need to restore anything
+	 * we or ip_output_simple might change in the ixa.
+	 */
+	if (connp != NULL) {
+		ASSERT(connp->conn_on_sqp);
+
+		ixa = conn_get_ixa_exclusive(connp);
+		if (ixa == NULL) {
+			tcps->tcps_rst_unsent++;
+			freemsg(mp);
+			return;
+		}
+		need_refrele = B_TRUE;
 	} else {
-		ipsec_mp = mp;
-		mctl_present = B_FALSE;
+		bzero(&ixas, sizeof (ixas));
+		ixa = &ixas;
+		/*
+		 * IXAF_VERIFY_SOURCE is overkill since we know the
+		 * packet was for us.
+		 */
+		ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE;
+		ixa->ixa_protocol = IPPROTO_TCP;
+		ixa->ixa_zoneid = ira->ira_zoneid;
+		ixa->ixa_ifindex = 0;
+		ixa->ixa_ipst = ipst;
+		ixa->ixa_cred = kcred;
+		ixa->ixa_cpid = NOPID;
 	}
 
-	if (str && q && tcps->tcps_dbg) {
+	if (str && tcps->tcps_dbg) {
 		(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE,
 		    "tcp_xmit_early_reset: '%s', seq 0x%x, ack 0x%x, "
 		    "flags 0x%x",
@@ -22126,20 +16930,12 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq,
 		mblk_t *mp1 = copyb(mp);
 		freemsg(mp);
 		mp = mp1;
-		if (!mp) {
-			if (mctl_present)
-				freeb(ipsec_mp);
-			return;
-		} else {
-			if (mctl_present) {
-				ipsec_mp->b_cont = mp;
-			} else {
-				ipsec_mp = mp;
-			}
-		}
+		if (mp == NULL)
+			goto done;
 	} else if (mp->b_cont) {
 		freemsg(mp->b_cont);
 		mp->b_cont = NULL;
+		DB_CKSUMFLAGS(mp) = 0;
 	}
 	/*
 	 * We skip reversing source route here.
@@ -22159,18 +16955,20 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq,
 		 */
 		if (ipha->ipha_src == 0 || ipha->ipha_src == INADDR_BROADCAST ||
 		    CLASSD(ipha->ipha_src)) {
-			freemsg(ipsec_mp);
 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
-			return;
+			ip_drop_input("ipIfStatsInDiscards", mp, NULL);
+			freemsg(mp);
+			goto done;
 		}
 	} else {
 		ip6h = (ip6_t *)mp->b_rptr;
 
 		if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) ||
 		    IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
-			freemsg(ipsec_mp);
 			BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards);
-			return;
+			ip_drop_input("ipIfStatsInDiscards", mp, NULL);
+			freemsg(mp);
+			goto done;
 		}
 
 		/* Remove any extension headers assuming partial overlay */
@@ -22185,13 +16983,13 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq,
 			ip6h->ip6_nxt = IPPROTO_TCP;
 		}
 	}
-	tcph = (tcph_t *)&mp->b_rptr[ip_hdr_len];
-	if (tcph->th_flags[0] & TH_RST) {
-		freemsg(ipsec_mp);
-		return;
+	tcpha = (tcpha_t *)&mp->b_rptr[ip_hdr_len];
+	if (tcpha->tha_flags & TH_RST) {
+		freemsg(mp);
+		goto done;
 	}
-	tcph->th_offset_and_rsrvd[0] = (5 << 4);
-	len = ip_hdr_len + sizeof (tcph_t);
+	tcpha->tha_offset_and_reserved = (5 << 4);
+	len = ip_hdr_len + sizeof (tcpha_t);
 	mp->b_wptr = &mp->b_rptr[len];
 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
 		ipha->ipha_length = htons(len);
@@ -22201,108 +16999,79 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq,
 		ipha->ipha_dst = v4addr;
 		ipha->ipha_ident = 0;
 		ipha->ipha_ttl = (uchar_t)tcps->tcps_ipv4_ttl;
-		addr_len = IP_ADDR_LEN;
-		addr = &v4addr;
+		ixa->ixa_flags |= IXAF_IS_IPV4;
+		ixa->ixa_ip_hdr_length = ip_hdr_len;
 	} else {
-		/* No ip6i_t in this case */
 		ip6h->ip6_plen = htons(len - IPV6_HDR_LEN);
 		/* Swap addresses */
 		v6addr = ip6h->ip6_src;
 		ip6h->ip6_src = ip6h->ip6_dst;
 		ip6h->ip6_dst = v6addr;
 		ip6h->ip6_hops = (uchar_t)tcps->tcps_ipv6_hoplimit;
-		addr_len = IPV6_ADDR_LEN;
-		addr = &v6addr;
-	}
-	tcp_xchg(tcph->th_fport, tcph->th_lport, 2);
-	U32_TO_BE32(ack, tcph->th_ack);
-	U32_TO_BE32(seq, tcph->th_seq);
-	U16_TO_BE16(0, tcph->th_win);
-	U16_TO_BE16(sizeof (tcph_t), tcph->th_sum);
-	tcph->th_flags[0] = (uint8_t)ctl;
+		ixa->ixa_flags &= ~IXAF_IS_IPV4;
+
+		if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_dst)) {
+			ixa->ixa_flags |= IXAF_SCOPEID_SET;
+			ixa->ixa_scopeid = ira->ira_ruifindex;
+		}
+		ixa->ixa_ip_hdr_length = IPV6_HDR_LEN;
+	}
+	ixa->ixa_pktlen = len;
+
+	/* Swap the ports */
+	port = tcpha->tha_fport;
+	tcpha->tha_fport = tcpha->tha_lport;
+	tcpha->tha_lport = port;
+
+	tcpha->tha_ack = htonl(ack);
+	tcpha->tha_seq = htonl(seq);
+	tcpha->tha_win = 0;
+	tcpha->tha_sum = htons(sizeof (tcpha_t));
+	tcpha->tha_flags = (uint8_t)ctl;
 	if (ctl & TH_RST) {
 		BUMP_MIB(&tcps->tcps_mib, tcpOutRsts);
 		BUMP_MIB(&tcps->tcps_mib, tcpOutControl);
 	}
 
-	/* IP trusts us to set up labels when required. */
-	if (is_system_labeled() && (cr = msg_getcred(mp, &pid)) != NULL &&
-	    crgetlabel(cr) != NULL) {
-		int err;
-
-		if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION)
-			err = tsol_check_label(cr, &mp,
-			    tcp->tcp_connp->conn_mac_mode,
-			    tcps->tcps_netstack->netstack_ip, pid);
-		else
-			err = tsol_check_label_v6(cr, &mp,
-			    tcp->tcp_connp->conn_mac_mode,
-			    tcps->tcps_netstack->netstack_ip, pid);
-		if (mctl_present)
-			ipsec_mp->b_cont = mp;
-		else
-			ipsec_mp = mp;
-		if (err != 0) {
-			freemsg(ipsec_mp);
-			return;
-		}
-		if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
-			ipha = (ipha_t *)mp->b_rptr;
-		} else {
-			ip6h = (ip6_t *)mp->b_rptr;
-		}
+	/* Discard any old label */
+	if (ixa->ixa_free_flags & IXA_FREE_TSL) {
+		ASSERT(ixa->ixa_tsl != NULL);
+		label_rele(ixa->ixa_tsl);
+		ixa->ixa_free_flags &= ~IXA_FREE_TSL;
 	}
+	ixa->ixa_tsl = ira->ira_tsl;	/* Behave as a multi-level responder */
 
-	if (mctl_present) {
-		ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr;
-
-		ASSERT(ii->ipsec_in_type == IPSEC_IN);
-		if (!ipsec_in_to_out(ipsec_mp, ipha, ip6h, zoneid)) {
-			return;
+	if (ira->ira_flags & IRAF_IPSEC_SECURE) {
+		/*
+		 * Apply IPsec based on how IPsec was applied to
+		 * the packet that caused the RST.
+		 */
+		if (!ipsec_in_to_out(ira, ixa, mp, ipha, ip6h)) {
+			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
+			/* Note: mp already consumed and ip_drop_packet done */
+			goto done;
 		}
+	} else {
+		/*
+		 * This is in clear. The RST message we are building
+		 * here should go out in clear, independent of our policy.
+		 */
+		ixa->ixa_flags |= IXAF_NO_IPSEC;
 	}
-	if (zoneid == ALL_ZONES)
-		zoneid = GLOBAL_ZONEID;
-
-	/* Add the zoneid so ip_output routes it properly */
-	if ((nmp = ip_prepend_zoneid(ipsec_mp, zoneid, ipst)) == NULL) {
-		freemsg(ipsec_mp);
-		return;
-	}
-	ipsec_mp = nmp;
 
 	/*
 	 * NOTE:  one might consider tracing a TCP packet here, but
 	 * this function has no active TCP state and no tcp structure
 	 * that has a trace buffer.  If we traced here, we would have
 	 * to keep a local trace buffer in tcp_record_trace().
-	 *
-	 * TSol note: The mblk that contains the incoming packet was
-	 * reused by tcp_xmit_listener_reset, so it already contains
-	 * the right credentials and we don't need to call mblk_setcred.
-	 * Also the conn's cred is not right since it is associated
-	 * with tcps_g_q.
 	 */
-	CALL_IP_WPUT(tcp->tcp_connp, tcp->tcp_wq, ipsec_mp);
 
-	/*
-	 * Tell IP to mark the IRE used for this destination temporary.
-	 * This way, we can limit our exposure to DoS attack because IP
-	 * creates an IRE for each destination.  If there are too many,
-	 * the time to do any routing lookup will be extremely long.  And
-	 * the lookup can be in interrupt context.
-	 *
-	 * Note that in normal circumstances, this marking should not
-	 * affect anything.  It would be nice if only 1 message is
-	 * needed to inform IP that the IRE created for this RST should
-	 * not be added to the cache table.  But there is currently
-	 * not such communication mechanism between TCP and IP.  So
-	 * the best we can do now is to send the advice ioctl to IP
-	 * to mark the IRE temporary.
-	 */
-	if ((mp = tcp_ip_advise_mblk(addr, addr_len, &ipic)) != NULL) {
-		ipic->ipic_ire_marks |= IRE_MARK_TEMPORARY;
-		CALL_IP_WPUT(tcp->tcp_connp, tcp->tcp_wq, mp);
+	(void) ip_output_simple(mp, ixa);
+done:
+	ixa_cleanup(ixa);
+	if (need_refrele) {
+		ASSERT(ixa != &ixas);
+		ixa_refrele(ixa);
 	}
 }
 
@@ -22313,9 +17082,11 @@ tcp_xmit_early_reset(char *str, mblk_t *mp, uint32_t seq,
 static int
 tcp_xmit_end(tcp_t *tcp)
 {
-	ipic_t	*ipic;
-	mblk_t	*mp;
+	mblk_t		*mp;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	iulp_t		uinfo;
+	ip_stack_t	*ipst = tcps->tcps_netstack->netstack_ip;
+	conn_t		*connp = tcp->tcp_connp;
 
 	if (tcp->tcp_state < TCPS_SYN_RCVD ||
 	    tcp->tcp_state > TCPS_CLOSE_WAIT) {
@@ -22337,7 +17108,7 @@ tcp_xmit_end(tcp_t *tcp)
 		    tcp->tcp_fss, B_FALSE, NULL, B_FALSE);
 
 		if (mp) {
-			tcp_send_data(tcp, tcp->tcp_wq, mp);
+			tcp_send_data(tcp, mp);
 		} else {
 			/*
 			 * Couldn't allocate msg.  Pretend we got it out.
@@ -22373,66 +17144,49 @@ tcp_xmit_end(tcp_t *tcp)
 		return (0);
 
 	/*
-	 * NOTE: should not update if source routes i.e. if tcp_remote if
-	 * different from the destination.
+	 * We do not have a good algorithm to update ssthresh at this time.
+	 * So don't do any update.
+	 */
+	bzero(&uinfo, sizeof (uinfo));
+	uinfo.iulp_rtt = tcp->tcp_rtt_sa;
+	uinfo.iulp_rtt_sd = tcp->tcp_rtt_sd;
+
+	/*
+	 * Note that uinfo is kept for conn_faddr in the DCE. Could update even
+	 * if source routed but we don't.
 	 */
-	if (tcp->tcp_ipversion == IPV4_VERSION) {
-		if (tcp->tcp_remote !=  tcp->tcp_ipha->ipha_dst) {
+	if (connp->conn_ipversion == IPV4_VERSION) {
+		if (connp->conn_faddr_v4 !=  tcp->tcp_ipha->ipha_dst) {
 			return (0);
 		}
-		mp = tcp_ip_advise_mblk(&tcp->tcp_ipha->ipha_dst, IP_ADDR_LEN,
-		    &ipic);
+		(void) dce_update_uinfo_v4(connp->conn_faddr_v4, &uinfo, ipst);
 	} else {
-		if (!(IN6_ARE_ADDR_EQUAL(&tcp->tcp_remote_v6,
+		uint_t ifindex;
+
+		if (!(IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6,
 		    &tcp->tcp_ip6h->ip6_dst))) {
 			return (0);
 		}
-		mp = tcp_ip_advise_mblk(&tcp->tcp_ip6h->ip6_dst, IPV6_ADDR_LEN,
-		    &ipic);
-	}
-
-	/* Record route attributes in the IRE for use by future connections. */
-	if (mp == NULL)
-		return (0);
-
-	/*
-	 * We do not have a good algorithm to update ssthresh at this time.
-	 * So don't do any update.
-	 */
-	ipic->ipic_rtt = tcp->tcp_rtt_sa;
-	ipic->ipic_rtt_sd = tcp->tcp_rtt_sd;
-
-	CALL_IP_WPUT(tcp->tcp_connp, tcp->tcp_wq, mp);
-
-	return (0);
-}
+		ifindex = 0;
+		if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6)) {
+			ip_xmit_attr_t *ixa = connp->conn_ixa;
 
-/* ARGSUSED */
-void
-tcp_xmit_reset(void *arg, mblk_t *mp, void *arg2)
-{
-	conn_t *connp = (conn_t *)arg;
-	mblk_t *mp1;
-	tcp_t *tcp = connp->conn_tcp;
-	tcp_xmit_reset_event_t *eventp;
-
-	ASSERT(mp->b_datap->db_type == M_PROTO &&
-	    MBLKL(mp) == sizeof (tcp_xmit_reset_event_t));
+			/*
+			 * If we are going to create a DCE we'd better have
+			 * an ifindex
+			 */
+			if (ixa->ixa_nce != NULL) {
+				ifindex = ixa->ixa_nce->nce_common->ncec_ill->
+				    ill_phyint->phyint_ifindex;
+			} else {
+				return (0);
+			}
+		}
 
-	if (tcp->tcp_state != TCPS_LISTEN) {
-		freemsg(mp);
-		return;
+		(void) dce_update_uinfo(&connp->conn_faddr_v6, ifindex, &uinfo,
+		    ipst);
 	}
-
-	mp1 = mp->b_cont;
-	mp->b_cont = NULL;
-	eventp = (tcp_xmit_reset_event_t *)mp->b_rptr;
-	ASSERT(eventp->tcp_xre_tcps->tcps_netstack ==
-	    connp->conn_netstack);
-
-	tcp_xmit_listeners_reset(mp1, eventp->tcp_xre_iphdrlen,
-	    eventp->tcp_xre_zoneid, eventp->tcp_xre_tcps, connp);
-	freemsg(mp);
+	return (0);
 }
 
 /*
@@ -22442,45 +17196,25 @@ tcp_xmit_reset(void *arg, mblk_t *mp, void *arg2)
  * Note that we are reusing the incoming mp to construct the outgoing RST.
  */
 void
-tcp_xmit_listeners_reset(mblk_t *mp, uint_t ip_hdr_len, zoneid_t zoneid,
-    tcp_stack_t *tcps, conn_t *connp)
+tcp_xmit_listeners_reset(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst,
+    conn_t *connp)
 {
 	uchar_t		*rptr;
 	uint32_t	seg_len;
-	tcph_t		*tcph;
+	tcpha_t		*tcpha;
 	uint32_t	seg_seq;
 	uint32_t	seg_ack;
 	uint_t		flags;
-	mblk_t		*ipsec_mp;
 	ipha_t 		*ipha;
 	ip6_t 		*ip6h;
-	boolean_t	mctl_present = B_FALSE;
-	boolean_t	check = B_TRUE;
 	boolean_t	policy_present;
+	netstack_t	*ns = ipst->ips_netstack;
+	tcp_stack_t	*tcps = ns->netstack_tcp;
 	ipsec_stack_t	*ipss = tcps->tcps_netstack->netstack_ipsec;
+	uint_t		ip_hdr_len = ira->ira_ip_hdr_length;
 
 	TCP_STAT(tcps, tcp_no_listener);
 
-	ipsec_mp = mp;
-
-	if (mp->b_datap->db_type == M_CTL) {
-		ipsec_in_t *ii;
-
-		mctl_present = B_TRUE;
-		mp = mp->b_cont;
-
-		ii = (ipsec_in_t *)ipsec_mp->b_rptr;
-		ASSERT(ii->ipsec_in_type == IPSEC_IN);
-		if (ii->ipsec_in_dont_check) {
-			check = B_FALSE;
-			if (!ii->ipsec_in_secure) {
-				freeb(ipsec_mp);
-				mctl_present = B_FALSE;
-				ipsec_mp = mp;
-			}
-		}
-	}
-
 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
 		policy_present = ipss->ipsec_inbound_v4_policy_present;
 		ipha = (ipha_t *)mp->b_rptr;
@@ -22491,41 +17225,39 @@ tcp_xmit_listeners_reset(mblk_t *mp, uint_t ip_hdr_len, zoneid_t zoneid,
 		ip6h = (ip6_t *)mp->b_rptr;
 	}
 
-	if (check && policy_present) {
+	if (policy_present) {
 		/*
 		 * The conn_t parameter is NULL because we already know
 		 * nobody's home.
 		 */
-		ipsec_mp = ipsec_check_global_policy(
-		    ipsec_mp, (conn_t *)NULL, ipha, ip6h, mctl_present,
-		    tcps->tcps_netstack);
-		if (ipsec_mp == NULL)
+		mp = ipsec_check_global_policy(mp, (conn_t *)NULL, ipha, ip6h,
+		    ira, ns);
+		if (mp == NULL)
 			return;
 	}
-	if (is_system_labeled() && !tsol_can_reply_error(mp)) {
+	if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) {
 		DTRACE_PROBE2(
 		    tx__ip__log__error__nolistener__tcp,
 		    char *, "Could not reply with RST to mp(1)",
 		    mblk_t *, mp);
 		ip2dbg(("tcp_xmit_listeners_reset: not permitted to reply\n"));
-		freemsg(ipsec_mp);
+		freemsg(mp);
 		return;
 	}
 
 	rptr = mp->b_rptr;
 
-	tcph = (tcph_t *)&rptr[ip_hdr_len];
-	seg_seq = BE32_TO_U32(tcph->th_seq);
-	seg_ack = BE32_TO_U32(tcph->th_ack);
-	flags = tcph->th_flags[0];
+	tcpha = (tcpha_t *)&rptr[ip_hdr_len];
+	seg_seq = ntohl(tcpha->tha_seq);
+	seg_ack = ntohl(tcpha->tha_ack);
+	flags = tcpha->tha_flags;
 
-	seg_len = msgdsize(mp) - (TCP_HDR_LENGTH(tcph) + ip_hdr_len);
+	seg_len = msgdsize(mp) - (TCP_HDR_LENGTH(tcpha) + ip_hdr_len);
 	if (flags & TH_RST) {
-		freemsg(ipsec_mp);
+		freemsg(mp);
 	} else if (flags & TH_ACK) {
-		tcp_xmit_early_reset("no tcp, reset",
-		    ipsec_mp, seg_ack, 0, TH_RST, ip_hdr_len, zoneid, tcps,
-		    connp);
+		tcp_xmit_early_reset("no tcp, reset", mp, seg_ack, 0, TH_RST,
+		    ira, ipst, connp);
 	} else {
 		if (flags & TH_SYN) {
 			seg_len++;
@@ -22537,14 +17269,13 @@ tcp_xmit_listeners_reset(mblk_t *mp, uint_t ip_hdr_len, zoneid_t zoneid,
 			 * segment is neither.  Just drop it on the
 			 * floor.
 			 */
-			freemsg(ipsec_mp);
+			freemsg(mp);
 			tcps->tcps_rst_unsent++;
 			return;
 		}
 
-		tcp_xmit_early_reset("no tcp, reset/ack",
-		    ipsec_mp, 0, seg_seq + seg_len,
-		    TH_RST | TH_ACK, ip_hdr_len, zoneid, tcps, connp);
+		tcp_xmit_early_reset("no tcp, reset/ack", mp, 0,
+		    seg_seq + seg_len, TH_RST | TH_ACK, ira, ipst, connp);
 	}
 }
 
@@ -22573,14 +17304,16 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset,
 	mblk_t	*mp1;
 	mblk_t	*mp2;
 	uchar_t	*rptr;
-	tcph_t	*tcph;
+	tcpha_t	*tcpha;
 	int32_t	num_sack_blk = 0;
 	int32_t	sack_opt_len = 0;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
+	ip_xmit_attr_t	*ixa = connp->conn_ixa;
 
 	/* Allocate for our maximum TCP header + link-level */
-	mp1 = allocb(tcp->tcp_ip_hdr_len + TCP_MAX_HDR_LENGTH +
-	    tcps->tcps_wroff_xtra, BPRI_MED);
+	mp1 = allocb(connp->conn_ht_iphc_allocated + tcps->tcps_wroff_xtra,
+	    BPRI_MED);
 	if (!mp1)
 		return (NULL);
 	data_length = 0;
@@ -22646,15 +17379,14 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset,
 	}
 
 	/* Update the latest receive window size in TCP header. */
-	U32_TO_ABE16(tcp->tcp_rwnd >> tcp->tcp_rcv_ws,
-	    tcp->tcp_tcph->th_win);
+	tcp->tcp_tcpha->tha_win = htons(tcp->tcp_rwnd >> tcp->tcp_rcv_ws);
 
 	rptr = mp1->b_rptr + tcps->tcps_wroff_xtra;
 	mp1->b_rptr = rptr;
-	mp1->b_wptr = rptr + tcp->tcp_hdr_len + sack_opt_len;
-	bcopy(tcp->tcp_iphc, rptr, tcp->tcp_hdr_len);
-	tcph = (tcph_t *)&rptr[tcp->tcp_ip_hdr_len];
-	U32_TO_ABE32(seq, tcph->th_seq);
+	mp1->b_wptr = rptr + connp->conn_ht_iphc_len + sack_opt_len;
+	bcopy(connp->conn_ht_iphc, rptr, connp->conn_ht_iphc_len);
+	tcpha = (tcpha_t *)&rptr[ixa->ixa_ip_hdr_length];
+	tcpha->tha_seq = htonl(seq);
 
 	/*
 	 * Use tcp_unsent to determine if the PUSH bit should be used assumes
@@ -22729,14 +17461,14 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset,
 			wptr[0] = TCPOPT_MAXSEG;
 			wptr[1] = TCPOPT_MAXSEG_LEN;
 			wptr += 2;
-			u1 = tcp->tcp_if_mtu -
-			    (tcp->tcp_ipversion == IPV4_VERSION ?
+			u1 = tcp->tcp_initial_pmtu -
+			    (connp->conn_ipversion == IPV4_VERSION ?
 			    IP_SIMPLE_HDR_LENGTH : IPV6_HDR_LEN) -
 			    TCP_MIN_HEADER_LENGTH;
 			U16_TO_BE16(u1, wptr);
 			mp1->b_wptr = wptr + 2;
 			/* Update the offset to cover the additional word */
-			tcph->th_offset_and_rsrvd[0] += (1 << 4);
+			tcpha->tha_offset_and_reserved += (1 << 4);
 
 			/*
 			 * Note that the following way of filling in
@@ -22763,7 +17495,7 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset,
 					ASSERT(tcp->tcp_ts_recent == 0);
 					U32_TO_BE32(0L, wptr);
 					mp1->b_wptr += TCPOPT_REAL_TS_LEN;
-					tcph->th_offset_and_rsrvd[0] +=
+					tcpha->tha_offset_and_reserved +=
 					    (3 << 4);
 				}
 
@@ -22819,7 +17551,7 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset,
 				wptr[2] =  TCPOPT_WS_LEN;
 				wptr[3] = (uchar_t)tcp->tcp_rcv_ws;
 				mp1->b_wptr += TCPOPT_REAL_WS_LEN;
-				tcph->th_offset_and_rsrvd[0] += (1 << 4);
+				tcpha->tha_offset_and_reserved += (1 << 4);
 			}
 
 			if (tcp->tcp_snd_sack_ok) {
@@ -22829,7 +17561,7 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset,
 				wptr[2] = TCPOPT_SACK_PERMITTED;
 				wptr[3] = TCPOPT_SACK_OK_LEN;
 				mp1->b_wptr += TCPOPT_REAL_SACK_OK_LEN;
-				tcph->th_offset_and_rsrvd[0] += (1 << 4);
+				tcpha->tha_offset_and_reserved += (1 << 4);
 			}
 
 			/* allocb() of adequate mblk assures space */
@@ -22840,9 +17572,9 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset,
 			 * Get IP set to checksum on our behalf
 			 * Include the adjustment for a source route if any.
 			 */
-			u1 += tcp->tcp_sum;
+			u1 += connp->conn_sum;
 			u1 = (u1 >> 16) + (u1 & 0xFFFF);
-			U16_TO_BE16(u1, tcph->th_sum);
+			tcpha->tha_sum = htons(u1);
 			BUMP_MIB(&tcps->tcps_mib, tcpOutControl);
 		}
 		if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
@@ -22878,10 +17610,10 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset,
 		    u1 < (uint32_t)(64 * 1024)) {
 			flags |= TH_URG;
 			BUMP_MIB(&tcps->tcps_mib, tcpOutUrg);
-			U32_TO_ABE16(u1, tcph->th_urp);
+			tcpha->tha_urp = htons(u1);
 		}
 	}
-	tcph->th_flags[0] = (uchar_t)flags;
+	tcpha->tha_flags = (uchar_t)flags;
 	tcp->tcp_rack = tcp->tcp_rnxt;
 	tcp->tcp_rack_cnt = 0;
 
@@ -22890,14 +17622,14 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset,
 			uint32_t llbolt = (uint32_t)lbolt;
 
 			U32_TO_BE32(llbolt,
-			    (char *)tcph+TCP_MIN_HEADER_LENGTH+4);
+			    (char *)tcpha + TCP_MIN_HEADER_LENGTH+4);
 			U32_TO_BE32(tcp->tcp_ts_recent,
-			    (char *)tcph+TCP_MIN_HEADER_LENGTH+8);
+			    (char *)tcpha + TCP_MIN_HEADER_LENGTH+8);
 		}
 	}
 
 	if (num_sack_blk > 0) {
-		uchar_t *wptr = (uchar_t *)tcph + tcp->tcp_tcp_hdr_len;
+		uchar_t *wptr = (uchar_t *)tcpha + connp->conn_ht_ulp_len;
 		sack_blk_t *tmp;
 		int32_t	i;
 
@@ -22915,33 +17647,34 @@ tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset,
 			U32_TO_BE32(tmp[i].end, wptr);
 			wptr += sizeof (tcp_seq);
 		}
-		tcph->th_offset_and_rsrvd[0] += ((num_sack_blk * 2 + 1) << 4);
+		tcpha->tha_offset_and_reserved += ((num_sack_blk * 2 + 1) << 4);
 	}
 	ASSERT((uintptr_t)(mp1->b_wptr - rptr) <= (uintptr_t)INT_MAX);
 	data_length += (int)(mp1->b_wptr - rptr);
-	if (tcp->tcp_ipversion == IPV4_VERSION) {
+
+	ixa->ixa_pktlen = data_length;
+
+	if (ixa->ixa_flags & IXAF_IS_IPV4) {
 		((ipha_t *)rptr)->ipha_length = htons(data_length);
 	} else {
-		ip6_t *ip6 = (ip6_t *)(rptr +
-		    (((ip6_t *)rptr)->ip6_nxt == IPPROTO_RAW ?
-		    sizeof (ip6i_t) : 0));
+		ip6_t *ip6 = (ip6_t *)rptr;
 
-		ip6->ip6_plen = htons(data_length -
-		    ((char *)&tcp->tcp_ip6h[1] - tcp->tcp_iphc));
+		ip6->ip6_plen = htons(data_length - IPV6_HDR_LEN);
 	}
 
 	/*
 	 * Prime pump for IP
 	 * Include the adjustment for a source route if any.
 	 */
-	data_length -= tcp->tcp_ip_hdr_len;
-	data_length += tcp->tcp_sum;
+	data_length -= ixa->ixa_ip_hdr_length;
+	data_length += connp->conn_sum;
 	data_length = (data_length >> 16) + (data_length & 0xFFFF);
-	U16_TO_ABE16(data_length, tcph->th_sum);
+	tcpha->tha_sum = htons(data_length);
 	if (tcp->tcp_ip_forward_progress) {
-		ASSERT(tcp->tcp_ipversion == IPV6_VERSION);
-		*(uint32_t *)mp1->b_rptr  |= IP_FORWARD_PROG;
 		tcp->tcp_ip_forward_progress = B_FALSE;
+		connp->conn_ixa->ixa_flags |= IXAF_REACH_CONF;
+	} else {
+		connp->conn_ixa->ixa_flags &= ~IXAF_REACH_CONF;
 	}
 	return (mp1);
 }
@@ -23012,7 +17745,7 @@ tcp_ack_timer(void *arg)
 		BUMP_LOCAL(tcp->tcp_obsegs);
 		BUMP_MIB(&tcps->tcps_mib, tcpOutAck);
 		BUMP_MIB(&tcps->tcps_mib, tcpOutAckDelayed);
-		tcp_send_data(tcp, tcp->tcp_wq, mp);
+		tcp_send_data(tcp, mp);
 	}
 }
 
@@ -23023,6 +17756,7 @@ tcp_ack_mp(tcp_t *tcp)
 {
 	uint32_t	seq_no;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
 
 	/*
 	 * There are a few cases to be considered while setting the sequence no.
@@ -23058,12 +17792,13 @@ tcp_ack_mp(tcp_t *tcp)
 		/* Generate a simple ACK */
 		int	data_length;
 		uchar_t	*rptr;
-		tcph_t	*tcph;
+		tcpha_t	*tcpha;
 		mblk_t	*mp1;
+		int32_t	total_hdr_len;
 		int32_t	tcp_hdr_len;
-		int32_t	tcp_tcp_hdr_len;
 		int32_t	num_sack_blk = 0;
 		int32_t sack_opt_len;
+		ip_xmit_attr_t *ixa = connp->conn_ixa;
 
 		/*
 		 * Allocate space for TCP + IP headers
@@ -23074,34 +17809,34 @@ tcp_ack_mp(tcp_t *tcp)
 			    tcp->tcp_num_sack_blk);
 			sack_opt_len = num_sack_blk * sizeof (sack_blk_t) +
 			    TCPOPT_NOP_LEN * 2 + TCPOPT_HEADER_LEN;
-			tcp_hdr_len = tcp->tcp_hdr_len + sack_opt_len;
-			tcp_tcp_hdr_len = tcp->tcp_tcp_hdr_len + sack_opt_len;
+			total_hdr_len = connp->conn_ht_iphc_len + sack_opt_len;
+			tcp_hdr_len = connp->conn_ht_ulp_len + sack_opt_len;
 		} else {
-			tcp_hdr_len = tcp->tcp_hdr_len;
-			tcp_tcp_hdr_len = tcp->tcp_tcp_hdr_len;
+			total_hdr_len = connp->conn_ht_iphc_len;
+			tcp_hdr_len = connp->conn_ht_ulp_len;
 		}
-		mp1 = allocb(tcp_hdr_len + tcps->tcps_wroff_xtra, BPRI_MED);
+		mp1 = allocb(total_hdr_len + tcps->tcps_wroff_xtra, BPRI_MED);
 		if (!mp1)
 			return (NULL);
 
 		/* Update the latest receive window size in TCP header. */
-		U32_TO_ABE16(tcp->tcp_rwnd >> tcp->tcp_rcv_ws,
-		    tcp->tcp_tcph->th_win);
+		tcp->tcp_tcpha->tha_win =
+		    htons(tcp->tcp_rwnd >> tcp->tcp_rcv_ws);
 		/* copy in prototype TCP + IP header */
 		rptr = mp1->b_rptr + tcps->tcps_wroff_xtra;
 		mp1->b_rptr = rptr;
-		mp1->b_wptr = rptr + tcp_hdr_len;
-		bcopy(tcp->tcp_iphc, rptr, tcp->tcp_hdr_len);
+		mp1->b_wptr = rptr + total_hdr_len;
+		bcopy(connp->conn_ht_iphc, rptr, connp->conn_ht_iphc_len);
 
-		tcph = (tcph_t *)&rptr[tcp->tcp_ip_hdr_len];
+		tcpha = (tcpha_t *)&rptr[ixa->ixa_ip_hdr_length];
 
 		/* Set the TCP sequence number. */
-		U32_TO_ABE32(seq_no, tcph->th_seq);
+		tcpha->tha_seq = htonl(seq_no);
 
 		/* Set up the TCP flag field. */
-		tcph->th_flags[0] = (uchar_t)TH_ACK;
+		tcpha->tha_flags = (uchar_t)TH_ACK;
 		if (tcp->tcp_ecn_echo_on)
-			tcph->th_flags[0] |= TH_ECE;
+			tcpha->tha_flags |= TH_ECE;
 
 		tcp->tcp_rack = tcp->tcp_rnxt;
 		tcp->tcp_rack_cnt = 0;
@@ -23111,14 +17846,15 @@ tcp_ack_mp(tcp_t *tcp)
 			uint32_t llbolt = (uint32_t)lbolt;
 
 			U32_TO_BE32(llbolt,
-			    (char *)tcph+TCP_MIN_HEADER_LENGTH+4);
+			    (char *)tcpha + TCP_MIN_HEADER_LENGTH+4);
 			U32_TO_BE32(tcp->tcp_ts_recent,
-			    (char *)tcph+TCP_MIN_HEADER_LENGTH+8);
+			    (char *)tcpha + TCP_MIN_HEADER_LENGTH+8);
 		}
 
 		/* Fill in SACK options */
 		if (num_sack_blk > 0) {
-			uchar_t *wptr = (uchar_t *)tcph + tcp->tcp_tcp_hdr_len;
+			uchar_t *wptr = (uchar_t *)tcpha +
+			    connp->conn_ht_ulp_len;
 			sack_blk_t *tmp;
 			int32_t	i;
 
@@ -23136,34 +17872,33 @@ tcp_ack_mp(tcp_t *tcp)
 				U32_TO_BE32(tmp[i].end, wptr);
 				wptr += sizeof (tcp_seq);
 			}
-			tcph->th_offset_and_rsrvd[0] += ((num_sack_blk * 2 + 1)
-			    << 4);
+			tcpha->tha_offset_and_reserved +=
+			    ((num_sack_blk * 2 + 1) << 4);
 		}
 
-		if (tcp->tcp_ipversion == IPV4_VERSION) {
-			((ipha_t *)rptr)->ipha_length = htons(tcp_hdr_len);
+		ixa->ixa_pktlen = total_hdr_len;
+
+		if (ixa->ixa_flags & IXAF_IS_IPV4) {
+			((ipha_t *)rptr)->ipha_length = htons(total_hdr_len);
 		} else {
-			/* Check for ip6i_t header in sticky hdrs */
-			ip6_t *ip6 = (ip6_t *)(rptr +
-			    (((ip6_t *)rptr)->ip6_nxt == IPPROTO_RAW ?
-			    sizeof (ip6i_t) : 0));
+			ip6_t *ip6 = (ip6_t *)rptr;
 
-			ip6->ip6_plen = htons(tcp_hdr_len -
-			    ((char *)&tcp->tcp_ip6h[1] - tcp->tcp_iphc));
+			ip6->ip6_plen = htons(total_hdr_len - IPV6_HDR_LEN);
 		}
 
 		/*
 		 * Prime pump for checksum calculation in IP.  Include the
 		 * adjustment for a source route if any.
 		 */
-		data_length = tcp_tcp_hdr_len + tcp->tcp_sum;
+		data_length = tcp_hdr_len + connp->conn_sum;
 		data_length = (data_length >> 16) + (data_length & 0xFFFF);
-		U16_TO_ABE16(data_length, tcph->th_sum);
+		tcpha->tha_sum = htons(data_length);
 
 		if (tcp->tcp_ip_forward_progress) {
-			ASSERT(tcp->tcp_ipversion == IPV6_VERSION);
-			*(uint32_t *)mp1->b_rptr  |= IP_FORWARD_PROG;
 			tcp->tcp_ip_forward_progress = B_FALSE;
+			connp->conn_ixa->ixa_flags |= IXAF_REACH_CONF;
+		} else {
+			connp->conn_ixa->ixa_flags &= ~IXAF_REACH_CONF;
 		}
 		return (mp1);
 	}
@@ -23183,6 +17918,8 @@ tcp_bind_hash_insert(tf_t *tbf, tcp_t *tcp, int caller_holds_lock)
 	tcp_t	**tcpp;
 	tcp_t	*tcpnext;
 	tcp_t	*tcphash;
+	conn_t	*connp = tcp->tcp_connp;
+	conn_t	*connext;
 
 	if (tcp->tcp_ptpbhn != NULL) {
 		ASSERT(!caller_holds_lock);
@@ -23199,7 +17936,7 @@ tcp_bind_hash_insert(tf_t *tbf, tcp_t *tcp, int caller_holds_lock)
 	if (tcphash != NULL) {
 		/* Look for an entry using the same port */
 		while ((tcphash = tcpp[0]) != NULL &&
-		    tcp->tcp_lport != tcphash->tcp_lport)
+		    connp->conn_lport != tcphash->tcp_connp->conn_lport)
 			tcpp = &(tcphash->tcp_bind_hash);
 
 		/* The port was not found, just add to the end */
@@ -23219,14 +17956,19 @@ tcp_bind_hash_insert(tf_t *tbf, tcp_t *tcp, int caller_holds_lock)
 		 * INADDR_ANY.
 		 */
 		tcpnext = tcphash;
+		connext = tcpnext->tcp_connp;
 		tcphash = NULL;
-		if (V6_OR_V4_INADDR_ANY(tcp->tcp_bound_source_v6) &&
-		    !V6_OR_V4_INADDR_ANY(tcpnext->tcp_bound_source_v6)) {
-			while ((tcpnext = tcpp[0]) != NULL &&
-			    !V6_OR_V4_INADDR_ANY(tcpnext->tcp_bound_source_v6))
-				tcpp = &(tcpnext->tcp_bind_hash_port);
-
-			if (tcpnext) {
+		if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
+		    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
+			while ((tcpnext = tcpp[0]) != NULL) {
+				connext = tcpnext->tcp_connp;
+				if (!V6_OR_V4_INADDR_ANY(
+				    connext->conn_bound_addr_v6))
+					tcpp = &(tcpnext->tcp_bind_hash_port);
+				else
+					break;
+			}
+			if (tcpnext != NULL) {
 				tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash_port;
 				tcphash = tcpnext->tcp_bind_hash;
 				if (tcphash != NULL) {
@@ -23263,6 +18005,7 @@ tcp_bind_hash_remove(tcp_t *tcp)
 	tcp_t	*tcpnext;
 	kmutex_t *lockp;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
 
 	if (tcp->tcp_ptpbhn == NULL)
 		return;
@@ -23271,8 +18014,9 @@ tcp_bind_hash_remove(tcp_t *tcp)
 	 * Extract the lock pointer in case there are concurrent
 	 * hash_remove's for this instance.
 	 */
-	ASSERT(tcp->tcp_lport != 0);
-	lockp = &tcps->tcps_bind_fanout[TCP_BIND_HASH(tcp->tcp_lport)].tf_lock;
+	ASSERT(connp->conn_lport != 0);
+	lockp = &tcps->tcps_bind_fanout[TCP_BIND_HASH(
+	    connp->conn_lport)].tf_lock;
 
 	ASSERT(lockp != NULL);
 	mutex_enter(lockp);
@@ -23548,7 +18292,7 @@ tcp_conprim_opt_process(tcp_t *tcp, mblk_t *mp, int *do_disconnectp,
 	*sys_errorp = 0;
 	*do_disconnectp = 0;
 
-	error = tpi_optcom_buf(tcp->tcp_wq, mp, opt_lenp,
+	error = tpi_optcom_buf(tcp->tcp_connp->conn_wq, mp, opt_lenp,
 	    opt_offset, cr, &tcp_opt_obj,
 	    NULL, &is_absreq_failure);
 
@@ -23663,238 +18407,6 @@ tcp_sack_info_constructor(void *buf, void *cdrarg, int kmflags)
 	return (0);
 }
 
-/* ARGSUSED */
-static int
-tcp_iphc_constructor(void *buf, void *cdrarg, int kmflags)
-{
-	bzero(buf, TCP_MAX_COMBINED_HEADER_LENGTH);
-	return (0);
-}
-
-/*
- * Make sure we wait until the default queue is setup, yet allow
- * tcp_g_q_create() to open a TCP stream.
- * We need to allow tcp_g_q_create() do do an open
- * of tcp, hence we compare curhread.
- * All others have to wait until the tcps_g_q has been
- * setup.
- */
-void
-tcp_g_q_setup(tcp_stack_t *tcps)
-{
-	mutex_enter(&tcps->tcps_g_q_lock);
-	if (tcps->tcps_g_q != NULL) {
-		mutex_exit(&tcps->tcps_g_q_lock);
-		return;
-	}
-	if (tcps->tcps_g_q_creator == NULL) {
-		/* This thread will set it up */
-		tcps->tcps_g_q_creator = curthread;
-		mutex_exit(&tcps->tcps_g_q_lock);
-		tcp_g_q_create(tcps);
-		mutex_enter(&tcps->tcps_g_q_lock);
-		ASSERT(tcps->tcps_g_q_creator == curthread);
-		tcps->tcps_g_q_creator = NULL;
-		cv_signal(&tcps->tcps_g_q_cv);
-		ASSERT(tcps->tcps_g_q != NULL);
-		mutex_exit(&tcps->tcps_g_q_lock);
-		return;
-	}
-	/* Everybody but the creator has to wait */
-	if (tcps->tcps_g_q_creator != curthread) {
-		while (tcps->tcps_g_q == NULL)
-			cv_wait(&tcps->tcps_g_q_cv, &tcps->tcps_g_q_lock);
-	}
-	mutex_exit(&tcps->tcps_g_q_lock);
-}
-
-#define	IP	"ip"
-
-#define	TCP6DEV		"/devices/pseudo/tcp6@0:tcp6"
-
-/*
- * Create a default tcp queue here instead of in strplumb
- */
-void
-tcp_g_q_create(tcp_stack_t *tcps)
-{
-	int error;
-	ldi_handle_t	lh = NULL;
-	ldi_ident_t	li = NULL;
-	int		rval;
-	cred_t		*cr;
-	major_t IP_MAJ;
-
-#ifdef NS_DEBUG
-	(void) printf("tcp_g_q_create()\n");
-#endif
-
-	IP_MAJ = ddi_name_to_major(IP);
-
-	ASSERT(tcps->tcps_g_q_creator == curthread);
-
-	error = ldi_ident_from_major(IP_MAJ, &li);
-	if (error) {
-#ifdef DEBUG
-		printf("tcp_g_q_create: lyr ident get failed error %d\n",
-		    error);
-#endif
-		return;
-	}
-
-	cr = zone_get_kcred(netstackid_to_zoneid(
-	    tcps->tcps_netstack->netstack_stackid));
-	ASSERT(cr != NULL);
-	/*
-	 * We set the tcp default queue to IPv6 because IPv4 falls
-	 * back to IPv6 when it can't find a client, but
-	 * IPv6 does not fall back to IPv4.
-	 */
-	error = ldi_open_by_name(TCP6DEV, FREAD|FWRITE, cr, &lh, li);
-	if (error) {
-#ifdef DEBUG
-		printf("tcp_g_q_create: open of TCP6DEV failed error %d\n",
-		    error);
-#endif
-		goto out;
-	}
-
-	/*
-	 * This ioctl causes the tcp framework to cache a pointer to
-	 * this stream, so we don't want to close the stream after
-	 * this operation.
-	 * Use the kernel credentials that are for the zone we're in.
-	 */
-	error = ldi_ioctl(lh, TCP_IOC_DEFAULT_Q,
-	    (intptr_t)0, FKIOCTL, cr, &rval);
-	if (error) {
-#ifdef DEBUG
-		printf("tcp_g_q_create: ioctl TCP_IOC_DEFAULT_Q failed "
-		    "error %d\n", error);
-#endif
-		goto out;
-	}
-	tcps->tcps_g_q_lh = lh;	/* For tcp_g_q_close */
-	lh = NULL;
-out:
-	/* Close layered handles */
-	if (li)
-		ldi_ident_release(li);
-	/* Keep cred around until _inactive needs it */
-	tcps->tcps_g_q_cr = cr;
-}
-
-/*
- * We keep tcp_g_q set until all other tcp_t's in the zone
- * has gone away, and then when tcp_g_q_inactive() is called
- * we clear it.
- */
-void
-tcp_g_q_destroy(tcp_stack_t *tcps)
-{
-#ifdef NS_DEBUG
-	(void) printf("tcp_g_q_destroy()for stack %d\n",
-	    tcps->tcps_netstack->netstack_stackid);
-#endif
-
-	if (tcps->tcps_g_q == NULL) {
-		return;	/* Nothing to cleanup */
-	}
-	/*
-	 * Drop reference corresponding to the default queue.
-	 * This reference was added from tcp_open when the default queue
-	 * was created, hence we compensate for this extra drop in
-	 * tcp_g_q_close. If the refcnt drops to zero here it means
-	 * the default queue was the last one to be open, in which
-	 * case, then tcp_g_q_inactive will be
-	 * called as a result of the refrele.
-	 */
-	TCPS_REFRELE(tcps);
-}
-
-/*
- * Called when last tcp_t drops reference count using TCPS_REFRELE.
- * Run by tcp_q_q_inactive using a taskq.
- */
-static void
-tcp_g_q_close(void *arg)
-{
-	tcp_stack_t *tcps = arg;
-	int error;
-	ldi_handle_t	lh = NULL;
-	ldi_ident_t	li = NULL;
-	cred_t		*cr;
-	major_t IP_MAJ;
-
-	IP_MAJ = ddi_name_to_major(IP);
-
-#ifdef NS_DEBUG
-	(void) printf("tcp_g_q_inactive() for stack %d refcnt %d\n",
-	    tcps->tcps_netstack->netstack_stackid,
-	    tcps->tcps_netstack->netstack_refcnt);
-#endif
-	lh = tcps->tcps_g_q_lh;
-	if (lh == NULL)
-		return;	/* Nothing to cleanup */
-
-	ASSERT(tcps->tcps_refcnt == 1);
-	ASSERT(tcps->tcps_g_q != NULL);
-
-	error = ldi_ident_from_major(IP_MAJ, &li);
-	if (error) {
-#ifdef DEBUG
-		printf("tcp_g_q_inactive: lyr ident get failed error %d\n",
-		    error);
-#endif
-		return;
-	}
-
-	cr = tcps->tcps_g_q_cr;
-	tcps->tcps_g_q_cr = NULL;
-	ASSERT(cr != NULL);
-
-	/*
-	 * Make sure we can break the recursion when tcp_close decrements
-	 * the reference count causing g_q_inactive to be called again.
-	 */
-	tcps->tcps_g_q_lh = NULL;
-
-	/* close the default queue */
-	(void) ldi_close(lh, FREAD|FWRITE, cr);
-	/*
-	 * At this point in time tcps and the rest of netstack_t might
-	 * have been deleted.
-	 */
-	tcps = NULL;
-
-	/* Close layered handles */
-	ldi_ident_release(li);
-	crfree(cr);
-}
-
-/*
- * Called when last tcp_t drops reference count using TCPS_REFRELE.
- *
- * Have to ensure that the ldi routines are not used by an
- * interrupt thread by using a taskq.
- */
-void
-tcp_g_q_inactive(tcp_stack_t *tcps)
-{
-	if (tcps->tcps_g_q_lh == NULL)
-		return;	/* Nothing to cleanup */
-
-	ASSERT(tcps->tcps_refcnt == 0);
-	TCPS_REFHOLD(tcps); /* Compensate for what g_q_destroy did */
-
-	if (servicing_interrupt()) {
-		(void) taskq_dispatch(tcp_taskq, tcp_g_q_close,
-		    (void *) tcps, TQ_SLEEP);
-	} else {
-		tcp_g_q_close(tcps);
-	}
-}
-
 /*
  * Called by IP when IP is loaded into the kernel
  */
@@ -23909,10 +18421,6 @@ tcp_ddi_g_init(void)
 	    sizeof (tcp_sack_info_t), 0,
 	    tcp_sack_info_constructor, NULL, NULL, NULL, NULL, 0);
 
-	tcp_iphc_cache = kmem_cache_create("tcp_iphc_cache",
-	    TCP_MAX_COMBINED_HEADER_LENGTH, 0,
-	    tcp_iphc_constructor, NULL, NULL, NULL, NULL, 0);
-
 	mutex_init(&tcp_random_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	/* Initialize the random number generator */
@@ -23923,9 +18431,6 @@ tcp_ddi_g_init(void)
 
 	tcp_g_kstat = tcp_g_kstat_init(&tcp_g_statistics);
 
-	tcp_taskq = taskq_create("tcp_taskq", 1, minclsyspri, 1, 1,
-	    TASKQ_PREPOPULATE);
-
 	tcp_squeue_flag = tcp_squeue_switch(tcp_squeue_wput);
 
 	/*
@@ -23933,8 +18438,7 @@ tcp_ddi_g_init(void)
 	 * destroyed in the kernel, so we can maintain the
 	 * set of tcp_stack_t's.
 	 */
-	netstack_register(NS_TCP, tcp_stack_init, tcp_stack_shutdown,
-	    tcp_stack_fini);
+	netstack_register(NS_TCP, tcp_stack_init, NULL, tcp_stack_fini);
 }
 
 
@@ -23956,8 +18460,6 @@ tcp_stack_init(netstackid_t stackid, netstack_t *ns)
 	tcps->tcps_netstack = ns;
 
 	/* Initialize locks */
-	mutex_init(&tcps->tcps_g_q_lock, NULL, MUTEX_DEFAULT, NULL);
-	cv_init(&tcps->tcps_g_q_cv, NULL, CV_DEFAULT, NULL);
 	mutex_init(&tcps->tcps_iss_key_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&tcps->tcps_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
 
@@ -24018,6 +18520,11 @@ tcp_stack_init(netstackid_t stackid, netstack_t *ns)
 	major = mod_name_to_major(INET_NAME);
 	error = ldi_ident_from_major(major, &tcps->tcps_ldi_ident);
 	ASSERT(error == 0);
+	tcps->tcps_ixa_cleanup_mp = allocb_wait(0, BPRI_MED, STR_NOSIG, NULL);
+	ASSERT(tcps->tcps_ixa_cleanup_mp != NULL);
+	cv_init(&tcps->tcps_ixa_cleanup_cv, NULL, CV_DEFAULT, NULL);
+	mutex_init(&tcps->tcps_ixa_cleanup_lock, NULL, MUTEX_DEFAULT, NULL);
+
 	return (tcps);
 }
 
@@ -24035,22 +18542,8 @@ tcp_ddi_g_destroy(void)
 
 	kmem_cache_destroy(tcp_timercache);
 	kmem_cache_destroy(tcp_sack_info_cache);
-	kmem_cache_destroy(tcp_iphc_cache);
 
 	netstack_unregister(NS_TCP);
-	taskq_destroy(tcp_taskq);
-}
-
-/*
- * Shut down the TCP stack instance.
- */
-/* ARGSUSED */
-static void
-tcp_stack_shutdown(netstackid_t stackid, void *arg)
-{
-	tcp_stack_t *tcps = (tcp_stack_t *)arg;
-
-	tcp_g_q_destroy(tcps);
 }
 
 /*
@@ -24062,17 +18555,16 @@ tcp_stack_fini(netstackid_t stackid, void *arg)
 	tcp_stack_t *tcps = (tcp_stack_t *)arg;
 	int i;
 
+	freeb(tcps->tcps_ixa_cleanup_mp);
+	tcps->tcps_ixa_cleanup_mp = NULL;
+	cv_destroy(&tcps->tcps_ixa_cleanup_cv);
+	mutex_destroy(&tcps->tcps_ixa_cleanup_lock);
+
 	nd_free(&tcps->tcps_g_nd);
 	kmem_free(tcps->tcps_params, sizeof (lcl_tcp_param_arr));
 	tcps->tcps_params = NULL;
 	kmem_free(tcps->tcps_wroff_xtra_param, sizeof (tcpparam_t));
 	tcps->tcps_wroff_xtra_param = NULL;
-	kmem_free(tcps->tcps_mdt_head_param, sizeof (tcpparam_t));
-	tcps->tcps_mdt_head_param = NULL;
-	kmem_free(tcps->tcps_mdt_tail_param, sizeof (tcpparam_t));
-	tcps->tcps_mdt_tail_param = NULL;
-	kmem_free(tcps->tcps_mdt_max_pbufs_param, sizeof (tcpparam_t));
-	tcps->tcps_mdt_max_pbufs_param = NULL;
 
 	for (i = 0; i < TCP_BIND_FANOUT_SIZE; i++) {
 		ASSERT(tcps->tcps_bind_fanout[i].tf_tcp == NULL);
@@ -24091,8 +18583,6 @@ tcp_stack_fini(netstackid_t stackid, void *arg)
 	tcps->tcps_acceptor_fanout = NULL;
 
 	mutex_destroy(&tcps->tcps_iss_key_lock);
-	mutex_destroy(&tcps->tcps_g_q_lock);
-	cv_destroy(&tcps->tcps_g_q_cv);
 	mutex_destroy(&tcps->tcps_epriv_port_lock);
 
 	ip_drop_unregister(&tcps->tcps_dropper);
@@ -24120,6 +18610,7 @@ tcp_iss_init(tcp_t *tcp)
 	struct { uint32_t ports; in6_addr_t src; in6_addr_t dst; } arg;
 	uint32_t answer[4];
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	conn_t		*connp = tcp->tcp_connp;
 
 	tcps->tcps_iss_incr_extra += (ISS_INCR >> 1);
 	tcp->tcp_iss = tcps->tcps_iss_incr_extra;
@@ -24128,16 +18619,9 @@ tcp_iss_init(tcp_t *tcp)
 		mutex_enter(&tcps->tcps_iss_key_lock);
 		context = tcps->tcps_iss_key;
 		mutex_exit(&tcps->tcps_iss_key_lock);
-		arg.ports = tcp->tcp_ports;
-		if (tcp->tcp_ipversion == IPV4_VERSION) {
-			IN6_IPADDR_TO_V4MAPPED(tcp->tcp_ipha->ipha_src,
-			    &arg.src);
-			IN6_IPADDR_TO_V4MAPPED(tcp->tcp_ipha->ipha_dst,
-			    &arg.dst);
-		} else {
-			arg.src = tcp->tcp_ip6h->ip6_src;
-			arg.dst = tcp->tcp_ip6h->ip6_dst;
-		}
+		arg.ports = connp->conn_ports;
+		arg.src = connp->conn_laddr_v6;
+		arg.dst = connp->conn_faddr_v6;
 		MD5Update(&context, (uchar_t *)&arg, sizeof (arg));
 		MD5Final((uchar_t *)answer, &context);
 		tcp->tcp_iss += answer[0] ^ answer[1] ^ answer[2] ^ answer[3];
@@ -24220,27 +18704,16 @@ cl_tcp_walk_list_stack(int (*callback)(cl_tcp_info_t *, void *), void *arg,
 		connp = NULL;
 
 		while ((connp =
-		    ipcl_get_next_conn(connfp, connp, IPCL_TCP)) != NULL) {
+		    ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) {
 
 			tcp = connp->conn_tcp;
 			cl_tcpi.cl_tcpi_version = CL_TCPI_V1;
-			cl_tcpi.cl_tcpi_ipversion = tcp->tcp_ipversion;
+			cl_tcpi.cl_tcpi_ipversion = connp->conn_ipversion;
 			cl_tcpi.cl_tcpi_state = tcp->tcp_state;
-			cl_tcpi.cl_tcpi_lport = tcp->tcp_lport;
-			cl_tcpi.cl_tcpi_fport = tcp->tcp_fport;
-			/*
-			 * The macros tcp_laddr and tcp_faddr give the IPv4
-			 * addresses. They are copied implicitly below as
-			 * mapped addresses.
-			 */
-			cl_tcpi.cl_tcpi_laddr_v6 = tcp->tcp_ip_src_v6;
-			if (tcp->tcp_ipversion == IPV4_VERSION) {
-				cl_tcpi.cl_tcpi_faddr =
-				    tcp->tcp_ipha->ipha_dst;
-			} else {
-				cl_tcpi.cl_tcpi_faddr_v6 =
-				    tcp->tcp_ip6h->ip6_dst;
-			}
+			cl_tcpi.cl_tcpi_lport = connp->conn_lport;
+			cl_tcpi.cl_tcpi_fport = connp->conn_fport;
+			cl_tcpi.cl_tcpi_laddr_v6 = connp->conn_laddr_v6;
+			cl_tcpi.cl_tcpi_faddr_v6 = connp->conn_faddr_v6;
 
 			/*
 			 * If the callback returns non-zero
@@ -24302,35 +18775,35 @@ cl_tcp_walk_list_stack(int (*callback)(cl_tcp_info_t *, void *), void *arg,
 /*
  * Check if a tcp structure matches the info in acp.
  */
-#define	TCP_AC_ADDR_MATCH(acp, tcp)					\
+#define	TCP_AC_ADDR_MATCH(acp, connp, tcp)			\
 	(((acp)->ac_local.ss_family == AF_INET) ?		\
 	((TCP_AC_V4LOCAL((acp)) == INADDR_ANY ||		\
-	TCP_AC_V4LOCAL((acp)) == (tcp)->tcp_ip_src) &&	\
+	TCP_AC_V4LOCAL((acp)) == (connp)->conn_laddr_v4) &&	\
 	(TCP_AC_V4REMOTE((acp)) == INADDR_ANY ||		\
-	TCP_AC_V4REMOTE((acp)) == (tcp)->tcp_remote) &&	\
+	TCP_AC_V4REMOTE((acp)) == (connp)->conn_faddr_v4) &&	\
 	(TCP_AC_V4LPORT((acp)) == 0 ||				\
-	TCP_AC_V4LPORT((acp)) == (tcp)->tcp_lport) &&		\
+	TCP_AC_V4LPORT((acp)) == (connp)->conn_lport) &&	\
 	(TCP_AC_V4RPORT((acp)) == 0 ||				\
-	TCP_AC_V4RPORT((acp)) == (tcp)->tcp_fport) &&		\
-	(acp)->ac_start <= (tcp)->tcp_state &&	\
-	(acp)->ac_end >= (tcp)->tcp_state) :		\
+	TCP_AC_V4RPORT((acp)) == (connp)->conn_fport) &&	\
+	(acp)->ac_start <= (tcp)->tcp_state &&			\
+	(acp)->ac_end >= (tcp)->tcp_state) :			\
 	((IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6LOCAL((acp))) ||	\
 	IN6_ARE_ADDR_EQUAL(&TCP_AC_V6LOCAL((acp)),		\
-	&(tcp)->tcp_ip_src_v6)) &&				\
+	&(connp)->conn_laddr_v6)) &&				\
 	(IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6REMOTE((acp))) ||	\
 	IN6_ARE_ADDR_EQUAL(&TCP_AC_V6REMOTE((acp)),		\
-	&(tcp)->tcp_remote_v6)) &&				\
+	&(connp)->conn_faddr_v6)) &&				\
 	(TCP_AC_V6LPORT((acp)) == 0 ||				\
-	TCP_AC_V6LPORT((acp)) == (tcp)->tcp_lport) &&		\
+	TCP_AC_V6LPORT((acp)) == (connp)->conn_lport) &&	\
 	(TCP_AC_V6RPORT((acp)) == 0 ||				\
-	TCP_AC_V6RPORT((acp)) == (tcp)->tcp_fport) &&		\
-	(acp)->ac_start <= (tcp)->tcp_state &&	\
+	TCP_AC_V6RPORT((acp)) == (connp)->conn_fport) &&	\
+	(acp)->ac_start <= (tcp)->tcp_state &&			\
 	(acp)->ac_end >= (tcp)->tcp_state))
 
-#define	TCP_AC_MATCH(acp, tcp)					\
+#define	TCP_AC_MATCH(acp, connp, tcp)				\
 	(((acp)->ac_zoneid == ALL_ZONES ||			\
-	(acp)->ac_zoneid == tcp->tcp_connp->conn_zoneid) ?	\
-	TCP_AC_ADDR_MATCH(acp, tcp) : 0)
+	(acp)->ac_zoneid == (connp)->conn_zoneid) ?		\
+	TCP_AC_ADDR_MATCH(acp, connp, tcp) : 0)
 
 /*
  * Build a message containing a tcp_ioc_abort_conn_t structure
@@ -24346,8 +18819,6 @@ tcp_ioctl_abort_build_msg(tcp_ioc_abort_conn_t *acp, tcp_t *tp)
 	if (mp == NULL)
 		return (NULL);
 
-	mp->b_datap->db_type = M_CTL;
-
 	*((uint32_t *)mp->b_rptr) = TCP_IOC_ABORT_CONN;
 	tacp = (tcp_ioc_abort_conn_t *)((uchar_t *)mp->b_rptr +
 	    sizeof (uint32_t));
@@ -24359,17 +18830,17 @@ tcp_ioctl_abort_build_msg(tcp_ioc_abort_conn_t *acp, tcp_t *tp)
 	if (acp->ac_local.ss_family == AF_INET) {
 		tacp->ac_local.ss_family = AF_INET;
 		tacp->ac_remote.ss_family = AF_INET;
-		TCP_AC_V4LOCAL(tacp) = tp->tcp_ip_src;
-		TCP_AC_V4REMOTE(tacp) = tp->tcp_remote;
-		TCP_AC_V4LPORT(tacp) = tp->tcp_lport;
-		TCP_AC_V4RPORT(tacp) = tp->tcp_fport;
+		TCP_AC_V4LOCAL(tacp) = tp->tcp_connp->conn_laddr_v4;
+		TCP_AC_V4REMOTE(tacp) = tp->tcp_connp->conn_faddr_v4;
+		TCP_AC_V4LPORT(tacp) = tp->tcp_connp->conn_lport;
+		TCP_AC_V4RPORT(tacp) = tp->tcp_connp->conn_fport;
 	} else {
 		tacp->ac_local.ss_family = AF_INET6;
 		tacp->ac_remote.ss_family = AF_INET6;
-		TCP_AC_V6LOCAL(tacp) = tp->tcp_ip_src_v6;
-		TCP_AC_V6REMOTE(tacp) = tp->tcp_remote_v6;
-		TCP_AC_V6LPORT(tacp) = tp->tcp_lport;
-		TCP_AC_V6RPORT(tacp) = tp->tcp_fport;
+		TCP_AC_V6LOCAL(tacp) = tp->tcp_connp->conn_laddr_v6;
+		TCP_AC_V6REMOTE(tacp) = tp->tcp_connp->conn_faddr_v6;
+		TCP_AC_V6LPORT(tacp) = tp->tcp_connp->conn_lport;
+		TCP_AC_V6RPORT(tacp) = tp->tcp_connp->conn_fport;
 	}
 	mp->b_wptr = (uchar_t *)mp->b_rptr + sizeof (uint32_t) + sizeof (*acp);
 	return (mp);
@@ -24419,14 +18890,32 @@ tcp_ioctl_abort_dump(tcp_ioc_abort_conn_t *acp)
 }
 
 /*
- * Called inside tcp_rput when a message built using
+ * Called using SQ_FILL when a message built using
  * tcp_ioctl_abort_build_msg is put into a queue.
  * Note that when we get here there is no wildcard in acp any more.
  */
+/* ARGSUSED2 */
 static void
-tcp_ioctl_abort_handler(tcp_t *tcp, mblk_t *mp)
+tcp_ioctl_abort_handler(void *arg, mblk_t *mp, void *arg2,
+    ip_recv_attr_t *dummy)
 {
-	tcp_ioc_abort_conn_t *acp;
+	conn_t			*connp = (conn_t *)arg;
+	tcp_t			*tcp = connp->conn_tcp;
+	tcp_ioc_abort_conn_t	*acp;
+
+	/*
+	 * Don't accept any input on a closed tcp as this TCP logically does
+	 * not exist on the system. Don't proceed further with this TCP.
+	 * For eg. this packet could trigger another close of this tcp
+	 * which would be disastrous for tcp_refcnt. tcp_close_detached /
+	 * tcp_clean_death / tcp_closei_local must be called at most once
+	 * on a TCP.
+	 */
+	if (tcp->tcp_state == TCPS_CLOSED ||
+	    tcp->tcp_state == TCPS_BOUND) {
+		freemsg(mp);
+		return;
+	}
 
 	acp = (tcp_ioc_abort_conn_t *)(mp->b_rptr + sizeof (uint32_t));
 	if (tcp->tcp_state <= acp->ac_end) {
@@ -24468,12 +18957,17 @@ startover:
 	for (tconnp = connfp->connf_head; tconnp != NULL;
 	    tconnp = tconnp->conn_next) {
 		tcp = tconnp->conn_tcp;
-		if (TCP_AC_MATCH(acp, tcp)) {
-			CONN_INC_REF(tcp->tcp_connp);
+		/*
+		 * We are missing a check on sin6_scope_id for linklocals here,
+		 * but current usage is just for aborting based on zoneid
+		 * for shared-IP zones.
+		 */
+		if (TCP_AC_MATCH(acp, tconnp, tcp)) {
+			CONN_INC_REF(tconnp);
 			mp = tcp_ioctl_abort_build_msg(acp, tcp);
 			if (mp == NULL) {
 				err = ENOMEM;
-				CONN_DEC_REF(tcp->tcp_connp);
+				CONN_DEC_REF(tconnp);
 				break;
 			}
 			mp->b_prev = (mblk_t *)tcp;
@@ -24501,8 +18995,9 @@ startover:
 		listhead = listhead->b_next;
 		tcp = (tcp_t *)mp->b_prev;
 		mp->b_next = mp->b_prev = NULL;
-		SQUEUE_ENTER_ONE(tcp->tcp_connp->conn_sqp, mp, tcp_input,
-		    tcp->tcp_connp, SQ_FILL, SQTAG_TCP_ABORT_BUCKET);
+		SQUEUE_ENTER_ONE(tcp->tcp_connp->conn_sqp, mp,
+		    tcp_ioctl_abort_handler, tcp->tcp_connp, NULL,
+		    SQ_FILL, SQTAG_TCP_ABORT_BUCKET);
 	}
 
 	*count += nmatch;
@@ -24669,7 +19164,7 @@ out:
  */
 void
 tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
-    uint32_t seg_ack, int seg_len, tcph_t *tcph)
+    uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira)
 {
 	int32_t		bytes_acked;
 	int32_t		gap;
@@ -24677,17 +19172,18 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
 	tcp_opt_t	tcpopt;
 	uint_t		flags;
 	uint32_t	new_swnd = 0;
-	conn_t		*connp;
+	conn_t		*nconnp;
+	conn_t		*connp = tcp->tcp_connp;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
 
 	BUMP_LOCAL(tcp->tcp_ibsegs);
 	DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
 
-	flags = (unsigned int)tcph->th_flags[0] & 0xFF;
-	new_swnd = BE16_TO_U16(tcph->th_win) <<
-	    ((tcph->th_flags[0] & TH_SYN) ? 0 : tcp->tcp_snd_ws);
+	flags = (unsigned int)tcpha->tha_flags & 0xFF;
+	new_swnd = ntohs(tcpha->tha_win) <<
+	    ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws);
 	if (tcp->tcp_snd_ts_ok) {
-		if (!tcp_paws_check(tcp, tcph, &tcpopt)) {
+		if (!tcp_paws_check(tcp, tcpha, &tcpopt)) {
 			tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
 			    tcp->tcp_rnxt, TH_ACK);
 			goto done;
@@ -24770,17 +19266,10 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
 			mutex_enter(&tcps->tcps_iss_key_lock);
 			context = tcps->tcps_iss_key;
 			mutex_exit(&tcps->tcps_iss_key_lock);
-			arg.ports = tcp->tcp_ports;
+			arg.ports = connp->conn_ports;
 			/* We use MAPPED addresses in tcp_iss_init */
-			arg.src = tcp->tcp_ip_src_v6;
-			if (tcp->tcp_ipversion == IPV4_VERSION) {
-				IN6_IPADDR_TO_V4MAPPED(
-				    tcp->tcp_ipha->ipha_dst,
-				    &arg.dst);
-			} else {
-				arg.dst =
-				    tcp->tcp_ip6h->ip6_dst;
-			}
+			arg.src = connp->conn_laddr_v6;
+			arg.dst = connp->conn_faddr_v6;
 			MD5Update(&context, (uchar_t *)&arg,
 			    sizeof (arg));
 			MD5Final((uchar_t *)answer, &context);
@@ -24813,21 +19302,11 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
 		 */
 		if (tcp_clean_death(tcp, 0, 27) == -1)
 			goto done;
-		/*
-		 * We will come back to tcp_rput_data
-		 * on the global queue. Packets destined
-		 * for the global queue will be checked
-		 * with global policy. But the policy for
-		 * this packet has already been checked as
-		 * this was destined for the detached
-		 * connection. We need to bypass policy
-		 * check this time by attaching a dummy
-		 * ipsec_in with ipsec_in_dont_check set.
-		 */
-		connp = ipcl_classify(mp, tcp->tcp_connp->conn_zoneid, ipst);
-		if (connp != NULL) {
+		nconnp = ipcl_classify(mp, ira, ipst);
+		if (nconnp != NULL) {
 			TCP_STAT(tcps, tcp_time_wait_syn_success);
-			tcp_reinput(connp, mp, tcp->tcp_connp->conn_sqp);
+			/* Drops ref on nconnp */
+			tcp_reinput(nconnp, mp, ira, ipst);
 			return;
 		}
 		goto done;
@@ -24905,11 +19384,6 @@ process_ack:
 		    tcp->tcp_rnxt, TH_ACK);
 	}
 done:
-	if ((mp->b_datap->db_struioflag & STRUIO_EAGER) != 0) {
-		DB_CKSUMSTART(mp) = 0;
-		mp->b_datap->db_struioflag &= ~STRUIO_EAGER;
-		TCP_STAT(tcps, tcp_time_wait_syn_fail);
-	}
 	freemsg(mp);
 }
 
@@ -24965,11 +19439,12 @@ tcp_timer_callback(void *arg)
 	tcpt = (tcp_timer_t *)mp->b_rptr;
 	connp = tcpt->connp;
 	SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_timer_handler, connp,
-	    SQ_FILL, SQTAG_TCP_TIMER);
+	    NULL, SQ_FILL, SQTAG_TCP_TIMER);
 }
 
+/* ARGSUSED */
 static void
-tcp_timer_handler(void *arg, mblk_t *mp, void *arg2)
+tcp_timer_handler(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
 	tcp_timer_t *tcpt;
 	conn_t *connp = (conn_t *)arg;
@@ -24983,7 +19458,7 @@ tcp_timer_handler(void *arg, mblk_t *mp, void *arg2)
 	 * If the TCP has reached the closed state, don't proceed any
 	 * further. This TCP logically does not exist on the system.
 	 * tcpt_proc could for example access queues, that have already
-	 * been qprocoff'ed off. Also see comments at the start of tcp_input
+	 * been qprocoff'ed off.
 	 */
 	if (tcp->tcp_state != TCPS_CLOSED) {
 		(*tcpt->tcpt_proc)(connp);
@@ -25148,26 +19623,9 @@ tcp_setqfull(tcp_t *tcp)
 	if (tcp->tcp_closed)
 		return;
 
-	if (IPCL_IS_NONSTR(connp)) {
-		(*connp->conn_upcalls->su_txq_full)
-		    (tcp->tcp_connp->conn_upper_handle, B_TRUE);
-		tcp->tcp_flow_stopped = B_TRUE;
-	} else {
-		queue_t *q = tcp->tcp_wq;
-
-		if (!(q->q_flag & QFULL)) {
-			mutex_enter(QLOCK(q));
-			if (!(q->q_flag & QFULL)) {
-				/* still need to set QFULL */
-				q->q_flag |= QFULL;
-				tcp->tcp_flow_stopped = B_TRUE;
-				mutex_exit(QLOCK(q));
-				TCP_STAT(tcps, tcp_flwctl_on);
-			} else {
-				mutex_exit(QLOCK(q));
-			}
-		}
-	}
+	conn_setqfull(connp, &tcp->tcp_flow_stopped);
+	if (tcp->tcp_flow_stopped)
+		TCP_STAT(tcps, tcp_flwctl_on);
 }
 
 void
@@ -25177,27 +19635,7 @@ tcp_clrqfull(tcp_t *tcp)
 
 	if (tcp->tcp_closed)
 		return;
-
-	if (IPCL_IS_NONSTR(connp)) {
-		(*connp->conn_upcalls->su_txq_full)
-		    (tcp->tcp_connp->conn_upper_handle, B_FALSE);
-		tcp->tcp_flow_stopped = B_FALSE;
-	} else {
-		queue_t *q = tcp->tcp_wq;
-
-		if (q->q_flag & QFULL) {
-			mutex_enter(QLOCK(q));
-			if (q->q_flag & QFULL) {
-				q->q_flag &= ~QFULL;
-				tcp->tcp_flow_stopped = B_FALSE;
-				mutex_exit(QLOCK(q));
-				if (q->q_flag & QWANTW)
-					qbackenable(q, 0);
-			} else {
-				mutex_exit(QLOCK(q));
-			}
-		}
-	}
+	conn_clrqfull(connp, &tcp->tcp_flow_stopped);
 }
 
 /*
@@ -25246,10 +19684,7 @@ tcp_kstat2_init(netstackid_t stackid, tcp_stat_t *tcps_statisticsp)
 	tcp_stat_t template = {
 		{ "tcp_time_wait",		KSTAT_DATA_UINT64 },
 		{ "tcp_time_wait_syn",		KSTAT_DATA_UINT64 },
-		{ "tcp_time_wait_success",	KSTAT_DATA_UINT64 },
-		{ "tcp_time_wait_fail",		KSTAT_DATA_UINT64 },
-		{ "tcp_reinput_syn",		KSTAT_DATA_UINT64 },
-		{ "tcp_ip_output",		KSTAT_DATA_UINT64 },
+		{ "tcp_time_wait_syn_success",	KSTAT_DATA_UINT64 },
 		{ "tcp_detach_non_time_wait",	KSTAT_DATA_UINT64 },
 		{ "tcp_detach_time_wait",	KSTAT_DATA_UINT64 },
 		{ "tcp_time_wait_reap",		KSTAT_DATA_UINT64 },
@@ -25287,37 +19722,14 @@ tcp_kstat2_init(netstackid_t stackid, tcp_stat_t *tcps_statisticsp)
 		{ "tcp_timermp_freed",		KSTAT_DATA_UINT64 },
 		{ "tcp_push_timer_cnt",		KSTAT_DATA_UINT64 },
 		{ "tcp_ack_timer_cnt",		KSTAT_DATA_UINT64 },
-		{ "tcp_ire_null1",		KSTAT_DATA_UINT64 },
-		{ "tcp_ire_null",		KSTAT_DATA_UINT64 },
-		{ "tcp_ip_send",		KSTAT_DATA_UINT64 },
-		{ "tcp_ip_ire_send",		KSTAT_DATA_UINT64 },
 		{ "tcp_wsrv_called",		KSTAT_DATA_UINT64 },
 		{ "tcp_flwctl_on",		KSTAT_DATA_UINT64 },
 		{ "tcp_timer_fire_early",	KSTAT_DATA_UINT64 },
 		{ "tcp_timer_fire_miss",	KSTAT_DATA_UINT64 },
 		{ "tcp_rput_v6_error",		KSTAT_DATA_UINT64 },
-		{ "tcp_out_sw_cksum",		KSTAT_DATA_UINT64 },
-		{ "tcp_out_sw_cksum_bytes",	KSTAT_DATA_UINT64 },
 		{ "tcp_zcopy_on",		KSTAT_DATA_UINT64 },
 		{ "tcp_zcopy_off",		KSTAT_DATA_UINT64 },
 		{ "tcp_zcopy_backoff",		KSTAT_DATA_UINT64 },
-		{ "tcp_zcopy_disable",		KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_pkt_out",		KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_pkt_out_v4",		KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_pkt_out_v6",		KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_discarded",		KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_conn_halted1",	KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_conn_halted2",	KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_conn_halted3",	KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_conn_resumed1",	KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_conn_resumed2",	KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_legacy_small",	KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_legacy_all",		KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_legacy_ret",		KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_allocfail",		KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_addpdescfail",	KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_allocd",		KSTAT_DATA_UINT64 },
-		{ "tcp_mdt_linked",		KSTAT_DATA_UINT64 },
 		{ "tcp_fusion_flowctl",		KSTAT_DATA_UINT64 },
 		{ "tcp_fusion_backenabled",	KSTAT_DATA_UINT64 },
 		{ "tcp_fusion_urg",		KSTAT_DATA_UINT64 },
@@ -25490,7 +19902,7 @@ tcp_kstat_update(kstat_t *kp, int rw)
 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
 		connp = NULL;
 		while ((connp =
-		    ipcl_get_next_conn(connfp, connp, IPCL_TCP)) != NULL) {
+		    ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) {
 			tcp = connp->conn_tcp;
 			switch (tcp_snmp_state(tcp)) {
 			case MIB2_TCP_established:
@@ -25565,48 +19977,6 @@ tcp_kstat_update(kstat_t *kp, int rw)
 	return (0);
 }
 
-void
-tcp_reinput(conn_t *connp, mblk_t *mp, squeue_t *sqp)
-{
-	uint16_t	hdr_len;
-	ipha_t		*ipha;
-	uint8_t		*nexthdrp;
-	tcph_t		*tcph;
-	tcp_stack_t	*tcps = connp->conn_tcp->tcp_tcps;
-
-	/* Already has an eager */
-	if ((mp->b_datap->db_struioflag & STRUIO_EAGER) != 0) {
-		TCP_STAT(tcps, tcp_reinput_syn);
-		SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, connp,
-		    SQ_PROCESS, SQTAG_TCP_REINPUT_EAGER);
-		return;
-	}
-
-	switch (IPH_HDR_VERSION(mp->b_rptr)) {
-	case IPV4_VERSION:
-		ipha = (ipha_t *)mp->b_rptr;
-		hdr_len = IPH_HDR_LENGTH(ipha);
-		break;
-	case IPV6_VERSION:
-		if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr,
-		    &hdr_len, &nexthdrp)) {
-			CONN_DEC_REF(connp);
-			freemsg(mp);
-			return;
-		}
-		break;
-	}
-
-	tcph = (tcph_t *)&mp->b_rptr[hdr_len];
-	if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) {
-		mp->b_datap->db_struioflag |= STRUIO_EAGER;
-		DB_CKSUMSTART(mp) = (intptr_t)sqp;
-	}
-
-	SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, connp,
-	    SQ_FILL, SQTAG_TCP_REINPUT);
-}
-
 static int
 tcp_squeue_switch(int val)
 {
@@ -25653,278 +20023,20 @@ tcp_squeue_add(squeue_t *sqp)
 	tcp_time_wait->tcp_free_list_cnt = 0;
 }
 
-static int
-tcp_post_ip_bind(tcp_t *tcp, mblk_t *mp, int error, cred_t *cr, pid_t pid)
+/*
+ * On a labeled system we have some protocols above TCP, such as RPC, which
+ * appear to assume that every mblk in a chain has a db_credp.
+ */
+static void
+tcp_setcred_data(mblk_t *mp, ip_recv_attr_t *ira)
 {
-	mblk_t	*ire_mp = NULL;
-	mblk_t	*syn_mp;
-	mblk_t	*mdti;
-	mblk_t	*lsoi;
-	int	retval;
-	tcph_t	*tcph;
-	cred_t	*ecr;
-	ts_label_t	*tsl;
-	uint32_t	mss;
-	conn_t	*connp = tcp->tcp_connp;
-	tcp_stack_t	*tcps = tcp->tcp_tcps;
-
-	if (error == 0) {
-		/*
-		 * Adapt Multidata information, if any.  The
-		 * following tcp_mdt_update routine will free
-		 * the message.
-		 */
-		if (mp != NULL && ((mdti = tcp_mdt_info_mp(mp)) != NULL)) {
-			tcp_mdt_update(tcp, &((ip_mdt_info_t *)mdti->
-			    b_rptr)->mdt_capab, B_TRUE);
-			freemsg(mdti);
-		}
-
-		/*
-		 * Check to update LSO information with tcp, and
-		 * tcp_lso_update routine will free the message.
-		 */
-		if (mp != NULL && ((lsoi = tcp_lso_info_mp(mp)) != NULL)) {
-			tcp_lso_update(tcp, &((ip_lso_info_t *)lsoi->
-			    b_rptr)->lso_capab);
-			freemsg(lsoi);
-		}
-
-		/* Get the IRE, if we had requested for it */
-		if (mp != NULL)
-			ire_mp = tcp_ire_mp(&mp);
-
-		if (tcp->tcp_hard_binding) {
-			tcp->tcp_hard_binding = B_FALSE;
-			tcp->tcp_hard_bound = B_TRUE;
-			CL_INET_CONNECT(tcp->tcp_connp, tcp, B_TRUE, retval);
-			if (retval != 0) {
-				error = EADDRINUSE;
-				goto bind_failed;
-			}
-		} else {
-			if (ire_mp != NULL)
-				freeb(ire_mp);
-			goto after_syn_sent;
-		}
-
-		retval = tcp_adapt_ire(tcp, ire_mp);
-		if (ire_mp != NULL)
-			freeb(ire_mp);
-		if (retval == 0) {
-			error = (int)((tcp->tcp_state >= TCPS_SYN_SENT) ?
-			    ENETUNREACH : EADDRNOTAVAIL);
-			goto ipcl_rm;
-		}
-		/*
-		 * Don't let an endpoint connect to itself.
-		 * Also checked in tcp_connect() but that
-		 * check can't handle the case when the
-		 * local IP address is INADDR_ANY.
-		 */
-		if (tcp->tcp_ipversion == IPV4_VERSION) {
-			if ((tcp->tcp_ipha->ipha_dst ==
-			    tcp->tcp_ipha->ipha_src) &&
-			    (BE16_EQL(tcp->tcp_tcph->th_lport,
-			    tcp->tcp_tcph->th_fport))) {
-				error = EADDRNOTAVAIL;
-				goto ipcl_rm;
-			}
-		} else {
-			if (IN6_ARE_ADDR_EQUAL(
-			    &tcp->tcp_ip6h->ip6_dst,
-			    &tcp->tcp_ip6h->ip6_src) &&
-			    (BE16_EQL(tcp->tcp_tcph->th_lport,
-			    tcp->tcp_tcph->th_fport))) {
-				error = EADDRNOTAVAIL;
-				goto ipcl_rm;
-			}
-		}
-		ASSERT(tcp->tcp_state == TCPS_SYN_SENT);
-		/*
-		 * This should not be possible!  Just for
-		 * defensive coding...
-		 */
-		if (tcp->tcp_state != TCPS_SYN_SENT)
-			goto after_syn_sent;
-
-		if (is_system_labeled() &&
-		    !tcp_update_label(tcp, CONN_CRED(tcp->tcp_connp))) {
-			error = EHOSTUNREACH;
-			goto ipcl_rm;
-		}
-
-		/*
-		 * tcp_adapt_ire() does not adjust
-		 * for TCP/IP header length.
-		 */
-		mss = tcp->tcp_mss - tcp->tcp_hdr_len;
-
-		/*
-		 * Just make sure our rwnd is at
-		 * least tcp_recv_hiwat_mss * MSS
-		 * large, and round up to the nearest
-		 * MSS.
-		 *
-		 * We do the round up here because
-		 * we need to get the interface
-		 * MTU first before we can do the
-		 * round up.
-		 */
-		tcp->tcp_rwnd = MAX(MSS_ROUNDUP(tcp->tcp_rwnd, mss),
-		    tcps->tcps_recv_hiwat_minmss * mss);
-		tcp->tcp_recv_hiwater = tcp->tcp_rwnd;
-		tcp_set_ws_value(tcp);
-		U32_TO_ABE16((tcp->tcp_rwnd >> tcp->tcp_rcv_ws),
-		    tcp->tcp_tcph->th_win);
-		if (tcp->tcp_rcv_ws > 0 || tcps->tcps_wscale_always)
-			tcp->tcp_snd_ws_ok = B_TRUE;
-
-		/*
-		 * Set tcp_snd_ts_ok to true
-		 * so that tcp_xmit_mp will
-		 * include the timestamp
-		 * option in the SYN segment.
-		 */
-		if (tcps->tcps_tstamp_always ||
-		    (tcp->tcp_rcv_ws && tcps->tcps_tstamp_if_wscale)) {
-			tcp->tcp_snd_ts_ok = B_TRUE;
-		}
-
-		/*
-		 * tcp_snd_sack_ok can be set in
-		 * tcp_adapt_ire() if the sack metric
-		 * is set.  So check it here also.
-		 */
-		if (tcps->tcps_sack_permitted == 2 ||
-		    tcp->tcp_snd_sack_ok) {
-			if (tcp->tcp_sack_info == NULL) {
-				tcp->tcp_sack_info =
-				    kmem_cache_alloc(tcp_sack_info_cache,
-				    KM_SLEEP);
-			}
-			tcp->tcp_snd_sack_ok = B_TRUE;
-		}
+	ASSERT(is_system_labeled());
+	ASSERT(ira->ira_cred != NULL);
 
-		/*
-		 * Should we use ECN?  Note that the current
-		 * default value (SunOS 5.9) of tcp_ecn_permitted
-		 * is 1.  The reason for doing this is that there
-		 * are equipments out there that will drop ECN
-		 * enabled IP packets.  Setting it to 1 avoids
-		 * compatibility problems.
-		 */
-		if (tcps->tcps_ecn_permitted == 2)
-			tcp->tcp_ecn_ok = B_TRUE;
-
-		TCP_TIMER_RESTART(tcp, tcp->tcp_rto);
-		syn_mp = tcp_xmit_mp(tcp, NULL, 0, NULL, NULL,
-		    tcp->tcp_iss, B_FALSE, NULL, B_FALSE);
-		if (syn_mp) {
-			/*
-			 * cr contains the cred from the thread calling
-			 * connect().
-			 *
-			 * If no thread cred is available, use the
-			 * socket creator's cred instead. If still no
-			 * cred, drop the request rather than risk a
-			 * panic on production systems.
-			 */
-			if (cr == NULL) {
-				cr = CONN_CRED(connp);
-				pid = tcp->tcp_cpid;
-				ASSERT(cr != NULL);
-				if (cr != NULL) {
-					mblk_setcred(syn_mp, cr, pid);
-				} else {
-					error = ECONNABORTED;
-					goto ipcl_rm;
-				}
-
-			/*
-			 * If an effective security label exists for
-			 * the connection, create a copy of the thread's
-			 * cred but with the effective label attached.
-			 */
-			} else if (is_system_labeled() &&
-			    connp->conn_effective_cred != NULL &&
-			    (tsl = crgetlabel(connp->
-			    conn_effective_cred)) != NULL) {
-				if ((ecr = copycred_from_tslabel(cr,
-				    tsl, KM_NOSLEEP)) == NULL) {
-					error = ENOMEM;
-					goto ipcl_rm;
-				}
-				mblk_setcred(syn_mp, ecr, pid);
-				crfree(ecr);
-
-			/*
-			 * Default to using the thread's cred unchanged.
-			 */
-			} else {
-				mblk_setcred(syn_mp, cr, pid);
-			}
-
-			/*
-			 * We must bump the generation before sending the syn
-			 * to ensure that we use the right generation in case
-			 * this thread issues a "connected" up call.
-			 */
-			SOCK_CONNID_BUMP(tcp->tcp_connid);
-
-			tcp_send_data(tcp, tcp->tcp_wq, syn_mp);
-		}
-	after_syn_sent:
-		if (mp != NULL) {
-			ASSERT(mp->b_cont == NULL);
-			freeb(mp);
-		}
-		return (error);
-	} else {
-		/* error */
-		if (tcp->tcp_debug) {
-			(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE|SL_ERROR,
-			    "tcp_post_ip_bind: error == %d", error);
-		}
-		if (mp != NULL) {
-			freeb(mp);
-		}
+	while (mp != NULL) {
+		mblk_setcred(mp, ira->ira_cred, NOPID);
+		mp = mp->b_cont;
 	}
-
-ipcl_rm:
-	/*
-	 * Need to unbind with classifier since we were just
-	 * told that our bind succeeded. a.k.a error == 0 at the entry.
-	 */
-	tcp->tcp_hard_bound = B_FALSE;
-	tcp->tcp_hard_binding = B_FALSE;
-
-	ipcl_hash_remove(connp);
-
-bind_failed:
-	tcp->tcp_state = TCPS_IDLE;
-	if (tcp->tcp_ipversion == IPV4_VERSION)
-		tcp->tcp_ipha->ipha_src = 0;
-	else
-		V6_SET_ZERO(tcp->tcp_ip6h->ip6_src);
-	/*
-	 * Copy of the src addr. in tcp_t is needed since
-	 * the lookup funcs. can only look at tcp_t
-	 */
-	V6_SET_ZERO(tcp->tcp_ip_src_v6);
-
-	tcph = tcp->tcp_tcph;
-	tcph->th_lport[0] = 0;
-	tcph->th_lport[1] = 0;
-	tcp_bind_hash_remove(tcp);
-	bzero(&connp->u_port, sizeof (connp->u_port));
-	/* blow away saved option results if any */
-	if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
-		tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
-
-	conn_delete_ire(tcp->tcp_connp, NULL);
-
-	return (error);
 }
 
 static int
@@ -25936,16 +20048,16 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
 	boolean_t	user_specified;
 	in_port_t	allocated_port;
 	in_port_t	requested_port = *requested_port_ptr;
-	conn_t		*connp;
+	conn_t		*connp = tcp->tcp_connp;
 	zone_t		*zone;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
-	in6_addr_t	v6addr = tcp->tcp_ip_src_v6;
+	in6_addr_t	v6addr = connp->conn_laddr_v6;
 
 	/*
 	 * XXX It's up to the caller to specify bind_to_req_port_only or not.
 	 */
-	if (cr == NULL)
-		cr = tcp->tcp_cred;
+	ASSERT(cr != NULL);
+
 	/*
 	 * Get a valid port (within the anonymous range and should not
 	 * be a privileged one) to use if the user has not given a port.
@@ -25961,7 +20073,7 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
 	mlptype = mlptSingle;
 	mlp_port = requested_port;
 	if (requested_port == 0) {
-		requested_port = tcp->tcp_anon_priv_bind ?
+		requested_port = connp->conn_anon_priv_bind ?
 		    tcp_get_next_priv_port(tcp) :
 		    tcp_update_next_port(tcps->tcps_next_port_to_try,
 		    tcp, B_TRUE);
@@ -25975,7 +20087,6 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
 		 * this socket and RPC is MLP in this zone, then give him an
 		 * anonymous MLP.
 		 */
-		connp = tcp->tcp_connp;
 		if (connp->conn_anon_mlp && is_system_labeled()) {
 			zone = crgetzone(cr);
 			addrtype = tsol_mlp_addr_type(
@@ -26016,7 +20127,7 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
 		if (priv) {
 			if (secpolicy_net_privaddr(cr, requested_port,
 			    IPPROTO_TCP) != 0) {
-				if (tcp->tcp_debug) {
+				if (connp->conn_debug) {
 					(void) strlog(TCP_MOD_ID, 0, 1,
 					    SL_ERROR|SL_TRACE,
 					    "tcp_bind: no priv for port %d",
@@ -26044,7 +20155,7 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
 
 	if (mlptype != mlptSingle) {
 		if (secpolicy_net_bindmlp(cr) != 0) {
-			if (tcp->tcp_debug) {
+			if (connp->conn_debug) {
 				(void) strlog(TCP_MOD_ID, 0, 1,
 				    SL_ERROR|SL_TRACE,
 				    "tcp_bind: no priv for multilevel port %d",
@@ -26068,7 +20179,7 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
 			mlpzone = tsol_mlp_findzone(IPPROTO_TCP,
 			    htons(mlp_port));
 			if (connp->conn_zoneid != mlpzone) {
-				if (tcp->tcp_debug) {
+				if (connp->conn_debug) {
 					(void) strlog(TCP_MOD_ID, 0, 1,
 					    SL_ERROR|SL_TRACE,
 					    "tcp_bind: attempt to bind port "
@@ -26083,10 +20194,10 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
 
 		if (!user_specified) {
 			int err;
-			err = tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
+			err = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
 			    requested_port, B_TRUE);
 			if (err != 0) {
-				if (tcp->tcp_debug) {
+				if (connp->conn_debug) {
 					(void) strlog(TCP_MOD_ID, 0, 1,
 					    SL_ERROR|SL_TRACE,
 					    "tcp_bind: cannot establish anon "
@@ -26101,17 +20212,18 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
 	}
 
 	allocated_port = tcp_bindi(tcp, requested_port, &v6addr,
-	    tcp->tcp_reuseaddr, B_FALSE, bind_to_req_port_only, user_specified);
+	    connp->conn_reuseaddr, B_FALSE, bind_to_req_port_only,
+	    user_specified);
 
 	if (allocated_port == 0) {
 		connp->conn_mlp_type = mlptSingle;
 		if (connp->conn_anon_port) {
 			connp->conn_anon_port = B_FALSE;
-			(void) tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
+			(void) tsol_mlp_anon(zone, mlptype, connp->conn_proto,
 			    requested_port, B_FALSE);
 		}
 		if (bind_to_req_port_only) {
-			if (tcp->tcp_debug) {
+			if (connp->conn_debug) {
 				(void) strlog(TCP_MOD_ID, 0, 1,
 				    SL_ERROR|SL_TRACE,
 				    "tcp_bind: requested addr busy");
@@ -26119,7 +20231,7 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
 			return (-TADDRBUSY);
 		} else {
 			/* If we are out of ports, fail the bind. */
-			if (tcp->tcp_debug) {
+			if (connp->conn_debug) {
 				(void) strlog(TCP_MOD_ID, 0, 1,
 				    SL_ERROR|SL_TRACE,
 				    "tcp_bind: out of ports?");
@@ -26133,6 +20245,9 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
 	return (0);
 }
 
+/*
+ * Check the address and check/pick a local port number.
+ */
 static int
 tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
     boolean_t bind_to_req_port_only)
@@ -26140,18 +20255,22 @@ tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
 	tcp_t	*tcp = connp->conn_tcp;
 	sin_t	*sin;
 	sin6_t  *sin6;
-	in_port_t requested_port;
+	in_port_t	requested_port;
 	ipaddr_t	v4addr;
 	in6_addr_t	v6addr;
-	uint_t	ipversion;
-	int	error = 0;
+	ip_laddr_t	laddr_type = IPVL_UNICAST_UP;	/* INADDR_ANY */
+	zoneid_t	zoneid = IPCL_ZONEID(connp);
+	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
+	uint_t		scopeid = 0;
+	int		error = 0;
+	ip_xmit_attr_t	*ixa = connp->conn_ixa;
 
 	ASSERT((uintptr_t)len <= (uintptr_t)INT_MAX);
 
 	if (tcp->tcp_state == TCPS_BOUND) {
 		return (0);
 	} else if (tcp->tcp_state > TCPS_BOUND) {
-		if (tcp->tcp_debug) {
+		if (connp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
 			    "tcp_bind: bad state, %d", tcp->tcp_state);
 		}
@@ -26161,7 +20280,7 @@ tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
 	ASSERT(sa != NULL && len != 0);
 
 	if (!OK_32PTR((char *)sa)) {
-		if (tcp->tcp_debug) {
+		if (connp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1,
 			    SL_ERROR|SL_TRACE,
 			    "tcp_bind: bad address parameter, "
@@ -26171,38 +20290,48 @@ tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
 		return (-TPROTO);
 	}
 
+	error = proto_verify_ip_addr(connp->conn_family, sa, len);
+	if (error != 0) {
+		return (error);
+	}
+
 	switch (len) {
 	case sizeof (sin_t):	/* Complete IPv4 address */
 		sin = (sin_t *)sa;
-		/*
-		 * With sockets sockfs will accept bogus sin_family in
-		 * bind() and replace it with the family used in the socket
-		 * call.
-		 */
-		if (sin->sin_family != AF_INET ||
-		    tcp->tcp_family != AF_INET) {
-			return (EAFNOSUPPORT);
-		}
 		requested_port = ntohs(sin->sin_port);
-		ipversion = IPV4_VERSION;
 		v4addr = sin->sin_addr.s_addr;
 		IN6_IPADDR_TO_V4MAPPED(v4addr, &v6addr);
+		if (v4addr != INADDR_ANY) {
+			laddr_type = ip_laddr_verify_v4(v4addr, zoneid, ipst,
+			    B_FALSE);
+		}
 		break;
 
 	case sizeof (sin6_t): /* Complete IPv6 address */
 		sin6 = (sin6_t *)sa;
-		if (sin6->sin6_family != AF_INET6 ||
-		    tcp->tcp_family != AF_INET6) {
-			return (EAFNOSUPPORT);
-		}
-		requested_port = ntohs(sin6->sin6_port);
-		ipversion = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ?
-		    IPV4_VERSION : IPV6_VERSION;
 		v6addr = sin6->sin6_addr;
+		requested_port = ntohs(sin6->sin6_port);
+		if (IN6_IS_ADDR_V4MAPPED(&v6addr)) {
+			if (connp->conn_ipv6_v6only)
+				return (EADDRNOTAVAIL);
+
+			IN6_V4MAPPED_TO_IPADDR(&v6addr, v4addr);
+			if (v4addr != INADDR_ANY) {
+				laddr_type = ip_laddr_verify_v4(v4addr,
+				    zoneid, ipst, B_FALSE);
+			}
+		} else {
+			if (!IN6_IS_ADDR_UNSPECIFIED(&v6addr)) {
+				if (IN6_IS_ADDR_LINKSCOPE(&v6addr))
+					scopeid = sin6->sin6_scope_id;
+				laddr_type = ip_laddr_verify_v6(&v6addr,
+				    zoneid, ipst, B_FALSE, scopeid);
+			}
+		}
 		break;
 
 	default:
-		if (tcp->tcp_debug) {
+		if (connp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
 			    "tcp_bind: bad address length, %d", len);
 		}
@@ -26210,34 +20339,32 @@ tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
 		/* return (-TBADADDR); */
 	}
 
-	tcp->tcp_bound_source_v6 = v6addr;
+	/* Is the local address a valid unicast address? */
+	if (laddr_type == IPVL_BAD)
+		return (EADDRNOTAVAIL);
 
-	/* Check for change in ipversion */
-	if (tcp->tcp_ipversion != ipversion) {
-		ASSERT(tcp->tcp_family == AF_INET6);
-		error = (ipversion == IPV6_VERSION) ?
-		    tcp_header_init_ipv6(tcp) : tcp_header_init_ipv4(tcp);
-		if (error) {
-			return (ENOMEM);
-		}
-	}
-
-	/*
-	 * Initialize family specific fields. Copy of the src addr.
-	 * in tcp_t is needed for the lookup funcs.
-	 */
-	if (tcp->tcp_ipversion == IPV6_VERSION) {
-		tcp->tcp_ip6h->ip6_src = v6addr;
+	connp->conn_bound_addr_v6 = v6addr;
+	if (scopeid != 0) {
+		ixa->ixa_flags |= IXAF_SCOPEID_SET;
+		ixa->ixa_scopeid = scopeid;
+		connp->conn_incoming_ifindex = scopeid;
 	} else {
-		IN6_V4MAPPED_TO_IPADDR(&v6addr, tcp->tcp_ipha->ipha_src);
+		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
+		connp->conn_incoming_ifindex = connp->conn_bound_if;
 	}
-	tcp->tcp_ip_src_v6 = v6addr;
+
+	connp->conn_laddr_v6 = v6addr;
+	connp->conn_saddr_v6 = v6addr;
 
 	bind_to_req_port_only = requested_port != 0 && bind_to_req_port_only;
 
 	error = tcp_bind_select_lport(tcp, &requested_port,
 	    bind_to_req_port_only, cr);
-
+	if (error != 0) {
+		connp->conn_laddr_v6 = ipv6_all_zeros;
+		connp->conn_saddr_v6 = ipv6_all_zeros;
+		connp->conn_bound_addr_v6 = ipv6_all_zeros;
+	}
 	return (error);
 }
 
@@ -26253,7 +20380,7 @@ tcp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
 	tcp_t *tcp = connp->conn_tcp;
 
 	if (tcp->tcp_state >= TCPS_BOUND) {
-		if (tcp->tcp_debug) {
+		if (connp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
 			    "tcp_bind: bad state, %d", tcp->tcp_state);
 		}
@@ -26265,19 +20392,8 @@ tcp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
 		return (error);
 
 	ASSERT(tcp->tcp_state == TCPS_BOUND);
-
 	tcp->tcp_conn_req_max = 0;
-
-	if (tcp->tcp_family == AF_INET6) {
-		ASSERT(tcp->tcp_connp->conn_af_isv6);
-		error = ip_proto_bind_laddr_v6(connp, NULL, IPPROTO_TCP,
-		    &tcp->tcp_bound_source_v6, 0, B_FALSE);
-	} else {
-		ASSERT(!tcp->tcp_connp->conn_af_isv6);
-		error = ip_proto_bind_laddr_v4(connp, NULL, IPPROTO_TCP,
-		    tcp->tcp_ipha->ipha_src, 0, B_FALSE);
-	}
-	return (tcp_post_ip_bind(tcp, NULL, error, NULL, 0));
+	return (0);
 }
 
 int
@@ -26337,7 +20453,14 @@ tcp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
 	ipaddr_t	*dstaddrp;
 	in_port_t	dstport;
 	uint_t		srcid;
-	int		error = 0;
+	int		error;
+	uint32_t	mss;
+	mblk_t		*syn_mp;
+	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	int32_t		oldstate;
+	ip_xmit_attr_t	*ixa = connp->conn_ixa;
+
+	oldstate = tcp->tcp_state;
 
 	switch (len) {
 	default:
@@ -26351,7 +20474,7 @@ tcp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
 		if (sin->sin_port == 0) {
 			return (-TBADADDR);
 		}
-		if (tcp->tcp_connp && tcp->tcp_connp->conn_ipv6_v6only) {
+		if (connp->conn_ipv6_v6only) {
 			return (EAFNOSUPPORT);
 		}
 		break;
@@ -26365,23 +20488,18 @@ tcp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
 	}
 	/*
 	 * If we're connecting to an IPv4-mapped IPv6 address, we need to
-	 * make sure that the template IP header in the tcp structure is an
-	 * IPv4 header, and that the tcp_ipversion is IPV4_VERSION.  We
+	 * make sure that the conn_ipversion is IPV4_VERSION.  We
 	 * need to this before we call tcp_bindi() so that the port lookup
 	 * code will look for ports in the correct port space (IPv4 and
 	 * IPv6 have separate port spaces).
 	 */
-	if (tcp->tcp_family == AF_INET6 && tcp->tcp_ipversion == IPV6_VERSION &&
+	if (connp->conn_family == AF_INET6 &&
+	    connp->conn_ipversion == IPV6_VERSION &&
 	    IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
-		int err = 0;
+		if (connp->conn_ipv6_v6only)
+			return (EADDRNOTAVAIL);
 
-		err = tcp_header_init_ipv4(tcp);
-			if (err != 0) {
-				error = ENOMEM;
-				goto connect_failed;
-			}
-		if (tcp->tcp_lport != 0)
-			*(uint16_t *)tcp->tcp_tcph->th_lport = tcp->tcp_lport;
+		connp->conn_ipversion = IPV4_VERSION;
 	}
 
 	switch (tcp->tcp_state) {
@@ -26399,43 +20517,147 @@ tcp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
 		 */
 		/* FALLTHRU */
 	case TCPS_BOUND:
-		if (tcp->tcp_family == AF_INET6) {
-			if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
-				return (tcp_connect_ipv6(tcp,
-				    &sin6->sin6_addr,
-				    sin6->sin6_port, sin6->sin6_flowinfo,
-				    sin6->__sin6_src_id, sin6->sin6_scope_id,
-				    cr, pid));
-			}
+		break;
+	default:
+		return (-TOUTSTATE);
+	}
+
+	/*
+	 * We update our cred/cpid based on the caller of connect
+	 */
+	if (connp->conn_cred != cr) {
+		crhold(cr);
+		crfree(connp->conn_cred);
+		connp->conn_cred = cr;
+	}
+	connp->conn_cpid = pid;
+
+	/* Cache things in the ixa without any refhold */
+	ixa->ixa_cred = cr;
+	ixa->ixa_cpid = pid;
+	if (is_system_labeled()) {
+		/* We need to restart with a label based on the cred */
+		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
+	}
+
+	if (connp->conn_family == AF_INET6) {
+		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+			error = tcp_connect_ipv6(tcp, &sin6->sin6_addr,
+			    sin6->sin6_port, sin6->sin6_flowinfo,
+			    sin6->__sin6_src_id, sin6->sin6_scope_id);
+		} else {
 			/*
 			 * Destination adress is mapped IPv6 address.
 			 * Source bound address should be unspecified or
 			 * IPv6 mapped address as well.
 			 */
 			if (!IN6_IS_ADDR_UNSPECIFIED(
-			    &tcp->tcp_bound_source_v6) &&
-			    !IN6_IS_ADDR_V4MAPPED(&tcp->tcp_bound_source_v6)) {
+			    &connp->conn_bound_addr_v6) &&
+			    !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
 				return (EADDRNOTAVAIL);
 			}
 			dstaddrp = &V4_PART_OF_V6((sin6->sin6_addr));
 			dstport = sin6->sin6_port;
 			srcid = sin6->__sin6_src_id;
-		} else {
-			dstaddrp = &sin->sin_addr.s_addr;
-			dstport = sin->sin_port;
-			srcid = 0;
+			error = tcp_connect_ipv4(tcp, dstaddrp, dstport,
+			    srcid);
 		}
+	} else {
+		dstaddrp = &sin->sin_addr.s_addr;
+		dstport = sin->sin_port;
+		srcid = 0;
+		error = tcp_connect_ipv4(tcp, dstaddrp, dstport, srcid);
+	}
 
-		error = tcp_connect_ipv4(tcp, dstaddrp, dstport, srcid, cr,
-		    pid);
-		break;
-	default:
-		return (-TOUTSTATE);
+	if (error != 0)
+		goto connect_failed;
+
+	CL_INET_CONNECT(connp, B_TRUE, error);
+	if (error != 0)
+		goto connect_failed;
+
+	/* connect succeeded */
+	BUMP_MIB(&tcps->tcps_mib, tcpActiveOpens);
+	tcp->tcp_active_open = 1;
+
+	/*
+	 * tcp_set_destination() does not adjust for TCP/IP header length.
+	 */
+	mss = tcp->tcp_mss - connp->conn_ht_iphc_len;
+
+	/*
+	 * Just make sure our rwnd is at least rcvbuf * MSS large, and round up
+	 * to the nearest MSS.
+	 *
+	 * We do the round up here because we need to get the interface MTU
+	 * first before we can do the round up.
+	 */
+	tcp->tcp_rwnd = connp->conn_rcvbuf;
+	tcp->tcp_rwnd = MAX(MSS_ROUNDUP(tcp->tcp_rwnd, mss),
+	    tcps->tcps_recv_hiwat_minmss * mss);
+	connp->conn_rcvbuf = tcp->tcp_rwnd;
+	tcp_set_ws_value(tcp);
+	tcp->tcp_tcpha->tha_win = htons(tcp->tcp_rwnd >> tcp->tcp_rcv_ws);
+	if (tcp->tcp_rcv_ws > 0 || tcps->tcps_wscale_always)
+		tcp->tcp_snd_ws_ok = B_TRUE;
+
+	/*
+	 * Set tcp_snd_ts_ok to true
+	 * so that tcp_xmit_mp will
+	 * include the timestamp
+	 * option in the SYN segment.
+	 */
+	if (tcps->tcps_tstamp_always ||
+	    (tcp->tcp_rcv_ws && tcps->tcps_tstamp_if_wscale)) {
+		tcp->tcp_snd_ts_ok = B_TRUE;
 	}
+
 	/*
-	 * Note: Code below is the "failure" case
+	 * tcp_snd_sack_ok can be set in
+	 * tcp_set_destination() if the sack metric
+	 * is set.  So check it here also.
+	 */
+	if (tcps->tcps_sack_permitted == 2 ||
+	    tcp->tcp_snd_sack_ok) {
+		if (tcp->tcp_sack_info == NULL) {
+			tcp->tcp_sack_info = kmem_cache_alloc(
+			    tcp_sack_info_cache, KM_SLEEP);
+		}
+		tcp->tcp_snd_sack_ok = B_TRUE;
+	}
+
+	/*
+	 * Should we use ECN?  Note that the current
+	 * default value (SunOS 5.9) of tcp_ecn_permitted
+	 * is 1.  The reason for doing this is that there
+	 * are equipments out there that will drop ECN
+	 * enabled IP packets.  Setting it to 1 avoids
+	 * compatibility problems.
 	 */
+	if (tcps->tcps_ecn_permitted == 2)
+		tcp->tcp_ecn_ok = B_TRUE;
+
+	TCP_TIMER_RESTART(tcp, tcp->tcp_rto);
+	syn_mp = tcp_xmit_mp(tcp, NULL, 0, NULL, NULL,
+	    tcp->tcp_iss, B_FALSE, NULL, B_FALSE);
+	if (syn_mp != NULL) {
+		/*
+		 * We must bump the generation before sending the syn
+		 * to ensure that we use the right generation in case
+		 * this thread issues a "connected" up call.
+		 */
+		SOCK_CONNID_BUMP(tcp->tcp_connid);
+		tcp_send_data(tcp, syn_mp);
+	}
+
+	if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
+		tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
+	return (0);
+
 connect_failed:
+	connp->conn_faddr_v6 = ipv6_all_zeros;
+	connp->conn_fport = 0;
+	tcp->tcp_state = oldstate;
 	if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
 		tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
 	return (error);
@@ -26446,7 +20668,6 @@ tcp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
     socklen_t len, sock_connid_t *id, cred_t *cr)
 {
 	conn_t		*connp = (conn_t *)proto_handle;
-	tcp_t		*tcp = connp->conn_tcp;
 	squeue_t	*sqp = connp->conn_sqp;
 	int		error;
 
@@ -26455,7 +20676,7 @@ tcp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
 	/* All Solaris components should pass a cred for this operation. */
 	ASSERT(cr != NULL);
 
-	error = proto_verify_ip_addr(tcp->tcp_family, sa, len);
+	error = proto_verify_ip_addr(connp->conn_family, sa, len);
 	if (error != 0) {
 		return (error);
 	}
@@ -26493,7 +20714,7 @@ tcp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
 		}
 	}
 
-	if (tcp->tcp_loopback) {
+	if (connp->conn_tcp->tcp_loopback) {
 		struct sock_proto_props sopp;
 
 		sopp.sopp_flags = SOCKOPT_LOOPBACK;
@@ -26521,7 +20742,7 @@ tcp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
 		return (NULL);
 	}
 
-	connp = tcp_create_common(NULL, credp, isv6, B_TRUE, errorp);
+	connp = tcp_create_common(credp, isv6, B_TRUE, errorp);
 	if (connp == NULL) {
 		return (NULL);
 	}
@@ -26578,8 +20799,8 @@ tcp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
 	connp->conn_upcalls = sock_upcalls;
 	connp->conn_upper_handle = sock_handle;
 
-	ASSERT(connp->conn_tcp->tcp_recv_hiwater != 0 &&
-	    connp->conn_tcp->tcp_recv_hiwater == connp->conn_tcp->tcp_rwnd);
+	ASSERT(connp->conn_rcvbuf != 0 &&
+	    connp->conn_rcvbuf == connp->conn_tcp->tcp_rwnd);
 	(*sock_upcalls->su_set_proto_props)(sock_handle, &sopp);
 }
 
@@ -26663,7 +20884,7 @@ tcp_sendmsg(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
 		/*
 		 * Squeue Flow Control
 		 */
-		if (TCP_UNSENT_BYTES(tcp) > tcp->tcp_xmit_hiwater) {
+		if (TCP_UNSENT_BYTES(tcp) > connp->conn_sndbuf) {
 			tcp_setqfull(tcp);
 		}
 		mutex_exit(&tcp->tcp_non_sq_lock);
@@ -26680,12 +20901,11 @@ tcp_sendmsg(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
 		CONN_INC_REF(connp);
 
 		if (msg->msg_flags & MSG_OOB) {
-			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
-			    tcp_output_urgent, connp, tcp_squeue_flag,
-			    SQTAG_TCP_OUTPUT);
+			SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output_urgent,
+			    connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT);
 		} else {
 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output,
-			    connp, tcp_squeue_flag, SQTAG_TCP_OUTPUT);
+			    connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT);
 		}
 
 		return (0);
@@ -26698,9 +20918,9 @@ tcp_sendmsg(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
 	return (0);
 }
 
-/* ARGSUSED */
+/* ARGSUSED2 */
 void
-tcp_output_urgent(void *arg, mblk_t *mp, void *arg2)
+tcp_output_urgent(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
 	int len;
 	uint32_t msize;
@@ -26739,7 +20959,7 @@ tcp_output_urgent(void *arg, mblk_t *mp, void *arg2)
 	tcp_wput_data(tcp, mp, B_TRUE);
 }
 
-/* ARGSUSED */
+/* ARGSUSED3 */
 int
 tcp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr,
     socklen_t *addrlenp, cred_t *cr)
@@ -26752,24 +20972,24 @@ tcp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr,
 	ASSERT(cr != NULL);
 
 	ASSERT(tcp != NULL);
+	if (tcp->tcp_state < TCPS_SYN_RCVD)
+		return (ENOTCONN);
 
-	return (tcp_do_getpeername(tcp, addr, addrlenp));
+	return (conn_getpeername(connp, addr, addrlenp));
 }
 
-/* ARGSUSED */
+/* ARGSUSED3 */
 int
 tcp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr,
     socklen_t *addrlenp, cred_t *cr)
 {
 	conn_t	*connp = (conn_t *)proto_handle;
-	tcp_t	*tcp = connp->conn_tcp;
 
 	/* All Solaris components should pass a cred for this operation. */
 	ASSERT(cr != NULL);
 
 	ASSERT(connp->conn_upper_handle != NULL);
-
-	return (tcp_do_getsockname(tcp, addr, addrlenp));
+	return (conn_getsockname(connp, addr, addrlenp));
 }
 
 /*
@@ -26809,8 +21029,8 @@ tcp_fallback_noneager(tcp_t *tcp, mblk_t *stropt_mp, queue_t *q,
 
 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
 
-	connp->conn_tcp->tcp_rq = connp->conn_rq = RD(q);
-	connp->conn_tcp->tcp_wq = connp->conn_wq = WR(q);
+	connp->conn_rq = RD(q);
+	connp->conn_wq = WR(q);
 
 	WR(q)->q_qinfo = &tcp_sock_winit;
 
@@ -26830,11 +21050,11 @@ tcp_fallback_noneager(tcp_t *tcp, mblk_t *stropt_mp, queue_t *q,
 	stropt_mp->b_wptr += sizeof (struct stroptions);
 	stropt->so_flags = SO_HIWAT | SO_WROFF | SO_MAXBLK;
 
-	stropt->so_wroff = tcp->tcp_hdr_len + (tcp->tcp_loopback ? 0 :
+	stropt->so_wroff = connp->conn_ht_iphc_len + (tcp->tcp_loopback ? 0 :
 	    tcp->tcp_tcps->tcps_wroff_xtra);
 	if (tcp->tcp_snd_sack_ok)
 		stropt->so_wroff += TCPOPT_MAX_SACK_LEN;
-	stropt->so_hiwat = tcp->tcp_recv_hiwater;
+	stropt->so_hiwat = connp->conn_rcvbuf;
 	stropt->so_maxblk = tcp_maxpsz_set(tcp, B_FALSE);
 
 	putnext(RD(q), stropt_mp);
@@ -26845,15 +21065,17 @@ tcp_fallback_noneager(tcp_t *tcp, mblk_t *stropt_mp, queue_t *q,
 	tcp_do_capability_ack(tcp, &tca, TC1_INFO|TC1_ACCEPTOR_ID);
 
 	laddrlen = faddrlen = sizeof (sin6_t);
-	(void) tcp_do_getsockname(tcp, (struct sockaddr *)&laddr, &laddrlen);
-	error = tcp_do_getpeername(tcp, (struct sockaddr *)&faddr, &faddrlen);
+	(void) tcp_getsockname((sock_lower_handle_t)connp,
+	    (struct sockaddr *)&laddr, &laddrlen, CRED());
+	error = tcp_getpeername((sock_lower_handle_t)connp,
+	    (struct sockaddr *)&faddr, &faddrlen, CRED());
 	if (error != 0)
 		faddrlen = 0;
 
 	opts = 0;
-	if (tcp->tcp_oobinline)
+	if (connp->conn_oobinline)
 		opts |= SO_OOBINLINE;
-	if (tcp->tcp_dontroute)
+	if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
 		opts |= SO_DONTROUTE;
 
 	/*
@@ -26868,6 +21090,7 @@ tcp_fallback_noneager(tcp_t *tcp, mblk_t *stropt_mp, queue_t *q,
 	while ((mp = tcp->tcp_rcv_list) != NULL) {
 		tcp->tcp_rcv_list = mp->b_next;
 		mp->b_next = NULL;
+		/* We never do fallback for kernel RPC */
 		putnext(q, mp);
 	}
 	tcp->tcp_rcv_last_head = NULL;
@@ -26908,7 +21131,7 @@ tcp_fallback_eager(tcp_t *eager, boolean_t direct_sockfs)
 	 * Sockfs guarantees that the listener will not be closed
 	 * during fallback. So we can safely use the listener's queue.
 	 */
-	putnext(listener->tcp_rq, mp);
+	putnext(listener->tcp_connp->conn_rq, mp);
 }
 
 int
@@ -26987,7 +21210,7 @@ tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
 
 /* ARGSUSED */
 static void
-tcp_shutdown_output(void *arg, mblk_t *mp, void *arg2)
+tcp_shutdown_output(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 {
 	conn_t 	*connp = (conn_t *)arg;
 	tcp_t	*tcp = connp->conn_tcp;
@@ -27002,7 +21225,7 @@ tcp_shutdown_output(void *arg, mblk_t *mp, void *arg2)
 		 * We were crossing FINs and got a reset from
 		 * the other side. Just ignore it.
 		 */
-		if (tcp->tcp_debug) {
+		if (connp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1,
 			    SL_ERROR|SL_TRACE,
 			    "tcp_shutdown_output() out of state %s",
@@ -27036,7 +21259,7 @@ tcp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
 		bp = allocb_wait(0, BPRI_HI, STR_NOSIG, NULL);
 		CONN_INC_REF(connp);
 		SQUEUE_ENTER_ONE(connp->conn_sqp, bp, tcp_shutdown_output,
-		    connp, SQ_NODRAIN, SQTAG_TCP_SHUTDOWN_OUTPUT);
+		    connp, NULL, SQ_NODRAIN, SQTAG_TCP_SHUTDOWN_OUTPUT);
 
 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
 		    SOCK_OPCTL_SHUT_SEND, 0);
@@ -27109,7 +21332,7 @@ tcp_do_listen(conn_t *connp, struct sockaddr *sa, socklen_t len,
 			 */
 			goto do_listen;
 		}
-		if (tcp->tcp_debug) {
+		if (connp->conn_debug) {
 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
 			    "tcp_listen: bad state, %d", tcp->tcp_state);
 		}
@@ -27121,15 +21344,14 @@ tcp_do_listen(conn_t *connp, struct sockaddr *sa, socklen_t len,
 			sin6_t *sin6;
 
 			ASSERT(IPCL_IS_NONSTR(connp));
-
 			/* Do an implicit bind: Request for a generic port. */
-			if (tcp->tcp_family == AF_INET) {
+			if (connp->conn_family == AF_INET) {
 				len = sizeof (sin_t);
 				sin = (sin_t *)&addr;
 				*sin = sin_null;
 				sin->sin_family = AF_INET;
 			} else {
-				ASSERT(tcp->tcp_family == AF_INET6);
+				ASSERT(connp->conn_family == AF_INET6);
 				len = sizeof (sin6_t);
 				sin6 = (sin6_t *)&addr;
 				*sin6 = sin6_null;
@@ -27171,23 +21393,42 @@ do_listen:
 	}
 
 	/*
-	 * We can call ip_bind directly, the processing continues
-	 * in tcp_post_ip_bind().
-	 *
 	 * We need to make sure that the conn_recv is set to a non-null
 	 * value before we insert the conn into the classifier table.
 	 * This is to avoid a race with an incoming packet which does an
 	 * ipcl_classify().
+	 * We initially set it to tcp_input_listener_unbound to try to
+	 * pick a good squeue for the listener when the first SYN arrives.
+	 * tcp_input_listener_unbound sets it to tcp_input_listener on that
+	 * first SYN.
 	 */
-	connp->conn_recv = tcp_conn_request;
-	if (tcp->tcp_family == AF_INET) {
-		error = ip_proto_bind_laddr_v4(connp, NULL,
-		    IPPROTO_TCP, tcp->tcp_bound_source, tcp->tcp_lport, B_TRUE);
-	} else {
-		error = ip_proto_bind_laddr_v6(connp, NULL, IPPROTO_TCP,
-		    &tcp->tcp_bound_source_v6, tcp->tcp_lport, B_TRUE);
+	connp->conn_recv = tcp_input_listener_unbound;
+
+	/* Insert the listener in the classifier table */
+	error = ip_laddr_fanout_insert(connp);
+	if (error != 0) {
+		/* Undo the bind - release the port number */
+		tcp->tcp_state = TCPS_IDLE;
+		connp->conn_bound_addr_v6 = ipv6_all_zeros;
+
+		connp->conn_laddr_v6 = ipv6_all_zeros;
+		connp->conn_saddr_v6 = ipv6_all_zeros;
+		connp->conn_ports = 0;
+
+		if (connp->conn_anon_port) {
+			zone_t		*zone;
+
+			zone = crgetzone(cr);
+			connp->conn_anon_port = B_FALSE;
+			(void) tsol_mlp_anon(zone, connp->conn_mlp_type,
+			    connp->conn_proto, connp->conn_lport, B_FALSE);
+		}
+		connp->conn_mlp_type = mlptSingle;
+
+		tcp_bind_hash_remove(tcp);
+		return (error);
 	}
-	return (tcp_post_ip_bind(tcp, NULL, error, NULL, 0));
+	return (error);
 }
 
 void
@@ -27222,7 +21463,7 @@ tcp_clr_flowctrl(sock_lower_handle_t proto_handle)
 	if (tcp->tcp_fused) {
 		tcp_fuse_backenable(tcp);
 	} else {
-		tcp->tcp_rwnd = tcp->tcp_recv_hiwater;
+		tcp->tcp_rwnd = connp->conn_rcvbuf;
 		/*
 		 * Send back a window update immediately if TCP is above
 		 * ESTABLISHED state and the increase of the rcv window
@@ -27253,10 +21494,28 @@ tcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
 	/* All Solaris components should pass a cred for this operation. */
 	ASSERT(cr != NULL);
 
+	/*
+	 * If we don't have a helper stream then create one.
+	 * ip_create_helper_stream takes care of locking the conn_t,
+	 * so this check for NULL is just a performance optimization.
+	 */
+	if (connp->conn_helper_info == NULL) {
+		tcp_stack_t *tcps = connp->conn_tcp->tcp_tcps;
+
+		/*
+		 * Create a helper stream for non-STREAMS socket.
+		 */
+		error = ip_create_helper_stream(connp, tcps->tcps_ldi_ident);
+		if (error != 0) {
+			ip0dbg(("tcp_ioctl: create of IP helper stream "
+			    "failed %d\n", error));
+			return (error);
+		}
+	}
+
 	switch (cmd) {
 		case ND_SET:
 		case ND_GET:
-		case TCP_IOC_DEFAULT_Q:
 		case _SIOCSOCKFALLBACK:
 		case TCP_IOC_ABORT_CONN:
 		case TI_GETPEERNAME: