summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/src/cmd/cmd-inet/etc/sock2path41
-rw-r--r--usr/src/cmd/cmd-inet/usr.bin/netstat/unix.c25
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/soconfig.c48
-rw-r--r--usr/src/cmd/mdb/Makefile.common1
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/net.c48
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/vfs.c484
-rw-r--r--usr/src/cmd/mdb/common/modules/sockfs/sockfs.c154
-rw-r--r--usr/src/cmd/mdb/intel/amd64/sockfs/Makefile33
-rw-r--r--usr/src/cmd/mdb/intel/ia32/sockfs/Makefile32
-rw-r--r--usr/src/cmd/mdb/sparc/v9/sockfs/Makefile33
-rw-r--r--usr/src/pkgdefs/SUNWckr/prototype_com1
-rw-r--r--usr/src/pkgdefs/SUNWckr/prototype_i38613
-rw-r--r--usr/src/pkgdefs/SUNWckr/prototype_sparc7
-rw-r--r--usr/src/pkgdefs/SUNWhea/prototype_com2
-rw-r--r--usr/src/pkgdefs/SUNWibsdp/postinstall9
-rw-r--r--usr/src/pkgdefs/SUNWibsdp/preremove9
-rw-r--r--usr/src/pkgdefs/SUNWibsdp/prototype_i3868
-rw-r--r--usr/src/pkgdefs/SUNWibsdp/prototype_sparc7
-rw-r--r--usr/src/pkgdefs/SUNWmdb/prototype_i3862
-rw-r--r--usr/src/pkgdefs/SUNWmdb/prototype_sparc1
-rw-r--r--usr/src/pkgdefs/SUNWmdbr/prototype_i38610
-rw-r--r--usr/src/pkgdefs/SUNWmdbr/prototype_sparc12
-rw-r--r--usr/src/pkgdefs/common_files/i.sock2path29
-rw-r--r--usr/src/uts/Makefile.targ20
-rw-r--r--usr/src/uts/Makefile.uts13
-rw-r--r--usr/src/uts/common/Makefile.files18
-rw-r--r--usr/src/uts/common/Makefile.rules14
-rw-r--r--usr/src/uts/common/c2/audit_event.c274
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_negotiate.c24
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_net.c160
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_server.c31
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_session.c22
-rw-r--r--usr/src/uts/common/fs/sockfs/nl7c.c47
-rw-r--r--usr/src/uts/common/fs/sockfs/nl7c.h19
-rw-r--r--usr/src/uts/common/fs/sockfs/nl7chttp.c35
-rw-r--r--usr/src/uts/common/fs/sockfs/nl7curi.c74
-rw-r--r--usr/src/uts/common/fs/sockfs/sockcommon.c1092
-rw-r--r--usr/src/uts/common/fs/sockfs/sockcommon.h246
-rw-r--r--usr/src/uts/common/fs/sockfs/sockcommon_sops.c1696
-rw-r--r--usr/src/uts/common/fs/sockfs/sockcommon_subr.c1970
-rw-r--r--usr/src/uts/common/fs/sockfs/sockcommon_vnops.c482
-rw-r--r--usr/src/uts/common/fs/sockfs/socknotify.c379
-rw-r--r--usr/src/uts/common/fs/sockfs/sockparams.c723
-rw-r--r--usr/src/uts/common/fs/sockfs/socksctp.c2773
-rw-r--r--usr/src/uts/common/fs/sockfs/socksctpvnops.c875
-rwxr-xr-xusr/src/uts/common/fs/sockfs/socksdp.h85
-rwxr-xr-xusr/src/uts/common/fs/sockfs/socksdpsubr.c214
-rw-r--r--usr/src/uts/common/fs/sockfs/socksdpvnops.c535
-rw-r--r--usr/src/uts/common/fs/sockfs/sockssl.c9
-rw-r--r--usr/src/uts/common/fs/sockfs/sockstr.c744
-rw-r--r--usr/src/uts/common/fs/sockfs/socksubr.c693
-rw-r--r--usr/src/uts/common/fs/sockfs/socksyscalls.c642
-rw-r--r--usr/src/uts/common/fs/sockfs/socktpi.c2735
-rw-r--r--usr/src/uts/common/fs/sockfs/socktpi.h282
-rw-r--r--usr/src/uts/common/fs/sockfs/socktpi_impl.h99
-rw-r--r--usr/src/uts/common/fs/sockfs/sockvnops.c1438
-rw-r--r--usr/src/uts/common/inet/inetddi.c55
-rw-r--r--usr/src/uts/common/inet/ip.h24
-rw-r--r--usr/src/uts/common/inet/ip/icmp.c3232
-rw-r--r--usr/src/uts/common/inet/ip/icmp_opt_data.c16
-rw-r--r--usr/src/uts/common/inet/ip/icmpddi.c6
-rw-r--r--usr/src/uts/common/inet/ip/ip.c487
-rw-r--r--usr/src/uts/common/inet/ip/ip6.c497
-rw-r--r--usr/src/uts/common/inet/ip/ip6_if.c39
-rw-r--r--usr/src/uts/common/inet/ip/ip_helper_stream.c482
-rw-r--r--usr/src/uts/common/inet/ip/ip_if.c50
-rw-r--r--usr/src/uts/common/inet/ip/ip_opt_data.c12
-rw-r--r--usr/src/uts/common/inet/ip/ip_rts.c52
-rw-r--r--usr/src/uts/common/inet/ip/ipclassifier.c79
-rw-r--r--usr/src/uts/common/inet/ip/keysock.c7
-rw-r--r--usr/src/uts/common/inet/ip/rts.c851
-rw-r--r--usr/src/uts/common/inet/ip/rts_opt_data.c17
-rw-r--r--usr/src/uts/common/inet/ip/rtsddi.c12
-rw-r--r--usr/src/uts/common/inet/ip/spdsock.c7
-rw-r--r--usr/src/uts/common/inet/ip6.h11
-rw-r--r--usr/src/uts/common/inet/ip_if.h3
-rw-r--r--usr/src/uts/common/inet/ip_impl.h18
-rw-r--r--usr/src/uts/common/inet/ip_rts.h9
-rw-r--r--usr/src/uts/common/inet/ip_stack.h2
-rw-r--r--usr/src/uts/common/inet/ipclassifier.h38
-rw-r--r--usr/src/uts/common/inet/mi.c94
-rw-r--r--usr/src/uts/common/inet/mi.h17
-rw-r--r--usr/src/uts/common/inet/optcom.c194
-rw-r--r--usr/src/uts/common/inet/optcom.h13
-rw-r--r--usr/src/uts/common/inet/proto_set.c440
-rw-r--r--usr/src/uts/common/inet/proto_set.h58
-rw-r--r--usr/src/uts/common/inet/rawip_impl.h29
-rw-r--r--usr/src/uts/common/inet/rts_impl.h37
-rw-r--r--usr/src/uts/common/inet/sctp/sctp.c23
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_bind.c12
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_common.c11
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_conn.c34
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_cookie.c6
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_impl.h31
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_input.c47
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_notify.c16
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_opt_data.c7
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_output.c25
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_shutdown.c6
-rw-r--r--usr/src/uts/common/inet/sctp_itf.h31
-rw-r--r--usr/src/uts/common/inet/sockmods/sockmod_sctp.c221
-rw-r--r--usr/src/uts/common/inet/sockmods/sockmod_sdp.c154
-rw-r--r--usr/src/uts/common/inet/sockmods/socksctp.c2105
-rw-r--r--usr/src/uts/common/inet/sockmods/socksctp.h (renamed from usr/src/uts/common/fs/sockfs/socksctp.h)44
-rw-r--r--usr/src/uts/common/inet/sockmods/socksctpsubr.c (renamed from usr/src/uts/common/fs/sockfs/socksctpsubr.c)199
-rw-r--r--usr/src/uts/common/inet/sockmods/socksdp.c (renamed from usr/src/uts/common/fs/sockfs/socksdp.c)1024
-rw-r--r--usr/src/uts/common/inet/sockmods/socksdp.h44
-rw-r--r--usr/src/uts/common/inet/sockmods/socksdpsubr.c60
-rw-r--r--usr/src/uts/common/inet/spdsock.h5
-rw-r--r--usr/src/uts/common/inet/squeue.c139
-rw-r--r--usr/src/uts/common/inet/tcp.h17
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c5242
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_fusion.c225
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_opt_data.c16
-rw-r--r--usr/src/uts/common/inet/tcp/tcpddi.c6
-rw-r--r--usr/src/uts/common/inet/tcp_impl.h9
-rw-r--r--usr/src/uts/common/inet/tcp_stack.h3
-rw-r--r--usr/src/uts/common/inet/udp/udp.c4128
-rw-r--r--usr/src/uts/common/inet/udp/udp_opt_data.c14
-rw-r--r--usr/src/uts/common/inet/udp/udpddi.c5
-rw-r--r--usr/src/uts/common/inet/udp_impl.h41
-rw-r--r--usr/src/uts/common/io/comstar/port/iscsit/iscsit_isns.c107
-rw-r--r--usr/src/uts/common/io/comstar/port/iscsit/iscsit_isns.h2
-rw-r--r--usr/src/uts/common/io/comstar/port/iscsit/iscsit_radiuspacket.c81
-rw-r--r--usr/src/uts/common/io/ib/clients/rds/rds_opt.c8
-rw-r--r--usr/src/uts/common/io/ib/clients/rds/rdsddi.c13
-rw-r--r--usr/src/uts/common/io/ib/clients/sdp/sdpddi.c34
-rw-r--r--usr/src/uts/common/io/idm/idm_so.c271
-rw-r--r--usr/src/uts/common/io/ksocket/ksocket.c733
-rw-r--r--usr/src/uts/common/io/ksocket/ksocket_impl.h74
-rw-r--r--usr/src/uts/common/io/ksocket/ksocket_mod.c57
-rw-r--r--usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h4
-rw-r--r--usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_ioctl.c23
-rw-r--r--usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_login.c15
-rw-r--r--usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_net.c417
-rw-r--r--usr/src/uts/common/io/scsi/adapters/iscsi/isns_client.c46
-rw-r--r--usr/src/uts/common/io/sock_conf.c251
-rw-r--r--usr/src/uts/common/io/strplumb.c1
-rw-r--r--usr/src/uts/common/netinet/icmp6.h10
-rw-r--r--usr/src/uts/common/os/fio.c9
-rw-r--r--usr/src/uts/common/os/modconf.c67
-rw-r--r--usr/src/uts/common/os/move.c2
-rw-r--r--usr/src/uts/common/os/streamio.c46
-rw-r--r--usr/src/uts/common/os/strsubr.c16
-rw-r--r--usr/src/uts/common/smbsrv/smb_kproto.h15
-rw-r--r--usr/src/uts/common/smbsrv/smb_ktypes.h6
-rw-r--r--usr/src/uts/common/sys/Makefile2
-rw-r--r--usr/src/uts/common/sys/idm/idm_so.h20
-rw-r--r--usr/src/uts/common/sys/iscsit/radius_packet.h6
-rw-r--r--usr/src/uts/common/sys/ksocket.h127
-rw-r--r--usr/src/uts/common/sys/modctl.h10
-rw-r--r--usr/src/uts/common/sys/socket.h39
-rw-r--r--usr/src/uts/common/sys/socket_proto.h182
-rw-r--r--usr/src/uts/common/sys/socketvar.h732
-rw-r--r--usr/src/uts/common/sys/sockio.h8
-rw-r--r--usr/src/uts/common/sys/sodirect.h35
-rw-r--r--usr/src/uts/common/sys/squeue.h3
-rw-r--r--usr/src/uts/common/sys/squeue_impl.h2
-rw-r--r--usr/src/uts/common/sys/stream.h1
-rw-r--r--usr/src/uts/common/sys/strsubr.h3
-rw-r--r--usr/src/uts/common/syscall/sendfile.c237
-rw-r--r--usr/src/uts/intel/Makefile.intel.shared7
-rw-r--r--usr/src/uts/intel/ia32/ml/modstubs.s58
-rw-r--r--usr/src/uts/intel/icmp/Makefile12
-rw-r--r--usr/src/uts/intel/icmp/icmp.global-objs.debug646
-rw-r--r--usr/src/uts/intel/idm/Makefile2
-rw-r--r--usr/src/uts/intel/ip/ip.global-objs.debug6413
-rw-r--r--usr/src/uts/intel/ip/ip.global-objs.obj6413
-rw-r--r--usr/src/uts/intel/iscsi/Makefile2
-rw-r--r--usr/src/uts/intel/iscsit/Makefile2
-rw-r--r--usr/src/uts/intel/ksocket/Makefile84
-rw-r--r--usr/src/uts/intel/rts/Makefile11
-rw-r--r--usr/src/uts/intel/rts/rts.global-objs.debug645
-rw-r--r--usr/src/uts/intel/smbsrv/Makefile7
-rw-r--r--usr/src/uts/intel/socksctp/Makefile95
-rw-r--r--usr/src/uts/intel/socksdp/Makefile87
-rw-r--r--usr/src/uts/intel/tcp/Makefile16
-rw-r--r--usr/src/uts/intel/udp/Makefile17
-rw-r--r--usr/src/uts/sparc/Makefile.sparc.shared7
-rw-r--r--usr/src/uts/sparc/icmp/Makefile12
-rw-r--r--usr/src/uts/sparc/icmp/icmp.global-objs.debug646
-rw-r--r--usr/src/uts/sparc/idm/Makefile2
-rw-r--r--usr/src/uts/sparc/ip/ip.global-objs.debug6413
-rw-r--r--usr/src/uts/sparc/ip/ip.global-objs.obj6413
-rw-r--r--usr/src/uts/sparc/iscsi/Makefile2
-rw-r--r--usr/src/uts/sparc/iscsit/Makefile2
-rw-r--r--usr/src/uts/sparc/ksocket/Makefile84
-rw-r--r--usr/src/uts/sparc/ml/modstubs.s55
-rw-r--r--usr/src/uts/sparc/rts/Makefile11
-rw-r--r--usr/src/uts/sparc/rts/rts.global-objs.debug645
-rw-r--r--usr/src/uts/sparc/smbsrv/Makefile8
-rw-r--r--usr/src/uts/sparc/socksctp/Makefile96
-rw-r--r--usr/src/uts/sparc/socksdp/Makefile88
-rw-r--r--usr/src/uts/sparc/tcp/Makefile17
-rw-r--r--usr/src/uts/sparc/udp/Makefile17
195 files changed, 28628 insertions, 16272 deletions
diff --git a/usr/src/cmd/cmd-inet/etc/sock2path b/usr/src/cmd/cmd-inet/etc/sock2path
index 425d6c8006..aba55bb652 100644
--- a/usr/src/cmd/cmd-inet/etc/sock2path
+++ b/usr/src/cmd/cmd-inet/etc/sock2path
@@ -1,9 +1,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -18,39 +17,37 @@
#
# CDDL HEADER END
#
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
# socket configuration information
#
-# Family Type Protocol Path
- 2 2 0 /dev/tcp
- 2 2 6 /dev/tcp
+# Family Type Protocol Dev|Module
+ 2 2 0 tcp
+ 2 2 6 tcp
- 26 2 0 /dev/tcp6
- 26 2 6 /dev/tcp6
+ 26 2 0 tcp
+ 26 2 6 tcp
- 2 1 0 /dev/udp
- 2 1 17 /dev/udp
+ 2 1 0 udp
+ 2 1 17 udp
- 26 1 0 /dev/udp6
- 26 1 17 /dev/udp6
+ 26 1 0 udp
+ 26 1 17 udp
1 2 0 /dev/ticotsord
1 6 0 /dev/ticotsord
1 1 0 /dev/ticlts
- 2 4 0 /dev/rawip
- 26 4 0 /dev/rawip6
+ 2 4 0 icmp
+ 26 4 0 icmp
- 2 2 132 /dev/sctp
- 26 2 132 /dev/sctp6
- 2 6 132 /dev/sctp
- 26 6 132 /dev/sctp6
+ 2 2 132 socksctp
+ 26 2 132 socksctp
+ 2 6 132 socksctp
+ 26 6 132 socksctp
- 24 4 0 /dev/rts
+ 24 4 0 rts
27 4 2 /dev/keysock
28 2 0 /dev/nca
diff --git a/usr/src/cmd/cmd-inet/usr.bin/netstat/unix.c b/usr/src/cmd/cmd-inet/usr.bin/netstat/unix.c
index 5e7afa8e3d..175310a9a6 100644
--- a/usr/src/cmd/cmd-inet/usr.bin/netstat/unix.c
+++ b/usr/src/cmd/cmd-inet/usr.bin/netstat/unix.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2001 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -37,8 +36,6 @@
* contributors.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* code for netstat's -k option
*
@@ -130,8 +127,8 @@ print_kn(kstat_t *ksp)
(void) printf("\nActive UNIX domain sockets\n");
(void) printf("%-8.8s %-10.10s %8.8s %8.8s "
- "Local Addr Remote Addr\n",
- "Address", "Type", "Vnode", "Conn");
+ "Local Addr Remote Addr\n",
+ "Address", "Type", "Vnode", "Conn");
/* for each sockinfo structure, display what we need: */
for (i = 0; i < ksp->ks_ndata; i++) {
@@ -164,13 +161,13 @@ print_kn(kstat_t *ksp)
if ((psi->si_state & SS_ISBOUND) &&
strlen(psi->si_laddr_sun_path) != 0 &&
psi->si_laddr_soa_len != 0) {
- if (psi->si_state & SS_FADDR_NOXLATE) {
+ if (psi->si_faddr_noxlate) {
(void) printf(" (socketpair) ");
} else {
if (psi->si_laddr_soa_len >
- sizeof (psi->si_laddr_family))
+ sizeof (psi->si_laddr_family))
(void) printf("%s ",
- psi->si_laddr_sun_path);
+ psi->si_laddr_sun_path);
else
(void) printf(" ");
}
@@ -182,13 +179,13 @@ print_kn(kstat_t *ksp)
strlen(psi->si_faddr_sun_path) != 0 &&
psi->si_faddr_soa_len != 0) {
- if (psi->si_state & SS_FADDR_NOXLATE) {
+ if (psi->si_faddr_noxlate) {
(void) printf(" (socketpair) ");
} else {
if (psi->si_faddr_soa_len >
- sizeof (psi->si_faddr_family))
+ sizeof (psi->si_faddr_family))
(void) printf("%s ",
- psi->si_faddr_sun_path);
+ psi->si_faddr_sun_path);
else
(void) printf(" ");
}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/soconfig.c b/usr/src/cmd/cmd-inet/usr.sbin/soconfig.c
index 5d3838623f..b5c45f7b6f 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/soconfig.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/soconfig.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 1991-1996,2001 by Sun Microsystems, Inc.
- * All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <sys/stat.h>
#include <stdlib.h>
@@ -40,12 +37,12 @@
* Usage:
* sonconfig -f <file>
* Reads input from file. The file is structured as
- * <fam> <type> <protocol> <path>
+ * <fam> <type> <protocol> <path|module>
* <fam> <type> <protocol>
* with the first line registering and the second line
* deregistering.
*
- * soconfig <fam> <type> <protocol> <path>
+ * soconfig <fam> <type> <protocol> <path|module>
* registers
*
* soconfig <fam> <type> <protocol>
@@ -99,9 +96,9 @@ static void
usage(void)
{
fprintf(stderr, gettext(
- "Usage: soconfig -f <file>\n"
- "\tsoconfig <fam> <type> <protocol> <path>\n"
- "\tsoconfig <fam> <type> <protocol>\n"));
+ "Usage: soconfig -f <file>\n"
+ "\tsoconfig <fam> <type> <protocol> <path|module>\n"
+ "\tsoconfig <fam> <type> <protocol>\n"));
}
/*
@@ -131,7 +128,7 @@ parse_file(char *filename)
linecount++;
strcpy(pline, line);
argcount = split_line(pline, argvec,
- sizeof (argvec) / sizeof (argvec[0]));
+ sizeof (argvec) / sizeof (argvec[0]));
#ifdef DEBUG
{
int i;
@@ -147,18 +144,18 @@ parse_file(char *filename)
break;
case 3:
numerror += parse_params(argvec[0], argvec[1],
- argvec[2], NULL, linecount);
+ argvec[2], NULL, linecount);
break;
case 4:
numerror += parse_params(argvec[0], argvec[1],
- argvec[2], argvec[3], linecount);
+ argvec[2], argvec[3], linecount);
break;
default:
numerror++;
fprintf(stderr,
- gettext("Malformed line: <%s>\n"), line);
+ gettext("Malformed line: <%s>\n"), line);
fprintf(stderr,
- gettext("\ton line %d\n"), linecount);
+ gettext("\ton line %d\n"), linecount);
break;
}
}
@@ -223,7 +220,7 @@ parse_params(char *famstr, char *typestr, char *protostr, char *path, int line)
fprintf(stderr, gettext("Bad family number: %s\n"), famstr);
if (line != -1)
fprintf(stderr,
- gettext("\ton line %d\n"), line);
+ gettext("\ton line %d\n"), line);
else {
fprintf(stderr, "\n");
usage();
@@ -234,10 +231,10 @@ parse_params(char *famstr, char *typestr, char *protostr, char *path, int line)
type = parse_int(typestr);
if (type == -1) {
fprintf(stderr,
- gettext("Bad socket type number: %s\n"), typestr);
+ gettext("Bad socket type number: %s\n"), typestr);
if (line != -1)
fprintf(stderr,
- gettext("\ton line %d\n"), line);
+ gettext("\ton line %d\n"), line);
else {
fprintf(stderr, "\n");
usage();
@@ -248,10 +245,10 @@ parse_params(char *famstr, char *typestr, char *protostr, char *path, int line)
protocol = parse_int(protostr);
if (protocol == -1) {
fprintf(stderr,
- gettext("Bad protocol number: %s\n"), protostr);
+ gettext("Bad protocol number: %s\n"), protostr);
if (line != -1)
fprintf(stderr,
- gettext("\ton line %d\n"), line);
+ gettext("\ton line %d\n"), line);
else {
fprintf(stderr, "\n");
usage();
@@ -263,11 +260,12 @@ parse_params(char *famstr, char *typestr, char *protostr, char *path, int line)
if (path != NULL) {
struct stat stats;
- if (stat(path, &stats) == -1) {
+ if (strncmp(path, "/dev", strlen("/dev")) == 0 &&
+ stat(path, &stats) == -1) {
perror(path);
if (line != -1)
fprintf(stderr,
- gettext("\ton line %d\n"), line);
+ gettext("\ton line %d\n"), line);
else {
fprintf(stderr, "\n");
usage();
@@ -278,7 +276,7 @@ parse_params(char *famstr, char *typestr, char *protostr, char *path, int line)
#ifdef DEBUG
printf("not calling sockconfig(%d, %d, %d, %s)\n",
- fam, type, protocol, path == NULL ? "(null)" : path);
+ fam, type, protocol, path == NULL ? "(null)" : path);
#else
if (_sockconfig(fam, type, protocol, path) == -1) {
perror("sockconfig");
diff --git a/usr/src/cmd/mdb/Makefile.common b/usr/src/cmd/mdb/Makefile.common
index ed27426b8d..bb341fdc8f 100644
--- a/usr/src/cmd/mdb/Makefile.common
+++ b/usr/src/cmd/mdb/Makefile.common
@@ -87,6 +87,7 @@ COMMON_MODULES_KVM = \
sdbc \
smbfs \
smbsrv \
+ sockfs \
specfs \
sppp \
stmf \
diff --git a/usr/src/cmd/mdb/common/modules/genunix/net.c b/usr/src/cmd/mdb/common/modules/genunix/net.c
index c8785ed796..987e3b52a0 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/net.c
+++ b/usr/src/cmd/mdb/common/modules/genunix/net.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <mdb/mdb_modapi.h>
#include <mdb/mdb_ks.h>
#include <mdb/mdb_ctf.h>
@@ -50,6 +48,7 @@
#include <inet/arp_impl.h>
#include <inet/rawip_impl.h>
#include <inet/mi.h>
+#include <fs/sockfs/socktpi_impl.h>
#define ADDR_V6_WIDTH 23
#define ADDR_V4_WIDTH 15
@@ -248,7 +247,7 @@ sonode_walk_init(mdb_walk_state_t *wsp)
}
}
- wsp->walk_data = mdb_alloc(sizeof (struct sonode), UM_SLEEP);
+ wsp->walk_data = mdb_alloc(sizeof (struct sotpi_sonode), UM_SLEEP);
return (WALK_NEXT);
}
@@ -256,12 +255,12 @@ int
sonode_walk_step(mdb_walk_state_t *wsp)
{
int status;
- struct sonode *sonodep;
+ struct sotpi_sonode *stp;
if (wsp->walk_addr == NULL)
return (WALK_DONE);
- if (mdb_vread(wsp->walk_data, sizeof (struct sonode),
+ if (mdb_vread(wsp->walk_data, sizeof (struct sotpi_sonode),
wsp->walk_addr) == -1) {
mdb_warn("failed to read sonode at %p", wsp->walk_addr);
return (WALK_ERR);
@@ -270,16 +269,16 @@ sonode_walk_step(mdb_walk_state_t *wsp)
status = wsp->walk_callback(wsp->walk_addr, wsp->walk_data,
wsp->walk_cbdata);
- sonodep = wsp->walk_data;
+ stp = wsp->walk_data;
- wsp->walk_addr = (uintptr_t)sonodep->so_next;
+ wsp->walk_addr = (uintptr_t)stp->st_info.sti_next_so;
return (status);
}
void
sonode_walk_fini(mdb_walk_state_t *wsp)
{
- mdb_free(wsp->walk_data, sizeof (struct sonode));
+ mdb_free(wsp->walk_data, sizeof (struct sotpi_sonode));
}
struct mi_walk_data {
@@ -517,9 +516,9 @@ sonode(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
mdb_printf(" %4hi", so.so_type);
}
- mdb_printf(" %5hi %05x %04x %04hx %0?p\n",
+ mdb_printf(" %5hi %05x %04x %04hx\n",
so.so_protocol, so.so_state, so.so_mode,
- so.so_flag, so.so_accessvp);
+ so.so_flag);
return (DCMD_OK);
}
@@ -740,12 +739,13 @@ netstat_udpv6_cb(uintptr_t kaddr, const void *walk_data, void *cb_data)
* returns 0 on success, -1 otherwise
*/
static int
-netstat_unix_name_pr(const struct sonode *so, const struct soaddr *soa)
+netstat_unix_name_pr(const struct sotpi_sonode *st, const struct soaddr *soa)
{
+ const struct sonode *so = &st->st_sonode;
const char none[] = " (none)";
if ((so->so_state & SS_ISBOUND) && (soa->soa_len != 0)) {
- if (so->so_state & SS_FADDR_NOXLATE) {
+ if (st->st_info.sti_faddr_noxlate) {
mdb_printf("%-14s ", " (socketpair)");
} else {
if (soa->soa_len > sizeof (sa_family_t)) {
@@ -775,9 +775,11 @@ netstat_unix_name_pr(const struct sonode *so, const struct soaddr *soa)
static int
netstat_unix_cb(uintptr_t kaddr, const void *walk_data, void *cb_data)
{
- const struct sonode *so = walk_data;
+ const struct sotpi_sonode *st = walk_data;
+ const struct sonode *so = &st->st_sonode;
+ const struct sotpi_info *sti = &st->st_info;
- if (so->so_accessvp == NULL)
+ if (so->so_count == 0)
return (WALK_NEXT);
if (so->so_family != AF_UNIX) {
@@ -787,7 +789,7 @@ netstat_unix_cb(uintptr_t kaddr, const void *walk_data, void *cb_data)
mdb_printf("%-?p ", kaddr);
- switch (so->so_serv_type) {
+ switch (sti->sti_serv_type) {
case T_CLTS:
mdb_printf("%-10s ", "dgram");
break;
@@ -798,27 +800,27 @@ netstat_unix_cb(uintptr_t kaddr, const void *walk_data, void *cb_data)
mdb_printf("%-10s ", "stream-ord");
break;
default:
- mdb_printf("%-10i ", so->so_serv_type);
+ mdb_printf("%-10i ", sti->sti_serv_type);
}
if ((so->so_state & SS_ISBOUND) &&
- (so->so_ux_laddr.soua_magic == SOU_MAGIC_EXPLICIT)) {
- mdb_printf("%0?p ", so->so_ux_laddr.soua_vp);
+ (sti->sti_ux_laddr.soua_magic == SOU_MAGIC_EXPLICIT)) {
+ mdb_printf("%0?p ", sti->sti_ux_laddr.soua_vp);
} else {
mdb_printf("%0?p ", NULL);
}
if ((so->so_state & SS_ISCONNECTED) &&
- (so->so_ux_faddr.soua_magic == SOU_MAGIC_EXPLICIT)) {
- mdb_printf("%0?p ", so->so_ux_faddr.soua_vp);
+ (sti->sti_ux_faddr.soua_magic == SOU_MAGIC_EXPLICIT)) {
+ mdb_printf("%0?p ", sti->sti_ux_faddr.soua_vp);
} else {
mdb_printf("%0?p ", NULL);
}
- if (netstat_unix_name_pr(so, &so->so_laddr) == -1)
+ if (netstat_unix_name_pr(st, &sti->sti_laddr) == -1)
return (WALK_ERR);
- if (netstat_unix_name_pr(so, &so->so_faddr) == -1)
+ if (netstat_unix_name_pr(st, &sti->sti_faddr) == -1)
return (WALK_ERR);
mdb_printf("%4i\n", so->so_zoneid);
diff --git a/usr/src/cmd/mdb/common/modules/genunix/vfs.c b/usr/src/cmd/mdb/common/modules/genunix/vfs.c
index 5c5fc3361e..b12cdca0c9 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/vfs.c
+++ b/usr/src/cmd/mdb/common/modules/genunix/vfs.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <mdb/mdb_modapi.h>
#include <mdb/mdb_ks.h>
@@ -47,6 +45,11 @@
#include <sys/socketvar.h>
#include <sys/strsubr.h>
#include <sys/un.h>
+#include <fs/sockfs/socktpi_impl.h>
+#include <inet/ipclassifier.h>
+#include <inet/ip_if.h>
+#include <inet/sctp/sctp_impl.h>
+#include <inet/sctp/sctp_addr.h>
int
vfs_walk_init(mdb_walk_state_t *wsp)
@@ -173,7 +176,7 @@ read_fsname(uintptr_t vfsp, char *fsname)
#define FSINFO_MNTLEN 56
#endif
-/*ARGSUSED*/
+/* ARGSUSED */
int
fsinfo(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
@@ -387,14 +390,14 @@ pfiles_print_addr(struct sockaddr *addr)
switch (addr->sa_family) {
case AF_INET:
- /*LINTED: alignment*/
+ /* LINTED: alignment */
s_in = (struct sockaddr_in *)addr;
mdb_nhconvert(&port, &s_in->sin_port, sizeof (port));
mdb_printf("AF_INET %I %d ", s_in->sin_addr.s_addr, port);
break;
case AF_INET6:
- /*LINTED: alignment*/
+ /* LINTED: alignment */
s_in6 = (struct sockaddr_in6 *)addr;
mdb_nhconvert(&port, &s_in6->sin6_port, sizeof (port));
mdb_printf("AF_INET6 %N %d ", &(s_in6->sin6_addr), port);
@@ -410,31 +413,39 @@ pfiles_print_addr(struct sockaddr *addr)
}
}
-
static int
-pfiles_get_sonode(uintptr_t vp, struct sonode *sonode)
+pfiles_get_sonode(vnode_t *v_sock, struct sonode *sonode)
{
- vnode_t v;
- struct stdata stream;
-
- if (mdb_vread(&v, sizeof (v), vp) == -1) {
- mdb_warn("failed to read socket vnode");
+ if (mdb_vread(sonode, sizeof (struct sonode),
+ (uintptr_t)v_sock->v_data) == -1) {
+ mdb_warn("failed to read sonode");
return (-1);
}
- if (mdb_vread(&stream, sizeof (stream), (uintptr_t)v.v_stream) == -1) {
+ return (0);
+}
+
+static int
+pfiles_get_tpi_sonode(vnode_t *v_sock, sotpi_sonode_t *sotpi_sonode)
+{
+
+ struct stdata stream;
+
+ if (mdb_vread(&stream, sizeof (stream),
+ (uintptr_t)v_sock->v_stream) == -1) {
mdb_warn("failed to read stream data");
return (-1);
}
- if (mdb_vread(&v, sizeof (v), (uintptr_t)stream.sd_vnode) == -1) {
+ if (mdb_vread(v_sock, sizeof (vnode_t),
+ (uintptr_t)stream.sd_vnode) == -1) {
mdb_warn("failed to read stream vnode");
return (-1);
}
- if (mdb_vread(sonode, sizeof (struct sonode),
- (uintptr_t)v.v_data) == -1) {
- mdb_warn("failed to read sonode");
+ if (mdb_vread(sotpi_sonode, sizeof (sotpi_sonode_t),
+ (uintptr_t)v_sock->v_data) == -1) {
+ mdb_warn("failed to read sotpi_sonode");
return (-1);
}
@@ -470,16 +481,20 @@ pfiles_dig_pathname(uintptr_t vp, char *path)
/*
* For sockets, we won't find a path unless we print the path
- * associated with the accessvp.
+ * associated with transport's STREAM device.
*/
if (v.v_type == VSOCK) {
struct sonode sonode;
- if (pfiles_get_sonode(vp, &sonode) == -1) {
+ if (pfiles_get_sonode(&v, &sonode) == -1) {
return (-1);
}
-
- vp = (uintptr_t)sonode.so_accessvp;
+ if (!SOCK_IS_NONSTR(&sonode)) {
+ struct sockparams *sp = sonode.so_sockparams;
+ vp = (uintptr_t)sp->sp_sdev_info.sd_vnode;
+ } else {
+ vp = NULL;
+ }
}
}
@@ -531,6 +546,364 @@ struct pfiles_cbdata {
int fd;
};
+#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
+#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
+
+/*
+ * SCTP interface for geting the first source address of a sctp_t.
+ */
+int
+sctp_getsockaddr(sctp_t *sctp, struct sockaddr *addr)
+{
+ int err = -1;
+ int i;
+ int l;
+ sctp_saddr_ipif_t *pobj;
+ sctp_saddr_ipif_t obj;
+ size_t added = 0;
+ sin6_t *sin6;
+ sin_t *sin4;
+ int scanned = 0;
+ boolean_t skip_lback = B_FALSE;
+
+ addr->sa_family = sctp->sctp_family;
+ if (sctp->sctp_nsaddrs == 0)
+ goto done;
+
+ /*
+ * Skip loopback addresses for non-loopback assoc.
+ */
+ if (sctp->sctp_state >= SCTPS_ESTABLISHED && !sctp->sctp_loopback) {
+ skip_lback = B_TRUE;
+ }
+
+ for (i = 0; i < SCTP_IPIF_HASH; i++) {
+ if (sctp->sctp_saddrs[i].ipif_count == 0)
+ continue;
+
+ pobj = list_object(&sctp->sctp_saddrs[i].sctp_ipif_list,
+ sctp->sctp_saddrs[i].sctp_ipif_list.list_head.list_next);
+ if (mdb_vread(&obj, sizeof (sctp_saddr_ipif_t),
+ (uintptr_t)pobj) == -1) {
+ mdb_warn("failed to read sctp_saddr_ipif_t");
+ return (err);
+ }
+
+ for (l = 0; l < sctp->sctp_saddrs[i].ipif_count; l++) {
+ sctp_ipif_t ipif;
+ in6_addr_t laddr;
+ list_node_t *pnode;
+ list_node_t node;
+
+ if (mdb_vread(&ipif, sizeof (sctp_ipif_t),
+ (uintptr_t)obj.saddr_ipifp) == -1) {
+ mdb_warn("failed to read sctp_ipif_t");
+ return (err);
+ }
+ laddr = ipif.sctp_ipif_saddr;
+
+ scanned++;
+ if ((ipif.sctp_ipif_state == SCTP_IPIFS_CONDEMNED) ||
+ SCTP_DONT_SRC(&obj) ||
+ (ipif.sctp_ipif_ill->sctp_ill_flags &
+ PHYI_LOOPBACK) && skip_lback) {
+ if (scanned >= sctp->sctp_nsaddrs)
+ goto done;
+
+ /* LINTED: alignment */
+ pnode = list_d2l(&sctp->sctp_saddrs[i].
+ sctp_ipif_list, pobj);
+ if (mdb_vread(&node, sizeof (list_node_t),
+ (uintptr_t)pnode) == -1) {
+ mdb_warn("failed to read list_node_t");
+ return (err);
+ }
+ pobj = list_object(&sctp->sctp_saddrs[i].
+ sctp_ipif_list, node.list_next);
+ if (mdb_vread(&obj, sizeof (sctp_saddr_ipif_t),
+ (uintptr_t)pobj) == -1) {
+ mdb_warn("failed to read "
+ "sctp_saddr_ipif_t");
+ return (err);
+ }
+ continue;
+ }
+
+ switch (sctp->sctp_family) {
+ case AF_INET:
+ /* LINTED: alignment */
+ sin4 = (sin_t *)addr;
+ if ((sctp->sctp_state <= SCTPS_LISTEN) &&
+ sctp->sctp_bound_to_all) {
+ sin4->sin_addr.s_addr = INADDR_ANY;
+ sin4->sin_port = sctp->sctp_lport;
+ } else {
+ sin4 += added;
+ sin4->sin_family = AF_INET;
+ sin4->sin_port = sctp->sctp_lport;
+ IN6_V4MAPPED_TO_INADDR(&laddr,
+ &sin4->sin_addr);
+ }
+ break;
+
+ case AF_INET6:
+ /* LINTED: alignment */
+ sin6 = (sin6_t *)addr;
+ if ((sctp->sctp_state <= SCTPS_LISTEN) &&
+ sctp->sctp_bound_to_all) {
+ bzero(&sin6->sin6_addr,
+ sizeof (sin6->sin6_addr));
+ sin6->sin6_port = sctp->sctp_lport;
+ } else {
+ sin6 += added;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = sctp->sctp_lport;
+ sin6->sin6_addr = laddr;
+ }
+ sin6->sin6_flowinfo = sctp->sctp_ip6h->ip6_vcf &
+ ~IPV6_VERS_AND_FLOW_MASK;
+ sin6->sin6_scope_id = 0;
+ sin6->__sin6_src_id = 0;
+ break;
+ }
+ added++;
+ if (added >= 1) {
+ err = 0;
+ goto done;
+ }
+ if (scanned >= sctp->sctp_nsaddrs)
+ goto done;
+
+ /* LINTED: alignment */
+ pnode = list_d2l(&sctp->sctp_saddrs[i].sctp_ipif_list,
+ pobj);
+ if (mdb_vread(&node, sizeof (list_node_t),
+ (uintptr_t)pnode) == -1) {
+ mdb_warn("failed to read list_node_t");
+ return (err);
+ }
+ pobj = list_object(&sctp->sctp_saddrs[i].
+ sctp_ipif_list, node.list_next);
+ if (mdb_vread(&obj, sizeof (sctp_saddr_ipif_t),
+ (uintptr_t)pobj) == -1) {
+ mdb_warn("failed to read sctp_saddr_ipif_t");
+ return (err);
+ }
+ }
+ }
+done:
+ return (err);
+}
+
+/*
+ * SCTP interface for geting the primary peer address of a sctp_t.
+ */
+static int
+sctp_getpeeraddr(sctp_t *sctp, struct sockaddr *addr)
+{
+ struct sockaddr_in *sin4;
+ struct sockaddr_in6 *sin6;
+ sctp_faddr_t sctp_primary;
+ in6_addr_t faddr;
+
+ if (sctp->sctp_faddrs == NULL)
+ return (-1);
+
+ addr->sa_family = sctp->sctp_family;
+ if (mdb_vread(&sctp_primary, sizeof (sctp_faddr_t),
+ (uintptr_t)sctp->sctp_primary) == -1) {
+ mdb_warn("failed to read sctp primary faddr");
+ return (-1);
+ }
+ faddr = sctp_primary.faddr;
+
+ switch (sctp->sctp_family) {
+ case AF_INET:
+ /* LINTED: alignment */
+ sin4 = (struct sockaddr_in *)addr;
+ IN6_V4MAPPED_TO_INADDR(&faddr, &sin4->sin_addr);
+ sin4->sin_port = sctp->sctp_fport;
+ sin4->sin_family = AF_INET;
+ break;
+
+ case AF_INET6:
+ /* LINTED: alignment */
+ sin6 = (struct sockaddr_in6 *)addr;
+ sin6->sin6_addr = faddr;
+ sin6->sin6_port = sctp->sctp_fport;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_flowinfo = 0;
+ sin6->sin6_scope_id = 0;
+ sin6->__sin6_src_id = 0;
+ break;
+ }
+
+ return (0);
+}
+
+static int
+tpi_sock_print(sotpi_sonode_t *sotpi_sonode)
+{
+ if (sotpi_sonode->st_info.sti_laddr_valid == 1) {
+ struct sockaddr *laddr =
+ mdb_alloc(sotpi_sonode->st_info.sti_laddr_len, UM_SLEEP);
+ if (mdb_vread(laddr, sotpi_sonode->st_info.sti_laddr_len,
+ (uintptr_t)sotpi_sonode->st_info.sti_laddr_sa) == -1) {
+ mdb_warn("failed to read sotpi_sonode socket addr");
+ return (-1);
+ }
+
+ mdb_printf("socket: ");
+ pfiles_print_addr(laddr);
+ }
+
+ if (sotpi_sonode->st_info.sti_faddr_valid == 1) {
+ struct sockaddr *faddr =
+ mdb_alloc(sotpi_sonode->st_info.sti_faddr_len, UM_SLEEP);
+ if (mdb_vread(faddr, sotpi_sonode->st_info.sti_faddr_len,
+ (uintptr_t)sotpi_sonode->st_info.sti_faddr_sa) == -1) {
+ mdb_warn("failed to read sotpi_sonode remote addr");
+ return (-1);
+ }
+
+ mdb_printf("remote: ");
+ pfiles_print_addr(faddr);
+ }
+
+ return (0);
+}
+
+static int
+tcpip_sock_print(struct sonode *socknode)
+{
+ switch (socknode->so_family) {
+ case AF_INET:
+ {
+ conn_t conn_t;
+ in_port_t port;
+
+ if (mdb_vread(&conn_t, sizeof (conn_t),
+ (uintptr_t)socknode->so_proto_handle) == -1) {
+ mdb_warn("failed to read conn_t V4");
+ return (-1);
+ }
+
+ mdb_printf("socket: ");
+ mdb_nhconvert(&port, &conn_t.conn_lport, sizeof (port));
+ mdb_printf("AF_INET %I %d ", conn_t.conn_src, port);
+
+ /*
+ * If this is a listening socket, we don't print
+ * the remote address.
+ */
+ if (IPCL_IS_TCP(&conn_t) && IPCL_IS_BOUND(&conn_t) == 0 ||
+ IPCL_IS_UDP(&conn_t) && IPCL_IS_CONNECTED(&conn_t)) {
+ mdb_printf("remote: ");
+ mdb_nhconvert(&port, &conn_t.conn_fport, sizeof (port));
+ mdb_printf("AF_INET %I %d ", conn_t.conn_rem, port);
+ }
+
+ break;
+ }
+
+ case AF_INET6:
+ {
+ conn_t conn_t;
+ in_port_t port;
+
+ if (mdb_vread(&conn_t, sizeof (conn_t),
+ (uintptr_t)socknode->so_proto_handle) == -1) {
+ mdb_warn("failed to read conn_t V6");
+ return (-1);
+ }
+
+ mdb_printf("socket: ");
+ mdb_nhconvert(&port, &conn_t.conn_lport, sizeof (port));
+ mdb_printf("AF_INET6 %N %d ", &conn_t.conn_srcv6, port);
+
+ /*
+ * If this is a listening socket, we don't print
+ * the remote address.
+ */
+ if (IPCL_IS_TCP(&conn_t) && IPCL_IS_BOUND(&conn_t) == 0 ||
+ IPCL_IS_UDP(&conn_t) && IPCL_IS_CONNECTED(&conn_t)) {
+ mdb_printf("remote: ");
+ mdb_nhconvert(&port, &conn_t.conn_fport, sizeof (port));
+ mdb_printf("AF_INET6 %N %d ", &conn_t.conn_remv6, port);
+ }
+
+ break;
+ }
+
+ default:
+ mdb_printf("AF_?? (%d)", socknode->so_family);
+ break;
+ }
+
+ return (0);
+}
+
+static int
+sctp_sock_print(struct sonode *socknode)
+{
+ sctp_t sctp_t;
+
+ struct sockaddr *laddr = mdb_alloc(sizeof (struct sockaddr), UM_SLEEP);
+ struct sockaddr *faddr = mdb_alloc(sizeof (struct sockaddr), UM_SLEEP);
+
+ if (mdb_vread(&sctp_t, sizeof (sctp_t),
+ (uintptr_t)socknode->so_proto_handle) == -1) {
+ mdb_warn("failed to read sctp_t");
+ return (-1);
+ }
+
+ if (sctp_getsockaddr(&sctp_t, laddr) == 0) {
+ mdb_printf("socket:");
+ pfiles_print_addr(laddr);
+ }
+ if (sctp_getpeeraddr(&sctp_t, faddr) == 0) {
+ mdb_printf("remote:");
+ pfiles_print_addr(faddr);
+ }
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+sdp_sock_print(struct sonode *socknode)
+{
+ return (0);
+}
+
+struct sock_print {
+ int family;
+ int type;
+ int pro;
+ int (*print)(struct sonode *socknode);
+} sock_prints[] = {
+ { 2, 2, 0, tcpip_sock_print }, /* /dev/tcp */
+ { 2, 2, 6, tcpip_sock_print }, /* /dev/tcp */
+ { 26, 2, 0, tcpip_sock_print }, /* /dev/tcp6 */
+ { 26, 2, 6, tcpip_sock_print }, /* /dev/tcp6 */
+ { 2, 1, 0, tcpip_sock_print }, /* /dev/udp */
+ { 2, 1, 17, tcpip_sock_print }, /* /dev/udp */
+ { 26, 1, 0, tcpip_sock_print }, /* /dev/udp6 */
+ { 26, 1, 17, tcpip_sock_print }, /* /dev/udp6 */
+ { 2, 4, 0, tcpip_sock_print }, /* /dev/rawip */
+ { 26, 4, 0, tcpip_sock_print }, /* /dev/rawip6 */
+ { 2, 2, 132, sctp_sock_print }, /* /dev/sctp */
+ { 26, 2, 132, sctp_sock_print }, /* /dev/sctp6 */
+ { 2, 6, 132, sctp_sock_print }, /* /dev/sctp */
+ { 26, 6, 132, sctp_sock_print }, /* /dev/sctp6 */
+ { 24, 4, 0, tcpip_sock_print }, /* /dev/rts */
+ { 2, 2, 257, sdp_sock_print }, /* /dev/sdp */
+ { 26, 2, 257, sdp_sock_print }, /* /dev/sdp */
+};
+
+#define NUM_SOCK_PRINTS \
+ (sizeof (sock_prints) / sizeof (struct sock_print))
+
static int
pfile_callback(uintptr_t addr, const struct file *f, struct pfiles_cbdata *cb)
{
@@ -624,40 +997,62 @@ pfile_callback(uintptr_t addr, const struct file *f, struct pfiles_cbdata *cb)
case VSOCK:
{
- struct sonode sonode;
+ vnode_t v_sock;
+ struct sonode so;
- if (pfiles_get_sonode(realvpp, &sonode) == -1)
+ if (mdb_vread(&v_sock, sizeof (v_sock), realvpp) == -1) {
+ mdb_warn("failed to read socket vnode");
return (DCMD_ERR);
+ }
/*
- * If the address is cached in the sonode, use it; otherwise,
- * we print nothing.
+ * Sockets can be non-stream or stream, they have to be dealed
+ * with differently.
*/
- if (sonode.so_state & SS_LADDR_VALID) {
- struct sockaddr *laddr =
- mdb_alloc(sonode.so_laddr_len, UM_SLEEP);
- if (mdb_vread(laddr, sonode.so_laddr_len,
- (uintptr_t)sonode.so_laddr_sa) == -1) {
- mdb_warn("failed to read sonode socket addr");
+ if (v_sock.v_stream == NULL) {
+ if (pfiles_get_sonode(&v_sock, &so) == -1)
return (DCMD_ERR);
- }
- mdb_printf("socket: ");
- pfiles_print_addr(laddr);
- }
+ /* Pick the proper methods. */
+ for (i = 0; i <= NUM_SOCK_PRINTS; i++) {
+ if ((sock_prints[i].family == so.so_family &&
+ sock_prints[i].type == so.so_type &&
+ sock_prints[i].pro == so.so_protocol) ||
+ (sock_prints[i].family == so.so_family &&
+ sock_prints[i].type == so.so_type &&
+ so.so_type == SOCK_RAW)) {
+ if ((*sock_prints[i].print)(&so) == -1)
+ return (DCMD_ERR);
+ }
+ }
+ } else {
+ sotpi_sonode_t sotpi_sonode;
- if (sonode.so_state & SS_FADDR_VALID) {
- struct sockaddr *faddr =
- mdb_alloc(sonode.so_faddr_len, UM_SLEEP);
- if (mdb_vread(faddr, sonode.so_faddr_len,
- (uintptr_t)sonode.so_faddr_sa) == -1) {
- mdb_warn("failed to read sonode remote addr");
+ if (pfiles_get_sonode(&v_sock, &so) == -1)
return (DCMD_ERR);
+
+ /*
+ * If the socket is a fallback socket, read its related
+ * information separately; otherwise, read it as a whole
+ * tpi socket.
+ */
+ if (so.so_state & SS_FALLBACK_COMP) {
+ sotpi_sonode.st_sonode = so;
+
+ if (mdb_vread(&(sotpi_sonode.st_info),
+ sizeof (sotpi_info_t),
+ (uintptr_t)so.so_priv) == -1)
+ return (DCMD_ERR);
+ } else {
+ if (pfiles_get_tpi_sonode(&v_sock,
+ &sotpi_sonode) == -1)
+ return (DCMD_ERR);
}
- mdb_printf("remote: ");
- pfiles_print_addr(faddr);
+ if (tpi_sock_print(&sotpi_sonode) == -1)
+ return (DCMD_ERR);
}
+
break;
}
@@ -691,7 +1086,6 @@ pfile_callback(uintptr_t addr, const struct file *f, struct pfiles_cbdata *cb)
break;
}
-
mdb_printf("\n");
return (WALK_NEXT);
diff --git a/usr/src/cmd/mdb/common/modules/sockfs/sockfs.c b/usr/src/cmd/mdb/common/modules/sockfs/sockfs.c
new file mode 100644
index 0000000000..33b8d20f8a
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/sockfs/sockfs.c
@@ -0,0 +1,154 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/stropts.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <mdb/mdb_modapi.h>
+#include <mdb/mdb_ks.h>
+
+/*
+ * Look up the symbol name for the given sockparams list and walk
+ * all the entries.
+ */
+static boolean_t
+sockparams_walk_list(const char *symname, int argc, const mdb_arg_t *argv)
+{
+ GElf_Sym sym;
+
+ if (mdb_lookup_by_name(symname, &sym)) {
+ mdb_warn("can't find symbol %s", symname);
+ return (B_FALSE);
+ }
+
+ if (mdb_pwalk_dcmd("list", "sockfs`sockparams", argc, argv,
+ sym.st_value) != 0) {
+ mdb_warn("can't walk %s", symname);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * dcmd to print sockparams info.
+ *
+ * If no address is given then the default is to print all sockparams on the
+ * global list (i.e., installed with soconfig(1)). To also print the ephemeral
+ * entries the '-e' flag should be used. Only ephemeral entries can be printed
+ * by specifying the '-E' flag.
+ */
+static int
+sockparams_prt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ struct sockparams sp;
+
+ if ((flags & DCMD_ADDRSPEC) == 0) {
+ uint_t opt_e = 0;
+ uint_t opt_E = 0;
+
+ /*
+ * Determine what lists should be printed
+ */
+ if (mdb_getopts(argc, argv,
+ 'e', MDB_OPT_SETBITS, 1, &opt_e,
+ 'E', MDB_OPT_SETBITS, 1, &opt_E) != argc)
+ return (DCMD_USAGE);
+
+ if (!opt_E) {
+ if (!sockparams_walk_list("sphead", argc, argv))
+ return (DCMD_ERR);
+ }
+
+ if (opt_e || opt_E) {
+ if (!sockparams_walk_list("sp_ephem_list", argc, argv))
+ return (DCMD_ERR);
+ }
+
+ return (DCMD_OK);
+ }
+
+ /*
+ * If we are piping the output, then just print out the address,
+ * otherwise summarize the sockparams info.
+ */
+ if ((flags & DCMD_PIPE_OUT) != 0) {
+ mdb_printf("%#lr\n", addr);
+ return (DCMD_OK);
+ }
+
+ if (DCMD_HDRSPEC(flags)) {
+ mdb_printf("%-?s %3s %3s %3s %15s %15s %6s %6s\n",
+ "ADDR", "FAM", "TYP", "PRO", "STRDEV", "SOCKMOD", "REFS",
+ "FLGS");
+ }
+
+ if (mdb_vread(&sp, sizeof (sp), addr) == -1) {
+ mdb_warn("failed to read sockparams at %0?p", addr);
+ return (DCMD_ERR);
+ }
+
+ mdb_printf("%0?p %3u %3u %3u %15s %15s %6u %#6x\n",
+ addr,
+ sp.sp_family, sp.sp_type, sp.sp_protocol,
+ (sp.sp_sdev_info.sd_devpath != 0) ?
+ sp.sp_sdev_info.sd_devpath : "-",
+ sp.sp_smod_name, sp.sp_refcnt,
+ sp.sp_flags);
+
+
+ return (DCMD_OK);
+}
+
+/*
+ * Help function
+ */
+void
+sockparams_help(void)
+{
+ mdb_printf("Print sockparams information for a give sockparams ptr.\n"
+ "Without the address, list available sockparams. Default "
+ "behavior is to list only entries that were installed by the "
+ "admin (via soconfig(1M)).\n\n"
+ "Options:\n"
+ " -e:\t\tlist ephemeral sockparams\n"
+ " -E:\t\tonly list ephemeral sockparams\n");
+}
+
+static const mdb_dcmd_t dcmds[] = {
+ { "sockparams", "[-eE]", "print sockparams", sockparams_prt,
+ sockparams_help },
+ { NULL }
+};
+
+static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, NULL };
+
+const mdb_modinfo_t *
+_mdb_init(void)
+{
+ return (&modinfo);
+}
diff --git a/usr/src/cmd/mdb/intel/amd64/sockfs/Makefile b/usr/src/cmd/mdb/intel/amd64/sockfs/Makefile
new file mode 100644
index 0000000000..9808e469f6
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/amd64/sockfs/Makefile
@@ -0,0 +1,33 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+MODULE = sockfs.so
+MDBTGT = kvm
+
+MODSRCS = sockfs.c
+
+include ../../../../Makefile.cmd
+include ../../../../Makefile.cmd.64
+include ../../Makefile.amd64
+include ../../../Makefile.module
diff --git a/usr/src/cmd/mdb/intel/ia32/sockfs/Makefile b/usr/src/cmd/mdb/intel/ia32/sockfs/Makefile
new file mode 100644
index 0000000000..9b14d2fd04
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/ia32/sockfs/Makefile
@@ -0,0 +1,32 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+MODULE = sockfs.so
+MDBTGT = kvm
+
+MODSRCS = sockfs.c
+
+include ../../../../Makefile.cmd
+include ../../Makefile.ia32
+include ../../../Makefile.module
diff --git a/usr/src/cmd/mdb/sparc/v9/sockfs/Makefile b/usr/src/cmd/mdb/sparc/v9/sockfs/Makefile
new file mode 100644
index 0000000000..9e65a6282b
--- /dev/null
+++ b/usr/src/cmd/mdb/sparc/v9/sockfs/Makefile
@@ -0,0 +1,33 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+MODULE = sockfs.so
+MDBTGT = kvm
+
+MODSRCS = sockfs.c
+
+include ../../../../Makefile.cmd
+include ../../../../Makefile.cmd.64
+include ../../Makefile.sparcv9
+include ../../../Makefile.module
diff --git a/usr/src/pkgdefs/SUNWckr/prototype_com b/usr/src/pkgdefs/SUNWckr/prototype_com
index 1988298dfe..ead3a7e5e8 100644
--- a/usr/src/pkgdefs/SUNWckr/prototype_com
+++ b/usr/src/pkgdefs/SUNWckr/prototype_com
@@ -134,6 +134,7 @@ d none kernel/misc 755 root sys
d none kernel/sched 755 root sys
d none kernel/strmod 755 root sys
d none kernel/sys 755 root sys
+d none kernel/socketmod 755 root sys
d none lib 755 root bin
d none lib/svc 0755 root bin
d none lib/svc/method 0755 root bin
diff --git a/usr/src/pkgdefs/SUNWckr/prototype_i386 b/usr/src/pkgdefs/SUNWckr/prototype_i386
index 57be328034..adc41583bb 100644
--- a/usr/src/pkgdefs/SUNWckr/prototype_i386
+++ b/usr/src/pkgdefs/SUNWckr/prototype_i386
@@ -212,6 +212,7 @@ f none kernel/misc/ipc 755 root sys
f none kernel/misc/kbtrans 755 root sys
f none kernel/misc/kcf 755 root sys
f none kernel/misc/kmdbmod 755 root sys
+f none kernel/misc/ksocket 755 root sys
f none kernel/misc/mac 755 root sys
l none kernel/misc/md5=../../kernel/crypto/md5
f none kernel/misc/net80211 755 root sys
@@ -427,6 +428,7 @@ f none kernel/misc/amd64/ipc 755 root sys
f none kernel/misc/amd64/kbtrans 755 root sys
f none kernel/misc/amd64/kcf 755 root sys
f none kernel/misc/amd64/kmdbmod 755 root sys
+f none kernel/misc/amd64/ksocket 755 root sys
f none kernel/misc/amd64/mac 755 root sys
l none kernel/misc/amd64/md5=../../../kernel/crypto/amd64/md5
f none kernel/misc/amd64/net80211 755 root sys
@@ -497,3 +499,14 @@ f none kernel/kiconv/amd64/kiconv_ja 755 root sys
f none kernel/kiconv/amd64/kiconv_ko 755 root sys
f none kernel/kiconv/amd64/kiconv_sc 755 root sys
f none kernel/kiconv/amd64/kiconv_tc 755 root sys
+l none kernel/socketmod/icmp=../../kernel/drv/icmp
+l none kernel/socketmod/rts=../../kernel/drv/rts
+l none kernel/socketmod/tcp=../../kernel/drv/tcp
+l none kernel/socketmod/udp=../../kernel/drv/udp
+f none kernel/socketmod/socksctp 755 root sys
+d none kernel/socketmod/amd64 755 root sys
+l none kernel/socketmod/amd64/icmp=../../../kernel/drv/amd64/icmp
+l none kernel/socketmod/amd64/rts=../../../kernel/drv/amd64/rts
+l none kernel/socketmod/amd64/tcp=../../../kernel/drv/amd64/tcp
+l none kernel/socketmod/amd64/udp=../../../kernel/drv/amd64/udp
+f none kernel/socketmod/amd64/socksctp 755 root sys
diff --git a/usr/src/pkgdefs/SUNWckr/prototype_sparc b/usr/src/pkgdefs/SUNWckr/prototype_sparc
index daccee4e10..e81a86168e 100644
--- a/usr/src/pkgdefs/SUNWckr/prototype_sparc
+++ b/usr/src/pkgdefs/SUNWckr/prototype_sparc
@@ -199,6 +199,7 @@ f none kernel/misc/sparcv9/idmap 755 root sys
f none kernel/misc/sparcv9/ipc 755 root sys
f none kernel/misc/sparcv9/kbtrans 755 root sys
f none kernel/misc/sparcv9/kcf 755 root sys
+f none kernel/misc/sparcv9/ksocket 755 root sys
f none kernel/misc/sparcv9/mac 755 root sys
l none kernel/misc/sparcv9/md5=../../../kernel/crypto/sparcv9/md5
f none kernel/misc/sparcv9/neti 755 root sys
@@ -267,3 +268,9 @@ f none kernel/kiconv/sparcv9/kiconv_ja 755 root sys
f none kernel/kiconv/sparcv9/kiconv_ko 755 root sys
f none kernel/kiconv/sparcv9/kiconv_sc 755 root sys
f none kernel/kiconv/sparcv9/kiconv_tc 755 root sys
+d none kernel/socketmod/sparcv9 755 root sys
+l none kernel/socketmod/sparcv9/icmp=../../../kernel/drv/sparcv9/icmp
+l none kernel/socketmod/sparcv9/rts=../../../kernel/drv/sparcv9/rts
+l none kernel/socketmod/sparcv9/tcp=../../../kernel/drv/sparcv9/tcp
+l none kernel/socketmod/sparcv9/udp=../../../kernel/drv/sparcv9/udp
+f none kernel/socketmod/sparcv9/socksctp 755 root sys
diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com
index c5d0e03053..df95ddfabe 100644
--- a/usr/src/pkgdefs/SUNWhea/prototype_com
+++ b/usr/src/pkgdefs/SUNWhea/prototype_com
@@ -971,6 +971,7 @@ f none usr/include/sys/kmem.h 644 root bin
f none usr/include/sys/kmem_impl.h 644 root bin
f none usr/include/sys/kobj.h 644 root bin
f none usr/include/sys/kobj_impl.h 644 root bin
+f none usr/include/sys/ksocket.h 644 root bin
f none usr/include/sys/kstat.h 644 root bin
f none usr/include/sys/kstr.h 644 root bin
f none usr/include/sys/ksyms.h 644 root bin
@@ -1225,6 +1226,7 @@ f none usr/include/sys/socket.h 644 root bin
f none usr/include/sys/socket_impl.h 644 root bin
f none usr/include/sys/socketvar.h 644 root bin
f none usr/include/sys/sockio.h 644 root bin
+f none usr/include/sys/socket_proto.h 644 root bin
f none usr/include/sys/sodirect.h 644 root bin
f none usr/include/sys/sservice.h 644 root bin
f none usr/include/sys/squeue.h 644 root bin
diff --git a/usr/src/pkgdefs/SUNWibsdp/postinstall b/usr/src/pkgdefs/SUNWibsdp/postinstall
index e320b55507..01b5720227 100644
--- a/usr/src/pkgdefs/SUNWibsdp/postinstall
+++ b/usr/src/pkgdefs/SUNWibsdp/postinstall
@@ -19,18 +19,15 @@
#
# CDDL HEADER END
#
-#
-# ident "%Z%%M% %I% %E% SMI"
-#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
PATH="/usr/bin:/usr/sbin:${PATH}"
export PATH
-SDP4_SOCK_ENTRY=" 2 2 257 /dev/sdp"
-SDP6_SOCK_ENTRY=" 26 2 257 /dev/sdp"
+SDP4_SOCK_ENTRY=" 2 2 257 socksdp"
+SDP6_SOCK_ENTRY=" 26 2 257 socksdp"
if [ "${BASEDIR:=/}" != "/" ]
then
diff --git a/usr/src/pkgdefs/SUNWibsdp/preremove b/usr/src/pkgdefs/SUNWibsdp/preremove
index d0f143d2cf..bf6b2d72ad 100644
--- a/usr/src/pkgdefs/SUNWibsdp/preremove
+++ b/usr/src/pkgdefs/SUNWibsdp/preremove
@@ -19,18 +19,15 @@
#
# CDDL HEADER END
#
-#
-# ident "%Z%%M% %I% %E% SMI"
-#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
PATH="/usr/bin:/usr/sbin:${PATH}"
export PATH
-SDP4_SOCK_ENTRY=" 2 2 257 /dev/sdp"
-SDP6_SOCK_ENTRY=" 26 2 257 /dev/sdp"
+SDP4_SOCK_ENTRY=" 2 2 257 socksdp"
+SDP6_SOCK_ENTRY=" 26 2 257 socksdp"
EXIT=0
diff --git a/usr/src/pkgdefs/SUNWibsdp/prototype_i386 b/usr/src/pkgdefs/SUNWibsdp/prototype_i386
index 2c01d15098..f1a1db9a48 100644
--- a/usr/src/pkgdefs/SUNWibsdp/prototype_i386
+++ b/usr/src/pkgdefs/SUNWibsdp/prototype_i386
@@ -19,11 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
# This required package information file contains a list of package contents.
# The 'pkgmk' command uses this file to identify the contents of a package
# and their location on the development machine when building the package.
@@ -47,3 +45,7 @@
f none kernel/drv/sdp 0755 root sys
d none kernel/drv/amd64 0755 root sys
f none kernel/drv/amd64/sdp 0755 root sys
+d none kernel/socketmod 755 root sys
+f none kernel/socketmod/socksdp 755 root sys
+d none kernel/socketmod/amd64 755 root sys
+f none kernel/socketmod/amd64/socksdp 755 root sys
diff --git a/usr/src/pkgdefs/SUNWibsdp/prototype_sparc b/usr/src/pkgdefs/SUNWibsdp/prototype_sparc
index 891011aba8..37fa95f27d 100644
--- a/usr/src/pkgdefs/SUNWibsdp/prototype_sparc
+++ b/usr/src/pkgdefs/SUNWibsdp/prototype_sparc
@@ -19,11 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
# This required package information file contains a list of package contents.
# The 'pkgmk' command uses this file to identify the contents of a package
# and their location on the development machine when building the package.
@@ -49,3 +47,6 @@
#
d none kernel/drv/sparcv9 0755 root sys
f none kernel/drv/sparcv9/sdp 0755 root sys
+d none kernel/socketmod 755 root sys
+d none kernel/socketmod/sparcv9 755 root sys
+f none kernel/socketmod/sparcv9/socksdp 755 root sys
diff --git a/usr/src/pkgdefs/SUNWmdb/prototype_i386 b/usr/src/pkgdefs/SUNWmdb/prototype_i386
index 05c255e659..fb1a898f13 100644
--- a/usr/src/pkgdefs/SUNWmdb/prototype_i386
+++ b/usr/src/pkgdefs/SUNWmdb/prototype_i386
@@ -89,6 +89,7 @@ f none usr/lib/mdb/kvm/amd64/sppp.so 555 root sys
f none usr/lib/mdb/kvm/amd64/ufs.so 555 root sys
f none usr/lib/mdb/kvm/amd64/uhci.so 555 root sys
f none usr/lib/mdb/kvm/amd64/usba.so 555 root sys
+f none usr/lib/mdb/kvm/amd64/sockfs.so 555 root sys
f none usr/lib/mdb/kvm/arp.so 555 root sys
f none usr/lib/mdb/kvm/audiosup.so 555 root sys
f none usr/lib/mdb/kvm/cpc.so 555 root sys
@@ -117,6 +118,7 @@ f none usr/lib/mdb/kvm/s1394.so 555 root sys
f none usr/lib/mdb/kvm/scsi_vhci.so 555 root sys
f none usr/lib/mdb/kvm/sctp.so 555 root sys
f none usr/lib/mdb/kvm/sd.so 555 root sys
+f none usr/lib/mdb/kvm/sockfs.so 555 root sys
f none usr/lib/mdb/kvm/specfs.so 555 root sys
f none usr/lib/mdb/kvm/sppp.so 555 root sys
f none usr/lib/mdb/kvm/ufs.so 555 root sys
diff --git a/usr/src/pkgdefs/SUNWmdb/prototype_sparc b/usr/src/pkgdefs/SUNWmdb/prototype_sparc
index 51f5c49182..eae343b703 100644
--- a/usr/src/pkgdefs/SUNWmdb/prototype_sparc
+++ b/usr/src/pkgdefs/SUNWmdb/prototype_sparc
@@ -64,6 +64,7 @@ f none usr/lib/mdb/kvm/sparcv9/ptm.so 555 root sys
s none usr/lib/mdb/kvm/sparcv9/px.so=intr.so
f none usr/lib/mdb/kvm/sparcv9/random.so 555 root sys
f none usr/lib/mdb/kvm/sparcv9/sctp.so 555 root sys
+f none usr/lib/mdb/kvm/sparcv9/sockfs.so 555 root sys
f none usr/lib/mdb/kvm/sparcv9/s1394.so 555 root sys
f none usr/lib/mdb/kvm/sparcv9/scsi_vhci.so 555 root sys
f none usr/lib/mdb/kvm/sparcv9/specfs.so 555 root sys
diff --git a/usr/src/pkgdefs/SUNWmdbr/prototype_i386 b/usr/src/pkgdefs/SUNWmdbr/prototype_i386
index 237c1da83b..662f4cb1e3 100644
--- a/usr/src/pkgdefs/SUNWmdbr/prototype_i386
+++ b/usr/src/pkgdefs/SUNWmdbr/prototype_i386
@@ -33,9 +33,8 @@ f none kernel/kmdb/amd64/cpu.generic 555 root sys
f none kernel/kmdb/amd64/cpu_ms.AuthenticAMD.15 555 root sys
f none kernel/kmdb/amd64/crypto 555 root sys
f none kernel/kmdb/amd64/genunix 555 root sys
-f none kernel/kmdb/amd64/ip 555 root sys
f none kernel/kmdb/amd64/hook 555 root sys
-f none kernel/kmdb/amd64/neti 555 root sys
+f none kernel/kmdb/amd64/ip 555 root sys
f none kernel/kmdb/amd64/ipc 555 root sys
f none kernel/kmdb/amd64/ipp 555 root sys
f none kernel/kmdb/amd64/krtld 555 root sys
@@ -46,6 +45,7 @@ f none kernel/kmdb/amd64/md 555 root sys
f none kernel/kmdb/amd64/mdb_ds 555 root sys
f none kernel/kmdb/amd64/mpt 555 root sys
f none kernel/kmdb/amd64/nca 555 root sys
+f none kernel/kmdb/amd64/neti 555 root sys
f none kernel/kmdb/amd64/nfs 555 root sys
f none kernel/kmdb/amd64/ptm 555 root sys
f none kernel/kmdb/amd64/random 555 root sys
@@ -53,6 +53,7 @@ f none kernel/kmdb/amd64/s1394 555 root sys
f none kernel/kmdb/amd64/scsi_vhci 555 root sys
f none kernel/kmdb/amd64/sctp 555 root sys
f none kernel/kmdb/amd64/sd 555 root sys
+f none kernel/kmdb/amd64/sockfs 555 root sys
f none kernel/kmdb/amd64/specfs 555 root sys
f none kernel/kmdb/amd64/sppp 555 root sys
f none kernel/kmdb/amd64/ufs 555 root sys
@@ -65,9 +66,8 @@ f none kernel/kmdb/cpu.generic 555 root sys
f none kernel/kmdb/cpu_ms.AuthenticAMD.15 555 root sys
f none kernel/kmdb/crypto 555 root sys
f none kernel/kmdb/genunix 555 root sys
-f none kernel/kmdb/ip 555 root sys
f none kernel/kmdb/hook 555 root sys
-f none kernel/kmdb/neti 555 root sys
+f none kernel/kmdb/ip 555 root sys
f none kernel/kmdb/ipc 555 root sys
f none kernel/kmdb/ipp 555 root sys
f none kernel/kmdb/krtld 555 root sys
@@ -78,6 +78,7 @@ f none kernel/kmdb/md 555 root sys
f none kernel/kmdb/mdb_ds 555 root sys
f none kernel/kmdb/mpt 555 root sys
f none kernel/kmdb/nca 555 root sys
+f none kernel/kmdb/neti 555 root sys
f none kernel/kmdb/nfs 555 root sys
f none kernel/kmdb/ptm 555 root sys
f none kernel/kmdb/random 555 root sys
@@ -85,6 +86,7 @@ f none kernel/kmdb/s1394 555 root sys
f none kernel/kmdb/scsi_vhci 555 root sys
f none kernel/kmdb/sctp 555 root sys
f none kernel/kmdb/sd 555 root sys
+f none kernel/kmdb/sockfs 555 root sys
f none kernel/kmdb/specfs 555 root sys
f none kernel/kmdb/sppp 555 root sys
f none kernel/kmdb/ufs 555 root sys
diff --git a/usr/src/pkgdefs/SUNWmdbr/prototype_sparc b/usr/src/pkgdefs/SUNWmdbr/prototype_sparc
index b4057c2328..0e3e805552 100644
--- a/usr/src/pkgdefs/SUNWmdbr/prototype_sparc
+++ b/usr/src/pkgdefs/SUNWmdbr/prototype_sparc
@@ -22,7 +22,6 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#
!include prototype_com
#
@@ -32,10 +31,9 @@ f none kernel/kmdb/sparcv9/audiosup 555 root sys
f none kernel/kmdb/sparcv9/cpc 555 root sys
f none kernel/kmdb/sparcv9/crypto 555 root sys
f none kernel/kmdb/sparcv9/genunix 555 root sys
+f none kernel/kmdb/sparcv9/hook 555 root sys
f none kernel/kmdb/sparcv9/intr 555 root sys
f none kernel/kmdb/sparcv9/ip 555 root sys
-f none kernel/kmdb/sparcv9/hook 555 root sys
-f none kernel/kmdb/sparcv9/neti 555 root sys
f none kernel/kmdb/sparcv9/ipc 555 root sys
f none kernel/kmdb/sparcv9/ipp 555 root sys
f none kernel/kmdb/sparcv9/isp 555 root sys
@@ -47,16 +45,18 @@ f none kernel/kmdb/sparcv9/md 555 root sys
f none kernel/kmdb/sparcv9/mdb_ds 555 root sys
f none kernel/kmdb/sparcv9/mpt 555 root sys
f none kernel/kmdb/sparcv9/nca 555 root sys
+f none kernel/kmdb/sparcv9/neti 555 root sys
f none kernel/kmdb/sparcv9/nfs 555 root sys
-s none kernel/kmdb/sparcv9/pcisch=intr
s none kernel/kmdb/sparcv9/pcipsy=intr
+s none kernel/kmdb/sparcv9/pcisch=intr
f none kernel/kmdb/sparcv9/ptm 555 root sys
s none kernel/kmdb/sparcv9/px=intr
f none kernel/kmdb/sparcv9/random 555 root sys
-f none kernel/kmdb/sparcv9/sctp 555 root sys
f none kernel/kmdb/sparcv9/s1394 555 root sys
f none kernel/kmdb/sparcv9/scsi_vhci 555 root sys
+f none kernel/kmdb/sparcv9/sctp 555 root sys
f none kernel/kmdb/sparcv9/sd 555 root sys
+f none kernel/kmdb/sparcv9/sockfs 555 root sys
f none kernel/kmdb/sparcv9/specfs 555 root sys
f none kernel/kmdb/sparcv9/sppp 555 root sys
f none kernel/kmdb/sparcv9/ssd 555 root sys
@@ -68,10 +68,10 @@ d none platform/sun4u 755 root sys
d none platform/sun4u/kernel 755 root sys
d none platform/sun4u/kernel/kmdb 755 root sys
d none platform/sun4u/kernel/kmdb/sparcv9 755 root sys
+f none platform/sun4u/kernel/kmdb/sparcv9/oplhwd 555 root sys
f none platform/sun4u/kernel/kmdb/sparcv9/sgenv 555 root sys
f none platform/sun4u/kernel/kmdb/sparcv9/sgsbbc 555 root sys
f none platform/sun4u/kernel/kmdb/sparcv9/unix 555 root sys
-f none platform/sun4u/kernel/kmdb/sparcv9/oplhwd 555 root sys
#
d none platform/sun4v 755 root sys
d none platform/sun4v/kernel 755 root sys
diff --git a/usr/src/pkgdefs/common_files/i.sock2path b/usr/src/pkgdefs/common_files/i.sock2path
index 9b1bdedc36..31fcde8e06 100644
--- a/usr/src/pkgdefs/common_files/i.sock2path
+++ b/usr/src/pkgdefs/common_files/i.sock2path
@@ -3,9 +3,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,10 +19,7 @@
#
# CDDL HEADER END
#
-#
-#ident "%Z%%M% %I% %E% SMI"
-#
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -80,6 +76,25 @@ do
echo >> $dest
grep '/dev/spdsock' $src >> $dest
fi
+ grep "^#" $dest | awk '{
+ if ($5=="Path") {print $0 "|Module"}
+ else {print $0}}' > /tmp/i.$$
+ grep -v "^#" $dest | awk '{
+ if ($4=="/dev/tcp" || $4=="/dev/tcp6") {
+ print "\t" $1 "\t" $2 "\t" $3 "\ttcp"
+ } else if ($4=="/dev/udp" || $4=="/dev/udp6") {
+ print "\t" $1 "\t" $2 "\t" $3 "\tudp"
+ } else if ($4=="/dev/rawip" || $4=="/dev/rawip6") {
+ print "\t" $1 "\t" $2 "\t" $3 "\ticmp"
+ } else if ($4=="/dev/sctp" || $4=="/dev/sctp6") {
+ print "\t" $1 "\t" $2 "\t" $3 "\tsocksctp"
+ } else if ($4=="/dev/rts") {
+ print "\t" $1 "\t" $2 "\t" $3 "\trts"
+ } else if ($4=="/dev/sdp" || $4=="/dev/sdp6") {
+ print "\t" $1 "\t" $2 "\t" $3 "\tsocksdp"
+ } else {print $0}}' >> /tmp/i.$$
+ cp /tmp/i.$$ $dest
+ rm -f /tmp/i.$$
fi
done
diff --git a/usr/src/uts/Makefile.targ b/usr/src/uts/Makefile.targ
index 86adc21eb2..d9fc918b94 100644
--- a/usr/src/uts/Makefile.targ
+++ b/usr/src/uts/Makefile.targ
@@ -22,7 +22,6 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#
# This Makefiles contains the common targets and definitions for
# all kernels. It is to be included in the Makefiles for specific
# implementation architectures and processor architecture dependent
@@ -163,6 +162,9 @@ $(ROOT_FS_DIR)/%: $(OBJS_DIR)/% $(ROOT_FS_DIR) FRC
$(ROOT_SCHED_DIR)/%: $(OBJS_DIR)/% $(ROOT_SCHED_DIR) FRC
$(INS.file)
+$(ROOT_SOCK_DIR)/%: $(OBJS_DIR)/% $(ROOT_SOCK_DIR) FRC
+ $(INS.file)
+
$(ROOT_STRMOD_DIR)/%: $(OBJS_DIR)/% $(ROOT_STRMOD_DIR) FRC
$(INS.file)
@@ -388,12 +390,10 @@ $(MODLIST_DEPS): FRC
@case $@ in \
*32) \
class=32; \
- relmodule=`dirname $(RELMODULE)`; \
- rellink=`dirname $(RELLINK)`;; \
+ relmodule=`dirname $(RELMODULE)`;; \
*64) \
class=64; \
- relmodule=`dirname $(RELMODULE)`/$(SUBDIR64); \
- rellink=`dirname $(RELLINK)`/$(SUBDIR64);; \
+ relmodule=`dirname $(RELMODULE)`/$(SUBDIR64);; \
esac; \
if [ -z "$(THISIMPL)" ]; then \
impl=all; \
@@ -426,8 +426,16 @@ $(MODLIST_DEPS): FRC
done \
fi; \
if [ -n "$(ROOTLINK)" ]; then \
+ rellinks="$(RELLINK)"; \
+ for r in $$rellinks; do \
+ if [ $$class = 32 ]; then \
+ linkdir=`dirname $$r`; \
+ else \
+ linkdir=`dirname $$r`/$(SUBDIR64); \
+ fi; \
echo LINK $$relmodule $$module \
- $$rellink `basename $(RELLINK)` $$impl; \
+ $$linkdir `basename $$r` $$impl; \
+ done \
fi; \
if [ -n "$(UNIX32_LINK)" ]; then \
echo SYMLINK $(SUBDIR64)/$(UNIX) \
diff --git a/usr/src/uts/Makefile.uts b/usr/src/uts/Makefile.uts
index 86b39fc084..0f4718e3da 100644
--- a/usr/src/uts/Makefile.uts
+++ b/usr/src/uts/Makefile.uts
@@ -419,6 +419,7 @@ ROOT_DTRACE_DIR_32 = $(ROOT_MOD_DIR)/dtrace
ROOT_EXEC_DIR_32 = $(ROOT_MOD_DIR)/exec
ROOT_FS_DIR_32 = $(ROOT_MOD_DIR)/fs
ROOT_SCHED_DIR_32 = $(ROOT_MOD_DIR)/sched
+ROOT_SOCK_DIR_32 = $(ROOT_MOD_DIR)/socketmod
ROOT_STRMOD_DIR_32 = $(ROOT_MOD_DIR)/strmod
ROOT_IPP_DIR_32 = $(ROOT_MOD_DIR)/ipp
ROOT_SYS_DIR_32 = $(ROOT_MOD_DIR)/sys
@@ -444,6 +445,7 @@ ROOT_DTRACE_DIR_64 = $(ROOT_MOD_DIR)/dtrace/$(SUBDIR64)
ROOT_EXEC_DIR_64 = $(ROOT_MOD_DIR)/exec/$(SUBDIR64)
ROOT_FS_DIR_64 = $(ROOT_MOD_DIR)/fs/$(SUBDIR64)
ROOT_SCHED_DIR_64 = $(ROOT_MOD_DIR)/sched/$(SUBDIR64)
+ROOT_SOCK_DIR_64 = $(ROOT_MOD_DIR)/socketmod/$(SUBDIR64)
ROOT_STRMOD_DIR_64 = $(ROOT_MOD_DIR)/strmod/$(SUBDIR64)
ROOT_IPP_DIR_64 = $(ROOT_MOD_DIR)/ipp/$(SUBDIR64)
ROOT_SYS_DIR_64 = $(ROOT_MOD_DIR)/sys/$(SUBDIR64)
@@ -469,6 +471,7 @@ ROOT_DTRACE_DIR = $(ROOT_DTRACE_DIR_$(CLASS))
ROOT_EXEC_DIR = $(ROOT_EXEC_DIR_$(CLASS))
ROOT_FS_DIR = $(ROOT_FS_DIR_$(CLASS))
ROOT_SCHED_DIR = $(ROOT_SCHED_DIR_$(CLASS))
+ROOT_SOCK_DIR = $(ROOT_SOCK_DIR_$(CLASS))
ROOT_STRMOD_DIR = $(ROOT_STRMOD_DIR_$(CLASS))
ROOT_IPP_DIR = $(ROOT_IPP_DIR_$(CLASS))
ROOT_SYS_DIR = $(ROOT_SYS_DIR_$(CLASS))
@@ -492,7 +495,7 @@ ROOT_MOD_DIRS_32 = $(ROOT_BRAND_DIR_32) $(ROOT_DRV_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_EXEC_DIR_32) $(ROOT_DTRACE_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_FS_DIR_32) $(ROOT_SCHED_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_STRMOD_DIR_32) $(ROOT_SYS_DIR_32)
-ROOT_MOD_DIRS_32 += $(ROOT_IPP_DIR_32)
+ROOT_MOD_DIRS_32 += $(ROOT_IPP_DIR_32) $(ROOT_SOCK_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_MISC_DIR_32) $(ROOT_MACH_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_KGSS_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_SCSI_VHCI_DIR_32)
@@ -508,6 +511,7 @@ USR_DRV_DIR_32 = $(USR_MOD_DIR)/drv
USR_EXEC_DIR_32 = $(USR_MOD_DIR)/exec
USR_FS_DIR_32 = $(USR_MOD_DIR)/fs
USR_SCHED_DIR_32 = $(USR_MOD_DIR)/sched
+USR_SOCK_DIR_32 = $(USR_MOD_DIR)/socketmod
USR_STRMOD_DIR_32 = $(USR_MOD_DIR)/strmod
USR_SYS_DIR_32 = $(USR_MOD_DIR)/sys
USR_MISC_DIR_32 = $(USR_MOD_DIR)/misc
@@ -521,6 +525,7 @@ USR_DRV_DIR_64 = $(USR_MOD_DIR)/drv/$(SUBDIR64)
USR_EXEC_DIR_64 = $(USR_MOD_DIR)/exec/$(SUBDIR64)
USR_FS_DIR_64 = $(USR_MOD_DIR)/fs/$(SUBDIR64)
USR_SCHED_DIR_64 = $(USR_MOD_DIR)/sched/$(SUBDIR64)
+USR_SOCK_DIR_64 = $(USR_MOD_DIR)/socketmod/$(SUBDIR64)
USR_STRMOD_DIR_64 = $(USR_MOD_DIR)/strmod/$(SUBDIR64)
USR_SYS_DIR_64 = $(USR_MOD_DIR)/sys/$(SUBDIR64)
USR_MISC_DIR_64 = $(USR_MOD_DIR)/misc/$(SUBDIR64)
@@ -534,6 +539,7 @@ USR_DRV_DIR = $(USR_DRV_DIR_$(CLASS))
USR_EXEC_DIR = $(USR_EXEC_DIR_$(CLASS))
USR_FS_DIR = $(USR_FS_DIR_$(CLASS))
USR_SCHED_DIR = $(USR_SCHED_DIR_$(CLASS))
+USR_SOCK_DIR = $(USR_SOCK_DIR_$(CLASS))
USR_STRMOD_DIR = $(USR_STRMOD_DIR_$(CLASS))
USR_SYS_DIR = $(USR_SYS_DIR_$(CLASS))
USR_MISC_DIR = $(USR_MISC_DIR_$(CLASS))
@@ -599,7 +605,8 @@ PARALLEL_KMODS = $(DRV_KMODS) $(EXEC_KMODS) $(FS_KMODS) $(SCHED_KMODS) \
$(MMU_KMODS) $(DACF_KMODS) $(EXPORT_KMODS) $(IPP_KMODS) \
$(CRYPTO_KMODS) $(PCBE_KMODS) \
$(DRV_KMODS_$(CLASS)) $(MISC_KMODS_$(CLASS)) $(MAC_KMODS) \
- $(DEVNAME_KMODS) $(BRAND_KMODS) $(KICONV_KMODS)
+ $(DEVNAME_KMODS) $(BRAND_KMODS) $(KICONV_KMODS) \
+ $(SOCKET_KMODS)
KMODS = $(GENUNIX_KMODS) $(PARALLEL_KMODS)
@@ -614,7 +621,7 @@ LINT_KMODS = $(DRV_KMODS) $(EXEC_KMODS) $(FS_KMODS) $(SCHED_KMODS) \
$(MACH_KMODS) $(GSS_KMODS) $(DACF_KMODS) $(IPP_KMODS) \
$(CRYPTO_KMODS) $(PCBE_KMODS) $(DEVNAME_KMODS) \
$(DRV_KMODS_$(CLASS)) $(MISC_KMODS_$(CLASS)) $(MAC_KMODS) \
- $(BRAND_KMODS) $(KICONV_KMODS)
+ $(BRAND_KMODS) $(KICONV_KMODS) $(SOCKET_KMODS)
$(CLOSED_BUILD)CLOSED_LINT_KMODS = $(CLOSED_DRV_KMODS) $(CLOSED_TOD_KMODS) \
$(CLOSED_MISC_KMODS) $(CLOSED_DRV_KMODS_$(CLASS))
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 564b2cf72e..f0951c280b 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -289,6 +289,7 @@ GENUNIX_OBJS += \
sigsuspend.o \
sigtimedwait.o \
sleepq.o \
+ sock_conf.o \
space.o \
sscanf.o \
ssig.o \
@@ -489,7 +490,8 @@ IP_OBJS += igmp.o ip.o ip6.o ip6_asp.o ip6_if.o ip6_ire.o ip6_rts.o \
ip_multi.o ip_ndp.o ip_opt_data.o ip_rts.o ip_srcid.o \
ipddi.o ipdrop.o mi.o nd.o optcom.o snmpcom.o ipsec_loader.o \
spd.o ipclassifier.o inet_common.o ip_squeue.o squeue.o \
- ip_sadb.o ip_ftable.o radix.o ip_dummy.o \
+ ip_sadb.o ip_ftable.o proto_set.o radix.o ip_dummy.o \
+ ip_helper_stream.o \
$(IP_ICMP_OBJS) \
$(IP_RTS_OBJS) \
$(IP_TCP_OBJS) \
@@ -531,6 +533,10 @@ SCTP6_OBJS += sctp6ddi.o
NCA_OBJS += ncaddi.o
+SDP_SOCK_MOD_OBJS += sockmod_sdp.o socksdp.o socksdpsubr.o
+
+SCTP_SOCK_MOD_OBJS += sockmod_sctp.o socksctp.o socksctpsubr.o
+
TUN_OBJS += tun.o
ATUN_OBJS += atun.o
@@ -1138,10 +1144,10 @@ SHAREFS_OBJS += sharetab.o sharefs_vfsops.o sharefs_vnops.o
SPEC_OBJS += specsubr.o specvfsops.o specvnops.o
-SOCK_OBJS += socksubr.o sockvfsops.o sockvnops.o \
- socksyscalls.o socktpi.o sockstr.o \
- socksctp.o socksctpsubr.o socksctpvnops.o sockssl.o \
- socksdp.o socksdpsubr.o socksdpvnops.o \
+SOCK_OBJS += socksubr.o sockvfsops.o sockparams.o \
+ socksyscalls.o socktpi.o sockstr.o sockssl.o \
+ sockcommon_vnops.o sockcommon_subr.o \
+ sockcommon_sops.o sockcommon.o socknotify.o \
nl7c.o nl7curi.o nl7chttp.o nl7clogd.o \
nl7cnca.o
@@ -1456,6 +1462,8 @@ KGSSD_DERIVED_OBJS = gssd_xdr.o
KGSS_DUMMY_OBJS += dmech.o
+KSOCKET_OBJS += ksocket.o ksocket_mod.o
+
CRYPTO= cksumtypes.o decrypt.o encrypt.o encrypt_length.o etypes.o \
nfold.o verify_checksum.o prng.o block_size.o make_checksum.o\
checksum_length.o hmac.o default_state.o mandatory_sumtype.o
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index 0035b502b9..35fe0895f1 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -481,6 +481,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/nca/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/sockmods/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/vni/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -681,6 +685,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/kbtrans/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/ksocket/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/aggr/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1548,6 +1556,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/idmap/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/sockmods/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/arp/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -1732,6 +1743,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/kb8042/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/kbtrans/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/ksocket/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/aggr/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/common/c2/audit_event.c b/usr/src/uts/common/c2/audit_event.c
index 723212aa52..92559a3575 100644
--- a/usr/src/uts/common/c2/audit_event.c
+++ b/usr/src/uts/common/c2/audit_event.c
@@ -72,6 +72,8 @@
#include <sys/tihdr.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
+#include <sys/vfs_opreg.h>
+#include <fs/sockfs/sockcommon.h>
#include <netinet/in.h>
#include <sys/ddi.h>
#include <sys/port_impl.h>
@@ -3328,7 +3330,6 @@ auf_accept(
char so_laddr[sizeof (struct sockaddr_in6)];
char so_faddr[sizeof (struct sockaddr_in6)];
int err;
- int len;
short so_family, so_type;
int add_sock_token = 0;
@@ -3374,28 +3375,17 @@ auf_accept(
* XXX - what about other socket types for AF_INET (e.g. DGRAM)
*/
if (so->so_type == SOCK_STREAM) {
+ socklen_t len;
bzero((void *)so_laddr, sizeof (so_laddr));
bzero((void *)so_faddr, sizeof (so_faddr));
- /*
- * no local address then need to get it from lower
- * levels. only put out record on first read ala
- * AUE_WRITE.
- */
- if (so->so_state & SS_ISBOUND) {
- /* only done once on a connection */
- (void) SOP_GETSOCKNAME(so);
- (void) SOP_GETPEERNAME(so);
-
- /* get local and foreign addresses */
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- len = min(so->so_faddr_len, sizeof (so_faddr));
- bcopy(so->so_faddr_sa, so_faddr, len);
- mutex_exit(&so->so_lock);
- }
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so,
+ (struct sockaddr *)so_laddr, &len, CRED());
+ len = sizeof (so_faddr);
+ (void) socket_getpeername(so,
+ (struct sockaddr *)so_faddr, &len, B_FALSE, CRED());
add_sock_token = 1;
}
@@ -3434,7 +3424,7 @@ auf_bind(struct t_audit_data *tad, int error, rval_t *rvp)
char so_laddr[sizeof (struct sockaddr_in6)];
char so_faddr[sizeof (struct sockaddr_in6)];
int err, fd;
- int len;
+ socklen_t len;
short so_family, so_type;
int add_sock_token = 0;
@@ -3466,17 +3456,10 @@ auf_bind(struct t_audit_data *tad, int error, rval_t *rvp)
case AF_INET6:
bzero(so_faddr, sizeof (so_faddr));
+ len = sizeof (so_faddr);
- if (so->so_state & SS_ISBOUND) {
- /* only done once on a connection */
- (void) SOP_GETSOCKNAME(so);
- }
-
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- mutex_exit(&so->so_lock);
-
+ (void) socket_getpeername(so,
+ (struct sockaddr *)so_faddr, &len, B_FALSE, CRED());
add_sock_token = 1;
break;
@@ -3517,7 +3500,7 @@ auf_connect(struct t_audit_data *tad, int error, rval_t *rval)
char so_laddr[sizeof (struct sockaddr_in6)];
char so_faddr[sizeof (struct sockaddr_in6)];
int err, fd;
- int len;
+ socklen_t len;
short so_family, so_type;
int add_sock_token = 0;
@@ -3539,24 +3522,14 @@ auf_connect(struct t_audit_data *tad, int error, rval_t *rval)
switch (so_family) {
case AF_INET:
case AF_INET6:
- /*
- * no local address then need to get it from lower
- * levels.
- */
- if (so->so_state & SS_ISBOUND) {
- /* only done once on a connection */
- (void) SOP_GETSOCKNAME(so);
- (void) SOP_GETPEERNAME(so);
- }
bzero(so_laddr, sizeof (so_laddr));
bzero(so_faddr, sizeof (so_faddr));
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so, (struct sockaddr *)so_laddr,
+ &len, CRED());
if (error) {
- mutex_exit(&so->so_lock);
if (uap->addr == NULL)
break;
if (uap->len <= 0)
@@ -3569,9 +3542,9 @@ auf_connect(struct t_audit_data *tad, int error, rval_t *rval)
#endif
} else {
/* sanity check on length */
- len = min(so->so_faddr_len, sizeof (so_faddr));
- bcopy(so->so_faddr_sa, so_faddr, len);
- mutex_exit(&so->so_lock);
+ len = sizeof (so_faddr);
+ (void) socket_getpeername(so,
+ (struct sockaddr *)so_faddr, &len, B_FALSE, CRED());
}
add_sock_token = 1;
@@ -3614,7 +3587,7 @@ aus_shutdown(struct t_audit_data *tad)
char so_laddr[sizeof (struct sockaddr_in6)];
char so_faddr[sizeof (struct sockaddr_in6)];
int err, fd;
- int len;
+ socklen_t len;
short so_family, so_type;
int add_sock_token = 0;
file_t *fp; /* unix domain sockets */
@@ -3641,23 +3614,12 @@ aus_shutdown(struct t_audit_data *tad)
bzero(so_laddr, sizeof (so_laddr));
bzero(so_faddr, sizeof (so_faddr));
- if (so->so_state & SS_ISBOUND) {
- /*
- * no local address then need to get it from lower
- * levels.
- */
- if (so->so_laddr_len == 0)
- (void) SOP_GETSOCKNAME(so);
- if (so->so_faddr_len == 0)
- (void) SOP_GETPEERNAME(so);
- }
-
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- len = min(so->so_faddr_len, sizeof (so_faddr));
- bcopy(so->so_faddr_sa, so_faddr, len);
- mutex_exit(&so->so_lock);
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so,
+ (struct sockaddr *)so_laddr, &len, CRED());
+ len = sizeof (so_faddr);
+ (void) socket_getpeername(so,
+ (struct sockaddr *)so_faddr, &len, B_FALSE, CRED());
add_sock_token = 1;
@@ -3721,7 +3683,7 @@ auf_setsockopt(struct t_audit_data *tad, int error, rval_t *rval)
char so_faddr[sizeof (struct sockaddr_in6)];
char val[AU_BUFSIZE];
int err, fd;
- int len;
+ socklen_t len;
short so_family, so_type;
int add_sock_token = 0;
file_t *fp; /* unix domain sockets */
@@ -3751,24 +3713,16 @@ auf_setsockopt(struct t_audit_data *tad, int error, rval_t *rval)
switch (so_family) {
case AF_INET:
case AF_INET6:
-
bzero((void *)so_laddr, sizeof (so_laddr));
bzero((void *)so_faddr, sizeof (so_faddr));
- if (so->so_state & SS_ISBOUND) {
- if (so->so_laddr_len == 0)
- (void) SOP_GETSOCKNAME(so);
- if (so->so_faddr_len == 0)
- (void) SOP_GETPEERNAME(so);
- }
-
/* get local and foreign addresses */
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- len = min(so->so_faddr_len, sizeof (so_faddr));
- bcopy(so->so_faddr_sa, so_faddr, len);
- mutex_exit(&so->so_lock);
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so, (struct sockaddr *)so_laddr,
+ &len, CRED());
+ len = sizeof (so_faddr);
+ (void) socket_getpeername(so, (struct sockaddr *)so_faddr,
+ &len, B_FALSE, CRED());
add_sock_token = 1;
@@ -3892,7 +3846,7 @@ auf_recvmsg(
int err;
char so_laddr[sizeof (struct sockaddr_in6)];
char so_faddr[sizeof (struct sockaddr_in6)];
- int len;
+ socklen_t len;
file_t *fp; /* unix domain sockets */
struct f_audit_data *fad; /* unix domain sockets */
short so_family, so_type;
@@ -3942,10 +3896,9 @@ auf_recvmsg(
bzero((void *)so_faddr, sizeof (so_faddr));
/* get local address */
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- mutex_exit(&so->so_lock);
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so,
+ (struct sockaddr *)so_laddr, &len, CRED());
/* get peer address */
STRUCT_INIT(msg, get_udatamodel());
@@ -3995,21 +3948,13 @@ auf_recvmsg(
bzero((void *)so_laddr, sizeof (so_laddr));
bzero((void *)so_faddr, sizeof (so_faddr));
- if (so->so_state & SS_ISBOUND) {
-
- if (so->so_laddr_len == 0)
- (void) SOP_GETSOCKNAME(so);
- if (so->so_faddr_len == 0)
- (void) SOP_GETPEERNAME(so);
-
- /* get local and foreign addresses */
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- len = min(so->so_faddr_len, sizeof (so_faddr));
- bcopy(so->so_faddr_sa, so_faddr, len);
- mutex_exit(&so->so_lock);
- }
+ /* get local and foreign addresses */
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so,
+ (struct sockaddr *)so_laddr, &len, CRED());
+ len = sizeof (so_faddr);
+ (void) socket_getpeername(so,
+ (struct sockaddr *)so_faddr, &len, B_FALSE, CRED());
add_sock_token = 1;
}
@@ -4103,7 +4048,7 @@ auf_recvfrom(
int fd;
short so_family, so_type;
int add_sock_token = 0;
- int len;
+ socklen_t len;
int err;
struct file *fp;
struct f_audit_data *fad; /* unix domain sockets */
@@ -4149,10 +4094,9 @@ auf_recvfrom(
add_sock_token = 1;
/* get local address */
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- mutex_exit(&so->so_lock);
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so,
+ (struct sockaddr *)so_laddr, &len, CRED());
/* get peer address */
bzero((void *)so_faddr, sizeof (so_faddr));
@@ -4206,21 +4150,13 @@ auf_recvfrom(
bzero((void *)so_laddr, sizeof (so_laddr));
bzero((void *)so_faddr, sizeof (so_faddr));
- if (so->so_state & SS_ISBOUND) {
-
- if (so->so_laddr_len == 0)
- (void) SOP_GETSOCKNAME(so);
- if (so->so_faddr_len == 0)
- (void) SOP_GETPEERNAME(so);
-
- /* get local and foreign addresses */
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- len = min(so->so_faddr_len, sizeof (so_faddr));
- bcopy(so->so_faddr_sa, so_faddr, len);
- mutex_exit(&so->so_lock);
- }
+ /* get local and foreign addresses */
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so,
+ (struct sockaddr *)so_laddr, &len, CRED());
+ len = sizeof (so_faddr);
+ (void) socket_getpeername(so,
+ (struct sockaddr *)so_faddr, &len, B_FALSE, CRED());
add_sock_token = 1;
}
@@ -4306,7 +4242,7 @@ auf_sendmsg(struct t_audit_data *tad, int error, rval_t *rval)
int fd;
short so_family, so_type;
int add_sock_token = 0;
- int len;
+ socklen_t len;
struct file *fp;
struct f_audit_data *fad;
caddr_t msg_name;
@@ -4351,10 +4287,9 @@ auf_sendmsg(struct t_audit_data *tad, int error, rval_t *rval)
bzero((void *)so_faddr, sizeof (so_faddr));
/* get local address */
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- mutex_exit(&so->so_lock);
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so,
+ (struct sockaddr *)so_laddr, &len, CRED());
/* get peer address */
STRUCT_INIT(msg, get_udatamodel());
@@ -4405,21 +4340,13 @@ auf_sendmsg(struct t_audit_data *tad, int error, rval_t *rval)
bzero((void *)so_laddr, sizeof (so_laddr));
bzero((void *)so_faddr, sizeof (so_faddr));
- if (so->so_state & SS_ISBOUND) {
-
- if (so->so_laddr_len == 0)
- (void) SOP_GETSOCKNAME(so);
- if (so->so_faddr_len == 0)
- (void) SOP_GETPEERNAME(so);
-
- /* get local and foreign addresses */
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- len = min(so->so_faddr_len, sizeof (so_faddr));
- bcopy(so->so_faddr_sa, so_faddr, len);
- mutex_exit(&so->so_lock);
- }
+ /* get local and foreign addresses */
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so,
+ (struct sockaddr *)so_laddr, &len, CRED());
+ len = sizeof (so_faddr);
+ (void) socket_getpeername(so,
+ (struct sockaddr *)so_faddr, &len, B_FALSE, CRED());
add_sock_token = 1;
}
@@ -4506,7 +4433,7 @@ auf_sendto(struct t_audit_data *tad, int error, rval_t *rval)
socklen_t tolen;
int err;
int fd;
- int len;
+ socklen_t len;
short so_family, so_type;
int add_sock_token = 0;
struct file *fp;
@@ -4556,10 +4483,9 @@ auf_sendto(struct t_audit_data *tad, int error, rval_t *rval)
bzero((void *)so_faddr, sizeof (so_faddr));
/* get local address */
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- mutex_exit(&so->so_lock);
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so,
+ (struct sockaddr *)so_laddr, &len, CRED());
/* get peer address */
@@ -4610,21 +4536,13 @@ auf_sendto(struct t_audit_data *tad, int error, rval_t *rval)
bzero((void *)so_laddr, sizeof (so_laddr));
bzero((void *)so_faddr, sizeof (so_faddr));
- if (so->so_state & SS_ISBOUND) {
-
- if (so->so_laddr_len == 0)
- (void) SOP_GETSOCKNAME(so);
- if (so->so_faddr_len == 0)
- (void) SOP_GETPEERNAME(so);
-
- /* get local and foreign addresses */
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- len = min(so->so_faddr_len, sizeof (so_faddr));
- bcopy(so->so_faddr_sa, so_faddr, len);
- mutex_exit(&so->so_lock);
- }
+ /* get local and foreign addresses */
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so,
+ (struct sockaddr *)so_laddr, &len, CRED());
+ len = sizeof (so_faddr);
+ (void) socket_getpeername(so,
+ (struct sockaddr *)so_faddr, &len, B_FALSE, CRED());
add_sock_token = 1;
}
@@ -5394,7 +5312,7 @@ auf_recv(tad, error, rval)
struct f_audit_data *fad;
int fd;
int err;
- int len;
+ socklen_t len;
short so_family, so_type;
register struct a {
long fd;
@@ -5457,17 +5375,13 @@ auf_recv(tad, error, rval)
bzero((void *)so_laddr, sizeof (so_laddr));
bzero((void *)so_faddr, sizeof (so_faddr));
- /* only done once on a connection */
- (void) SOP_GETSOCKNAME(so);
- (void) SOP_GETPEERNAME(so);
-
/* get local and foreign addresses */
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- len = min(so->so_faddr_len, sizeof (so_faddr));
- bcopy(so->so_faddr_sa, so_faddr, len);
- mutex_exit(&so->so_lock);
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so,
+ (struct sockaddr *)so_laddr, &len, CRED());
+ len = sizeof (so_faddr);
+ (void) socket_getpeername(so,
+ (struct sockaddr *)so_faddr, &len, B_FALSE, CRED());
/*
* only way to drop out of switch. Note that we
@@ -5532,7 +5446,7 @@ auf_send(tad, error, rval)
struct f_audit_data *fad;
int fd;
int err;
- int len;
+ socklen_t len;
short so_family, so_type;
register struct a {
long fd;
@@ -5597,17 +5511,13 @@ auf_send(tad, error, rval)
bzero((void *)so_laddr, sizeof (so_laddr));
bzero((void *)so_faddr, sizeof (so_faddr));
- /* only done once on a connection */
- (void) SOP_GETSOCKNAME(so);
- (void) SOP_GETPEERNAME(so);
-
/* get local and foreign addresses */
- mutex_enter(&so->so_lock);
- len = min(so->so_laddr_len, sizeof (so_laddr));
- bcopy(so->so_laddr_sa, so_laddr, len);
- len = min(so->so_faddr_len, sizeof (so_faddr));
- bcopy(so->so_faddr_sa, so_faddr, len);
- mutex_exit(&so->so_lock);
+ len = sizeof (so_laddr);
+ (void) socket_getsockname(so,
+ (struct sockaddr *)so_laddr, &len, CRED());
+ len = sizeof (so_faddr);
+ (void) socket_getpeername(so,
+ (struct sockaddr *)so_faddr, &len, B_FALSE, CRED());
/*
* only way to drop out of switch. Note that we
diff --git a/usr/src/uts/common/fs/smbsrv/smb_negotiate.c b/usr/src/uts/common/fs/smbsrv/smb_negotiate.c
index fb3498f545..48f6e53458 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_negotiate.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_negotiate.c
@@ -293,9 +293,9 @@ smb_com_negotiate(smb_request_t *sr)
switch (dialect) {
case PC_NETWORK_PROGRAM_1_0: /* core */
- (void) sosetsockopt(sr->session->sock, SOL_SOCKET, SO_RCVBUF,
- (const void *)&smb_dos_tcp_rcvbuf,
- sizeof (smb_dos_tcp_rcvbuf));
+ (void) ksocket_setsockopt(sr->session->sock, SOL_SOCKET,
+ SO_RCVBUF, (const void *)&smb_dos_tcp_rcvbuf,
+ sizeof (smb_dos_tcp_rcvbuf), CRED());
rc = smbsr_encode_result(sr, 1, 0, "bww", 1, sel_pos, 0);
break;
@@ -306,9 +306,9 @@ smb_com_negotiate(smb_request_t *sr)
case LANMAN1_0:
case LM1_2X002:
case DOS_LM1_2X002:
- (void) sosetsockopt(sr->session->sock, SOL_SOCKET, SO_RCVBUF,
- (const void *)&smb_dos_tcp_rcvbuf,
- sizeof (smb_dos_tcp_rcvbuf));
+ (void) ksocket_setsockopt(sr->session->sock, SOL_SOCKET,
+ SO_RCVBUF, (const void *)&smb_dos_tcp_rcvbuf,
+ sizeof (smb_dos_tcp_rcvbuf), CRED());
sr->smb_flg |= SMB_FLAGS_LOCK_AND_READ_OK;
rc = smbsr_encode_result(sr, 13, VAR_BCC,
"bwwwwwwlYww2.w#c",
@@ -331,9 +331,9 @@ smb_com_negotiate(smb_request_t *sr)
case DOS_LANMAN2_1:
case LANMAN2_1:
- (void) sosetsockopt(sr->session->sock, SOL_SOCKET, SO_RCVBUF,
- (const void *)&smb_dos_tcp_rcvbuf,
- sizeof (smb_dos_tcp_rcvbuf));
+ (void) ksocket_setsockopt(sr->session->sock, SOL_SOCKET,
+ SO_RCVBUF, (const void *)&smb_dos_tcp_rcvbuf,
+ sizeof (smb_dos_tcp_rcvbuf), CRED());
sr->smb_flg |= SMB_FLAGS_LOCK_AND_READ_OK;
rc = smbsr_encode_result(sr, 13, VAR_BCC,
"bwwwwwwlYww2.w#cs",
@@ -356,9 +356,9 @@ smb_com_negotiate(smb_request_t *sr)
break;
case NT_LM_0_12:
- (void) sosetsockopt(sr->session->sock, SOL_SOCKET, SO_RCVBUF,
- (const void *)&smb_nt_tcp_rcvbuf,
- sizeof (smb_nt_tcp_rcvbuf));
+ (void) ksocket_setsockopt(sr->session->sock, SOL_SOCKET,
+ SO_RCVBUF, (const void *)&smb_nt_tcp_rcvbuf,
+ sizeof (smb_nt_tcp_rcvbuf), CRED());
capabilities = CAP_LARGE_FILES
| CAP_NT_SMBS
| CAP_STATUS32
diff --git a/usr/src/uts/common/fs/smbsrv/smb_net.c b/usr/src/uts/common/fs/smbsrv/smb_net.c
index 4593cfec6b..ef41d911db 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_net.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_net.c
@@ -35,6 +35,7 @@
#include <sys/fs/snode.h>
#include <sys/fs/dv_node.h>
#include <sys/vnode.h>
+#include <sys/ksocket.h>
#undef mem_free /* XXX Remove this after we convert everything to kmem_alloc */
#include <smbsrv/smb_vops.h>
@@ -103,58 +104,19 @@ smb_net_fini(void)
* smb_iov_sorecv: Receive data into an iovec from a socket
*/
-struct sonode *
+ksocket_t
smb_socreate(int domain, int type, int protocol)
{
- vnode_t *dvp = NULL;
- vnode_t *vp = NULL;
- struct snode *csp = NULL;
- int err = 0;
- major_t maj;
-
- if ((vp = solookup(domain, type, protocol, NULL, &err)) == NULL) {
-
- /*
- * solookup calls sogetvp if the vp is not found in the cache.
- * Since the call to sogetvp is hardwired to use USERSPACE
- * and declared static we'll do the work here instead.
- */
- err = lookupname(type == SOCK_STREAM ? "/dev/tcp" : "/dev/udp",
- UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
- if (err)
- return (NULL);
-
- /* Check that it is the correct vnode */
- if (vp->v_type != VCHR) {
- VN_RELE(vp);
- return (NULL);
- }
+ ksocket_t sock;
+ int err = 0;
- csp = VTOS(VTOS(vp)->s_commonvp);
- if (!(csp->s_flag & SDIPSET)) {
- char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- err = ddi_dev_pathname(vp->v_rdev, S_IFCHR,
- pathname);
- if (err == 0) {
- err = devfs_lookupname(pathname, NULLVPP,
- &dvp);
- }
- VN_RELE(vp);
- kmem_free(pathname, MAXPATHLEN);
- if (err != 0) {
- return (NULL);
- }
- vp = dvp;
- }
-
- maj = getmajor(vp->v_rdev);
- if (!STREAMSTAB(maj)) {
- VN_RELE(vp);
- return (NULL);
- }
- }
+ err = ksocket_socket(&sock, domain, type, protocol, KSOCKET_SLEEP,
+ CRED());
- return (socreate(vp, domain, type, protocol, SOV_DEFAULT, NULL, &err));
+ if (err != 0)
+ return (NULL);
+ else
+ return (sock);
}
/*
@@ -165,9 +127,9 @@ smb_socreate(int domain, int type, int protocol)
* regain control of a thread stuck in smb_sorecv.
*/
void
-smb_soshutdown(struct sonode *so)
+smb_soshutdown(ksocket_t so)
{
- (void) soshutdown(so, SHUT_RDWR);
+ (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
}
/*
@@ -177,82 +139,27 @@ smb_soshutdown(struct sonode *so)
* behavior will result.
*/
void
-smb_sodestroy(struct sonode *so)
+smb_sodestroy(ksocket_t so)
{
- vnode_t *vp = SOTOV(so);
-
- (void) VOP_CLOSE(vp, 0, 1, 0, kcred, NULL);
- VN_RELE(vp);
+ (void) ksocket_close(so, CRED());
}
int
-smb_sorecv(struct sonode *so, void *msg, size_t len)
+smb_sorecv(ksocket_t so, void *msg, size_t len)
{
- iovec_t iov;
+ size_t recvd;
int err;
ASSERT(so != NULL);
ASSERT(len != 0);
- /*
- * Fill in iovec and receive data
- */
- iov.iov_base = msg;
- iov.iov_len = len;
-
- if ((err = smb_iov_sorecv(so, &iov, 1, len)) != 0) {
+ if ((err = ksocket_recv(so, msg, len, MSG_WAITALL, &recvd,
+ CRED())) != 0) {
return (err);
}
/* Successful receive */
- return (0);
-}
-
-/*
- * smb_iov_sorecv - Receives an iovec from a connection
- *
- * This function gets the data asked for from the socket. It will return
- * only when all the requested data has been retrieved or if an error
- * occurs.
- *
- * Returns 0 for success, the socket errno value if sorecvmsg fails, and
- * -1 if sorecvmsg returns success but uio_resid != 0
- */
-int
-smb_iov_sorecv(struct sonode *so, iovec_t *iop, int iovlen, size_t total_len)
-{
- struct msghdr msg;
- struct uio uio;
- int error;
-
- ASSERT(iop != NULL);
-
- /* Initialization of the message header. */
- bzero(&msg, sizeof (msg));
- msg.msg_iov = iop;
- msg.msg_flags = MSG_WAITALL;
- msg.msg_iovlen = iovlen;
-
- /* Initialization of the uio structure. */
- bzero(&uio, sizeof (uio));
- uio.uio_iov = iop;
- uio.uio_iovcnt = iovlen;
- uio.uio_segflg = UIO_SYSSPACE;
- uio.uio_resid = total_len;
-
- if ((error = sorecvmsg(so, &msg, &uio)) == 0) {
- /* Received data */
- if (uio.uio_resid == 0) {
- /* All requested data received. Success */
- return (0);
- } else {
- /* Not all data was sent. Failure */
- return (-1);
- }
- }
-
- /* Receive failed */
- return (error);
+ return ((recvd == len) ? 0 : -1);
}
/*
@@ -327,13 +234,12 @@ smb_net_txr_free(smb_txreq_t *txr)
* queued and the routine returns immediately.
*/
int
-smb_net_txr_send(struct sonode *so, smb_txlst_t *txl, smb_txreq_t *txr)
+smb_net_txr_send(ksocket_t so, smb_txlst_t *txl, smb_txreq_t *txr)
{
list_t local;
int rc = 0;
- iovec_t iov;
- struct msghdr msg;
- struct uio uio;
+ size_t sent = 0;
+ size_t len;
ASSERT(txl->tl_magic == SMB_TXLST_MAGIC);
@@ -355,25 +261,11 @@ smb_net_txr_send(struct sonode *so, smb_txlst_t *txl, smb_txreq_t *txr)
ASSERT(txr->tr_magic == SMB_TXREQ_MAGIC);
list_remove(&local, txr);
- iov.iov_base = (void *)txr->tr_buf;
- iov.iov_len = txr->tr_len;
-
- bzero(&msg, sizeof (msg));
- msg.msg_iov = &iov;
- msg.msg_flags = MSG_WAITALL;
- msg.msg_iovlen = 1;
-
- bzero(&uio, sizeof (uio));
- uio.uio_iov = &iov;
- uio.uio_iovcnt = 1;
- uio.uio_segflg = UIO_SYSSPACE;
- uio.uio_resid = txr->tr_len;
-
- rc = sosendmsg(so, &msg, &uio);
-
+ len = txr->tr_len;
+ rc = ksocket_send(so, txr->tr_buf, txr->tr_len,
+ MSG_WAITALL, &sent, CRED());
smb_net_txr_free(txr);
-
- if ((rc == 0) && (uio.uio_resid == 0))
+ if ((rc == 0) && (sent == len))
continue;
if (rc == 0)
diff --git a/usr/src/uts/common/fs/smbsrv/smb_server.c b/usr/src/uts/common/fs/smbsrv/smb_server.c
index eb3f1d82a3..9296f123be 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_server.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_server.c
@@ -1242,7 +1242,7 @@ smb_server_listen(
int pthread_create_error)
{
int rc;
- struct sonode *s_so;
+ ksocket_t s_so;
uint32_t on = 1;
smb_session_t *session;
@@ -1263,14 +1263,16 @@ smb_server_listen(
if (ld->ld_so) {
- (void) sosetsockopt(ld->ld_so, SOL_SOCKET,
- SO_REUSEADDR, (const void *)&on, sizeof (on));
+ (void) ksocket_setsockopt(ld->ld_so, SOL_SOCKET,
+ SO_REUSEADDR, (const void *)&on, sizeof (on),
+ CRED());
- rc = sobind(ld->ld_so, (struct sockaddr *)&ld->ld_sin,
- sizeof (ld->ld_sin), 0, 0);
+ rc = ksocket_bind(ld->ld_so,
+ (struct sockaddr *)&ld->ld_sin,
+ sizeof (ld->ld_sin), CRED());
if (rc == 0) {
- rc = solisten(ld->ld_so, 20);
+ rc = ksocket_listen(ld->ld_so, 20, CRED());
if (rc < 0) {
cmn_err(CE_WARN,
"Port %d: listen failed", port);
@@ -1297,19 +1299,22 @@ smb_server_listen(
DTRACE_PROBE1(so__wait__accept, struct sonode *, ld->ld_so);
for (;;) {
- rc = soaccept(ld->ld_so, 0, &s_so);
+ rc = ksocket_accept(ld->ld_so, NULL, NULL, &s_so, CRED());
if (rc == 0) {
uint32_t txbuf_size = 128*1024;
uint32_t on = 1;
DTRACE_PROBE1(so__accept, struct sonode *, s_so);
- (void) sosetsockopt(s_so, IPPROTO_TCP, TCP_NODELAY,
- (const void *)&on, sizeof (on));
- (void) sosetsockopt(s_so, SOL_SOCKET, SO_KEEPALIVE,
- (const void *)&on, sizeof (on));
- (void) sosetsockopt(s_so, SOL_SOCKET, SO_SNDBUF,
- (const void *)&txbuf_size, sizeof (txbuf_size));
+ (void) ksocket_setsockopt(s_so, IPPROTO_TCP,
+ TCP_NODELAY, (const void *)&on, sizeof (on),
+ CRED());
+ (void) ksocket_setsockopt(s_so, SOL_SOCKET,
+ SO_KEEPALIVE, (const void *)&on, sizeof (on),
+ CRED());
+ (void) ksocket_setsockopt(s_so, SOL_SOCKET, SO_SNDBUF,
+ (const void *)&txbuf_size, sizeof (txbuf_size),
+ CRED());
/*
* Create a session for this connection.
*/
diff --git a/usr/src/uts/common/fs/smbsrv/smb_session.c b/usr/src/uts/common/fs/smbsrv/smb_session.c
index f76c6d77d1..571dee63c3 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_session.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_session.c
@@ -634,11 +634,10 @@ smb_session_message(smb_session_t *session)
* Port will be SSN_SRVC_TCP_PORT or SMB_SRVC_TCP_PORT.
*/
smb_session_t *
-smb_session_create(struct sonode *new_so, uint16_t port, smb_server_t *sv)
+smb_session_create(ksocket_t new_so, uint16_t port, smb_server_t *sv)
{
- uint32_t ipaddr;
- uint32_t local_ipaddr;
struct sockaddr_in sin;
+ socklen_t slen;
smb_session_t *session;
session = kmem_cache_alloc(sv->si_cache_session, KM_SLEEP);
@@ -670,13 +669,18 @@ smb_session_create(struct sonode *new_so, uint16_t port, smb_server_t *sv)
smb_rwx_init(&session->s_lock);
if (new_so) {
- bcopy(new_so->so_faddr_sa, &sin, new_so->so_faddr_len);
- ipaddr = sin.sin_addr.s_addr;
- bcopy(new_so->so_laddr_sa, &sin, new_so->so_faddr_len);
- local_ipaddr = sin.sin_addr.s_addr;
+ slen = sizeof (sin);
+
+ (void) ksocket_getsockname(new_so, (struct sockaddr *)&sin,
+ &slen, CRED());
+ session->local_ipaddr = sin.sin_addr.s_addr;
+
+ slen = sizeof (sin);
+ (void) ksocket_getpeername(new_so, (struct sockaddr *)&sin,
+ &slen, CRED());
+ session->ipaddr = sin.sin_addr.s_addr;
+
session->s_local_port = port;
- session->ipaddr = ipaddr;
- session->local_ipaddr = local_ipaddr;
session->sock = new_so;
}
diff --git a/usr/src/uts/common/fs/sockfs/nl7c.c b/usr/src/uts/common/fs/sockfs/nl7c.c
index 002d111c3a..fe3619ab6c 100644
--- a/usr/src/uts/common/fs/sockfs/nl7c.c
+++ b/usr/src/uts/common/fs/sockfs/nl7c.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* NL7C (Network Layer 7 Cache) as part of SOCKFS provides an in-kernel
* gateway cache for the request/response message based L7 protocol HTTP
@@ -57,6 +55,7 @@
#include <netinet/in.h>
#include <fs/sockfs/nl7c.h>
#include <fs/sockfs/nl7curi.h>
+#include <fs/sockfs/socktpi.h>
#include <inet/nca/ncadoorhdr.h>
#include <inet/nca/ncalogd.h>
@@ -90,7 +89,7 @@ extern void nl7c_nca_init(void);
*
* This list is searched at bind(3SOCKET) time when an application doesn't
* explicitly set AF_NCA but instead uses AF_INET, if a match is found then
- * the underlying socket is marked so_nl7c_flags NL7C_ENABLED.
+ * the underlying socket is marked sti_nl7c_flags NL7C_ENABLED.
*/
typedef struct nl7c_addr_s {
@@ -121,7 +120,7 @@ nl7c_listener_addr(void *arg, struct sonode *so)
if (p->listener == NULL)
p->listener = so;
- so->so_nl7c_addr = arg;
+ SOTOTPI(so)->sti_nl7c_addr = arg;
}
struct sonode *
@@ -256,7 +255,7 @@ nl7c_mi_report_addr(mblk_t *mp)
int a4 = ip & 0xFF;
(void) mi_sprintf(addr, "%d.%d.%d.%d",
- a1, a2, a3, a4);
+ a1, a2, a3, a4);
}
so = p->listener;
(void) mi_mpprintf(mp, "%p %s:%d %d",
@@ -398,7 +397,7 @@ ncaportconf_read(void)
if (ret != 0) {
/* Error of some sort, tell'm about it */
cmn_err(CE_WARN, "%s: read error %d",
- portconf, ret);
+ portconf, ret);
break;
}
if (resid == sizeof (buf)) {
@@ -564,7 +563,7 @@ ncakmodconf_read(void)
if (ret != 0) {
/* Error of some sort, tell'm about it */
cmn_err(CE_WARN, "%s: read error %d",
- status, ret);
+ status, ret);
break;
}
if (resid == sizeof (buf)) {
@@ -687,7 +686,7 @@ ncalogdconf_read(void)
if (ret != 0) {
/* Error of some sort, tell'm about it */
cmn_err(CE_WARN, "%s: read error %d",
- ncalogd, ret);
+ ncalogd, ret);
break;
}
if (resid == sizeof (buf)) {
@@ -933,7 +932,8 @@ boolean_t
nl7c_process(struct sonode *so, boolean_t nonblocking)
{
vnode_t *vp = SOTOV(so);
- mblk_t *rmp = so->so_nl7c_rcv_mp;
+ sotpi_info_t *sti = SOTOTPI(so);
+ mblk_t *rmp = sti->sti_nl7c_rcv_mp;
clock_t timout;
rval_t rval;
uchar_t pri;
@@ -942,7 +942,7 @@ nl7c_process(struct sonode *so, boolean_t nonblocking)
boolean_t more;
boolean_t ret = B_FALSE;
boolean_t first = B_TRUE;
- boolean_t pollin = (so->so_nl7c_flags & NL7C_POLLIN);
+ boolean_t pollin = (sti->sti_nl7c_flags & NL7C_POLLIN);
nl7c_proc_cnt++;
@@ -950,7 +950,7 @@ nl7c_process(struct sonode *so, boolean_t nonblocking)
error = so_lock_read_intr(so, nonblocking ? FNDELAY|FNONBLOCK : 0);
if (error) {
/* Couldn't read lock, pass on this socket */
- so->so_nl7c_flags = 0;
+ sti->sti_nl7c_flags = 0;
nl7c_proc_noLRI++;
return (B_FALSE);
}
@@ -958,7 +958,7 @@ nl7c_process(struct sonode *so, boolean_t nonblocking)
mutex_exit(&so->so_lock);
if (pollin)
- so->so_nl7c_flags &= ~NL7C_POLLIN;
+ sti->sti_nl7c_flags &= ~NL7C_POLLIN;
/* Initialize some kstrgetmsg() constants */
pflag = MSG_ANY | MSG_DELAYERROR;
@@ -966,7 +966,7 @@ nl7c_process(struct sonode *so, boolean_t nonblocking)
if (nonblocking) {
/* Non blocking so don't block */
timout = 0;
- } else if (so->so_nl7c_flags & NL7C_SOPERSIST) {
+ } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) {
/* 2nd or more time(s) here so use keep-alive value */
timout = nca_http_keep_alive_timeout;
} else {
@@ -996,18 +996,18 @@ nl7c_process(struct sonode *so, boolean_t nonblocking)
/* Error of some sort */
nl7c_proc_error++;
rval.r_v.r_v2 = error;
- so->so_nl7c_flags = 0;
+ sti->sti_nl7c_flags = 0;
break;
}
error = 0;
}
if (rmp != NULL) {
- mblk_t *mp = so->so_nl7c_rcv_mp;
+ mblk_t *mp = sti->sti_nl7c_rcv_mp;
if (mp == NULL) {
/* Just new data, common case */
- so->so_nl7c_rcv_mp = rmp;
+ sti->sti_nl7c_rcv_mp = rmp;
} else {
/* Add new data to tail */
while (mp->b_cont != NULL)
@@ -1015,13 +1015,14 @@ nl7c_process(struct sonode *so, boolean_t nonblocking)
mp->b_cont = rmp;
}
}
- if (so->so_nl7c_rcv_mp == NULL) {
+ if (sti->sti_nl7c_rcv_mp == NULL) {
/* No data */
nl7c_proc_nodata++;
if (timout > 0 || (first && pollin)) {
/* Expected data so EOF */
ret = B_TRUE;
- } else if (so->so_nl7c_flags & NL7C_SOPERSIST) {
+ } else if (sti->sti_nl7c_flags &
+ NL7C_SOPERSIST) {
/* Persistent so just checking */
ret = B_FALSE;
}
@@ -1035,7 +1036,7 @@ nl7c_process(struct sonode *so, boolean_t nonblocking)
more = nl7c_parse(so, nonblocking, &ret);
- if (ret == B_TRUE && (so->so_nl7c_flags & NL7C_SOPERSIST)) {
+ if (ret == B_TRUE && (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
/*
* Parse complete, cache hit, response on its way,
* socket is persistent so try to process the next
@@ -1045,7 +1046,7 @@ nl7c_process(struct sonode *so, boolean_t nonblocking)
ret = B_FALSE;
break;
}
- if (so->so_nl7c_rcv_mp) {
+ if (sti->sti_nl7c_rcv_mp) {
/* More recv-side data, pipelined */
nl7c_proc_again++;
goto again;
@@ -1061,10 +1062,10 @@ nl7c_process(struct sonode *so, boolean_t nonblocking)
} while (more);
- if (so->so_nl7c_rcv_mp) {
+ if (sti->sti_nl7c_rcv_mp) {
nl7c_proc_rcv++;
}
- so->so_nl7c_rcv_rval = rval.r_vals;
+ sti->sti_nl7c_rcv_rval = rval.r_vals;
/* Renter so_lock, caller called with it enter()ed */
mutex_enter(&so->so_lock);
so_unlock_read(so);
diff --git a/usr/src/uts/common/fs/sockfs/nl7c.h b/usr/src/uts/common/fs/sockfs/nl7c.h
index 68914a3a58..6cd27c5efd 100644
--- a/usr/src/uts/common/fs/sockfs/nl7c.h
+++ b/usr/src/uts/common/fs/sockfs/nl7c.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SOCKFS_NL7C_H
#define _SYS_SOCKFS_NL7C_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -39,8 +37,17 @@ extern "C" {
#include <sys/socket.h>
#include <sys/socketvar.h>
+
/*
- * NL7C (uint64_t)(struct sonode).so_nl7c_flags:
+ * NCA_DEV NCA device
+ *
+ * NCA_INET_DEV TPI device for the INET based transport that NCA will use.
+ */
+#define NCA_DEV "/dev/nca"
+#define NCA_INET_DEV "/dev/tcp"
+
+/*
+ * NL7C (uint64_t)(sotpi_info_t).sti_nl7c_flags:
*/
#define NL7C_ENABLED 0x00000001 /* NL7C enabled socket */
@@ -71,6 +78,10 @@ void nl7c_urifree(struct sonode *);
void nl7c_close(struct sonode *);
boolean_t nl7c_parse(struct sonode *, boolean_t, boolean_t *);
+extern void *nl7c_lookup_addr(void *, t_uscalar_t);
+extern void *nl7c_add_addr(void *, t_uscalar_t);
+extern void nl7c_listener_addr(void *, struct sonode *);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/fs/sockfs/nl7chttp.c b/usr/src/uts/common/fs/sockfs/nl7chttp.c
index 20f726a4c2..81dd8a99a5 100644
--- a/usr/src/uts/common/fs/sockfs/nl7chttp.c
+++ b/usr/src/uts/common/fs/sockfs/nl7chttp.c
@@ -19,16 +19,15 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/sysmacros.h>
#include <sys/strsubr.h>
#include <fs/sockfs/nl7c.h>
#include <fs/sockfs/nl7curi.h>
+#include <fs/sockfs/socktpi.h>
#include <inet/nca/ncadoorhdr.h>
#include <inet/nca/ncalogd.h>
@@ -578,7 +577,7 @@ http_date2time_t(char *cp, char *ep)
leap--;
leap = leap / 4 - leap / 100 + leap / 400 - zeroleap;
secs = ((((year - 1970) * 365 + dom[month] + day - 1 + leap) * 24
- + hour) * 60 + min) * 60 + sec;
+ + hour) * 60 + min) * 60 + sec;
return (secs);
}
@@ -1167,7 +1166,7 @@ nl7c_http_cond(uri_desc_t *req, uri_desc_t *res)
mblk_t *
nl7c_http_persist(struct sonode *so)
{
- uint64_t flags = so->so_nl7c_flags & NL7C_SCHEMEPRIV;
+ uint64_t flags = SOTOTPI(so)->sti_nl7c_flags & NL7C_SCHEMEPRIV;
mblk_t *mp;
if (flags & HTTP_CONN_CL)
@@ -1187,6 +1186,7 @@ nl7c_http_persist(struct sonode *so)
boolean_t
nl7c_http_request(char **cpp, char *ep, uri_desc_t *uri, struct sonode *so)
{
+ sotpi_info_t *sti = SOTOTPI(so);
http_t *http = kmem_cache_alloc(http_kmc, KM_SLEEP);
char *cp = *cpp;
char *hp;
@@ -1429,20 +1429,20 @@ done:
*
*/
if (persist)
- so->so_nl7c_flags |= NL7C_SOPERSIST;
+ sti->sti_nl7c_flags |= NL7C_SOPERSIST;
else
- so->so_nl7c_flags &= ~NL7C_SOPERSIST;
+ sti->sti_nl7c_flags &= ~NL7C_SOPERSIST;
if (http->major == 1) {
- so->so_nl7c_flags &= ~NL7C_SCHEMEPRIV;
+ sti->sti_nl7c_flags &= ~NL7C_SCHEMEPRIV;
if (http->minor >= 1) {
if (! persist)
- so->so_nl7c_flags |= HTTP_CONN_CL;
+ sti->sti_nl7c_flags |= HTTP_CONN_CL;
} else {
if (persist)
- so->so_nl7c_flags |= HTTP_CONN_KA;
+ sti->sti_nl7c_flags |= HTTP_CONN_KA;
else
- so->so_nl7c_flags |= HTTP_CONN_CL;
+ sti->sti_nl7c_flags |= HTTP_CONN_CL;
}
}
/*
@@ -1464,6 +1464,7 @@ more:
boolean_t
nl7c_http_response(char **cpp, char *ep, uri_desc_t *uri, struct sonode *so)
{
+ sotpi_info_t *sti = SOTOTPI(so);
http_t *http = uri->scheme;
char *cp = *cpp;
char *hp;
@@ -1753,20 +1754,20 @@ done:
/* Set socket persist state */
if (persist)
- so->so_nl7c_flags |= NL7C_SOPERSIST;
+ sti->sti_nl7c_flags |= NL7C_SOPERSIST;
else
- so->so_nl7c_flags &= ~NL7C_SOPERSIST;
+ sti->sti_nl7c_flags &= ~NL7C_SOPERSIST;
if (http->major == 1) {
- so->so_nl7c_flags &= ~NL7C_SCHEMEPRIV;
+ sti->sti_nl7c_flags &= ~NL7C_SCHEMEPRIV;
if (http->minor >= 1) {
if (! persist)
- so->so_nl7c_flags |= HTTP_CONN_CL;
+ sti->sti_nl7c_flags |= HTTP_CONN_CL;
} else {
if (persist)
- so->so_nl7c_flags |= HTTP_CONN_KA;
+ sti->sti_nl7c_flags |= HTTP_CONN_KA;
else
- so->so_nl7c_flags |= HTTP_CONN_CL;
+ sti->sti_nl7c_flags |= HTTP_CONN_CL;
}
}
diff --git a/usr/src/uts/common/fs/sockfs/nl7curi.c b/usr/src/uts/common/fs/sockfs/nl7curi.c
index fb1bf2f000..61f72258fc 100644
--- a/usr/src/uts/common/fs/sockfs/nl7curi.c
+++ b/usr/src/uts/common/fs/sockfs/nl7curi.c
@@ -33,6 +33,7 @@
#include <sys/sendfile.h>
#include <fs/sockfs/nl7c.h>
#include <fs/sockfs/nl7curi.h>
+#include <fs/sockfs/socktpi_impl.h>
#include <inet/common.h>
#include <inet/ip.h>
@@ -1017,9 +1018,10 @@ next:
void
nl7c_urifree(struct sonode *so)
{
- uri_desc_t *uri = (uri_desc_t *)so->so_nl7c_uri;
+ sotpi_info_t *sti = SOTOTPI(so);
+ uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri;
- so->so_nl7c_uri = NULL;
+ sti->sti_nl7c_uri = NULL;
if (uri->hash != URI_TEMP) {
uri_delete(uri);
mutex_enter(&uri->proclock);
@@ -1109,7 +1111,8 @@ pass:
int
nl7c_data(struct sonode *so, uio_t *uio)
{
- uri_desc_t *uri = (uri_desc_t *)so->so_nl7c_uri;
+ sotpi_info_t *sti = SOTOTPI(so);
+ uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri;
iovec_t *iov;
int cnt;
int sz = uio->uio_resid;
@@ -1123,13 +1126,13 @@ nl7c_data(struct sonode *so, uio_t *uio)
if (uri == NULL) {
/* Socket & NL7C out of sync, disable NL7C */
- so->so_nl7c_flags = 0;
+ sti->sti_nl7c_flags = 0;
nl7c_uri_NULL1++;
return (-1);
}
- if (so->so_nl7c_flags & NL7C_WAITWRITE) {
- so->so_nl7c_flags &= ~NL7C_WAITWRITE;
+ if (sti->sti_nl7c_flags & NL7C_WAITWRITE) {
+ sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
first = B_TRUE;
} else {
first = B_FALSE;
@@ -1191,9 +1194,9 @@ nl7c_data(struct sonode *so, uio_t *uio)
* so close the URI processing for this so.
*/
nl7c_close(so);
- if (! (so->so_nl7c_flags & NL7C_SOPERSIST)) {
+ if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
/* Not a persistent connection */
- so->so_nl7c_flags = 0;
+ sti->sti_nl7c_flags = 0;
}
}
@@ -1203,7 +1206,7 @@ fail:
if (alloc != NULL) {
kmem_free(alloc, sz);
}
- so->so_nl7c_flags = 0;
+ sti->sti_nl7c_flags = 0;
nl7c_urifree(so);
return (error);
@@ -1275,7 +1278,8 @@ int
nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp,
int sfvc, ssize_t *xfer)
{
- uri_desc_t *uri = (uri_desc_t *)so->so_nl7c_uri;
+ sotpi_info_t *sti = SOTOTPI(so);
+ uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri;
file_t *fp = NULL;
vnode_t *vp = NULL;
char *data = NULL;
@@ -1294,13 +1298,13 @@ nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp,
if (uri == NULL) {
/* Socket & NL7C out of sync, disable NL7C */
- so->so_nl7c_flags = 0;
+ sti->sti_nl7c_flags = 0;
nl7c_uri_NULL2++;
return (0);
}
- if (so->so_nl7c_flags & NL7C_WAITWRITE)
- so->so_nl7c_flags &= ~NL7C_WAITWRITE;
+ if (sti->sti_nl7c_flags & NL7C_WAITWRITE)
+ sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
while (sfvc-- > 0) {
/*
@@ -1435,15 +1439,18 @@ nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp,
* so close the URI processing for this so.
*/
nl7c_close(so);
- if (! (so->so_nl7c_flags & NL7C_SOPERSIST)) {
+ if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
/* Not a persistent connection */
- so->so_nl7c_flags = 0;
+ sti->sti_nl7c_flags = 0;
}
}
return (0);
fail:
+ if (error == EPIPE)
+ tsignal(curthread, SIGPIPE);
+
if (alloc != NULL)
kmem_free(data, len);
@@ -1457,7 +1464,7 @@ fail:
atomic_add_64(&nl7c_uri_bytes, total_count);
}
- so->so_nl7c_flags = 0;
+ sti->sti_nl7c_flags = 0;
nl7c_urifree(so);
return (error);
@@ -1472,7 +1479,8 @@ fail:
void
nl7c_close(struct sonode *so)
{
- uri_desc_t *uri = (uri_desc_t *)so->so_nl7c_uri;
+ sotpi_info_t *sti = SOTOTPI(so);
+ uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri;
if (uri == NULL) {
/*
@@ -1484,7 +1492,7 @@ nl7c_close(struct sonode *so)
}
return;
}
- so->so_nl7c_uri = NULL;
+ sti->sti_nl7c_uri = NULL;
if (uri->hash != URI_TEMP) {
mutex_enter(&uri->proclock);
uri->proc = NULL;
@@ -1679,7 +1687,6 @@ kstrwritempnoqwait(struct vnode *vp, mblk_t *mp)
if (error != 0) {
if (!(stp->sd_flag & STPLEX) &&
(stp->sd_wput_opt & SW_SIGPIPE)) {
- tsignal(curthread, SIGPIPE);
error = EPIPE;
}
return (error);
@@ -1700,7 +1707,7 @@ uri_rd_response(struct sonode *so,
boolean_t first)
{
vnode_t *vp = SOTOV(so);
- int max_mblk = (int)((tcp_t *)so->so_priv)->tcp_mss;
+ int max_mblk = (int)vp->v_stream->sd_maxblk;
int wsz;
mblk_t *mp, *wmp, *persist;
int write_bytes;
@@ -1934,8 +1941,9 @@ static char pchars[] = {
boolean_t
nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
{
- char *cp = (char *)so->so_nl7c_rcv_mp->b_rptr;
- char *ep = (char *)so->so_nl7c_rcv_mp->b_wptr;
+ sotpi_info_t *sti = SOTOTPI(so);
+ char *cp = (char *)sti->sti_nl7c_rcv_mp->b_rptr;
+ char *ep = (char *)sti->sti_nl7c_rcv_mp->b_wptr;
char *get = "GET ";
char *post = "POST ";
char c;
@@ -1945,7 +1953,7 @@ nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
mblk_t *reqmp;
uint32_t hv = 0;
- if ((reqmp = dupb(so->so_nl7c_rcv_mp)) == NULL) {
+ if ((reqmp = dupb(sti->sti_nl7c_rcv_mp)) == NULL) {
nl7c_uri_pass_dupbfail++;
goto pass;
}
@@ -1965,7 +1973,7 @@ nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
/*
* Set request time to current time.
*/
- so->so_nl7c_rtime = gethrestime_sec();
+ sti->sti_nl7c_rtime = gethrestime_sec();
/*
* Parse the Request-Line for the URI.
@@ -2043,7 +2051,7 @@ nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
}
if (uri->hash == URI_TEMP) {
- if (so->so_nl7c_flags & NL7C_SOPERSIST) {
+ if (sti->sti_nl7c_flags & NL7C_SOPERSIST) {
/* Temporary URI so skip hash processing */
nl7c_uri_request++;
nl7c_uri_temp++;
@@ -2073,10 +2081,10 @@ nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
* We have the response cached, update recv mblk rptr
* to reflect the data consumed in parse.
*/
- mblk_t *mp = so->so_nl7c_rcv_mp;
+ mblk_t *mp = sti->sti_nl7c_rcv_mp;
if (cp == (char *)mp->b_wptr) {
- so->so_nl7c_rcv_mp = mp->b_cont;
+ sti->sti_nl7c_rcv_mp = mp->b_cont;
mp->b_cont = NULL;
freeb(mp);
} else {
@@ -2094,12 +2102,12 @@ nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
if (so->so_family == AF_INET) {
/* Only support IPv4 addrs */
faddr = ((struct sockaddr_in *)
- so->so_faddr_sa) ->sin_addr.s_addr;
+ sti->sti_faddr_sa) ->sin_addr.s_addr;
} else {
faddr = 0;
}
/* XXX need to pass response type, e.g. 200, 304 */
- nl7c_logd_log(ruri, uri, so->so_nl7c_rtime, faddr);
+ nl7c_logd_log(ruri, uri, sti->sti_nl7c_rtime, faddr);
}
/*
* Release reference on request URI, send the response out
@@ -2125,11 +2133,11 @@ temp:
* read-side processing is suspended (so the next read() gets
* the request data) until a write() is processed by NL7C.
*
- * Note, so->so_nl7c_uri now owns the REF_INIT() ref.
+ * Note, sti->sti_nl7c_uri now owns the REF_INIT() ref.
*/
uri->proc = so;
- so->so_nl7c_uri = uri;
- so->so_nl7c_flags |= NL7C_WAITWRITE;
+ sti->sti_nl7c_uri = uri;
+ sti->sti_nl7c_flags |= NL7C_WAITWRITE;
*ret = B_FALSE;
return (B_FALSE);
@@ -2147,7 +2155,7 @@ pass:
if (uri) {
REF_RELE(uri);
}
- so->so_nl7c_flags = 0;
+ sti->sti_nl7c_flags = 0;
*ret = B_FALSE;
return (B_FALSE);
}
diff --git a/usr/src/uts/common/fs/sockfs/sockcommon.c b/usr/src/uts/common/fs/sockfs/sockcommon.c
new file mode 100644
index 0000000000..02c3c16df5
--- /dev/null
+++ b/usr/src/uts/common/fs/sockfs/sockcommon.c
@@ -0,0 +1,1092 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/cmn_err.h>
+#include <sys/vfs.h>
+#include <sys/policy.h>
+#include <sys/modctl.h>
+
+#include <sys/sunddi.h>
+
+#include <sys/strsun.h>
+#include <sys/stropts.h>
+#include <sys/strsubr.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sodirect.h>
+#include <sys/uio.h>
+
+#include <inet/ipclassifier.h>
+#include <fs/sockfs/sockcommon.h>
+#include <fs/sockfs/nl7c.h>
+#include <inet/ip.h>
+
+extern int xnet_skip_checks, xnet_check_print, xnet_truncate_print;
+
+static struct kmem_cache *sock_sod_cache;
+
+/*
+ * Common socket access functions.
+ *
+ * Instead of accessing the sonode switch directly (i.e., SOP_xxx()),
+ * the socket_xxx() function should be used.
+ */
+
+/*
+ * Try to create a new sonode of the requested <family, type, protocol>.
+ */
+/* ARGSUSED */
+struct sonode *
+socket_create(int family, int type, int protocol, char *devpath, char *mod,
+ int flags, int version, struct cred *cr, int *errorp)
+{
+ struct sonode *so;
+ struct sockparams *sp = NULL;
+
+ /*
+ * Look for a sockparams entry that match the given criteria.
+ * solookup() returns with the entry held.
+ */
+ *errorp = solookup(family, type, protocol, &sp);
+ if (sp == NULL) {
+ int kmflags = (flags == SOCKET_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
+ /*
+ * There is no matching sockparams entry. An ephemeral entry is
+ * created if the caller specifies a device or a socket module.
+ */
+ if (devpath != NULL) {
+ sp = sockparams_hold_ephemeral_bydev(family, type,
+ protocol, devpath, kmflags, errorp);
+ } else if (mod != NULL) {
+ sp = sockparams_hold_ephemeral_bymod(family, type,
+ protocol, mod, kmflags, errorp);
+ } else {
+ return (NULL);
+ }
+
+ if (sp == NULL)
+ return (NULL);
+ }
+
+ ASSERT(sp->sp_smod_info != NULL);
+ ASSERT(flags == SOCKET_SLEEP || flags == SOCKET_NOSLEEP);
+ so = sp->sp_smod_info->smod_sock_create_func(sp, family, type,
+ protocol, version, flags, errorp, cr);
+ if (so == NULL) {
+ SOCKPARAMS_DEC_REF(sp);
+ } else {
+ if ((*errorp = SOP_INIT(so, NULL, cr, flags)) == 0) {
+ /* Cannot fail, only bumps so_count */
+ (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL);
+ } else {
+ socket_destroy(so);
+ so = NULL;
+ }
+ }
+ return (so);
+}
+
+struct sonode *
+socket_newconn(struct sonode *parent, sock_lower_handle_t lh,
+ sock_downcalls_t *dc, int flags, int *errorp)
+{
+ struct sonode *so;
+ struct sockparams *sp;
+ struct cred *cr;
+
+ if ((cr = CRED()) == NULL)
+ cr = kcred;
+
+ sp = parent->so_sockparams;
+ ASSERT(sp != NULL);
+
+ so = sp->sp_smod_info->smod_sock_create_func(sp, parent->so_family,
+ parent->so_type, parent->so_protocol, parent->so_version, flags,
+ errorp, cr);
+ if (so != NULL) {
+ SOCKPARAMS_INC_REF(sp);
+
+ so->so_proto_handle = lh;
+ so->so_downcalls = dc;
+ /*
+ * This function may be called in interrupt context, and CRED()
+ * will be NULL. In this case, pass in kcred.
+ */
+ if ((*errorp = SOP_INIT(so, parent, cr, flags)) == 0) {
+ /* Cannot fail, only bumps so_count */
+ (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL);
+ } else {
+ socket_destroy(so);
+ so = NULL;
+ }
+ }
+
+ return (so);
+}
+
+/*
+ * Bind local endpoint.
+ */
+int
+socket_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
+ int flags, cred_t *cr)
+{
+ return (SOP_BIND(so, name, namelen, flags, cr));
+}
+
+/*
+ * Turn socket into a listen socket.
+ */
+int
+socket_listen(struct sonode *so, int backlog, cred_t *cr)
+{
+ if (backlog < 0) {
+ backlog = 0;
+ }
+
+ /*
+ * Use the same qlimit as in BSD. BSD checks the qlimit
+ * before queuing the next connection implying that a
+ * listen(sock, 0) allows one connection to be queued.
+ * BSD also uses 1.5 times the requested backlog.
+ *
+ * XNS Issue 4 required a strict interpretation of the backlog.
+ * This has been waived subsequently for Issue 4 and the change
+ * incorporated in XNS Issue 5. So we aren't required to do
+ * anything special for XPG apps.
+ */
+ if (backlog >= (INT_MAX - 1) / 3)
+ backlog = INT_MAX;
+ else
+ backlog = backlog * 3 / 2 + 1;
+
+ return (SOP_LISTEN(so, backlog, cr));
+}
+
+/*
+ * Accept incoming connection.
+ */
+int
+socket_accept(struct sonode *lso, int fflag, cred_t *cr, struct sonode **nsop)
+{
+ return (SOP_ACCEPT(lso, fflag, cr, nsop));
+}
+
+/*
+ * Active open.
+ */
+int
+socket_connect(struct sonode *so, const struct sockaddr *name,
+ socklen_t namelen, int fflag, int flags, cred_t *cr)
+{
+ int error;
+
+ /*
+ * Handle a connect to a name parameter of type AF_UNSPEC like a
+ * connect to a null address. This is the portable method to
+ * unconnect a socket.
+ */
+ if ((namelen >= sizeof (sa_family_t)) &&
+ (name->sa_family == AF_UNSPEC)) {
+ name = NULL;
+ namelen = 0;
+ }
+
+ error = SOP_CONNECT(so, name, namelen, fflag, flags, cr);
+
+ if (error == EHOSTUNREACH && flags & _SOCONNECT_XPG4_2) {
+ /*
+ * X/Open specification contains a requirement that
+ * ENETUNREACH be returned but does not require
+ * EHOSTUNREACH. In order to keep the test suite
+ * happy we mess with the errno here.
+ */
+ error = ENETUNREACH;
+ }
+
+ return (error);
+}
+
+/*
+ * Get address of remote node.
+ */
+int
+socket_getpeername(struct sonode *so, struct sockaddr *addr,
+ socklen_t *addrlen, boolean_t accept, cred_t *cr)
+{
+ ASSERT(*addrlen > 0);
+ return (SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
+
+}
+
+/*
+ * Get local address.
+ */
+int
+socket_getsockname(struct sonode *so, struct sockaddr *addr,
+ socklen_t *addrlen, cred_t *cr)
+{
+ return (SOP_GETSOCKNAME(so, addr, addrlen, cr));
+
+}
+
+/*
+ * Called from shutdown().
+ */
+int
+socket_shutdown(struct sonode *so, int how, cred_t *cr)
+{
+ return (SOP_SHUTDOWN(so, how, cr));
+}
+
+/*
+ * Get socket options.
+ */
+/*ARGSUSED*/
+int
+socket_getsockopt(struct sonode *so, int level, int option_name,
+ void *optval, socklen_t *optlenp, int flags, cred_t *cr)
+{
+ return (SOP_GETSOCKOPT(so, level, option_name, optval,
+ optlenp, flags, cr));
+}
+
+/*
+ * Set socket options
+ */
+int
+socket_setsockopt(struct sonode *so, int level, int option_name,
+ const void *optval, t_uscalar_t optlen, cred_t *cr)
+{
+ /* Caller allocates aligned optval, or passes null */
+ ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0);
+ /* If optval is null optlen is 0, and vice-versa */
+ ASSERT(optval != NULL || optlen == 0);
+ ASSERT(optlen != 0 || optval == NULL);
+
+ /* No options should be zero-length */
+ if (optlen == 0)
+ return (EINVAL);
+
+ return (SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
+}
+
+int
+socket_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
+ cred_t *cr)
+{
+ int error = 0;
+ ssize_t orig_resid = uiop->uio_resid;
+
+ /*
+ * Do not bypass the cache if we are doing a local (AF_UNIX) write.
+ */
+ if (so->so_family == AF_UNIX)
+ uiop->uio_extflg |= UIO_COPY_CACHED;
+ else
+ uiop->uio_extflg &= ~UIO_COPY_CACHED;
+
+ error = SOP_SENDMSG(so, msg, uiop, cr);
+ switch (error) {
+ default:
+ break;
+ case EINTR:
+ case ETIME:
+ case EWOULDBLOCK:
+ /* We did a partial send */
+ if (uiop->uio_resid != orig_resid)
+ error = 0;
+ break;
+ case EPIPE:
+ if ((so->so_mode & SM_KERNEL) == 0)
+ tsignal(curthread, SIGPIPE);
+ break;
+ }
+
+ return (error);
+}
+
+int
+socket_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
+ struct cred *cr, mblk_t **mpp)
+{
+ int error = 0;
+
+ error = SOP_SENDMBLK(so, msg, fflag, cr, mpp);
+ if (error == EPIPE) {
+ tsignal(curthread, SIGPIPE);
+ }
+ return (error);
+}
+
+int
+socket_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
+ cred_t *cr)
+{
+ int error;
+ ssize_t orig_resid = uiop->uio_resid;
+
+ /*
+ * Do not bypass the cache when reading data, as the application
+ * is likely to access the data shortly.
+ */
+ uiop->uio_extflg |= UIO_COPY_CACHED;
+
+ error = SOP_RECVMSG(so, msg, uiop, cr);
+
+ switch (error) {
+ case EINTR:
+ case ETIME:
+ case EWOULDBLOCK:
+ /* We did a partial read */
+ if (uiop->uio_resid != orig_resid)
+ error = 0;
+ break;
+ default:
+ break;
+ }
+ return (error);
+}
+
+int
+socket_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
+ struct cred *cr, int32_t *rvalp)
+{
+ return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
+}
+
+int
+socket_poll(struct sonode *so, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ return (SOP_POLL(so, events, anyyet, reventsp, phpp));
+}
+
+int
+socket_close(struct sonode *so, int flag, struct cred *cr)
+{
+ return (VOP_CLOSE(SOTOV(so), flag, 1, 0, cr, NULL));
+}
+
+int
+socket_close_internal(struct sonode *so, int flag, cred_t *cr)
+{
+ ASSERT(so->so_count == 0);
+
+ return (SOP_CLOSE(so, flag, cr));
+}
+
+void
+socket_destroy(struct sonode *so)
+{
+ vn_invalid(SOTOV(so));
+ VN_RELE(SOTOV(so));
+}
+
+/* ARGSUSED */
+void
+socket_destroy_internal(struct sonode *so, cred_t *cr)
+{
+ struct sockparams *sp = so->so_sockparams;
+ ASSERT(so->so_count == 0 && sp != NULL);
+
+ sp->sp_smod_info->smod_sock_destroy_func(so);
+
+ SOCKPARAMS_DEC_REF(sp);
+}
+
+/*
+ * TODO Once the common vnode ops is available, then the vnops argument
+ * should be removed.
+ */
+/*ARGSUSED*/
+int
+sonode_constructor(void *buf, void *cdrarg, int kmflags)
+{
+ struct sonode *so = buf;
+ struct vnode *vp;
+
+ vp = so->so_vnode = vn_alloc(kmflags);
+ if (vp == NULL) {
+ return (-1);
+ }
+ vp->v_data = so;
+ vn_setops(vp, socket_vnodeops);
+
+ so->so_priv = NULL;
+ so->so_oobmsg = NULL;
+
+ so->so_proto_handle = NULL;
+
+ so->so_peercred = NULL;
+
+ so->so_rcv_queued = 0;
+ so->so_rcv_q_head = NULL;
+ so->so_rcv_q_last_head = NULL;
+ so->so_rcv_head = NULL;
+ so->so_rcv_last_head = NULL;
+ so->so_rcv_wanted = 0;
+ so->so_rcv_timer_interval = SOCKET_NO_RCVTIMER;
+ so->so_rcv_timer_tid = 0;
+ so->so_rcv_thresh = 0;
+
+ so->so_acceptq_head = NULL;
+ so->so_acceptq_tail = &so->so_acceptq_head;
+ so->so_acceptq_next = NULL;
+ so->so_acceptq_len = 0;
+ so->so_backlog = 0;
+
+ so->so_snd_qfull = B_FALSE;
+
+ mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&so->so_acceptq_lock, NULL, MUTEX_DEFAULT, NULL);
+ rw_init(&so->so_fallback_rwlock, NULL, RW_DEFAULT, NULL);
+ cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&so->so_want_cv, NULL, CV_DEFAULT, NULL);
+
+ cv_init(&so->so_acceptq_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&so->so_snd_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&so->so_rcv_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&so->so_copy_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&so->so_closing_cv, NULL, CV_DEFAULT, NULL);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+void
+sonode_destructor(void *buf, void *cdrarg)
+{
+ struct sonode *so = buf;
+ struct vnode *vp = SOTOV(so);
+
+ ASSERT(so->so_priv == NULL);
+ ASSERT(so->so_peercred == NULL);
+
+ ASSERT(so->so_oobmsg == NULL);
+
+ ASSERT(so->so_rcv_q_head == NULL);
+
+ ASSERT(so->so_acceptq_head == NULL);
+ ASSERT(so->so_acceptq_tail == &so->so_acceptq_head);
+ ASSERT(so->so_acceptq_next == NULL);
+
+ ASSERT(vp->v_data == so);
+ ASSERT(vn_matchops(vp, socket_vnodeops));
+
+ vn_free(vp);
+
+ mutex_destroy(&so->so_lock);
+ mutex_destroy(&so->so_acceptq_lock);
+ rw_destroy(&so->so_fallback_rwlock);
+
+ cv_destroy(&so->so_state_cv);
+ cv_destroy(&so->so_want_cv);
+ cv_destroy(&so->so_acceptq_cv);
+ cv_destroy(&so->so_snd_cv);
+ cv_destroy(&so->so_rcv_cv);
+ cv_destroy(&so->so_closing_cv);
+}
+
+void
+sonode_init(struct sonode *so, struct sockparams *sp, int family,
+ int type, int protocol, sonodeops_t *sops)
+{
+ vnode_t *vp;
+
+ vp = SOTOV(so);
+
+ so->so_flag = 0;
+
+ so->so_state = 0;
+ so->so_mode = 0;
+
+ so->so_count = 0;
+
+ so->so_family = family;
+ so->so_type = type;
+ so->so_protocol = protocol;
+
+ SOCK_CONNID_INIT(so->so_proto_connid);
+
+ so->so_options = 0;
+ so->so_linger.l_onoff = 0;
+ so->so_linger.l_linger = 0;
+ so->so_sndbuf = 0;
+ so->so_error = 0;
+ so->so_rcvtimeo = 0;
+ so->so_sndtimeo = 0;
+
+ ASSERT(so->so_oobmsg == NULL);
+ so->so_oobmark = 0;
+ so->so_pgrp = 0;
+
+ ASSERT(so->so_peercred == NULL);
+
+ so->so_zoneid = getzoneid();
+
+ so->so_sockparams = sp;
+
+ so->so_ops = sops;
+
+ so->so_proto_handle = NULL;
+
+ so->so_downcalls = NULL;
+
+ so->so_copyflag = 0;
+
+ ASSERT(so->so_acceptq_head == NULL);
+ ASSERT(so->so_acceptq_tail == &so->so_acceptq_head);
+ ASSERT(so->so_acceptq_next == NULL);
+
+ vn_reinit(vp);
+ vp->v_vfsp = rootvfs;
+ vp->v_type = VSOCK;
+ vp->v_rdev = sockdev;
+
+ so->so_rcv_queued = 0;
+ so->so_rcv_q_head = NULL;
+ so->so_rcv_q_last_head = NULL;
+ so->so_rcv_head = NULL;
+ so->so_rcv_last_head = NULL;
+
+ so->so_snd_qfull = B_FALSE;
+ so->so_minpsz = 0;
+
+ so->so_rcv_wakeup = B_FALSE;
+ so->so_snd_wakeup = B_FALSE;
+ so->so_flowctrld = B_FALSE;
+
+ so->so_pollev = 0;
+ bzero(&so->so_poll_list, sizeof (so->so_poll_list));
+ bzero(&so->so_proto_props, sizeof (struct sock_proto_props));
+
+ bzero(&(so->so_ksock_callbacks), sizeof (ksocket_callbacks_t));
+ so->so_ksock_cb_arg = NULL;
+
+ so->so_max_addr_len = sizeof (struct sockaddr_storage);
+
+ so->so_direct = NULL;
+
+ vn_exists(vp);
+}
+
+void
+sonode_fini(struct sonode *so)
+{
+ mblk_t *mp;
+ vnode_t *vp;
+
+ ASSERT(so->so_count == 0);
+
+ if (so->so_rcv_timer_tid) {
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+ (void) untimeout(so->so_rcv_timer_tid);
+ so->so_rcv_timer_tid = 0;
+ }
+
+ so_acceptq_flush(so);
+
+#ifdef DEBUG
+ mutex_enter(&so->so_lock);
+ ASSERT(so_verify_oobstate(so));
+ mutex_exit(&so->so_lock);
+#endif /* DEBUG */
+ if ((mp = so->so_oobmsg) != NULL) {
+ freemsg(mp);
+ so->so_oobmsg = NULL;
+ so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
+ SS_RCVATMARK);
+ }
+
+ if (so->so_poll_list.ph_list != NULL) {
+ pollwakeup(&so->so_poll_list, POLLERR);
+ pollhead_clean(&so->so_poll_list);
+ }
+
+ if (so->so_direct != NULL) {
+ sodirect_t *sodp = so->so_direct;
+
+ ASSERT(sodp->sod_uioafh == NULL);
+
+ so->so_direct = NULL;
+ kmem_cache_free(sock_sod_cache, sodp);
+ }
+
+ vp = SOTOV(so);
+ vn_invalid(vp);
+
+ if (so->so_peercred != NULL) {
+ crfree(so->so_peercred);
+ so->so_peercred = NULL;
+ }
+}
+
+/*
+ * This function is called at the beginning of recvmsg().
+ *
+ * If I/OAT is enabled on this sonode, initialize the uioa state machine
+ * with state UIOA_ALLOC.
+ */
+uio_t *
+sod_rcv_init(struct sonode *so, int flags, struct uio **uiopp)
+{
+ struct uio *suiop;
+ struct uio *uiop;
+ sodirect_t *sodp = so->so_direct;
+
+ if (sodp == NULL)
+ return (NULL);
+
+ suiop = NULL;
+ uiop = *uiopp;
+
+ mutex_enter(sodp->sod_lockp);
+ if (uiop->uio_resid >= uioasync.mincnt &&
+ sodp != NULL && (sodp->sod_state & SOD_ENABLED) &&
+ uioasync.enabled && !(flags & MSG_PEEK) &&
+ !(so->so_state & SS_CANTRCVMORE)) {
+ /*
+ * Big enough I/O for uioa min setup and an sodirect socket
+ * and sodirect enabled and uioa enabled and I/O will be done
+ * and not EOF so initialize the sodirect_t uioa_t with "uiop".
+ */
+ if (!uioainit(uiop, &sodp->sod_uioa)) {
+ /*
+ * Successful uioainit() so the uio_t part of the
+ * uioa_t will be used for all uio_t work to follow,
+ * we return the original "uiop" in "suiop".
+ */
+ suiop = uiop;
+ *uiopp = (uio_t *)&sodp->sod_uioa;
+ /*
+ * Before returning to the caller the passed in uio_t
+ * "uiop" will be updated via a call to uioafini()
+ * below.
+ *
+ * Note, the uioa.uioa_state isn't set to UIOA_ENABLED
+ * here as first we have to uioamove() any currently
+ * queued M_DATA mblk_t(s) so it will be done later.
+ */
+ }
+ /*
+ * In either uioainit() success or not case note the number
+ * of uio bytes the caller wants for sod framework and/or
+ * transport (e.g. TCP) strategy.
+ */
+ sodp->sod_want = uiop->uio_resid;
+ } else if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) {
+ /*
+ * No uioa but still using sodirect so note the number of
+ * uio bytes the caller wants for sodirect framework and/or
+ * transport (e.g. TCP) strategy.
+ */
+ sodp->sod_want = uiop->uio_resid;
+ }
+ mutex_exit(sodp->sod_lockp);
+
+ return (suiop);
+}
+
+/*
+ * This function is called at the end of recvmsg(), it finializes all the I/OAT
+ * operations, and reset the uioa state to UIOA_ALLOC.
+ */
+int
+sod_rcv_done(struct sonode *so, struct uio *suiop, struct uio *uiop)
+{
+ int error = 0;
+ sodirect_t *sodp = so->so_direct;
+ mblk_t *mp;
+
+ if (sodp == NULL) {
+ return (0);
+ }
+
+ ASSERT(MUTEX_HELD(sodp->sod_lockp));
+ /* Finish any sodirect and uioa processing */
+ if (suiop != NULL) {
+ /* Finish any uioa_t processing */
+
+ ASSERT(uiop == (uio_t *)&sodp->sod_uioa);
+ error = uioafini(suiop, (uioa_t *)uiop);
+ if ((mp = sodp->sod_uioafh) != NULL) {
+ sodp->sod_uioafh = NULL;
+ sodp->sod_uioaft = NULL;
+ freemsg(mp);
+ }
+ }
+ ASSERT(sodp->sod_uioafh == NULL);
+ if (!(sodp->sod_state & SOD_WAKE_NOT)) {
+ /* Awoke */
+ sodp->sod_state &= SOD_WAKE_CLR;
+ sodp->sod_state |= SOD_WAKE_NOT;
+ }
+ /* Last, clear sod_want value */
+ sodp->sod_want = 0;
+
+ return (error);
+}
+
+/*
+ * Schedule a uioamove() on a mblk. This is ususally called from
+ * protocols (e.g. TCP) on a I/OAT enabled sonode.
+ */
+mblk_t *
+sod_uioa_mblk_init(struct sodirect_s *sodp, mblk_t *mp, size_t msg_size)
+{
+ uioa_t *uioap = &sodp->sod_uioa;
+ mblk_t *mp1 = mp;
+ mblk_t *lmp = NULL;
+
+ ASSERT(DB_TYPE(mp) == M_DATA);
+ ASSERT(msg_size == msgdsize(mp));
+
+ /* Caller must have lock held */
+ ASSERT(MUTEX_HELD(sodp->sod_lockp));
+
+ if (uioap->uioa_state & UIOA_ENABLED) {
+ /* Uioa is enabled */
+
+ if (msg_size > uioap->uio_resid) {
+ /*
+ * There isn't enough uio space for the mblk_t chain
+ * so disable uioa such that this and any additional
+ * mblk_t data is handled by the socket and schedule
+ * the socket for wakeup to finish this uioa.
+ */
+ uioap->uioa_state &= UIOA_CLR;
+ uioap->uioa_state |= UIOA_FINI;
+ if (sodp->sod_state & SOD_WAKE_NOT) {
+ sodp->sod_state &= SOD_WAKE_CLR;
+ sodp->sod_state |= SOD_WAKE_NEED;
+ }
+ return (mp);
+ }
+ do {
+ uint32_t len = MBLKL(mp1);
+
+ if (!uioamove(mp1->b_rptr, len, UIO_READ, uioap)) {
+ /* Scheduled, mark dblk_t as such */
+ DB_FLAGS(mp1) |= DBLK_UIOA;
+ } else {
+ /* Error, turn off async processing */
+ uioap->uioa_state &= UIOA_CLR;
+ uioap->uioa_state |= UIOA_FINI;
+ break;
+ }
+ lmp = mp1;
+ } while ((mp1 = mp1->b_cont) != NULL);
+
+ if (mp1 != NULL || uioap->uio_resid == 0) {
+ /*
+ * Not all mblk_t(s) uioamoved (error) or all uio
+ * space has been consumed so schedule the socket
+ * for wakeup to finish this uio.
+ */
+ sodp->sod_state &= SOD_WAKE_CLR;
+ sodp->sod_state |= SOD_WAKE_NEED;
+
+ /* Break the mblk chain if neccessary. */
+ if (mp1 != NULL && lmp != NULL) {
+ mp->b_next = mp1;
+ lmp->b_cont = NULL;
+ }
+ }
+ }
+ return (mp1);
+}
+
+/*
+ * This function is called on a mblk that thas been successfully uioamoved().
+ */
+void
+sod_uioa_mblk_done(sodirect_t *sodp, mblk_t *bp)
+{
+ if (bp != NULL && (bp->b_datap->db_flags & DBLK_UIOA)) {
+ /*
+ * A uioa flaged mblk_t chain, already uio processed,
+ * add it to the sodirect uioa pending free list.
+ *
+ * Note, a b_cont chain headed by a DBLK_UIOA enable
+ * mblk_t must have all mblk_t(s) DBLK_UIOA enabled.
+ */
+ mblk_t *bpt = sodp->sod_uioaft;
+
+ ASSERT(sodp != NULL);
+
+ /*
+ * Add first mblk_t of "bp" chain to current sodirect uioa
+ * free list tail mblk_t, if any, else empty list so new head.
+ */
+ if (bpt == NULL)
+ sodp->sod_uioafh = bp;
+ else
+ bpt->b_cont = bp;
+
+ /*
+ * Walk mblk_t "bp" chain to find tail and adjust rptr of
+ * each to reflect that uioamove() has consumed all data.
+ */
+ bpt = bp;
+ for (;;) {
+ ASSERT(bpt->b_datap->db_flags & DBLK_UIOA);
+
+ bpt->b_rptr = bpt->b_wptr;
+ if (bpt->b_cont == NULL)
+ break;
+ bpt = bpt->b_cont;
+ }
+ /* New sodirect uioa free list tail */
+ sodp->sod_uioaft = bpt;
+
+ /* Only dequeue once with data returned per uioa_t */
+ if (sodp->sod_uioa.uioa_state & UIOA_ENABLED) {
+ sodp->sod_uioa.uioa_state &= UIOA_CLR;
+ sodp->sod_uioa.uioa_state |= UIOA_FINI;
+ }
+ }
+}
+
+/*
+ * When transit from UIOA_INIT state to UIOA_ENABLE state in recvmsg(), call
+ * this function on a non-STREAMS socket to schedule uioamove() on the data
+ * that has already queued in this socket.
+ */
+void
+sod_uioa_so_init(struct sonode *so, struct sodirect_s *sodp, struct uio *uiop)
+{
+ uioa_t *uioap = (uioa_t *)uiop;
+ mblk_t *lbp;
+ mblk_t *wbp;
+ mblk_t *bp;
+ int len;
+ int error;
+ boolean_t in_rcv_q = B_TRUE;
+
+ ASSERT(MUTEX_HELD(sodp->sod_lockp));
+ ASSERT(&sodp->sod_uioa == uioap);
+
+ /*
+ * Walk first b_cont chain in sod_q
+ * and schedule any M_DATA mblk_t's for uio asynchronous move.
+ */
+ bp = so->so_rcv_q_head;
+
+again:
+ /* Walk the chain */
+ lbp = NULL;
+ wbp = bp;
+
+ do {
+ if (bp == NULL)
+ break;
+
+ if (wbp->b_datap->db_type != M_DATA) {
+ /* Not M_DATA, no more uioa */
+ goto nouioa;
+ }
+ if ((len = wbp->b_wptr - wbp->b_rptr) > 0) {
+ /* Have a M_DATA mblk_t with data */
+ if (len > uioap->uio_resid || (so->so_oobmark > 0 &&
+ len + uioap->uioa_mbytes >= so->so_oobmark)) {
+ /* Not enough uio sapce, or beyond oobmark */
+ goto nouioa;
+ }
+ ASSERT(!(wbp->b_datap->db_flags & DBLK_UIOA));
+ error = uioamove(wbp->b_rptr, len,
+ UIO_READ, uioap);
+ if (!error) {
+ /* Scheduled, mark dblk_t as such */
+ wbp->b_datap->db_flags |= DBLK_UIOA;
+ } else {
+ /* Break the mblk chain */
+ goto nouioa;
+ }
+ }
+ /* Save last wbp processed */
+ lbp = wbp;
+ } while ((wbp = wbp->b_cont) != NULL);
+
+ if (in_rcv_q && (bp == NULL || bp->b_next == NULL)) {
+ /*
+ * We get here only once to process the sonode dump area
+ * if so_rcv_q_head is NULL or all the mblks have been
+ * successfully uioamoved()ed.
+ */
+ in_rcv_q = B_FALSE;
+
+ /* move to dump area */
+ bp = so->so_rcv_head;
+ goto again;
+ }
+
+ return;
+
+nouioa:
+ /* No more uioa */
+ uioap->uioa_state &= UIOA_CLR;
+ uioap->uioa_state |= UIOA_FINI;
+
+ /*
+ * If we processed 1 or more mblk_t(s) then we need to split the
+ * current mblk_t chain in 2 so that all the uioamove()ed mblk_t(s)
+ * are in the current chain and the rest are in the following new
+ * chain.
+ */
+ if (lbp != NULL) {
+ /* New end of current chain */
+ lbp->b_cont = NULL;
+
+ /* Insert new chain wbp after bp */
+ if ((wbp->b_next = bp->b_next) == NULL) {
+ /*
+ * No need to grab so_lock, since sod_lockp
+ * points to so_lock.
+ */
+ if (in_rcv_q)
+ so->so_rcv_q_last_head = wbp;
+ else
+ so->so_rcv_last_head = wbp;
+ }
+ bp->b_next = wbp;
+ bp->b_next->b_prev = bp->b_prev;
+ bp->b_prev = lbp;
+ }
+}
+
+/*
+ * Initialize sodirect data structures on a socket.
+ */
+void
+sod_sock_init(struct sonode *so, struct stdata *stp, sod_enq_func enq_func,
+ sod_wakeup_func wake_func, kmutex_t *lockp)
+{
+ sodirect_t *sodp;
+
+ ASSERT(so->so_direct == NULL);
+
+ so->so_state |= SS_SODIRECT;
+
+ sodp = kmem_cache_alloc(sock_sod_cache, KM_SLEEP);
+ sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT;
+ sodp->sod_want = 0;
+ sodp->sod_q = (stp != NULL) ? RD(stp->sd_wrq) : NULL;
+ sodp->sod_enqueue = enq_func;
+ sodp->sod_wakeup = wake_func;
+ sodp->sod_uioafh = NULL;
+ sodp->sod_uioaft = NULL;
+ sodp->sod_lockp = lockp;
+ /*
+ * Remainder of the sod_uioa members are left uninitialized
+ * but will be initialized later by uioainit() before uioa
+ * is enabled.
+ */
+ sodp->sod_uioa.uioa_state = UIOA_ALLOC;
+ so->so_direct = sodp;
+ if (stp != NULL)
+ stp->sd_sodirect = sodp;
+}
+
+/*
+ * Init the sodirect kmem cache while sockfs is loading.
+ */
+void
+sod_init()
+{
+ /* Allocate sodirect_t kmem_cache */
+ sock_sod_cache = kmem_cache_create("sock_sod_cache",
+ sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+}
+
+ssize_t
+sod_uioa_mblk(struct sonode *so, mblk_t *mp)
+{
+ sodirect_t *sodp = so->so_direct;
+
+ ASSERT(sodp != NULL);
+ ASSERT(MUTEX_HELD(sodp->sod_lockp));
+
+ ASSERT(sodp->sod_state & SOD_ENABLED);
+ ASSERT(sodp->sod_uioa.uioa_state != (UIOA_ALLOC|UIOA_INIT));
+
+ ASSERT(sodp->sod_uioa.uioa_state & (UIOA_ENABLED|UIOA_FINI));
+
+ if (mp == NULL && so->so_rcv_q_head != NULL) {
+ mp = so->so_rcv_q_head;
+ ASSERT(mp->b_prev != NULL);
+ mp->b_prev = NULL;
+ so->so_rcv_q_head = mp->b_next;
+ if (so->so_rcv_q_head == NULL) {
+ so->so_rcv_q_last_head = NULL;
+ }
+ mp->b_next = NULL;
+ }
+
+ sod_uioa_mblk_done(sodp, mp);
+
+ if (so->so_rcv_q_head == NULL && so->so_rcv_head != NULL &&
+ DB_TYPE(so->so_rcv_head) == M_DATA &&
+ (DB_FLAGS(so->so_rcv_head) & DBLK_UIOA)) {
+ /* more arrived */
+ ASSERT(so->so_rcv_q_head == NULL);
+ mp = so->so_rcv_head;
+ so->so_rcv_head = mp->b_next;
+ if (so->so_rcv_head == NULL)
+ so->so_rcv_last_head = NULL;
+ mp->b_prev = mp->b_next = NULL;
+ sod_uioa_mblk_done(sodp, mp);
+ }
+
+#ifdef DEBUG
+ if (so->so_rcv_q_head != NULL) {
+ mblk_t *m = so->so_rcv_q_head;
+ while (m != NULL) {
+ if (DB_FLAGS(m) & DBLK_UIOA) {
+ cmn_err(CE_PANIC, "Unexpected I/OAT mblk %p"
+ " in so_rcv_q_head.\n", (void *)m);
+ }
+ m = m->b_next;
+ }
+ }
+ if (so->so_rcv_head != NULL) {
+ mblk_t *m = so->so_rcv_head;
+ while (m != NULL) {
+ if (DB_FLAGS(m) & DBLK_UIOA) {
+ cmn_err(CE_PANIC, "Unexpected I/OAT mblk %p"
+ " in so_rcv_head.\n", (void *)m);
+ }
+ m = m->b_next;
+ }
+ }
+#endif
+ return (sodp->sod_uioa.uioa_mbytes);
+}
diff --git a/usr/src/uts/common/fs/sockfs/sockcommon.h b/usr/src/uts/common/fs/sockfs/sockcommon.h
new file mode 100644
index 0000000000..fb4512c874
--- /dev/null
+++ b/usr/src/uts/common/fs/sockfs/sockcommon.h
@@ -0,0 +1,246 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SOCKCOMMON_H_
+#define _SOCKCOMMON_H_
+
+#pragma ident "@(#)sockcommon.h 1.1 07/06/14 SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/filio.h>
+#include <sys/socket_proto.h>
+
+struct sonode;
+
+extern kmem_cache_t *socket_cache;
+
+/*
+ * Socket access functions
+ *
+ * The following functions should only be used by sockfs, and are common
+ * functions that can be used both by kernel sockets (i.e., no file
+ * descriptors should ever be expected, or created), and to implement
+ * the socket system calls.
+ */
+extern struct sonode *socket_create(int, int, int, char *, char *, int, int,
+ struct cred *, int *);
+extern struct sonode *socket_newconn(struct sonode *, sock_lower_handle_t,
+ sock_downcalls_t *, int, int *);
+extern int socket_bind(struct sonode *, struct sockaddr *, socklen_t, int,
+ struct cred *);
+extern int socket_accept(struct sonode *, int, struct cred *, struct sonode **);
+extern int socket_listen(struct sonode *, int, struct cred *);
+extern int socket_connect(struct sonode *, const struct sockaddr *,
+ socklen_t, int, int, struct cred *);
+extern int socket_getpeername(struct sonode *, struct sockaddr *, socklen_t *,
+ boolean_t, struct cred *);
+extern int socket_getsockname(struct sonode *, struct sockaddr *, socklen_t *,
+ struct cred *);
+extern int socket_shutdown(struct sonode *, int, struct cred *);
+extern int socket_getsockopt(struct sonode *, int, int, void *, socklen_t *,
+ int, struct cred *);
+extern int socket_setsockopt(struct sonode *, int, int, const void *,
+ socklen_t, struct cred *);
+extern int socket_recvmsg(struct sonode *, struct nmsghdr *, struct uio *,
+ struct cred *);
+extern int socket_sendmsg(struct sonode *, struct nmsghdr *, struct uio *,
+ struct cred *);
+extern int socket_sendmblk(struct sonode *, struct nmsghdr *, int,
+ struct cred *, mblk_t **);
+extern int socket_ioctl(struct sonode *, int, intptr_t, int, struct cred *,
+ int32_t *);
+extern int socket_poll(struct sonode *, short, int, short *,
+ struct pollhead **);
+extern int socket_close(struct sonode *, int, struct cred *);
+extern void socket_destroy(struct sonode *);
+
+/*
+ * Cancel the socket push timer.
+ */
+#define SOCKET_TIMER_CANCEL(so) { \
+ timeout_id_t tid; \
+ \
+ ASSERT(MUTEX_HELD(&(so)->so_lock)); \
+ if ((so)->so_rcv_timer_tid != 0) { \
+ tid = (so)->so_rcv_timer_tid; \
+ (so)->so_rcv_timer_tid = 0; \
+ mutex_exit(&(so)->so_lock); \
+ \
+ (void) untimeout(tid); \
+ \
+ mutex_enter(&(so)->so_lock); \
+ } \
+}
+
+#define SOCKET_TIMER_START(so) { \
+ ASSERT(MUTEX_HELD(&(so)->so_lock)); \
+ if ((so)->so_rcv_timer_interval != SOCKET_NO_RCVTIMER) { \
+ (so)->so_rcv_timer_tid = timeout(so_timer_callback, \
+ (so), MSEC_TO_TICK((so)->so_rcv_timer_interval)); \
+ } \
+}
+
+/* Common sonode ops not support */
+extern int so_listen_notsupp(struct sonode *, int, struct cred *);
+extern int so_accept_notsupp(struct sonode *, int, struct cred *,
+ struct sonode **);
+extern int so_getpeername_notsupp(struct sonode *, struct sockaddr *,
+ socklen_t *, boolean_t, struct cred *);
+extern int so_shutdown_notsupp(struct sonode *, int, struct cred *);
+extern int so_sendmblk_notsupp(struct sonode *, struct nmsghdr *,
+ int, struct cred *, mblk_t **);
+
+/* Common sonode ops */
+extern int so_init(struct sonode *, struct sonode *, struct cred *, int);
+extern int so_accept(struct sonode *, int, struct cred *, struct sonode **);
+extern int so_bind(struct sonode *, struct sockaddr *, socklen_t, int,
+ struct cred *);
+extern int so_listen(struct sonode *, int, struct cred *);
+extern int so_connect(struct sonode *, const struct sockaddr *,
+ socklen_t, int, int, struct cred *);
+extern int so_getsockopt(struct sonode *, int, int, void *,
+ socklen_t *, int, struct cred *);
+extern int so_setsockopt(struct sonode *, int, int, const void *,
+ socklen_t, struct cred *);
+extern int so_getpeername(struct sonode *, struct sockaddr *,
+ socklen_t *, boolean_t, struct cred *);
+extern int so_getsockname(struct sonode *, struct sockaddr *,
+ socklen_t *, struct cred *);
+extern int so_ioctl(struct sonode *, int, intptr_t, int, struct cred *,
+ int32_t *);
+extern int so_poll(struct sonode *, short, int, short *,
+ struct pollhead **);
+extern int so_sendmsg(struct sonode *, struct nmsghdr *, struct uio *,
+ struct cred *);
+extern int so_sendmblk(struct sonode *, struct nmsghdr *, int,
+ struct cred *, mblk_t **);
+extern int so_recvmsg(struct sonode *, struct nmsghdr *, struct uio *,
+ struct cred *);
+extern int so_shutdown(struct sonode *, int, struct cred *);
+extern int so_close(struct sonode *, int, struct cred *);
+
+extern int so_tpi_fallback(struct sonode *, struct cred *);
+
+/* Common upcalls */
+extern sock_upper_handle_t so_newconn(sock_upper_handle_t,
+ sock_lower_handle_t, sock_downcalls_t *, struct cred *, pid_t,
+ sock_upcalls_t **);
+extern void so_set_prop(sock_upper_handle_t,
+ struct sock_proto_props *);
+extern ssize_t so_queue_msg(sock_upper_handle_t, mblk_t *, size_t, int,
+ int *, boolean_t *);
+extern void so_signal_oob(sock_upper_handle_t, ssize_t);
+
+extern void so_connected(sock_upper_handle_t, sock_connid_t, struct cred *,
+ pid_t);
+extern int so_disconnected(sock_upper_handle_t, sock_connid_t, int);
+extern void so_txq_full(sock_upper_handle_t, boolean_t);
+extern void so_opctl(sock_upper_handle_t, sock_opctl_action_t, uintptr_t);
+/* Common misc. functions */
+
+ /* accept queue */
+extern int so_acceptq_enqueue(struct sonode *, struct sonode *);
+extern int so_acceptq_enqueue_locked(struct sonode *, struct sonode *);
+extern int so_acceptq_dequeue(struct sonode *, boolean_t,
+ struct sonode **);
+extern void so_acceptq_flush(struct sonode *);
+
+ /* connect */
+extern int so_wait_connected(struct sonode *, boolean_t, sock_connid_t);
+
+ /* send */
+extern int so_snd_wait_qnotfull(struct sonode *, boolean_t);
+extern void so_snd_qfull(struct sonode *so);
+extern void so_snd_qnotfull(struct sonode *so);
+
+extern int socket_chgpgrp(struct sonode *, pid_t);
+extern void socket_sendsig(struct sonode *, int);
+extern int so_dequeue_msg(struct sonode *, mblk_t **, struct uio *,
+ rval_t *, int);
+extern void so_enqueue_msg(struct sonode *, mblk_t *, size_t);
+
+extern mblk_t *socopyinuio(uio_t *, ssize_t, size_t, ssize_t, size_t, int *);
+extern mblk_t *socopyoutuio(mblk_t *, struct uio *, ssize_t, int *);
+
+extern boolean_t somsghasdata(mblk_t *);
+extern void so_rcv_flush(struct sonode *);
+extern int sorecvoob(struct sonode *, struct nmsghdr *, struct uio *,
+ int, boolean_t);
+
+extern void so_timer_callback(void *);
+
+extern struct sonode *socket_sonode_create(struct sockparams *, int, int, int,
+ int, int, int *, struct cred *);
+
+extern void socket_sonode_destroy(struct sonode *);
+extern int socket_init_common(struct sonode *, struct sonode *, int flags,
+ struct cred *);
+extern int socket_getopt_common(struct sonode *, int, int, void *, socklen_t *);
+extern int socket_ioctl_common(struct sonode *, int, intptr_t, int,
+ struct cred *, int32_t *);
+extern int socket_strioc_common(struct sonode *, int, intptr_t, int,
+ struct cred *, int32_t *);
+
+extern int so_zcopy_wait(struct sonode *);
+extern int so_get_mod_version(struct sockparams *);
+
+/* Notification functions */
+extern void so_notify_connected(struct sonode *);
+extern void so_notify_disconnecting(struct sonode *);
+extern void so_notify_disconnected(struct sonode *, int);
+extern void so_notify_writable(struct sonode *);
+extern void so_notify_data(struct sonode *, size_t);
+extern void so_notify_oobsig(struct sonode *);
+extern void so_notify_oobdata(struct sonode *, boolean_t);
+extern void so_notify_eof(struct sonode *);
+extern void so_notify_newconn(struct sonode *);
+extern void so_notify_shutdown(struct sonode *);
+extern void so_notify_error(struct sonode *);
+
+/* Common sonode functions */
+extern int sonode_constructor(void *, void *, int);
+extern void sonode_destructor(void *, void *);
+extern void sonode_init(struct sonode *, struct sockparams *,
+ int, int, int, sonodeops_t *);
+extern void sonode_fini(struct sonode *);
+
+/*
+ * Event flags to socket_sendsig().
+ */
+#define SOCKETSIG_WRITE 0x1
+#define SOCKETSIG_READ 0x2
+#define SOCKETSIG_URG 0x4
+
+extern sonodeops_t so_sonodeops;
+extern sock_upcalls_t so_upcalls;
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* _SOCKCOMMON_H_ */
diff --git a/usr/src/uts/common/fs/sockfs/sockcommon_sops.c b/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
new file mode 100644
index 0000000000..e8fc18552d
--- /dev/null
+++ b/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
@@ -0,0 +1,1696 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "@(#)sockcommon_sops.c 1.1 07/06/14 SMI"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/cmn_err.h>
+
+#include <sys/stropts.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#define _SUN_TPI_VERSION 2
+#include <sys/tihdr.h>
+#include <sys/sockio.h>
+#include <sys/sodirect.h>
+#include <sys/kmem_impl.h>
+
+#include <sys/strsubr.h>
+#include <sys/strsun.h>
+#include <sys/ddi.h>
+#include <netinet/in.h>
+#include <inet/ip.h>
+
+#include <fs/sockfs/sockcommon.h>
+
+#include <sys/socket_proto.h>
+
+#include <fs/sockfs/socktpi_impl.h>
+#include <sys/tihdr.h>
+#include <fs/sockfs/nl7c.h>
+#include <inet/kssl/ksslapi.h>
+
+
+extern int xnet_skip_checks;
+extern int xnet_check_print;
+
+static void so_queue_oob(sock_upper_handle_t, mblk_t *, size_t);
+
+
+/*ARGSUSED*/
+int
+so_accept_notsupp(struct sonode *lso, int fflag,
+ struct cred *cr, struct sonode **nsop)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa,
+ socklen_t *len, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr,
+ socklen_t *addrlen, boolean_t accept, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag,
+ struct cred *cr, mblk_t **mpp)
+{
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Generic Socket Ops
+ */
+
+/* ARGSUSED */
+int
+so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
+{
+ return (socket_init_common(so, pso, flags, cr));
+}
+
+int
+so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
+ int flags, struct cred *cr)
+{
+ int error;
+
+ SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr));
+
+ ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD);
+
+ /* X/Open requires this check */
+ if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
+ if (xnet_check_print) {
+ printf("sockfs: X/Open bind state check "
+ "caused EINVAL\n");
+ }
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * a bind to a NULL address is interpreted as unbind. So just
+ * do the downcall.
+ */
+ if (name == NULL)
+ goto dobind;
+
+ switch (so->so_family) {
+ case AF_INET:
+ if ((size_t)namelen != sizeof (sin_t)) {
+ error = name->sa_family != so->so_family ?
+ EAFNOSUPPORT : EINVAL;
+ eprintsoline(so, error);
+ goto done;
+ }
+
+ if ((flags & _SOBIND_XPG4_2) &&
+ (name->sa_family != so->so_family)) {
+ /*
+ * This check has to be made for X/Open
+ * sockets however application failures have
+ * been observed when it is applied to
+ * all sockets.
+ */
+ error = EAFNOSUPPORT;
+ eprintsoline(so, error);
+ goto done;
+ }
+ /*
+ * Force a zero sa_family to match so_family.
+ *
+ * Some programs like inetd(1M) don't set the
+ * family field. Other programs leave
+ * sin_family set to garbage - SunOS 4.X does
+ * not check the family field on a bind.
+ * We use the family field that
+ * was passed in to the socket() call.
+ */
+ name->sa_family = so->so_family;
+ break;
+
+ case AF_INET6: {
+#ifdef DEBUG
+ sin6_t *sin6 = (sin6_t *)name;
+#endif
+ if ((size_t)namelen != sizeof (sin6_t)) {
+ error = name->sa_family != so->so_family ?
+ EAFNOSUPPORT : EINVAL;
+ eprintsoline(so, error);
+ goto done;
+ }
+
+ if (name->sa_family != so->so_family) {
+ /*
+ * With IPv6 we require the family to match
+ * unlike in IPv4.
+ */
+ error = EAFNOSUPPORT;
+ eprintsoline(so, error);
+ goto done;
+ }
+#ifdef DEBUG
+ /*
+ * Verify that apps don't forget to clear
+ * sin6_scope_id etc
+ */
+ if (sin6->sin6_scope_id != 0 &&
+ !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
+ zcmn_err(getzoneid(), CE_WARN,
+ "bind with uninitialized sin6_scope_id "
+ "(%d) on socket. Pid = %d\n",
+ (int)sin6->sin6_scope_id,
+ (int)curproc->p_pid);
+ }
+ if (sin6->__sin6_src_id != 0) {
+ zcmn_err(getzoneid(), CE_WARN,
+ "bind with uninitialized __sin6_src_id "
+ "(%d) on socket. Pid = %d\n",
+ (int)sin6->__sin6_src_id,
+ (int)curproc->p_pid);
+ }
+#endif /* DEBUG */
+
+ break;
+ }
+ default:
+ /* Just pass the request to the protocol */
+ goto dobind;
+ }
+
+ /*
+ * First we check if either NCA or KSSL has been enabled for
+ * the requested address, and if so, we fall back to TPI.
+ * If neither of those two services are enabled, then we just
+ * pass the request to the protocol.
+ *
+ * Note that KSSL can only be enabled on a socket if NCA is NOT
+ * enabled for that socket, hence the else-statement below.
+ */
+ if (nl7c_enabled && ((so->so_family == AF_INET ||
+ so->so_family == AF_INET6) &&
+ nl7c_lookup_addr(name, namelen) != NULL)) {
+ /*
+ * NL7C is not supported in non-global zones,
+ * we enforce this restriction here.
+ */
+ if (so->so_zoneid == GLOBAL_ZONEID) {
+ /* NCA should be used, so fall back to TPI */
+ error = so_tpi_fallback(so, cr);
+ SO_UNBLOCK_FALLBACK(so);
+ if (error)
+ return (error);
+ else
+ return (SOP_BIND(so, name, namelen, flags, cr));
+ }
+ } else if (so->so_type == SOCK_STREAM) {
+ /* Check if KSSL has been configured for this address */
+ kssl_ent_t ent;
+ kssl_endpt_type_t type;
+ struct T_bind_req bind_req;
+ mblk_t *mp;
+
+ /*
+ * TODO: Check with KSSL team if we could add a function call
+ * that only queries whether KSSL is enabled for the given
+ * address.
+ */
+ bind_req.PRIM_type = T_BIND_REQ;
+ bind_req.ADDR_length = namelen;
+ bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req);
+ mp = soallocproto2(&bind_req, sizeof (bind_req),
+ name, namelen, 0, _ALLOC_SLEEP);
+
+ type = kssl_check_proxy(mp, so, &ent);
+ freemsg(mp);
+
+ if (type != KSSL_NO_PROXY) {
+ /*
+ * KSSL has been configured for this address, so
+ * we must fall back to TPI.
+ */
+ kssl_release_ent(ent, so, type);
+ error = so_tpi_fallback(so, cr);
+ SO_UNBLOCK_FALLBACK(so);
+ if (error)
+ return (error);
+ else
+ return (SOP_BIND(so, name, namelen, flags, cr));
+ }
+ }
+
+dobind:
+ error = (*so->so_downcalls->sd_bind)
+ (so->so_proto_handle, name, namelen, cr);
+done:
+ SO_UNBLOCK_FALLBACK(so);
+
+ return (error);
+}
+
+int
+so_listen(struct sonode *so, int backlog, struct cred *cr)
+{
+ int error = 0;
+
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+ SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr));
+
+ error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, backlog,
+ cr);
+
+ SO_UNBLOCK_FALLBACK(so);
+
+ return (error);
+}
+
+
+int
+so_connect(struct sonode *so, const struct sockaddr *name,
+ socklen_t namelen, int fflag, int flags, struct cred *cr)
+{
+ int error = 0;
+ sock_connid_t id;
+
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+ SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr));
+
+ /*
+ * If there is a pending error, return error
+ * This can happen if a non blocking operation caused an error.
+ */
+
+ if (so->so_error != 0) {
+ mutex_enter(&so->so_lock);
+ error = sogeterr(so, B_TRUE);
+ mutex_exit(&so->so_lock);
+ if (error != 0)
+ goto done;
+ }
+
+ error = (*so->so_downcalls->sd_connect)(so->so_proto_handle,
+ name, namelen, &id, cr);
+
+ if (error == EINPROGRESS)
+ error = so_wait_connected(so, fflag & (FNONBLOCK|FNDELAY), id);
+
+done:
+ SO_UNBLOCK_FALLBACK(so);
+ return (error);
+}
+
+/*ARGSUSED*/
+int
+so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop)
+{
+ int error = 0;
+ struct sonode *nso;
+
+ *nsop = NULL;
+
+ SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop));
+ if ((so->so_state & SS_ACCEPTCONN) == 0) {
+ SO_UNBLOCK_FALLBACK(so);
+ return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ?
+ EOPNOTSUPP : EINVAL);
+ }
+
+ if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)),
+ &nso)) == 0) {
+ ASSERT(nso != NULL);
+
+ /* finish the accept */
+ error = (*so->so_downcalls->sd_accept)(so->so_proto_handle,
+ nso->so_proto_handle, (sock_upper_handle_t)nso, cr);
+ if (error != 0) {
+ (void) socket_close(nso, 0, cr);
+ socket_destroy(nso);
+ } else {
+ *nsop = nso;
+ }
+ }
+
+ SO_UNBLOCK_FALLBACK(so);
+ return (error);
+}
+
+int
+so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
+ struct cred *cr)
+{
+ int error, flags;
+ boolean_t dontblock;
+ ssize_t orig_resid;
+ mblk_t *mp;
+
+ SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr));
+
+ flags = msg->msg_flags;
+ error = 0;
+ dontblock = (flags & MSG_DONTWAIT) ||
+ (uiop->uio_fmode & (FNONBLOCK|FNDELAY));
+
+ if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) {
+ /*
+ * Old way of passing fd's is not supported
+ */
+ SO_UNBLOCK_FALLBACK(so);
+ return (EOPNOTSUPP);
+ }
+
+ if ((so->so_mode & SM_ATOMIC) &&
+ uiop->uio_resid > so->so_proto_props.sopp_maxpsz &&
+ so->so_proto_props.sopp_maxpsz != -1) {
+ SO_UNBLOCK_FALLBACK(so);
+ return (EMSGSIZE);
+ }
+
+ /*
+ * For atomic sends we will only do one iteration.
+ */
+ do {
+ if (so->so_state & SS_CANTSENDMORE) {
+ error = EPIPE;
+ break;
+ }
+
+ if (so->so_error != 0) {
+ mutex_enter(&so->so_lock);
+ error = sogeterr(so, B_TRUE);
+ mutex_exit(&so->so_lock);
+ if (error != 0)
+ break;
+ }
+
+ /*
+ * Send down OOB messages even if the send path is being
+ * flow controlled (assuming the protocol supports OOB data).
+ */
+ if (flags & MSG_OOB) {
+ if ((so->so_mode & SM_EXDATA) == 0) {
+ error = EOPNOTSUPP;
+ break;
+ }
+ } else if (so->so_snd_qfull) {
+ /*
+ * Need to wait until the protocol is ready to receive
+ * more data for transmission.
+ */
+ if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
+ break;
+ }
+
+ /*
+ * Time to send data to the protocol. We either copy the
+ * data into mblks or pass the uio directly to the protocol.
+ * We decide what to do based on the available down calls.
+ */
+ if (so->so_downcalls->sd_send_uio != NULL) {
+ error = (*so->so_downcalls->sd_send_uio)
+ (so->so_proto_handle, uiop, msg, cr);
+ if (error != 0)
+ break;
+ } else {
+ /* save the resid in case of failure */
+ orig_resid = uiop->uio_resid;
+
+ if ((mp = socopyinuio(uiop,
+ so->so_proto_props.sopp_maxpsz,
+ so->so_proto_props.sopp_wroff,
+ so->so_proto_props.sopp_maxblk,
+ so->so_proto_props.sopp_tail, &error)) == NULL) {
+ break;
+ }
+ ASSERT(uiop->uio_resid >= 0);
+
+ error = (*so->so_downcalls->sd_send)
+ (so->so_proto_handle, mp, msg, cr);
+ if (error != 0) {
+ /*
+ * The send failed. We do not have to free the
+ * mblks, because that is the protocol's
+ * responsibility. However, uio_resid must
+ * remain accurate, so adjust that here.
+ */
+ uiop->uio_resid = orig_resid;
+ break;
+ }
+ }
+ } while (uiop->uio_resid > 0);
+
+ SO_UNBLOCK_FALLBACK(so);
+
+ return (error);
+}
+
+int
+so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
+ struct cred *cr, mblk_t **mpp)
+{
+ int error;
+ boolean_t dontblock;
+ size_t size;
+ mblk_t *mp = *mpp;
+
+ SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
+
+ error = 0;
+ dontblock = (msg->msg_flags & MSG_DONTWAIT) ||
+ (fflag & (FNONBLOCK|FNDELAY));
+ size = msgdsize(mp);
+
+ if (so->so_downcalls->sd_send == NULL) {
+ SO_UNBLOCK_FALLBACK(so);
+ return (EOPNOTSUPP);
+ }
+
+ if ((so->so_mode & SM_ATOMIC) &&
+ size > so->so_proto_props.sopp_maxpsz &&
+ so->so_proto_props.sopp_maxpsz != -1) {
+ SO_UNBLOCK_FALLBACK(so);
+ return (EMSGSIZE);
+ }
+
+ while (mp != NULL) {
+ mblk_t *nmp, *last_mblk;
+ size_t mlen;
+
+ if (so->so_state & SS_CANTSENDMORE) {
+ error = EPIPE;
+ break;
+ }
+ if (so->so_error != 0) {
+ mutex_enter(&so->so_lock);
+ error = sogeterr(so, B_TRUE);
+ mutex_exit(&so->so_lock);
+ if (error != 0)
+ break;
+ }
+ if (so->so_snd_qfull) {
+ /*
+ * Need to wait until the protocol is ready to receive
+ * more data for transmission.
+ */
+ if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
+ break;
+ }
+
+ /*
+ * We only allow so_maxpsz of data to be sent down to
+ * the protocol at time.
+ */
+ mlen = MBLKL(mp);
+ nmp = mp->b_cont;
+ last_mblk = mp;
+ while (nmp != NULL) {
+ mlen += MBLKL(nmp);
+ if (mlen > so->so_proto_props.sopp_maxpsz) {
+ last_mblk->b_cont = NULL;
+ break;
+ }
+ last_mblk = nmp;
+ nmp = nmp->b_cont;
+ }
+
+ error = (*so->so_downcalls->sd_send)
+ (so->so_proto_handle, mp, msg, cr);
+ if (error != 0) {
+ /*
+ * The send failed. The protocol will free the mblks
+ * that were sent down. Let the caller deal with the
+ * rest.
+ */
+ *mpp = nmp;
+ break;
+ }
+
+ *mpp = mp = nmp;
+ }
+
+ SO_UNBLOCK_FALLBACK(so);
+
+ return (error);
+}
+
+int
+so_shutdown(struct sonode *so, int how, struct cred *cr)
+{
+ int error;
+
+ SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr));
+
+ /*
+ * SunOS 4.X has no check for datagram sockets.
+ * 5.X checks that it is connected (ENOTCONN)
+ * X/Open requires that we check the connected state.
+ */
+ if (!(so->so_state & SS_ISCONNECTED)) {
+ if (!xnet_skip_checks) {
+ error = ENOTCONN;
+ if (xnet_check_print) {
+ printf("sockfs: X/Open shutdown check "
+ "caused ENOTCONN\n");
+ }
+ }
+ goto done;
+ }
+
+ error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle,
+ how, cr));
+
+ /*
+ * Protocol agreed to shutdown. We need to flush the
+ * receive buffer if the receive side is being shutdown.
+ */
+ if (error == 0 && how != SHUT_WR) {
+ mutex_enter(&so->so_lock);
+ /* wait for active reader to finish */
+ (void) so_lock_read(so, 0);
+
+ so_rcv_flush(so);
+
+ so_unlock_read(so);
+ mutex_exit(&so->so_lock);
+ }
+
+done:
+ SO_UNBLOCK_FALLBACK(so);
+ return (error);
+}
+
+int
+so_getsockname(struct sonode *so, struct sockaddr *addr,
+ socklen_t *addrlen, struct cred *cr)
+{
+ int error;
+
+ SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
+
+ error = (*so->so_downcalls->sd_getsockname)
+ (so->so_proto_handle, addr, addrlen, cr);
+
+ SO_UNBLOCK_FALLBACK(so);
+ return (error);
+}
+
+int
+so_getpeername(struct sonode *so, struct sockaddr *addr,
+ socklen_t *addrlen, boolean_t accept, struct cred *cr)
+{
+ int error;
+
+ SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
+
+ if (accept) {
+ error = (*so->so_downcalls->sd_getpeername)
+ (so->so_proto_handle, addr, addrlen, cr);
+ } else if (!(so->so_state & SS_ISCONNECTED)) {
+ error = ENOTCONN;
+ } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
+ /* Added this check for X/Open */
+ error = EINVAL;
+ if (xnet_check_print) {
+ printf("sockfs: X/Open getpeername check => EINVAL\n");
+ }
+ } else {
+ error = (*so->so_downcalls->sd_getpeername)
+ (so->so_proto_handle, addr, addrlen, cr);
+ }
+
+ SO_UNBLOCK_FALLBACK(so);
+ return (error);
+}
+
+int
+so_getsockopt(struct sonode *so, int level, int option_name,
+ void *optval, socklen_t *optlenp, int flags, struct cred *cr)
+{
+ int error = 0;
+
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+ SO_BLOCK_FALLBACK(so,
+ SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
+
+ error = socket_getopt_common(so, level, option_name, optval,
+ optlenp);
+ if (error < 0) {
+ error = (*so->so_downcalls->sd_getsockopt)
+ (so->so_proto_handle, level, option_name, optval, optlenp,
+ cr);
+ if (error == ENOPROTOOPT) {
+ if (level == SOL_SOCKET) {
+ /*
+ * If a protocol does not support a particular
+ * socket option, set can fail (not allowed)
+ * but get can not fail. This is the previous
+ * sockfs bahvior.
+ */
+ switch (option_name) {
+ case SO_LINGER:
+ if (*optlenp < (t_uscalar_t)
+ sizeof (struct linger)) {
+ error = EINVAL;
+ break;
+ }
+ error = 0;
+ bzero(optval, sizeof (struct linger));
+ *optlenp = sizeof (struct linger);
+ break;
+ case SO_RCVTIMEO:
+ case SO_SNDTIMEO:
+ if (*optlenp < (t_uscalar_t)
+ sizeof (struct timeval)) {
+ error = EINVAL;
+ break;
+ }
+ error = 0;
+ bzero(optval, sizeof (struct timeval));
+ *optlenp = sizeof (struct timeval);
+ break;
+ case SO_SND_BUFINFO:
+ if (*optlenp < (t_uscalar_t)
+ sizeof (struct so_snd_bufinfo)) {
+ error = EINVAL;
+ break;
+ }
+ error = 0;
+ bzero(optval,
+ sizeof (struct so_snd_bufinfo));
+ *optlenp =
+ sizeof (struct so_snd_bufinfo);
+ break;
+ case SO_DEBUG:
+ case SO_REUSEADDR:
+ case SO_KEEPALIVE:
+ case SO_DONTROUTE:
+ case SO_BROADCAST:
+ case SO_USELOOPBACK:
+ case SO_OOBINLINE:
+ case SO_DGRAM_ERRIND:
+ case SO_SNDBUF:
+ case SO_RCVBUF:
+ error = 0;
+ *((int32_t *)optval) = 0;
+ *optlenp = sizeof (int32_t);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+ SO_UNBLOCK_FALLBACK(so);
+ return (error);
+}
+
+int
+so_setsockopt(struct sonode *so, int level, int option_name,
+ const void *optval, socklen_t optlen, struct cred *cr)
+{
+ int error = 0;
+
+ SO_BLOCK_FALLBACK(so,
+ SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
+
+ /* X/Open requires this check */
+ if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) {
+ SO_UNBLOCK_FALLBACK(so);
+ if (xnet_check_print)
+ printf("sockfs: X/Open setsockopt check => EINVAL\n");
+ return (EINVAL);
+ }
+
+ if (level == SOL_SOCKET &&
+ ((option_name == SO_RCVTIMEO) || (option_name == SO_SNDTIMEO))) {
+ struct timeval *tl = (struct timeval *)optval;
+ clock_t t_usec;
+
+ if (optlen != (t_uscalar_t)sizeof (struct timeval)) {
+ SO_UNBLOCK_FALLBACK(so);
+ return (EINVAL);
+ }
+ t_usec = tl->tv_sec * 1000 * 1000 + tl->tv_usec;
+ mutex_enter(&so->so_lock);
+ if (option_name == SO_RCVTIMEO)
+ so->so_rcvtimeo = drv_usectohz(t_usec);
+ else
+ so->so_sndtimeo = drv_usectohz(t_usec);
+ mutex_exit(&so->so_lock);
+ SO_UNBLOCK_FALLBACK(so);
+ return (0);
+ }
+ error = (*so->so_downcalls->sd_setsockopt)
+ (so->so_proto_handle, level, option_name, optval, optlen, cr);
+
+ SO_UNBLOCK_FALLBACK(so);
+ return (error);
+}
+
+int
+so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
+ struct cred *cr, int32_t *rvalp)
+{
+ int error = 0;
+
+ SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
+
+ /*
+ * If there is a pending error, return error
+ * This can happen if a non blocking operation caused an error.
+ */
+ if (so->so_error != 0) {
+ mutex_enter(&so->so_lock);
+ error = sogeterr(so, B_TRUE);
+ mutex_exit(&so->so_lock);
+ if (error != 0)
+ goto done;
+ }
+
+ /*
+ * calling strioc can result in the socket falling back to TPI,
+ * if that is supported.
+ */
+ if ((error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 &&
+ (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) {
+ error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle,
+ cmd, arg, mode, rvalp, cr);
+ }
+
+done:
+ SO_UNBLOCK_FALLBACK(so);
+
+ return (error);
+}
+
+int
+so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ int state = so->so_state;
+ *reventsp = 0;
+
+ if (so->so_error != 0 &&
+ ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) {
+ *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events;
+ return (0);
+ }
+
+ /*
+ * As long as there is buffer to send data, and the socket is
+ * in a state where it can send data (i.e., connected for
+ * connection oriented protocols), then turn on POLLOUT events
+ */
+ if (!so->so_snd_qfull && ((so->so_mode & SM_CONNREQUIRED) == 0 ||
+ state & SS_ISCONNECTED)) {
+ *reventsp |= POLLOUT & events;
+ }
+
+ /*
+ * Turn on POLLIN whenever there is data on the receive queue,
+ * or the socket is in a state where no more data will be received.
+ * Also, if the socket is accepting connections, flip the bit if
+ * there is something on the queue.
+ */
+
+ /* Pending connections */
+ if (so->so_acceptq_len > 0)
+ *reventsp |= (POLLIN|POLLRDNORM) & events;
+
+ /* Data */
+ /* so_downcalls is null for sctp */
+ if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) {
+ *reventsp |= (*so->so_downcalls->sd_poll)
+ (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet,
+ CRED()) & events;
+ ASSERT((*reventsp & ~events) == 0);
+ /* do not recheck events */
+ events &= ~SO_PROTO_POLLEV;
+ } else {
+ if (SO_HAVE_DATA(so))
+ *reventsp |= (POLLIN|POLLRDNORM) & events;
+
+ /* Urgent data */
+ if ((state & SS_OOBPEND) != 0)
+ *reventsp |= (POLLRDBAND) & events;
+ }
+
+ if (!*reventsp && !anyyet) {
+ /* Check for read events again, but this time under lock */
+ if (events & (POLLIN|POLLRDNORM)) {
+ mutex_enter(&so->so_lock);
+ if (SO_HAVE_DATA(so) || so->so_acceptq_len > 0) {
+ mutex_exit(&so->so_lock);
+ *reventsp |= (POLLIN|POLLRDNORM) & events;
+ return (0);
+ } else {
+ so->so_pollev |= SO_POLLEV_IN;
+ mutex_exit(&so->so_lock);
+ }
+ }
+ *phpp = &so->so_poll_list;
+ }
+ return (0);
+}
+
+/*
+ * Generic Upcalls
+ */
+void
+so_connected(sock_upper_handle_t sock_handle, sock_connid_t id,
+ cred_t *peer_cred, pid_t peer_cpid)
+{
+ struct sonode *so = (struct sonode *)sock_handle;
+
+ mutex_enter(&so->so_lock);
+ ASSERT(so->so_proto_handle != NULL);
+
+ if (peer_cred != NULL) {
+ if (so->so_peercred != NULL)
+ crfree(so->so_peercred);
+ crhold(peer_cred);
+ so->so_peercred = peer_cred;
+ so->so_cpid = peer_cpid;
+ }
+
+ so->so_proto_connid = id;
+ soisconnected(so);
+ /*
+ * Wake ones who're waiting for conn to become established.
+ */
+ so_notify_connected(so);
+}
+
+int
+so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error)
+{
+ struct sonode *so = (struct sonode *)sock_handle;
+
+ mutex_enter(&so->so_lock);
+
+ so->so_proto_connid = id;
+ soisdisconnected(so, error);
+ so_notify_disconnected(so, error);
+
+ return (0);
+}
+
+void
+so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action,
+ uintptr_t arg)
+{
+ struct sonode *so = (struct sonode *)sock_handle;
+
+ switch (action) {
+ case SOCK_OPCTL_SHUT_SEND:
+ mutex_enter(&so->so_lock);
+ socantsendmore(so);
+ so_notify_disconnecting(so);
+ break;
+ case SOCK_OPCTL_SHUT_RECV: {
+ mutex_enter(&so->so_lock);
+ socantrcvmore(so);
+ so_notify_eof(so);
+ break;
+ }
+ case SOCK_OPCTL_ENAB_ACCEPT:
+ mutex_enter(&so->so_lock);
+ so->so_state |= SS_ACCEPTCONN;
+ so->so_backlog = (unsigned int)arg;
+ mutex_exit(&so->so_lock);
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+}
+
+void
+so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull)
+{
+ struct sonode *so = (struct sonode *)sock_handle;
+
+ if (qfull) {
+ so_snd_qfull(so);
+ } else {
+ so_snd_qnotfull(so);
+ mutex_enter(&so->so_lock);
+ so_notify_writable(so);
+ }
+}
+
+sock_upper_handle_t
+so_newconn(sock_upper_handle_t parenthandle,
+ sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls,
+ struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp)
+{
+ struct sonode *so = (struct sonode *)parenthandle;
+ struct sonode *nso;
+ int error;
+
+ ASSERT(proto_handle != NULL);
+
+ if ((so->so_state & SS_ACCEPTCONN) == 0 ||
+ so->so_acceptq_len >= so->so_backlog)
+ return (NULL);
+
+ nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP,
+ &error);
+ if (nso == NULL)
+ return (NULL);
+
+ if (peer_cred != NULL) {
+ crhold(peer_cred);
+ nso->so_peercred = peer_cred;
+ nso->so_cpid = peer_cpid;
+ }
+
+ (void) so_acceptq_enqueue(so, nso);
+ mutex_enter(&so->so_lock);
+ so_notify_newconn(so);
+
+ *sock_upcallsp = &so_upcalls;
+
+ return ((sock_upper_handle_t)nso);
+}
+
+void
+so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp)
+{
+ struct sonode *so;
+
+ so = (struct sonode *)sock_handle;
+
+ mutex_enter(&so->so_lock);
+
+ if (soppp->sopp_flags & SOCKOPT_MAXBLK)
+ so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk;
+ if (soppp->sopp_flags & SOCKOPT_WROFF)
+ so->so_proto_props.sopp_wroff = soppp->sopp_wroff;
+ if (soppp->sopp_flags & SOCKOPT_TAIL)
+ so->so_proto_props.sopp_tail = soppp->sopp_tail;
+ if (soppp->sopp_flags & SOCKOPT_RCVHIWAT)
+ so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat;
+ if (soppp->sopp_flags & SOCKOPT_RCVLOWAT)
+ so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat;
+ if (soppp->sopp_flags & SOCKOPT_MAXPSZ)
+ so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz;
+ if (soppp->sopp_flags & SOCKOPT_MINPSZ)
+ so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz;
+ if (soppp->sopp_flags & SOCKOPT_ZCOPY) {
+ if (soppp->sopp_zcopyflag & ZCVMSAFE) {
+ so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE;
+ so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE;
+ } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) {
+ so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE;
+ so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE;
+ }
+
+ if (soppp->sopp_zcopyflag & COPYCACHED) {
+ so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED;
+ }
+ }
+ if (soppp->sopp_flags & SOCKOPT_OOBINLINE)
+ so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline;
+ if (soppp->sopp_flags & SOCKOPT_RCVTIMER)
+ so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer;
+ if (soppp->sopp_flags & SOCKOPT_RCVTHRESH)
+ so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh;
+ if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN)
+ so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen;
+
+ mutex_exit(&so->so_lock);
+
+#ifdef DEBUG
+ soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL |
+ SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ |
+ SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER |
+ SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ);
+ ASSERT(soppp->sopp_flags == 0);
+#endif
+}
+
+/* ARGSUSED */
+ssize_t
+so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp,
+ size_t msg_size, int flags, int *errorp, boolean_t *force_pushp)
+{
+ struct sonode *so = (struct sonode *)sock_handle;
+ boolean_t force_push = B_TRUE;
+ int space_left;
+ sodirect_t *sodp = so->so_direct;
+
+ ASSERT(errorp != NULL);
+ *errorp = 0;
+ if (mp == NULL) {
+ if (msg_size > 0) {
+ ASSERT(so->so_downcalls->sd_recv_uio != NULL);
+ mutex_enter(&so->so_lock);
+ /* the notify functions will drop the lock */
+ if (flags & MSG_OOB)
+ so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
+ else
+ so_notify_data(so, msg_size);
+ return (0);
+ }
+ /*
+ * recv space check
+ */
+ mutex_enter(&so->so_lock);
+ space_left = so->so_rcvbuf - so->so_rcv_queued;
+ if (space_left <= 0) {
+ so->so_flowctrld = B_TRUE;
+ *errorp = ENOSPC;
+ space_left = -1;
+ }
+ goto done_unlock;
+ }
+
+ ASSERT(mp->b_next == NULL);
+ ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO);
+ ASSERT(msg_size == msgdsize(mp));
+
+ if (flags & MSG_OOB) {
+ so_queue_oob(sock_handle, mp, msg_size);
+ return (0);
+ }
+
+ if (force_pushp != NULL)
+ force_push = *force_pushp;
+
+ if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
+ /* The read pointer is not aligned correctly for TPI */
+ zcmn_err(getzoneid(), CE_WARN,
+ "sockfs: Unaligned TPI message received. rptr = %p\n",
+ (void *)mp->b_rptr);
+ freemsg(mp);
+ mutex_enter(sodp->sod_lockp);
+ SOD_UIOAFINI(sodp);
+ mutex_exit(sodp->sod_lockp);
+
+ return (so->so_rcvbuf - so->so_rcv_queued);
+ }
+
+ mutex_enter(&so->so_lock);
+ if (so->so_state & (SS_FALLBACK_PENDING | SS_FALLBACK_COMP)) {
+ SOD_DISABLE(sodp);
+ mutex_exit(&so->so_lock);
+ *errorp = EOPNOTSUPP;
+ return (-1);
+ }
+ if (so->so_state & SS_CANTRCVMORE) {
+ freemsg(mp);
+ SOD_DISABLE(sodp);
+ mutex_exit(&so->so_lock);
+ return (0);
+ }
+
+ /* process the mblk via I/OAT if capable */
+ if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) {
+ if (DB_TYPE(mp) == M_DATA) {
+ (void) sod_uioa_mblk_init(sodp, mp, msg_size);
+ } else {
+ SOD_UIOAFINI(sodp);
+ }
+ }
+
+ if (mp->b_next == NULL) {
+ so_enqueue_msg(so, mp, msg_size);
+ } else {
+ do {
+ mblk_t *nmp;
+
+ if ((nmp = mp->b_next) != NULL) {
+ mp->b_next = NULL;
+ }
+ so_enqueue_msg(so, mp, msgdsize(mp));
+ mp = nmp;
+ } while (mp != NULL);
+ }
+
+ space_left = so->so_rcvbuf - so->so_rcv_queued;
+ if (space_left <= 0) {
+ so->so_flowctrld = B_TRUE;
+ *errorp = ENOSPC;
+ space_left = -1;
+ }
+
+ if (force_push || so->so_rcv_queued >= so->so_rcv_thresh ||
+ so->so_rcv_queued >= so->so_rcv_wanted ||
+ (sodp != NULL && so->so_rcv_queued >= sodp->sod_want)) {
+ SOCKET_TIMER_CANCEL(so);
+ /*
+ * so_notify_data will release the lock
+ */
+ so_notify_data(so, so->so_rcv_queued);
+
+ if (force_pushp != NULL)
+ *force_pushp = B_TRUE;
+ goto done;
+ } else if (so->so_rcv_timer_tid == 0) {
+ /* Make sure the recv push timer is running */
+ SOCKET_TIMER_START(so);
+ }
+
+done_unlock:
+ mutex_exit(&so->so_lock);
+done:
+ return (space_left);
+}
+
+/*
+ * Set the offset of where the oob data is relative to the bytes in
+ * queued. Also generate SIGURG
+ */
+void
+so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset)
+{
+ struct sonode *so;
+
+ ASSERT(offset >= 0);
+ so = (struct sonode *)sock_handle;
+ mutex_enter(&so->so_lock);
+ SOD_UIOAFINI(so->so_direct);
+
+ /*
+ * New urgent data on the way so forget about any old
+ * urgent data.
+ */
+ so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
+
+ /*
+ * Record that urgent data is pending.
+ */
+ so->so_state |= SS_OOBPEND;
+
+ if (so->so_oobmsg != NULL) {
+ dprintso(so, 1, ("sock: discarding old oob\n"));
+ freemsg(so->so_oobmsg);
+ so->so_oobmsg = NULL;
+ }
+
+ /*
+ * set the offset where the urgent byte is
+ */
+ so->so_oobmark = so->so_rcv_queued + offset;
+ if (so->so_oobmark == 0)
+ so->so_state |= SS_RCVATMARK;
+ else
+ so->so_state &= ~SS_RCVATMARK;
+
+ so_notify_oobsig(so);
+}
+
+/*
+ * Queue the OOB byte
+ */
+static void
+so_queue_oob(sock_upper_handle_t sock_handle, mblk_t *mp, size_t len)
+{
+ struct sonode *so;
+
+ so = (struct sonode *)sock_handle;
+ mutex_enter(&so->so_lock);
+ SOD_UIOAFINI(so->so_direct);
+
+ ASSERT(mp != NULL);
+ if (!IS_SO_OOB_INLINE(so)) {
+ so->so_oobmsg = mp;
+ so->so_state |= SS_HAVEOOBDATA;
+ } else {
+ so_enqueue_msg(so, mp, len);
+ }
+
+ so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
+}
+
+int
+so_close(struct sonode *so, int flag, struct cred *cr)
+{
+ int error;
+
+ error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr);
+
+ /*
+ * At this point there will be no more upcalls from the protocol
+ */
+ mutex_enter(&so->so_lock);
+ so_rcv_flush(so);
+ mutex_exit(&so->so_lock);
+
+ return (error);
+}
+
+void
+so_zcopy_notify(sock_upper_handle_t sock_handle)
+{
+ struct sonode *so = (struct sonode *)sock_handle;
+
+ mutex_enter(&so->so_lock);
+ so->so_copyflag |= STZCNOTIFY;
+ cv_broadcast(&so->so_copy_cv);
+ mutex_exit(&so->so_lock);
+}
+
+void
+so_set_error(sock_upper_handle_t sock_handle, int error)
+{
+ struct sonode *so = (struct sonode *)sock_handle;
+
+ mutex_enter(&so->so_lock);
+
+ soseterror(so, error);
+
+ so_notify_error(so);
+}
+
+/*
+ * so_recvmsg - read data from the socket
+ *
+ * There are two ways of obtaining data; either we ask the protocol to
+ * copy directly into the supplied buffer, or we copy data from the
+ * sonode's receive queue. The decision which one to use depends on
+ * whether the protocol has a sd_recv_uio down call.
+ */
+int
+so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
+ struct cred *cr)
+{
+ rval_t rval;
+ int flags = 0;
+ t_uscalar_t controllen, namelen;
+ int error = 0;
+ int ret;
+ mblk_t *mctlp = NULL;
+ union T_primitives *tpr;
+ void *control;
+ ssize_t saved_resid;
+ struct uio *suiop;
+
+ SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr));
+
+ if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
+ (so->so_mode & SM_CONNREQUIRED)) {
+ SO_UNBLOCK_FALLBACK(so);
+ return (ENOTCONN);
+ }
+
+ if (msg->msg_flags & MSG_PEEK)
+ msg->msg_flags &= ~MSG_WAITALL;
+
+ if (so->so_mode & SM_ATOMIC)
+ msg->msg_flags |= MSG_TRUNC;
+
+ if (msg->msg_flags & MSG_OOB) {
+ if ((so->so_mode & SM_EXDATA) == 0) {
+ error = EOPNOTSUPP;
+ } else if (so->so_downcalls->sd_recv_uio != NULL) {
+ error = (*so->so_downcalls->sd_recv_uio)
+ (so->so_proto_handle, uiop, msg, cr);
+ } else {
+ error = sorecvoob(so, msg, uiop, msg->msg_flags,
+ IS_SO_OOB_INLINE(so));
+ }
+ SO_UNBLOCK_FALLBACK(so);
+ return (error);
+ }
+
+ /*
+ * If the protocol has the recv down call, then pass the request
+ * down.
+ */
+ if (so->so_downcalls->sd_recv_uio != NULL) {
+ error = (*so->so_downcalls->sd_recv_uio)
+ (so->so_proto_handle, uiop, msg, cr);
+ SO_UNBLOCK_FALLBACK(so);
+ return (error);
+ }
+
+ /*
+ * Reading data from the socket buffer
+ */
+ flags = msg->msg_flags;
+ msg->msg_flags = 0;
+
+ /*
+ * Set msg_controllen and msg_namelen to zero here to make it
+ * simpler in the cases that no control or name is returned.
+ */
+ controllen = msg->msg_controllen;
+ namelen = msg->msg_namelen;
+ msg->msg_controllen = 0;
+ msg->msg_namelen = 0;
+
+ mutex_enter(&so->so_lock);
+ /* Set SOREADLOCKED */
+ error = so_lock_read_intr(so,
+ uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
+ mutex_exit(&so->so_lock);
+ if (error) {
+ SO_UNBLOCK_FALLBACK(so);
+ return (error);
+ }
+
+ suiop = sod_rcv_init(so, flags, &uiop);
+retry:
+ saved_resid = uiop->uio_resid;
+ error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags);
+ if (error != 0) {
+ goto out;
+ }
+ /*
+ * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
+ * For non-datagrams MOREDATA is used to set MSG_EOR.
+ */
+ ASSERT(!(rval.r_val1 & MORECTL));
+ if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
+ msg->msg_flags |= MSG_TRUNC;
+ if (mctlp == NULL) {
+ dprintso(so, 1, ("so_recvmsg: got M_DATA\n"));
+
+ mutex_enter(&so->so_lock);
+ /* Set MSG_EOR based on MOREDATA */
+ if (!(rval.r_val1 & MOREDATA)) {
+ if (so->so_state & SS_SAVEDEOR) {
+ msg->msg_flags |= MSG_EOR;
+ so->so_state &= ~SS_SAVEDEOR;
+ }
+ }
+ /*
+ * If some data was received (i.e. not EOF) and the
+ * read/recv* has not been satisfied wait for some more.
+ */
+ if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
+ uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
+ mutex_exit(&so->so_lock);
+ goto retry;
+ }
+
+ goto out_locked;
+ }
+ /* strsock_proto has already verified length and alignment */
+ tpr = (union T_primitives *)mctlp->b_rptr;
+ dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type));
+ switch (tpr->type) {
+ case T_DATA_IND: {
+ /*
+ * Set msg_flags to MSG_EOR based on
+ * MORE_flag and MOREDATA.
+ */
+ mutex_enter(&so->so_lock);
+ so->so_state &= ~SS_SAVEDEOR;
+ if (!(tpr->data_ind.MORE_flag & 1)) {
+ if (!(rval.r_val1 & MOREDATA))
+ msg->msg_flags |= MSG_EOR;
+ else
+ so->so_state |= SS_SAVEDEOR;
+ }
+ freemsg(mctlp);
+ /*
+ * If some data was received (i.e. not EOF) and the
+ * read/recv* has not been satisfied wait for some more.
+ */
+ if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
+ uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
+ mutex_exit(&so->so_lock);
+ goto retry;
+ }
+ goto out_locked;
+ }
+ case T_UNITDATA_IND: {
+ void *addr;
+ t_uscalar_t addrlen;
+ void *abuf;
+ t_uscalar_t optlen;
+ void *opt;
+
+ if (namelen != 0) {
+ /* Caller wants source address */
+ addrlen = tpr->unitdata_ind.SRC_length;
+ addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset,
+ addrlen, 1);
+ if (addr == NULL) {
+ freemsg(mctlp);
+ error = EPROTO;
+ eprintsoline(so, error);
+ goto out;
+ }
+ ASSERT(so->so_family != AF_UNIX);
+ }
+ optlen = tpr->unitdata_ind.OPT_length;
+ if (optlen != 0) {
+ t_uscalar_t ncontrollen;
+
+ /*
+ * Extract any source address option.
+ * Determine how large cmsg buffer is needed.
+ */
+ opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset,
+ optlen, __TPI_ALIGN_SIZE);
+
+ if (opt == NULL) {
+ freemsg(mctlp);
+ error = EPROTO;
+ eprintsoline(so, error);
+ goto out;
+ }
+ if (so->so_family == AF_UNIX)
+ so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
+ ncontrollen = so_cmsglen(mctlp, opt, optlen,
+ !(flags & MSG_XPG4_2));
+ if (controllen != 0)
+ controllen = ncontrollen;
+ else if (ncontrollen != 0)
+ msg->msg_flags |= MSG_CTRUNC;
+ } else {
+ controllen = 0;
+ }
+
+ if (namelen != 0) {
+ /*
+ * Return address to caller.
+ * Caller handles truncation if length
+ * exceeds msg_namelen.
+ * NOTE: AF_UNIX NUL termination is ensured by
+ * the sender's copyin_name().
+ */
+ abuf = kmem_alloc(addrlen, KM_SLEEP);
+
+ bcopy(addr, abuf, addrlen);
+ msg->msg_name = abuf;
+ msg->msg_namelen = addrlen;
+ }
+
+ if (controllen != 0) {
+ /*
+ * Return control msg to caller.
+ * Caller handles truncation if length
+ * exceeds msg_controllen.
+ */
+ control = kmem_zalloc(controllen, KM_SLEEP);
+
+ error = so_opt2cmsg(mctlp, opt, optlen,
+ !(flags & MSG_XPG4_2), control, controllen);
+ if (error) {
+ freemsg(mctlp);
+ if (msg->msg_namelen != 0)
+ kmem_free(msg->msg_name,
+ msg->msg_namelen);
+ kmem_free(control, controllen);
+ eprintsoline(so, error);
+ goto out;
+ }
+ msg->msg_control = control;
+ msg->msg_controllen = controllen;
+ }
+
+ freemsg(mctlp);
+ goto out;
+ }
+ case T_OPTDATA_IND: {
+ struct T_optdata_req *tdr;
+ void *opt;
+ t_uscalar_t optlen;
+
+ tdr = (struct T_optdata_req *)mctlp->b_rptr;
+ optlen = tdr->OPT_length;
+ if (optlen != 0) {
+ t_uscalar_t ncontrollen;
+ /*
+ * Determine how large cmsg buffer is needed.
+ */
+ opt = sogetoff(mctlp,
+ tpr->optdata_ind.OPT_offset, optlen,
+ __TPI_ALIGN_SIZE);
+
+ if (opt == NULL) {
+ freemsg(mctlp);
+ error = EPROTO;
+ eprintsoline(so, error);
+ goto out;
+ }
+
+ ncontrollen = so_cmsglen(mctlp, opt, optlen,
+ !(flags & MSG_XPG4_2));
+ if (controllen != 0)
+ controllen = ncontrollen;
+ else if (ncontrollen != 0)
+ msg->msg_flags |= MSG_CTRUNC;
+ } else {
+ controllen = 0;
+ }
+
+ if (controllen != 0) {
+ /*
+ * Return control msg to caller.
+ * Caller handles truncation if length
+ * exceeds msg_controllen.
+ */
+ control = kmem_zalloc(controllen, KM_SLEEP);
+
+ error = so_opt2cmsg(mctlp, opt, optlen,
+ !(flags & MSG_XPG4_2), control, controllen);
+ if (error) {
+ freemsg(mctlp);
+ kmem_free(control, controllen);
+ eprintsoline(so, error);
+ goto out;
+ }
+ msg->msg_control = control;
+ msg->msg_controllen = controllen;
+ }
+
+ /*
+ * Set msg_flags to MSG_EOR based on
+ * DATA_flag and MOREDATA.
+ */
+ mutex_enter(&so->so_lock);
+ so->so_state &= ~SS_SAVEDEOR;
+ if (!(tpr->data_ind.MORE_flag & 1)) {
+ if (!(rval.r_val1 & MOREDATA))
+ msg->msg_flags |= MSG_EOR;
+ else
+ so->so_state |= SS_SAVEDEOR;
+ }
+ freemsg(mctlp);
+ /*
+ * If some data was received (i.e. not EOF) and the
+ * read/recv* has not been satisfied wait for some more.
+ * Not possible to wait if control info was received.
+ */
+ if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
+ controllen == 0 &&
+ uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
+ mutex_exit(&so->so_lock);
+ goto retry;
+ }
+ goto out_locked;
+ }
+ default:
+ cmn_err(CE_CONT, "so_recvmsg bad type %x \n",
+ tpr->type);
+ freemsg(mctlp);
+ error = EPROTO;
+ ASSERT(0);
+ }
+out:
+ mutex_enter(&so->so_lock);
+out_locked:
+ /* The sod_lockp pointers to the sonode so_lock */
+ ret = sod_rcv_done(so, suiop, uiop);
+ if (ret != 0 && error == 0)
+ error = ret;
+
+ so_unlock_read(so); /* Clear SOREADLOCKED */
+ mutex_exit(&so->so_lock);
+
+ SO_UNBLOCK_FALLBACK(so);
+
+ return (error);
+}
+
+sonodeops_t so_sonodeops = {
+ so_init, /* sop_init */
+ so_accept, /* sop_accept */
+ so_bind, /* sop_bind */
+ so_listen, /* sop_listen */
+ so_connect, /* sop_connect */
+ so_recvmsg, /* sop_recvmsg */
+ so_sendmsg, /* sop_sendmsg */
+ so_sendmblk, /* sop_sendmblk */
+ so_getpeername, /* sop_getpeername */
+ so_getsockname, /* sop_getsockname */
+ so_shutdown, /* sop_shutdown */
+ so_getsockopt, /* sop_getsockopt */
+ so_setsockopt, /* sop_setsockopt */
+ so_ioctl, /* sop_ioctl */
+ so_poll, /* sop_poll */
+ so_close, /* sop_close */
+};
+
+sock_upcalls_t so_upcalls = {
+ so_newconn,
+ so_connected,
+ so_disconnected,
+ so_opctl,
+ so_queue_msg,
+ so_set_prop,
+ so_txq_full,
+ so_signal_oob,
+ so_zcopy_notify,
+ so_set_error
+};
diff --git a/usr/src/uts/common/fs/sockfs/sockcommon_subr.c b/usr/src/uts/common/fs/sockfs/sockcommon_subr.c
new file mode 100644
index 0000000000..c1cfa6bf5f
--- /dev/null
+++ b/usr/src/uts/common/fs/sockfs/sockcommon_subr.c
@@ -0,0 +1,1970 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/signal.h>
+#include <sys/cmn_err.h>
+
+#include <sys/stropts.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/sodirect.h>
+#include <sys/strsubr.h>
+#include <sys/strsun.h>
+#include <sys/atomic.h>
+
+#include <fs/sockfs/sockcommon.h>
+#include <fs/sockfs/socktpi.h>
+#include <sys/ddi.h>
+#include <inet/ip.h>
+#include <sys/time.h>
+#include <sys/cmn_err.h>
+
+#ifdef SOCK_TEST
+extern int do_useracc;
+extern clock_t sock_test_timelimit;
+#endif /* SOCK_TEST */
+
+#define MBLK_PULL_LEN 64
+uint32_t so_mblk_pull_len = MBLK_PULL_LEN;
+
+#ifdef DEBUG
+boolean_t so_debug_length = B_FALSE;
+static boolean_t so_check_length(sonode_t *so);
+#endif
+
+int
+so_acceptq_enqueue_locked(struct sonode *so, struct sonode *nso)
+{
+ ASSERT(MUTEX_HELD(&so->so_acceptq_lock));
+ ASSERT(nso->so_acceptq_next == NULL);
+
+ *so->so_acceptq_tail = nso;
+ so->so_acceptq_tail = &nso->so_acceptq_next;
+ so->so_acceptq_len++;
+
+ /* Wakeup a single consumer */
+ cv_signal(&so->so_acceptq_cv);
+
+ return (so->so_acceptq_len);
+}
+
+/*
+ * int so_acceptq_enqueue(struct sonode *so, struct sonode *nso)
+ *
+ * Enqueue an incoming connection on a listening socket.
+ *
+ * Arguments:
+ * so - listening socket
+ * nso - new connection
+ *
+ * Returns:
+ * Number of queued connections, including the new connection
+ */
+int
+so_acceptq_enqueue(struct sonode *so, struct sonode *nso)
+{
+ int conns;
+
+ mutex_enter(&so->so_acceptq_lock);
+ conns = so_acceptq_enqueue_locked(so, nso);
+ mutex_exit(&so->so_acceptq_lock);
+
+ return (conns);
+}
+
+static int
+so_acceptq_dequeue_locked(struct sonode *so, boolean_t dontblock,
+ struct sonode **nsop)
+{
+ struct sonode *nso = NULL;
+
+ *nsop = NULL;
+ ASSERT(MUTEX_HELD(&so->so_acceptq_lock));
+ while ((nso = so->so_acceptq_head) == NULL) {
+ /*
+ * No need to check so_error here, because it is not
+ * possible for a listening socket to be reset or otherwise
+ * disconnected.
+ *
+ * So now we just need check if it's ok to wait.
+ */
+ if (dontblock)
+ return (EWOULDBLOCK);
+ if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING))
+ return (EINTR);
+
+ if (cv_wait_sig_swap(&so->so_acceptq_cv,
+ &so->so_acceptq_lock) == 0)
+ return (EINTR);
+ }
+
+ ASSERT(nso != NULL);
+ so->so_acceptq_head = nso->so_acceptq_next;
+ nso->so_acceptq_next = NULL;
+
+ if (so->so_acceptq_head == NULL) {
+ ASSERT(so->so_acceptq_tail == &nso->so_acceptq_next);
+ so->so_acceptq_tail = &so->so_acceptq_head;
+ }
+ ASSERT(so->so_acceptq_len > 0);
+ --so->so_acceptq_len;
+
+ *nsop = nso;
+
+ return (0);
+}
+
+/*
+ * int so_acceptq_dequeue(struct sonode *, boolean_t, struct sonode **)
+ *
+ * Pulls a connection off of the accept queue.
+ *
+ * Arguments:
+ * so - listening socket
+ * dontblock - indicate whether it's ok to sleep if there are no
+ * connections on the queue
+ * nsop - Value-return argument
+ *
+ * Return values:
+ * 0 when a connection is successfully dequeued, in which case nsop
+ * is set to point to the new connection. Upon failure a non-zero
+ * value is returned, and the value of nsop is set to NULL.
+ *
+ * Note:
+ * so_acceptq_dequeue() may return prematurly if the socket is falling
+ * back to TPI.
+ */
+int
+so_acceptq_dequeue(struct sonode *so, boolean_t dontblock,
+ struct sonode **nsop)
+{
+ int error;
+
+ mutex_enter(&so->so_acceptq_lock);
+ error = so_acceptq_dequeue_locked(so, dontblock, nsop);
+ mutex_exit(&so->so_acceptq_lock);
+
+ return (error);
+}
+
+/*
+ * void so_acceptq_flush(struct sonode *so)
+ *
+ * Removes all pending connections from a listening socket, and
+ * frees the associated resources.
+ *
+ * Arguments
+ * so - listening socket
+ *
+ * Return values:
+ * None.
+ *
+ * Note:
+ * The caller has to ensure that no calls to so_acceptq_enqueue() or
+ * so_acceptq_dequeue() occur while the accept queue is being flushed.
+ * So either the socket needs to be in a state where no operations
+ * would come in, or so_lock needs to be obtained.
+ */
+void
+so_acceptq_flush(struct sonode *so)
+{
+ struct sonode *nso;
+
+ nso = so->so_acceptq_head;
+
+ while (nso != NULL) {
+ struct sonode *nnso = NULL;
+
+ nnso = nso->so_acceptq_next;
+ nso->so_acceptq_next = NULL;
+ /*
+ * Since the socket is on the accept queue, there can
+ * only be one reference. We drop the reference and
+ * just blow off the socket.
+ */
+ ASSERT(nso->so_count == 1);
+ nso->so_count--;
+ socket_destroy(nso);
+ nso = nnso;
+ }
+
+ so->so_acceptq_head = NULL;
+ so->so_acceptq_tail = &so->so_acceptq_head;
+ so->so_acceptq_len = 0;
+}
+
+int
+so_wait_connected_locked(struct sonode *so, boolean_t nonblock,
+ sock_connid_t id)
+{
+ ASSERT(MUTEX_HELD(&so->so_lock));
+
+ /*
+ * The protocol has notified us that a connection attempt is being
+ * made, so before we wait for a notification to arrive we must
+ * clear out any errors associated with earlier connection attempts.
+ */
+ if (so->so_error != 0 && SOCK_CONNID_LT(so->so_proto_connid, id))
+ so->so_error = 0;
+
+ while (SOCK_CONNID_LT(so->so_proto_connid, id)) {
+ if (nonblock)
+ return (EINPROGRESS);
+
+ if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING))
+ return (EINTR);
+
+ if (cv_wait_sig_swap(&so->so_state_cv, &so->so_lock) == 0)
+ return (EINTR);
+ }
+
+ if (so->so_error != 0)
+ return (sogeterr(so, B_TRUE));
+ /*
+ * Under normal circumstances, so_error should contain an error
+ * in case the connect failed. However, it is possible for another
+ * thread to come in a consume the error, so generate a sensible
+ * error in that case.
+ */
+ if ((so->so_state & SS_ISCONNECTED) == 0)
+ return (ECONNREFUSED);
+
+ return (0);
+}
+
+/*
+ * int so_wait_connected(struct sonode *so, boolean_t nonblock,
+ * sock_connid_t id)
+ *
+ * Wait until the socket is connected or an error has occured.
+ *
+ * Arguments:
+ * so - socket
+ * nonblock - indicate whether it's ok to sleep if the connection has
+ * not yet been established
+ * gen - generation number that was returned by the protocol
+ * when the operation was started
+ *
+ * Returns:
+ * 0 if the connection attempt was successful, or an error indicating why
+ * the connection attempt failed.
+ */
+int
+so_wait_connected(struct sonode *so, boolean_t nonblock, sock_connid_t id)
+{
+ int error;
+
+ mutex_enter(&so->so_lock);
+ error = so_wait_connected_locked(so, nonblock, id);
+ mutex_exit(&so->so_lock);
+
+ return (error);
+}
+
+int
+so_snd_wait_qnotfull_locked(struct sonode *so, boolean_t dontblock)
+{
+ int error;
+
+ ASSERT(MUTEX_HELD(&so->so_lock));
+ while (so->so_snd_qfull) {
+ if (so->so_state & SS_CANTSENDMORE)
+ return (EPIPE);
+ if (dontblock)
+ return (EWOULDBLOCK);
+
+ if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING))
+ return (EINTR);
+
+ if (so->so_sndtimeo == 0) {
+ /*
+ * Zero means disable timeout.
+ */
+ error = cv_wait_sig(&so->so_snd_cv, &so->so_lock);
+ } else {
+ clock_t now;
+
+ time_to_wait(&now, so->so_sndtimeo);
+ error = cv_timedwait_sig(&so->so_snd_cv, &so->so_lock,
+ now);
+ }
+ if (error == 0)
+ return (EINTR);
+ else if (error == -1)
+ return (ETIME);
+ }
+ return (0);
+}
+
+/*
+ * int so_wait_sendbuf(struct sonode *so, boolean_t dontblock)
+ *
+ * Wait for the transport to notify us about send buffers becoming
+ * available.
+ */
+int
+so_snd_wait_qnotfull(struct sonode *so, boolean_t dontblock)
+{
+ int error = 0;
+
+ mutex_enter(&so->so_lock);
+ if (so->so_snd_qfull) {
+ so->so_snd_wakeup = B_TRUE;
+ error = so_snd_wait_qnotfull_locked(so, dontblock);
+ so->so_snd_wakeup = B_FALSE;
+ }
+ mutex_exit(&so->so_lock);
+
+ return (error);
+}
+
+void
+so_snd_qfull(struct sonode *so)
+{
+ mutex_enter(&so->so_lock);
+ so->so_snd_qfull = B_TRUE;
+ mutex_exit(&so->so_lock);
+}
+
+void
+so_snd_qnotfull(struct sonode *so)
+{
+ mutex_enter(&so->so_lock);
+ so->so_snd_qfull = B_FALSE;
+ /* wake up everyone waiting for buffers */
+ cv_broadcast(&so->so_snd_cv);
+ mutex_exit(&so->so_lock);
+}
+
+/*
+ * Change the process/process group to which SIGIO is sent.
+ */
+int
+socket_chgpgrp(struct sonode *so, pid_t pid)
+{
+ int error;
+
+ ASSERT(MUTEX_HELD(&so->so_lock));
+ if (pid != 0) {
+ /*
+ * Permissions check by sending signal 0.
+ * Note that when kill fails it does a
+ * set_errno causing the system call to fail.
+ */
+ error = kill(pid, 0);
+ if (error != 0) {
+ return (error);
+ }
+ }
+ so->so_pgrp = pid;
+ return (0);
+}
+
+
+/*
+ * Generate a SIGIO, for 'writable' events include siginfo structure,
+ * for read events just send the signal.
+ */
+/*ARGSUSED*/
+static void
+socket_sigproc(proc_t *proc, int event)
+{
+ k_siginfo_t info;
+
+ ASSERT(event & (SOCKETSIG_WRITE | SOCKETSIG_READ | SOCKETSIG_URG));
+
+ if (event & SOCKETSIG_WRITE) {
+ info.si_signo = SIGPOLL;
+ info.si_code = POLL_OUT;
+ info.si_errno = 0;
+ info.si_fd = 0;
+ info.si_band = 0;
+ sigaddq(proc, NULL, &info, KM_NOSLEEP);
+ }
+ if (event & SOCKETSIG_READ) {
+ sigtoproc(proc, NULL, SIGPOLL);
+ }
+ if (event & SOCKETSIG_URG) {
+ sigtoproc(proc, NULL, SIGURG);
+ }
+}
+
+void
+socket_sendsig(struct sonode *so, int event)
+{
+ proc_t *proc;
+
+ ASSERT(MUTEX_HELD(&so->so_lock));
+
+ if (so->so_pgrp == 0 || (!(so->so_state & SS_ASYNC) &&
+ event != SOCKETSIG_URG)) {
+ return;
+ }
+
+ dprint(3, ("sending sig %d to %d\n", event, so->so_pgrp));
+
+ if (so->so_pgrp > 0) {
+ /*
+ * XXX This unfortunately still generates
+ * a signal when a fd is closed but
+ * the proc is active.
+ */
+ mutex_enter(&pidlock);
+ proc = prfind(so->so_pgrp);
+ if (proc == NULL) {
+ mutex_exit(&pidlock);
+ return;
+ }
+ mutex_enter(&proc->p_lock);
+ mutex_exit(&pidlock);
+ socket_sigproc(proc, event);
+ mutex_exit(&proc->p_lock);
+ } else {
+ /*
+ * Send to process group. Hold pidlock across
+ * calls to socket_sigproc().
+ */
+ pid_t pgrp = -so->so_pgrp;
+
+ mutex_enter(&pidlock);
+ proc = pgfind(pgrp);
+ while (proc != NULL) {
+ mutex_enter(&proc->p_lock);
+ socket_sigproc(proc, event);
+ mutex_exit(&proc->p_lock);
+ proc = proc->p_pglink;
+ }
+ mutex_exit(&pidlock);
+ }
+}
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+/* Copy userdata into a new mblk_t */
+mblk_t *
+socopyinuio(uio_t *uiop, ssize_t iosize, size_t wroff, ssize_t maxblk,
+ size_t tail_len, int *errorp)
+{
+ mblk_t *head = NULL, **tail = &head;
+
+ ASSERT(iosize == INFPSZ || iosize > 0);
+
+ if (iosize == INFPSZ || iosize > uiop->uio_resid)
+ iosize = uiop->uio_resid;
+
+ if (maxblk == INFPSZ)
+ maxblk = iosize;
+
+ /* Nothing to do in these cases, so we're done */
+ if (iosize < 0 || maxblk < 0 || (maxblk == 0 && iosize > 0))
+ goto done;
+
+ /*
+ * We will enter the loop below if iosize is 0; it will allocate an
+ * empty message block and call uiomove(9F) which will just return.
+ * We could avoid that with an extra check but would only slow
+ * down the much more likely case where iosize is larger than 0.
+ */
+ do {
+ ssize_t blocksize;
+ mblk_t *mp;
+
+ blocksize = MIN(iosize, maxblk);
+ ASSERT(blocksize >= 0);
+ if ((mp = allocb(wroff + blocksize + tail_len,
+ BPRI_MED)) == NULL) {
+ *errorp = ENOMEM;
+ return (head);
+ }
+ mp->b_rptr += wroff;
+ mp->b_wptr = mp->b_rptr + blocksize;
+
+ *tail = mp;
+ tail = &mp->b_cont;
+
+ /* uiomove(9F) either returns 0 or EFAULT */
+ if ((*errorp = uiomove(mp->b_rptr, (size_t)blocksize,
+ UIO_WRITE, uiop)) != 0) {
+ ASSERT(*errorp != ENOMEM);
+ freemsg(head);
+ return (NULL);
+ }
+
+ iosize -= blocksize;
+ } while (iosize > 0);
+
+done:
+ *errorp = 0;
+ return (head);
+}
+
+mblk_t *
+socopyoutuio(mblk_t *mp, struct uio *uiop, ssize_t max_read, int *errorp)
+{
+ int error;
+ ptrdiff_t n;
+ mblk_t *nmp;
+
+ ASSERT(mp->b_wptr >= mp->b_rptr);
+
+ /*
+ * max_read is the offset of the oobmark and read can not go pass
+ * the oobmark.
+ */
+ if (max_read == INFPSZ || max_read > uiop->uio_resid)
+ max_read = uiop->uio_resid;
+
+ do {
+ if ((n = MIN(max_read, MBLKL(mp))) != 0) {
+ ASSERT(n > 0);
+
+ error = uiomove(mp->b_rptr, n, UIO_READ, uiop);
+ if (error != 0) {
+ freemsg(mp);
+ *errorp = error;
+ return (NULL);
+ }
+ }
+
+ mp->b_rptr += n;
+ max_read -= n;
+ while (mp != NULL && (mp->b_rptr >= mp->b_wptr)) {
+ /*
+ * get rid of zero length mblks
+ */
+ nmp = mp;
+ mp = mp->b_cont;
+ freeb(nmp);
+ }
+ } while (mp != NULL && max_read > 0);
+
+ *errorp = 0;
+ return (mp);
+}
+
+static void
+so_prepend_msg(struct sonode *so, mblk_t *mp, mblk_t *last_tail)
+{
+ ASSERT(last_tail != NULL);
+ mp->b_next = so->so_rcv_q_head;
+ mp->b_prev = last_tail;
+ ASSERT(!(DB_FLAGS(mp) & DBLK_UIOA));
+
+ if (so->so_rcv_q_head == NULL) {
+ ASSERT(so->so_rcv_q_last_head == NULL);
+ so->so_rcv_q_last_head = mp;
+#ifdef DEBUG
+ } else {
+ ASSERT(!(DB_FLAGS(so->so_rcv_q_head) & DBLK_UIOA));
+#endif
+ }
+ so->so_rcv_q_head = mp;
+
+#ifdef DEBUG
+ if (so_debug_length) {
+ mutex_enter(&so->so_lock);
+ ASSERT(so_check_length(so));
+ mutex_exit(&so->so_lock);
+ }
+#endif
+}
+
+static void
+process_new_message(struct sonode *so, mblk_t *mp_head, mblk_t *mp_last_head)
+{
+ ASSERT(mp_head->b_prev != NULL);
+ if (so->so_rcv_q_head == NULL) {
+ so->so_rcv_q_head = mp_head;
+ so->so_rcv_q_last_head = mp_last_head;
+ ASSERT(so->so_rcv_q_last_head->b_prev != NULL);
+ } else {
+ boolean_t flag_equal = ((DB_FLAGS(mp_head) & DBLK_UIOA) ==
+ (DB_FLAGS(so->so_rcv_q_last_head) & DBLK_UIOA));
+
+ if (mp_head->b_next == NULL &&
+ DB_TYPE(mp_head) == M_DATA &&
+ DB_TYPE(so->so_rcv_q_last_head) == M_DATA && flag_equal) {
+ so->so_rcv_q_last_head->b_prev->b_cont = mp_head;
+ so->so_rcv_q_last_head->b_prev = mp_head->b_prev;
+ mp_head->b_prev = NULL;
+ } else if (flag_equal && (DB_FLAGS(mp_head) & DBLK_UIOA)) {
+ /*
+ * Append to last_head if more than one mblks, and both
+ * mp_head and last_head are I/OAT mblks.
+ */
+ ASSERT(mp_head->b_next != NULL);
+ so->so_rcv_q_last_head->b_prev->b_cont = mp_head;
+ so->so_rcv_q_last_head->b_prev = mp_head->b_prev;
+ mp_head->b_prev = NULL;
+
+ so->so_rcv_q_last_head->b_next = mp_head->b_next;
+ mp_head->b_next = NULL;
+ so->so_rcv_q_last_head = mp_last_head;
+ } else {
+#ifdef DEBUG
+ {
+ mblk_t *tmp_mblk;
+ tmp_mblk = mp_head;
+ while (tmp_mblk != NULL) {
+ ASSERT(tmp_mblk->b_prev != NULL);
+ tmp_mblk = tmp_mblk->b_next;
+ }
+ }
+#endif
+ so->so_rcv_q_last_head->b_next = mp_head;
+ so->so_rcv_q_last_head = mp_last_head;
+ }
+ }
+}
+
+int
+so_dequeue_msg(struct sonode *so, mblk_t **mctlp, struct uio *uiop,
+ rval_t *rvalp, int flags)
+{
+ mblk_t *mp, *nmp;
+ mblk_t *savemp, *savemptail;
+ mblk_t *new_msg_head;
+ mblk_t *new_msg_last_head;
+ mblk_t *last_tail;
+ boolean_t partial_read;
+ boolean_t reset_atmark = B_FALSE;
+ int more = 0;
+ int error;
+ ssize_t oobmark;
+ sodirect_t *sodp = so->so_direct;
+
+ partial_read = B_FALSE;
+ *mctlp = NULL;
+again:
+ mutex_enter(&so->so_lock);
+again1:
+#ifdef DEBUG
+ if (so_debug_length) {
+ ASSERT(so_check_length(so));
+ }
+#endif
+ /*
+ * First move messages from the dump area to processing area
+ */
+ if (sodp != NULL) {
+ /* No need to grab sod_lockp since it pointers to so_lock */
+ if (sodp->sod_state & SOD_ENABLED) {
+ ASSERT(sodp->sod_lockp == &so->so_lock);
+
+ if (sodp->sod_uioa.uioa_state & UIOA_ALLOC) {
+ /* nothing to uioamove */
+ sodp = NULL;
+ } else if (sodp->sod_uioa.uioa_state & UIOA_INIT) {
+ sodp->sod_uioa.uioa_state &= UIOA_CLR;
+ sodp->sod_uioa.uioa_state |= UIOA_ENABLED;
+ /*
+ * try to uioamove() the data that
+ * has already queued.
+ */
+ sod_uioa_so_init(so, sodp, uiop);
+ }
+ } else {
+ sodp = NULL;
+ }
+ }
+ new_msg_head = so->so_rcv_head;
+ new_msg_last_head = so->so_rcv_last_head;
+ so->so_rcv_head = NULL;
+ so->so_rcv_last_head = NULL;
+ oobmark = so->so_oobmark;
+ /*
+ * We can release the lock as there can only be one reader
+ */
+ mutex_exit(&so->so_lock);
+
+ if (so->so_state & SS_RCVATMARK) {
+ reset_atmark = B_TRUE;
+ }
+ if (new_msg_head != NULL) {
+ process_new_message(so, new_msg_head, new_msg_last_head);
+ }
+ savemp = savemptail = NULL;
+ rvalp->r_val1 = 0;
+ error = 0;
+ mp = so->so_rcv_q_head;
+
+ if (mp != NULL &&
+ (so->so_rcv_timer_tid == 0 ||
+ so->so_rcv_queued >= so->so_rcv_thresh)) {
+ partial_read = B_FALSE;
+
+ if (flags & MSG_PEEK) {
+ if ((nmp = dupmsg(mp)) == NULL &&
+ (nmp = copymsg(mp)) == NULL) {
+ size_t size = msgsize(mp);
+
+ error = strwaitbuf(size, BPRI_HI);
+ if (error) {
+ return (error);
+ }
+ goto again;
+ }
+ mp = nmp;
+ } else {
+ ASSERT(mp->b_prev != NULL);
+ last_tail = mp->b_prev;
+ mp->b_prev = NULL;
+ so->so_rcv_q_head = mp->b_next;
+ if (so->so_rcv_q_head == NULL) {
+ so->so_rcv_q_last_head = NULL;
+ }
+ mp->b_next = NULL;
+ }
+
+ ASSERT(mctlp != NULL);
+ /*
+ * First process PROTO or PCPROTO blocks, if any.
+ */
+ if (DB_TYPE(mp) != M_DATA) {
+ *mctlp = mp;
+ savemp = mp;
+ savemptail = mp;
+ ASSERT(DB_TYPE(mp) == M_PROTO ||
+ DB_TYPE(mp) == M_PCPROTO);
+ while (mp->b_cont != NULL &&
+ DB_TYPE(mp->b_cont) != M_DATA) {
+ ASSERT(DB_TYPE(mp->b_cont) == M_PROTO ||
+ DB_TYPE(mp->b_cont) == M_PCPROTO);
+ mp = mp->b_cont;
+ savemptail = mp;
+ }
+ mp = savemptail->b_cont;
+ savemptail->b_cont = NULL;
+ }
+
+ ASSERT(DB_TYPE(mp) == M_DATA);
+ /*
+ * Now process DATA blocks, if any. Note that for sodirect
+ * enabled socket, uio_resid can be 0.
+ */
+ if (uiop->uio_resid >= 0) {
+ ssize_t copied = 0;
+
+ if (sodp != NULL && (DB_FLAGS(mp) & DBLK_UIOA)) {
+ mutex_enter(sodp->sod_lockp);
+ ASSERT(uiop == (uio_t *)&sodp->sod_uioa);
+ copied = sod_uioa_mblk(so, mp);
+ if (copied > 0)
+ partial_read = B_TRUE;
+ mutex_exit(sodp->sod_lockp);
+ /* mark this mblk as processed */
+ mp = NULL;
+ } else {
+ ssize_t oldresid = uiop->uio_resid;
+
+ if (MBLKL(mp) < so_mblk_pull_len) {
+ if (pullupmsg(mp, -1) == 1) {
+ last_tail = mp;
+ }
+ }
+ /*
+ * Can not read beyond the oobmark
+ */
+ mp = socopyoutuio(mp, uiop,
+ oobmark == 0 ? INFPSZ : oobmark, &error);
+ if (error != 0) {
+ freemsg(*mctlp);
+ *mctlp = NULL;
+ more = 0;
+ goto done;
+ }
+ ASSERT(oldresid >= uiop->uio_resid);
+ copied = oldresid - uiop->uio_resid;
+ if (oldresid > uiop->uio_resid)
+ partial_read = B_TRUE;
+ }
+ ASSERT(copied >= 0);
+ if (copied > 0 && !(flags & MSG_PEEK)) {
+ mutex_enter(&so->so_lock);
+ so->so_rcv_queued -= copied;
+ ASSERT(so->so_oobmark >= 0);
+ if (so->so_oobmark > 0) {
+ so->so_oobmark -= copied;
+ ASSERT(so->so_oobmark >= 0);
+ if (so->so_oobmark == 0) {
+ ASSERT(so->so_state &
+ SS_OOBPEND);
+ so->so_oobmark = 0;
+ so->so_state |= SS_RCVATMARK;
+ }
+ }
+ if (so->so_flowctrld && so->so_rcv_queued <
+ so->so_rcvlowat) {
+ so->so_flowctrld = B_FALSE;
+ mutex_exit(&so->so_lock);
+ /*
+ * open up flow control
+ */
+ (*so->so_downcalls->sd_clr_flowctrl)
+ (so->so_proto_handle);
+ } else {
+ mutex_exit(&so->so_lock);
+ }
+ }
+ }
+ if (mp != NULL) { /* more data blocks in msg */
+ more |= MOREDATA;
+ if ((flags & (MSG_PEEK|MSG_TRUNC))) {
+ if (flags & MSG_TRUNC) {
+ mutex_enter(&so->so_lock);
+ so->so_rcv_queued -= msgdsize(mp);
+ mutex_exit(&so->so_lock);
+ }
+ freemsg(mp);
+ } else if (partial_read && !somsghasdata(mp)) {
+ /*
+ * Avoid queuing a zero-length tail part of
+ * a message. partial_read == 1 indicates that
+ * we read some of the message.
+ */
+ freemsg(mp);
+ more &= ~MOREDATA;
+ } else {
+ if (savemp != NULL &&
+ (flags & MSG_DUPCTRL)) {
+ mblk_t *nmp;
+ /*
+ * There should only be non data mblks
+ */
+ ASSERT(DB_TYPE(savemp) != M_DATA &&
+ DB_TYPE(savemptail) != M_DATA);
+try_again:
+ if ((nmp = dupmsg(savemp)) == NULL &&
+ (nmp = copymsg(savemp)) == NULL) {
+
+ size_t size = msgsize(savemp);
+
+ error = strwaitbuf(size,
+ BPRI_HI);
+ if (error != 0) {
+ /*
+ * In case we
+ * cannot copy
+ * control data
+ * free the remaining
+ * data.
+ */
+ freemsg(mp);
+ goto done;
+ }
+ goto try_again;
+ }
+
+ ASSERT(nmp != NULL);
+ ASSERT(DB_TYPE(nmp) != M_DATA);
+ savemptail->b_cont = mp;
+ *mctlp = nmp;
+ mp = savemp;
+ }
+ /*
+ * putback mp
+ */
+ so_prepend_msg(so, mp, last_tail);
+ }
+ }
+
+ /* fast check so_rcv_head if there is more data */
+ if (partial_read && !(so->so_state & SS_RCVATMARK) &&
+ *mctlp == NULL && uiop->uio_resid > 0 &&
+ !(flags & MSG_PEEK) && so->so_rcv_head != NULL) {
+ goto again;
+ }
+ } else if (!partial_read) {
+ mutex_enter(&so->so_lock);
+ if (so->so_error != 0) {
+ error = sogeterr(so, !(flags & MSG_PEEK));
+ mutex_exit(&so->so_lock);
+ return (error);
+ }
+ /*
+ * No pending data. Return right away for nonblocking
+ * socket, otherwise sleep waiting for data.
+ */
+ if (!(so->so_state & SS_CANTRCVMORE)) {
+ if ((uiop->uio_fmode & (FNDELAY|FNONBLOCK)) ||
+ (flags & MSG_DONTWAIT)) {
+ error = EWOULDBLOCK;
+ } else {
+ if (so->so_state & (SS_CLOSING |
+ SS_FALLBACK_PENDING)) {
+ mutex_exit(&so->so_lock);
+ error = EINTR;
+ goto done;
+ }
+
+ if (so->so_rcv_head != NULL) {
+ goto again1;
+ }
+ so->so_rcv_wakeup = B_TRUE;
+ so->so_rcv_wanted = uiop->uio_resid;
+ if (so->so_rcvtimeo == 0) {
+ /*
+ * Zero means disable timeout.
+ */
+ error = cv_wait_sig(&so->so_rcv_cv,
+ &so->so_lock);
+ } else {
+ clock_t now;
+ time_to_wait(&now, so->so_rcvtimeo);
+ error = cv_timedwait_sig(&so->so_rcv_cv,
+ &so->so_lock, now);
+ }
+ so->so_rcv_wakeup = B_FALSE;
+ so->so_rcv_wanted = 0;
+
+ if (error == 0) {
+ error = EINTR;
+ } else if (error == -1) {
+ error = ETIME;
+ } else {
+ goto again1;
+ }
+ }
+ }
+ mutex_exit(&so->so_lock);
+ }
+ if (reset_atmark && partial_read && !(flags & MSG_PEEK)) {
+ /*
+ * We are passed the mark, update state
+ * 4.3BSD and 4.4BSD clears the mark when peeking across it.
+ * The draft Posix socket spec states that the mark should
+ * not be cleared when peeking. We follow the latter.
+ */
+ mutex_enter(&so->so_lock);
+ ASSERT(so_verify_oobstate(so));
+ so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK);
+ freemsg(so->so_oobmsg);
+ so->so_oobmsg = NULL;
+ ASSERT(so_verify_oobstate(so));
+ mutex_exit(&so->so_lock);
+ }
+ ASSERT(so->so_rcv_wakeup == B_FALSE);
+done:
+ if (sodp != NULL) {
+ mutex_enter(sodp->sod_lockp);
+ if ((sodp->sod_state & SOD_ENABLED) &&
+ (sodp->sod_uioa.uioa_state & UIOA_ENABLED)) {
+ SOD_UIOAFINI(sodp);
+ if (sodp->sod_uioa.uioa_mbytes > 0) {
+ ASSERT(so->so_rcv_q_head != NULL ||
+ so->so_rcv_head != NULL);
+ so->so_rcv_queued -= sod_uioa_mblk(so, NULL);
+ if (error == EWOULDBLOCK)
+ error = 0;
+ }
+ }
+ mutex_exit(sodp->sod_lockp);
+ }
+#ifdef DEBUG
+ if (so_debug_length) {
+ mutex_enter(&so->so_lock);
+ ASSERT(so_check_length(so));
+ mutex_exit(&so->so_lock);
+ }
+#endif
+ rvalp->r_val1 = more;
+ return (error);
+}
+
+void
+so_enqueue_msg(struct sonode *so, mblk_t *mp, size_t msg_size)
+{
+ ASSERT(MUTEX_HELD(&so->so_lock));
+
+#ifdef DEBUG
+ if (so_debug_length) {
+ ASSERT(so_check_length(so));
+ }
+#endif
+ so->so_rcv_queued += msg_size;
+
+ if (so->so_rcv_head == NULL) {
+ ASSERT(so->so_rcv_last_head == NULL);
+ so->so_rcv_head = mp;
+ so->so_rcv_last_head = mp;
+ } else if ((DB_TYPE(mp) == M_DATA &&
+ DB_TYPE(so->so_rcv_last_head) == M_DATA) &&
+ ((DB_FLAGS(mp) & DBLK_UIOA) ==
+ (DB_FLAGS(so->so_rcv_last_head) & DBLK_UIOA))) {
+ /* Added to the end */
+ ASSERT(so->so_rcv_last_head != NULL);
+ ASSERT(so->so_rcv_last_head->b_prev != NULL);
+ so->so_rcv_last_head->b_prev->b_cont = mp;
+ } else {
+ /* Start a new end */
+ so->so_rcv_last_head->b_next = mp;
+ so->so_rcv_last_head = mp;
+ }
+ while (mp->b_cont != NULL)
+ mp = mp->b_cont;
+
+ so->so_rcv_last_head->b_prev = mp;
+#ifdef DEBUG
+ if (so_debug_length) {
+ ASSERT(so_check_length(so));
+ }
+#endif
+}
+
+/*
+ * Return B_TRUE if there is data in the message, B_FALSE otherwise.
+ */
+boolean_t
+somsghasdata(mblk_t *mp)
+{
+ for (; mp; mp = mp->b_cont)
+ if (mp->b_datap->db_type == M_DATA) {
+ ASSERT(mp->b_wptr >= mp->b_rptr);
+ if (mp->b_wptr > mp->b_rptr)
+ return (B_TRUE);
+ }
+ return (B_FALSE);
+}
+
+/*
+ * Flush the read side of sockfs.
+ *
+ * The caller must be sure that a reader is not already active when the
+ * buffer is being flushed.
+ */
+void
+so_rcv_flush(struct sonode *so)
+{
+ mblk_t *mp;
+
+ ASSERT(MUTEX_HELD(&so->so_lock));
+
+ if (so->so_oobmsg != NULL) {
+ freemsg(so->so_oobmsg);
+ so->so_oobmsg = NULL;
+ so->so_oobmark = 0;
+ so->so_state &=
+ ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|SS_RCVATMARK);
+ }
+
+ /*
+ * Free messages sitting in the send and recv queue
+ */
+ while (so->so_rcv_q_head != NULL) {
+ mp = so->so_rcv_q_head;
+ so->so_rcv_q_head = mp->b_next;
+ mp->b_next = mp->b_prev = NULL;
+ freemsg(mp);
+ }
+ while (so->so_rcv_head != NULL) {
+ mp = so->so_rcv_head;
+ so->so_rcv_head = mp->b_next;
+ mp->b_next = mp->b_prev = NULL;
+ freemsg(mp);
+ }
+ so->so_rcv_queued = 0;
+ so->so_rcv_q_head = NULL;
+ so->so_rcv_q_last_head = NULL;
+ so->so_rcv_head = NULL;
+ so->so_rcv_last_head = NULL;
+}
+
+/*
+ * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK.
+ */
+int
+sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags,
+ boolean_t oob_inline)
+{
+ mblk_t *mp, *nmp;
+ int error;
+
+ dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", (void *)so, (void *)msg,
+ flags));
+
+ if (msg != NULL) {
+ /*
+ * There is never any oob data with addresses or control since
+ * the T_EXDATA_IND does not carry any options.
+ */
+ msg->msg_controllen = 0;
+ msg->msg_namelen = 0;
+ msg->msg_flags = 0;
+ }
+
+ mutex_enter(&so->so_lock);
+ ASSERT(so_verify_oobstate(so));
+ if (oob_inline ||
+ (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) {
+ dprintso(so, 1, ("sorecvoob: inline or data consumed\n"));
+ mutex_exit(&so->so_lock);
+ return (EINVAL);
+ }
+ if (!(so->so_state & SS_HAVEOOBDATA)) {
+ dprintso(so, 1, ("sorecvoob: no data yet\n"));
+ mutex_exit(&so->so_lock);
+ return (EWOULDBLOCK);
+ }
+ ASSERT(so->so_oobmsg != NULL);
+ mp = so->so_oobmsg;
+ if (flags & MSG_PEEK) {
+ /*
+ * Since recv* can not return ENOBUFS we can not use dupmsg.
+ * Instead we revert to the consolidation private
+ * allocb_wait plus bcopy.
+ */
+ mblk_t *mp1;
+
+ mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL);
+ ASSERT(mp1);
+
+ while (mp != NULL) {
+ ssize_t size;
+
+ size = MBLKL(mp);
+ bcopy(mp->b_rptr, mp1->b_wptr, size);
+ mp1->b_wptr += size;
+ ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim);
+ mp = mp->b_cont;
+ }
+ mp = mp1;
+ } else {
+ /*
+ * Update the state indicating that the data has been consumed.
+ * Keep SS_OOBPEND set until data is consumed past the mark.
+ */
+ so->so_oobmsg = NULL;
+ so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA;
+ }
+ ASSERT(so_verify_oobstate(so));
+ mutex_exit(&so->so_lock);
+
+ error = 0;
+ nmp = mp;
+ while (nmp != NULL && uiop->uio_resid > 0) {
+ ssize_t n = MBLKL(nmp);
+
+ n = MIN(n, uiop->uio_resid);
+ if (n > 0)
+ error = uiomove(nmp->b_rptr, n,
+ UIO_READ, uiop);
+ if (error)
+ break;
+ nmp = nmp->b_cont;
+ }
+ ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
+ freemsg(mp);
+ return (error);
+}
+
+/*
+ * Allocate and initializ sonode
+ */
+/* ARGSUSED */
+struct sonode *
+socket_sonode_create(struct sockparams *sp, int family, int type,
+ int protocol, int version, int sflags, int *errorp, struct cred *cr)
+{
+ sonode_t *so;
+ int kmflags;
+
+ /*
+ * Choose the right set of sonodeops based on the upcall and
+ * down call version that the protocol has provided
+ */
+ if (SOCK_UC_VERSION != sp->sp_smod_info->smod_uc_version ||
+ SOCK_DC_VERSION != sp->sp_smod_info->smod_dc_version) {
+ /*
+ * mismatch
+ */
+#ifdef DEBUG
+ cmn_err(CE_CONT, "protocol and socket module version mismatch");
+#endif
+ *errorp = EINVAL;
+ return (NULL);
+ }
+
+ kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
+
+ so = kmem_cache_alloc(socket_cache, kmflags);
+ if (so == NULL) {
+ *errorp = ENOMEM;
+ return (NULL);
+ }
+
+ sonode_init(so, sp, family, type, protocol, &so_sonodeops);
+
+ if (version == SOV_DEFAULT)
+ version = so_default_version;
+
+ so->so_version = (short)version;
+
+ /*
+ * set the default values to be INFPSZ
+ * if a protocol desires it can change the value later
+ */
+ so->so_proto_props.sopp_rxhiwat = SOCKET_RECVHIWATER;
+ so->so_proto_props.sopp_rxlowat = SOCKET_RECVLOWATER;
+ so->so_proto_props.sopp_maxpsz = INFPSZ;
+ so->so_proto_props.sopp_maxblk = INFPSZ;
+
+ return (so);
+}
+
+int
+socket_init_common(struct sonode *so, struct sonode *pso, int flags, cred_t *cr)
+{
+ int error = 0;
+
+ if (pso != NULL) {
+ /*
+ * We have a passive open, so inherit basic state from
+ * the parent (listener).
+ *
+ * No need to grab the new sonode's lock, since there is no
+ * one that can have a reference to it.
+ */
+ mutex_enter(&pso->so_lock);
+
+ so->so_state |= SS_ISCONNECTED | (pso->so_state & SS_ASYNC);
+ so->so_pgrp = pso->so_pgrp;
+ so->so_rcvtimeo = pso->so_rcvtimeo;
+ so->so_sndtimeo = pso->so_sndtimeo;
+ /*
+ * Make note of the socket level options. TCP and IP level
+ * options are already inherited. We could do all this after
+ * accept is successful but doing it here simplifies code and
+ * no harm done for error case.
+ */
+ so->so_options = pso->so_options & (SO_DEBUG|SO_REUSEADDR|
+ SO_KEEPALIVE| SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK|
+ SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER);
+ so->so_proto_props = pso->so_proto_props;
+ so->so_mode = pso->so_mode;
+
+ mutex_exit(&pso->so_lock);
+
+ if (uioasync.enabled) {
+ sod_sock_init(so, NULL, NULL, NULL, &so->so_lock);
+ }
+ return (0);
+ } else {
+ struct sockparams *sp = so->so_sockparams;
+ sock_upcalls_t *upcalls_to_use;
+
+ /*
+ * Based on the version number select the right upcalls to
+ * pass down. Currently we only have one version so choose
+ * default
+ */
+ upcalls_to_use = &so_upcalls;
+
+ /* active open, so create a lower handle */
+ so->so_proto_handle =
+ sp->sp_smod_info->smod_proto_create_func(so->so_family,
+ so->so_type, so->so_protocol, &so->so_downcalls,
+ &so->so_mode, &error, flags, cr);
+
+ if (so->so_proto_handle == NULL) {
+ ASSERT(error != 0);
+ /*
+ * To be safe; if a lower handle cannot be created, and
+ * the proto does not give a reason why, assume there
+ * was a lack of memory.
+ */
+ return ((error == 0) ? ENOMEM : error);
+ }
+ ASSERT(so->so_downcalls != NULL);
+ ASSERT(so->so_downcalls->sd_send != NULL ||
+ so->so_downcalls->sd_send_uio != NULL);
+ if (so->so_downcalls->sd_recv_uio != NULL) {
+ ASSERT(so->so_downcalls->sd_poll != NULL);
+ so->so_pollev |= SO_POLLEV_ALWAYS;
+ }
+
+ (*so->so_downcalls->sd_activate)(so->so_proto_handle,
+ (sock_upper_handle_t)so, upcalls_to_use, 0, cr);
+
+ /* Wildcard */
+
+ /*
+ * FIXME No need for this, the protocol can deal with it in
+ * sd_create(). Should update ICMP.
+ */
+ if (so->so_protocol != so->so_sockparams->sp_protocol) {
+ int protocol = so->so_protocol;
+ int error;
+ /*
+ * Issue SO_PROTOTYPE setsockopt.
+ */
+ error = socket_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE,
+ &protocol, (t_uscalar_t)sizeof (protocol), cr);
+ if (error) {
+ (void) (*so->so_downcalls->sd_close)
+ (so->so_proto_handle, 0, cr);
+
+ mutex_enter(&so->so_lock);
+ so_rcv_flush(so);
+ mutex_exit(&so->so_lock);
+ /*
+ * Setsockopt often fails with ENOPROTOOPT but
+ * socket() should fail with
+ * EPROTONOSUPPORT/EPROTOTYPE.
+ */
+ return (EPROTONOSUPPORT);
+ }
+ }
+ return (0);
+ }
+}
+
+/*
+ * int socket_ioctl_common(struct sonode *so, int cmd, intptr_t arg, int mode,
+ * struct cred *cr, int32_t *rvalp)
+ *
+ * Handle ioctls that manipulate basic socket state; non-blocking,
+ * async, etc.
+ *
+ * Returns:
+ * < 0 - ioctl was not handle
+ * >= 0 - ioctl was handled, if > 0, then it is an errno
+ *
+ * Notes:
+ * Assumes the standard receive buffer is used to obtain info for
+ * NREAD.
+ */
+/* ARGSUSED */
+int
+socket_ioctl_common(struct sonode *so, int cmd, intptr_t arg, int mode,
+ struct cred *cr, int32_t *rvalp)
+{
+ switch (cmd) {
+ case FIONBIO: {
+ int32_t value;
+
+ if (so_copyin((void *)arg, &value, sizeof (int32_t),
+ (mode & (int)FKIOCTL)))
+ return (EFAULT);
+
+ mutex_enter(&so->so_lock);
+ if (value) {
+ so->so_state |= SS_NDELAY;
+ } else {
+ so->so_state &= ~SS_NDELAY;
+ }
+ mutex_exit(&so->so_lock);
+ return (0);
+ }
+ case FIOASYNC: {
+ int32_t value;
+
+ if (so_copyin((void *)arg, &value, sizeof (int32_t),
+ (mode & (int)FKIOCTL)))
+ return (EFAULT);
+
+ mutex_enter(&so->so_lock);
+
+ if (value) {
+ /* Turn on SIGIO */
+ so->so_state |= SS_ASYNC;
+ } else {
+ /* Turn off SIGIO */
+ so->so_state &= ~SS_ASYNC;
+ }
+ mutex_exit(&so->so_lock);
+
+ return (0);
+ }
+
+ case SIOCSPGRP:
+ case FIOSETOWN: {
+ int error;
+ pid_t pid;
+
+ if (so_copyin((void *)arg, &pid, sizeof (pid_t),
+ (mode & (int)FKIOCTL)))
+ return (EFAULT);
+
+ mutex_enter(&so->so_lock);
+ error = (pid != so->so_pgrp) ? socket_chgpgrp(so, pid) : 0;
+ mutex_exit(&so->so_lock);
+ return (error);
+ }
+ case SIOCGPGRP:
+ case FIOGETOWN:
+ if (so_copyout(&so->so_pgrp, (void *)arg,
+ sizeof (pid_t), (mode & (int)FKIOCTL)))
+ return (EFAULT);
+
+ return (0);
+ case SIOCATMARK: {
+ int retval;
+
+ /*
+ * Only protocols that support urgent data can handle ATMARK.
+ */
+ if ((so->so_mode & SM_EXDATA) == 0)
+ return (EINVAL);
+
+ /*
+ * If the protocol is maintaining its own buffer, then the
+ * request must be passed down.
+ */
+ if (so->so_downcalls->sd_recv_uio != NULL)
+ return (-1);
+
+ retval = (so->so_state & SS_RCVATMARK) != 0;
+
+ if (so_copyout(&retval, (void *)arg, sizeof (int),
+ (mode & (int)FKIOCTL))) {
+ return (EFAULT);
+ }
+ return (0);
+ }
+
+ case FIONREAD: {
+ int retval;
+
+ /*
+ * If the protocol is maintaining its own buffer, then the
+ * request must be passed down.
+ */
+ if (so->so_downcalls->sd_recv_uio != NULL)
+ return (-1);
+
+ retval = MIN(so->so_rcv_queued, INT_MAX);
+
+ if (so_copyout(&retval, (void *)arg,
+ sizeof (retval), (mode & (int)FKIOCTL))) {
+ return (EFAULT);
+ }
+ return (0);
+ }
+
+ case _I_GETPEERCRED: {
+ int error = 0;
+
+ if ((mode & FKIOCTL) == 0)
+ return (EINVAL);
+
+ mutex_enter(&so->so_lock);
+ if ((so->so_mode & SM_CONNREQUIRED) == 0) {
+ error = ENOTSUP;
+ } else if ((so->so_state & SS_ISCONNECTED) == 0) {
+ error = ENOTCONN;
+ } else if (so->so_peercred != NULL) {
+ k_peercred_t *kp = (k_peercred_t *)arg;
+ kp->pc_cr = so->so_peercred;
+ kp->pc_cpid = so->so_cpid;
+ crhold(so->so_peercred);
+ } else {
+ error = EINVAL;
+ }
+ mutex_exit(&so->so_lock);
+ return (error);
+ }
+ default:
+ return (-1);
+ }
+}
+
+/*
+ * Process STREAMS related ioctls. If a I_PUSH/POP operation is specified
+ * then the socket will fall back to TPI.
+ *
+ * Returns:
+ * < 0 - ioctl was not handle
+ * >= 0 - ioctl was handled, if > 0, then it is an errno
+ */
+int
+socket_strioc_common(struct sonode *so, int cmd, intptr_t arg, int mode,
+ struct cred *cr, int32_t *rvalp)
+{
+ switch (cmd) {
+ case _I_INSERT:
+ case _I_REMOVE:
+ case I_FIND:
+ case I_LIST:
+ return (EOPNOTSUPP);
+
+ case I_PUSH:
+ case I_POP: {
+ int retval;
+
+ if ((retval = so_tpi_fallback(so, cr)) == 0) {
+ /* Reissue the ioctl */
+ ASSERT(so->so_rcv_q_head == NULL);
+ return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
+ }
+ return (retval);
+ }
+ case I_LOOK:
+ if (so_copyout("sockmod", (void *)arg, strlen("sockmod") + 1,
+ (mode & (int)FKIOCTL))) {
+ return (EFAULT);
+ }
+ return (0);
+ default:
+ return (-1);
+ }
+}
+
+int
+socket_getopt_common(struct sonode *so, int level, int option_name,
+ void *optval, socklen_t *optlenp)
+{
+ if (level != SOL_SOCKET)
+ return (-1);
+
+ switch (option_name) {
+ case SO_ERROR:
+ case SO_DOMAIN:
+ case SO_TYPE:
+ case SO_ACCEPTCONN: {
+ int32_t value;
+ socklen_t optlen = *optlenp;
+
+ if (optlen < (t_uscalar_t)sizeof (int32_t)) {
+ return (EINVAL);
+ }
+
+ switch (option_name) {
+ case SO_ERROR:
+ mutex_enter(&so->so_lock);
+ value = sogeterr(so, B_TRUE);
+ mutex_exit(&so->so_lock);
+ break;
+ case SO_DOMAIN:
+ value = so->so_family;
+ break;
+ case SO_TYPE:
+ value = so->so_type;
+ break;
+ case SO_ACCEPTCONN:
+ if (so->so_state & SS_ACCEPTCONN)
+ value = SO_ACCEPTCONN;
+ else
+ value = 0;
+ break;
+ }
+
+ bcopy(&value, optval, sizeof (value));
+ *optlenp = sizeof (value);
+
+ return (0);
+ }
+ case SO_SNDTIMEO:
+ case SO_RCVTIMEO: {
+ clock_t value;
+ socklen_t optlen = *optlenp;
+
+ if (optlen < (t_uscalar_t)sizeof (struct timeval)) {
+ return (EINVAL);
+ }
+ if (option_name == SO_RCVTIMEO)
+ value = drv_hztousec(so->so_rcvtimeo);
+ else
+ value = drv_hztousec(so->so_sndtimeo);
+ ((struct timeval *)(optval))->tv_sec = value / (1000 * 1000);
+ ((struct timeval *)(optval))->tv_usec = value % (1000 * 1000);
+ *optlenp = sizeof (struct timeval);
+ return (0);
+ }
+ case SO_DEBUG:
+ case SO_REUSEADDR:
+ case SO_KEEPALIVE:
+ case SO_DONTROUTE:
+ case SO_BROADCAST:
+ case SO_USELOOPBACK:
+ case SO_OOBINLINE:
+ case SO_SNDBUF:
+ case SO_RCVBUF:
+#ifdef notyet
+ case SO_SNDLOWAT:
+ case SO_RCVLOWAT:
+#endif /* notyet */
+ case SO_DGRAM_ERRIND: {
+ socklen_t optlen = *optlenp;
+
+ if (optlen < (t_uscalar_t)sizeof (int32_t))
+ return (EINVAL);
+ break;
+ }
+ case SO_LINGER: {
+ socklen_t optlen = *optlenp;
+
+ if (optlen < (t_uscalar_t)sizeof (struct linger))
+ return (EINVAL);
+ break;
+ }
+ case SO_SND_BUFINFO: {
+ socklen_t optlen = *optlenp;
+
+ if (optlen < (t_uscalar_t)sizeof (struct so_snd_bufinfo))
+ return (EINVAL);
+ ((struct so_snd_bufinfo *)(optval))->sbi_wroff =
+ (so->so_proto_props).sopp_wroff;
+ ((struct so_snd_bufinfo *)(optval))->sbi_maxblk =
+ (so->so_proto_props).sopp_maxblk;
+ ((struct so_snd_bufinfo *)(optval))->sbi_maxpsz =
+ (so->so_proto_props).sopp_maxpsz;
+ ((struct so_snd_bufinfo *)(optval))->sbi_tail =
+ (so->so_proto_props).sopp_tail;
+ *optlenp = sizeof (struct so_snd_bufinfo);
+ return (0);
+ }
+ default:
+ break;
+ }
+
+ /* Unknown Option */
+ return (-1);
+}
+
+void
+socket_sonode_destroy(struct sonode *so)
+{
+ sonode_fini(so);
+ kmem_cache_free(socket_cache, so);
+}
+
+int
+so_zcopy_wait(struct sonode *so)
+{
+ int error = 0;
+
+ mutex_enter(&so->so_lock);
+ while (!(so->so_copyflag & STZCNOTIFY)) {
+ if (so->so_state & SS_CLOSING) {
+ mutex_exit(&so->so_lock);
+ return (EINTR);
+ }
+ if (cv_wait_sig(&so->so_copy_cv, &so->so_lock) == 0) {
+ error = EINTR;
+ break;
+ }
+ }
+ so->so_copyflag &= ~STZCNOTIFY;
+ mutex_exit(&so->so_lock);
+ return (error);
+}
+
+void
+so_timer_callback(void *arg)
+{
+ struct sonode *so = (struct sonode *)arg;
+
+ mutex_enter(&so->so_lock);
+
+ so->so_rcv_timer_tid = 0;
+ if (so->so_rcv_queued > 0) {
+ so_notify_data(so, so->so_rcv_queued);
+ } else {
+ mutex_exit(&so->so_lock);
+ }
+}
+
+#ifdef DEBUG
+/*
+ * Verify that the length stored in so_rcv_queued and the length of data blocks
+ * queued is same.
+ */
+static boolean_t
+so_check_length(sonode_t *so)
+{
+ mblk_t *mp = so->so_rcv_q_head;
+ int len = 0;
+
+ ASSERT(MUTEX_HELD(&so->so_lock));
+
+ if (mp != NULL) {
+ len = msgdsize(mp);
+ while ((mp = mp->b_next) != NULL)
+ len += msgdsize(mp);
+ }
+ mp = so->so_rcv_head;
+ if (mp != NULL) {
+ len += msgdsize(mp);
+ while ((mp = mp->b_next) != NULL)
+ len += msgdsize(mp);
+ }
+ return ((len == so->so_rcv_queued) ? B_TRUE : B_FALSE);
+}
+#endif
+
+int
+so_get_mod_version(struct sockparams *sp)
+{
+ ASSERT(sp != NULL && sp->sp_smod_info != NULL);
+ return (sp->sp_smod_info->smod_version);
+}
+
+/*
+ * so_start_fallback()
+ *
+ * Block new socket operations from coming in, and wait for active operations
+ * to complete. Threads that are sleeping will be woken up so they can get
+ * out of the way.
+ *
+ * The caller must be a reader on so_fallback_rwlock.
+ */
+static boolean_t
+so_start_fallback(struct sonode *so)
+{
+ ASSERT(RW_READ_HELD(&so->so_fallback_rwlock));
+
+ mutex_enter(&so->so_lock);
+ if (so->so_state & SS_FALLBACK_PENDING) {
+ mutex_exit(&so->so_lock);
+ return (B_FALSE);
+ }
+ so->so_state |= SS_FALLBACK_PENDING;
+ /*
+ * Poke all threads that might be sleeping. Any operation that comes
+ * in after the cv_broadcast will observe the fallback pending flag
+ * which cause the call to return where it would normally sleep.
+ */
+ cv_broadcast(&so->so_state_cv); /* threads in connect() */
+ cv_broadcast(&so->so_rcv_cv); /* threads in recvmsg() */
+ cv_broadcast(&so->so_snd_cv); /* threads in sendmsg() */
+ mutex_enter(&so->so_acceptq_lock);
+ cv_broadcast(&so->so_acceptq_cv); /* threads in accept() */
+ mutex_exit(&so->so_acceptq_lock);
+ mutex_exit(&so->so_lock);
+
+ /*
+ * The main reason for the rw_tryupgrade call is to provide
+ * observability during the fallback process. We want to
+ * be able to see if there are pending operations.
+ */
+ if (rw_tryupgrade(&so->so_fallback_rwlock) == 0) {
+ /*
+ * It is safe to drop and reaquire the fallback lock, because
+ * we are guaranteed that another fallback cannot take place.
+ */
+ rw_exit(&so->so_fallback_rwlock);
+ DTRACE_PROBE1(pending__ops__wait, (struct sonode *), so);
+ rw_enter(&so->so_fallback_rwlock, RW_WRITER);
+ DTRACE_PROBE1(pending__ops__complete, (struct sonode *), so);
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * so_end_fallback()
+ *
+ * Allow socket opertions back in.
+ *
+ * The caller must be a writer on so_fallback_rwlock.
+ */
+static void
+so_end_fallback(struct sonode *so)
+{
+ ASSERT(RW_ISWRITER(&so->so_fallback_rwlock));
+
+ mutex_enter(&so->so_lock);
+ so->so_state &= ~SS_FALLBACK_PENDING;
+ mutex_exit(&so->so_lock);
+
+ rw_downgrade(&so->so_fallback_rwlock);
+}
+
+/*
+ * so_quiesced_cb()
+ *
+ * Callback passed to the protocol during fallback. It is called once
+ * the endpoint is quiescent.
+ *
+ * No requests from the user, no notifications from the protocol, so it
+ * is safe to synchronize the state. Data can also be moved without
+ * risk for reordering.
+ *
+ * NOTE: urgent data is dropped on the floor.
+ *
+ * We do not need to hold so_lock, since there can be only one thread
+ * operating on the sonode.
+ */
+static void
+so_quiesced_cb(sock_upper_handle_t sock_handle, queue_t *q,
+ struct T_capability_ack *tcap, struct sockaddr *laddr, socklen_t laddrlen,
+ struct sockaddr *faddr, socklen_t faddrlen, short opts)
+{
+ struct sonode *so = (struct sonode *)sock_handle;
+
+ sotpi_update_state(so, tcap, laddr, laddrlen, faddr, faddrlen, opts);
+
+ mutex_enter(&so->so_lock);
+ SOCKET_TIMER_CANCEL(so);
+ mutex_exit(&so->so_lock);
+ /*
+ * Move data to the STREAM head.
+ */
+ if (so->so_rcv_head != NULL) {
+ if (so->so_rcv_q_last_head == NULL)
+ so->so_rcv_q_head = so->so_rcv_head;
+ else
+ so->so_rcv_q_last_head->b_next = so->so_rcv_head;
+ so->so_rcv_q_last_head = so->so_rcv_last_head;
+ }
+
+ while (so->so_rcv_q_head != NULL) {
+ mblk_t *mp = so->so_rcv_q_head;
+ size_t mlen = msgdsize(mp);
+
+ so->so_rcv_q_head = mp->b_next;
+ mp->b_next = NULL;
+ mp->b_prev = NULL;
+ so->so_rcv_queued -= mlen;
+ putnext(q, mp);
+ }
+ ASSERT(so->so_rcv_queued == 0);
+ so->so_rcv_head = NULL;
+ so->so_rcv_last_head = NULL;
+ so->so_rcv_q_head = NULL;
+ so->so_rcv_q_last_head = NULL;
+
+#ifdef DEBUG
+ if (so->so_oobmsg != NULL || so->so_oobmark > 0) {
+ cmn_err(CE_NOTE, "losing oob data due to tpi fallback\n");
+ }
+#endif
+ if (so->so_oobmsg != NULL) {
+ freemsg(so->so_oobmsg);
+ so->so_oobmsg = NULL;
+ }
+ so->so_oobmark = 0;
+
+ ASSERT(so->so_rcv_queued == 0);
+}
+
+/*
+ * so_tpi_fallback()
+ *
+ * This is fallback initation routine; things start here.
+ *
+ * Basic strategy:
+ * o Block new socket operations from coming in
+ * o Allocate/initate info needed by TPI
+ * o Quiesce the connection, at which point we sync
+ * state and move data
+ * o Change operations (sonodeops) associated with the socket
+ * o Unblock threads waiting for the fallback to finish
+ */
+int
+so_tpi_fallback(struct sonode *so, struct cred *cr)
+{
+ int error;
+ queue_t *q;
+ struct sockparams *sp;
+ struct sockparams *newsp;
+ so_proto_fallback_func_t fbfunc;
+ boolean_t direct;
+
+ error = 0;
+ sp = so->so_sockparams;
+ fbfunc = sp->sp_smod_info->smod_proto_fallback_func;
+
+ /*
+ * Fallback can only happen if there is a device associated
+ * with the sonode, and the socket module has a fallback function.
+ */
+ if (!SOCKPARAMS_HAS_DEVICE(sp) || fbfunc == NULL)
+ return (EINVAL);
+
+ /*
+ * Initiate fallback; upon success we know that no new requests
+ * will come in from the user.
+ */
+ if (!so_start_fallback(so))
+ return (EAGAIN);
+
+ newsp = sockparams_hold_ephemeral_bydev(so->so_family, so->so_type,
+ so->so_protocol, so->so_sockparams->sp_sdev_info.sd_devpath,
+ KM_SLEEP, &error);
+ if (error != 0)
+ goto out;
+
+ if (so->so_direct != NULL) {
+ sodirect_t *sodp = so->so_direct;
+ mutex_enter(sodp->sod_lockp);
+
+ so->so_direct->sod_state &= ~SOD_ENABLED;
+ so->so_state &= ~SS_SODIRECT;
+ ASSERT(sodp->sod_uioafh == NULL);
+ mutex_exit(sodp->sod_lockp);
+ }
+
+ /* Turn sonode into a TPI socket */
+ q = sotpi_convert_sonode(so, newsp, &direct, cr);
+ if (q == NULL) {
+ zcmn_err(getzoneid(), CE_WARN,
+ "Failed to convert socket to TPI. Pid = %d\n",
+ curproc->p_pid);
+ SOCKPARAMS_DEC_REF(newsp);
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * Now tell the protocol to start using TPI. so_quiesced_cb be
+ * called once it's safe to synchronize state.
+ */
+ DTRACE_PROBE1(proto__fallback__begin, struct sonode *, so);
+ /* FIXME assumes this cannot fail. TCP can fail to enter squeue */
+ (*fbfunc)(so->so_proto_handle, q, direct, so_quiesced_cb);
+ DTRACE_PROBE1(proto__fallback__end, struct sonode *, so);
+
+ /*
+ * Free all pending connection indications, i.e., socket_accept() has
+ * not yet pulled the connection of the queue. The transport sent
+ * a T_CONN_IND message for each pending connection to the STREAM head.
+ */
+ so_acceptq_flush(so);
+
+ mutex_enter(&so->so_lock);
+ so->so_state |= SS_FALLBACK_COMP;
+ mutex_exit(&so->so_lock);
+
+ /*
+ * Swap the sonode ops. Socket opertations that come in once this
+ * is done will proceed without blocking.
+ */
+ so->so_ops = &sotpi_sonodeops;
+
+ /*
+ * Wake up any threads stuck in poll. This is needed since the poll
+ * head changes when the fallback happens (moves from the sonode to
+ * the STREAMS head).
+ */
+ pollwakeup(&so->so_poll_list, POLLERR);
+out:
+ so_end_fallback(so);
+
+ return (error);
+}
diff --git a/usr/src/uts/common/fs/sockfs/sockcommon_vnops.c b/usr/src/uts/common/fs/sockfs/sockcommon_vnops.c
new file mode 100644
index 0000000000..ffcecfa7c1
--- /dev/null
+++ b/usr/src/uts/common/fs/sockfs/sockcommon_vnops.c
@@ -0,0 +1,482 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/t_lock.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bitmap.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/strsubr.h>
+#include <sys/cmn_err.h>
+#include <sys/sysmacros.h>
+#include <sys/filio.h>
+#include <sys/flock.h>
+#include <sys/stat.h>
+#include <sys/share.h>
+
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+
+#include <sys/sockio.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/strsun.h>
+
+#include <fs/sockfs/sockcommon.h>
+#include <fs/sockfs/socktpi.h>
+
+/*
+ * Generic vnode ops
+ */
+static int socket_vop_open(struct vnode **, int, struct cred *,
+ caller_context_t *);
+static int socket_vop_close(struct vnode *, int, int, offset_t,
+ struct cred *, caller_context_t *);
+static int socket_vop_read(struct vnode *, struct uio *, int,
+ struct cred *, caller_context_t *);
+static int socket_vop_write(struct vnode *, struct uio *, int,
+ struct cred *, caller_context_t *);
+static int socket_vop_ioctl(struct vnode *, int, intptr_t, int,
+ struct cred *, int32_t *, caller_context_t *);
+static int socket_vop_setfl(struct vnode *, int, int, cred_t *,
+ caller_context_t *);
+static int socket_vop_getattr(struct vnode *, struct vattr *, int,
+ struct cred *, caller_context_t *);
+static int socket_vop_setattr(struct vnode *, struct vattr *, int,
+ struct cred *, caller_context_t *);
+static int socket_vop_access(struct vnode *, int, int, struct cred *,
+ caller_context_t *);
+static int socket_vop_fsync(struct vnode *, int, struct cred *,
+ caller_context_t *);
+static void socket_vop_inactive(struct vnode *, struct cred *,
+ caller_context_t *);
+static int socket_vop_fid(struct vnode *, struct fid *,
+ caller_context_t *);
+static int socket_vop_seek(struct vnode *, offset_t, offset_t *,
+ caller_context_t *);
+static int socket_vop_poll(struct vnode *, short, int, short *,
+ struct pollhead **, caller_context_t *);
+
+extern int socket_close_internal(struct sonode *, int, cred_t *);
+extern void socket_destroy_internal(struct sonode *, cred_t *);
+
+struct vnodeops *socket_vnodeops;
+const fs_operation_def_t socket_vnodeops_template[] = {
+ VOPNAME_OPEN, { .vop_open = socket_vop_open },
+ VOPNAME_CLOSE, { .vop_close = socket_vop_close },
+ VOPNAME_READ, { .vop_read = socket_vop_read },
+ VOPNAME_WRITE, { .vop_write = socket_vop_write },
+ VOPNAME_IOCTL, { .vop_ioctl = socket_vop_ioctl },
+ VOPNAME_SETFL, { .vop_setfl = socket_vop_setfl },
+ VOPNAME_GETATTR, { .vop_getattr = socket_vop_getattr },
+ VOPNAME_SETATTR, { .vop_setattr = socket_vop_setattr },
+ VOPNAME_ACCESS, { .vop_access = socket_vop_access },
+ VOPNAME_FSYNC, { .vop_fsync = socket_vop_fsync },
+ VOPNAME_INACTIVE, { .vop_inactive = socket_vop_inactive },
+ VOPNAME_FID, { .vop_fid = socket_vop_fid },
+ VOPNAME_SEEK, { .vop_seek = socket_vop_seek },
+ VOPNAME_POLL, { .vop_poll = socket_vop_poll },
+ VOPNAME_DISPOSE, { .error = fs_error },
+ NULL, NULL
+};
+
+
+/*
+ * generic vnode ops
+ */
+
+/*ARGSUSED*/
+static int
+socket_vop_open(struct vnode **vpp, int flag, struct cred *cr,
+ caller_context_t *ct)
+{
+ struct vnode *vp = *vpp;
+ struct sonode *so = VTOSO(vp);
+
+ flag &= ~FCREAT; /* paranoia */
+ mutex_enter(&so->so_lock);
+ so->so_count++;
+ mutex_exit(&so->so_lock);
+
+ ASSERT(so->so_count != 0); /* wraparound */
+ ASSERT(vp->v_type == VSOCK);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset,
+ struct cred *cr, caller_context_t *ct)
+{
+ struct sonode *so;
+ int error = 0;
+
+ so = VTOSO(vp);
+ ASSERT(vp->v_type == VSOCK);
+
+ cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
+ cleanshares(vp, ttoproc(curthread)->p_pid);
+
+ if (vp->v_stream)
+ strclean(vp);
+
+ if (count > 1) {
+ dprint(2, ("socket_vop_close: count %d\n", count));
+ return (0);
+ }
+
+ mutex_enter(&so->so_lock);
+ if (--so->so_count == 0) {
+ /*
+ * Initiate connection shutdown.
+ */
+ mutex_exit(&so->so_lock);
+ error = socket_close_internal(so, flag, cr);
+ } else {
+ mutex_exit(&so->so_lock);
+ }
+
+ return (error);
+}
+
+/*ARGSUSED2*/
+static int
+socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
+ caller_context_t *ct)
+{
+ struct sonode *so = VTOSO(vp);
+ struct nmsghdr lmsg;
+
+ ASSERT(vp->v_type == VSOCK);
+ bzero((void *)&lmsg, sizeof (lmsg));
+
+ return (socket_recvmsg(so, &lmsg, uiop, cr));
+}
+
+/*ARGSUSED2*/
+static int
+socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag,
+ struct cred *cr, caller_context_t *ct)
+{
+ struct sonode *so = VTOSO(vp);
+ struct nmsghdr lmsg;
+
+ ASSERT(vp->v_type == VSOCK);
+ bzero((void *)&lmsg, sizeof (lmsg));
+
+ if (!(so->so_mode & SM_BYTESTREAM)) {
+ /*
+ * If the socket is not byte stream set MSG_EOR
+ */
+ lmsg.msg_flags = MSG_EOR;
+ }
+
+ return (socket_sendmsg(so, &lmsg, uiop, cr));
+}
+
+/*ARGSUSED4*/
+static int
+socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
+ struct cred *cr, int32_t *rvalp, caller_context_t *ct)
+{
+ struct sonode *so = VTOSO(vp);
+
+ ASSERT(vp->v_type == VSOCK);
+
+ return (socket_ioctl(so, cmd, arg, mode, cr, rvalp));
+}
+
+/*
+ * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
+ * from listener to acceptor.
+ */
+/* ARGSUSED */
+static int
+socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr,
+ caller_context_t *ct)
+{
+ struct sonode *so = VTOSO(vp);
+ int error = 0;
+
+ ASSERT(vp->v_type == VSOCK);
+
+ mutex_enter(&so->so_lock);
+ if (nflags & FNDELAY)
+ so->so_state |= SS_NDELAY;
+ else
+ so->so_state &= ~SS_NDELAY;
+ if (nflags & FNONBLOCK)
+ so->so_state |= SS_NONBLOCK;
+ else
+ so->so_state &= ~SS_NONBLOCK;
+ mutex_exit(&so->so_lock);
+
+ if (so->so_state & SS_ASYNC)
+ oflags |= FASYNC;
+ /*
+ * Sets/clears the SS_ASYNC flag based on the presence/absence
+ * of the FASYNC flag passed to fcntl(F_SETFL).
+ * This exists solely for BSD fcntl() FASYNC compatibility.
+ */
+ if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) {
+ int async = nflags & FASYNC;
+ int32_t rv;
+
+ /*
+ * For non-TPI sockets all we have to do is set/remove the
+ * SS_ASYNC bit, but for TPI it is more involved. For that
+ * reason we delegate the job to the protocol's ioctl handler.
+ */
+ error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL,
+ cr, &rv);
+ }
+ return (error);
+}
+
+
+/*
+ * Get the made up attributes for the vnode.
+ * 4.3BSD returns the current time for all the timestamps.
+ * 4.4BSD returns 0 for all the timestamps.
+ * Here we use the access and modified times recorded in the sonode.
+ *
+ * Just like in BSD there is not effect on the underlying file system node
+ * bound to an AF_UNIX pathname.
+ *
+ * When sockmod has been popped this will act just like a stream. Since
+ * a socket is always a clone there is no need to inspect the attributes
+ * of the "realvp".
+ */
+/* ARGSUSED */
+int
+socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags,
+ struct cred *cr, caller_context_t *ct)
+{
+ dev_t fsid;
+ struct sonode *so;
+ static int sonode_shift = 0;
+
+ /*
+ * Calculate the amount of bitshift to a sonode pointer which will
+ * still keep it unique. See below.
+ */
+ if (sonode_shift == 0)
+ sonode_shift = highbit(sizeof (struct sonode));
+ ASSERT(sonode_shift > 0);
+
+ so = VTOSO(vp);
+ fsid = sockdev;
+
+ if (so->so_version == SOV_STREAM) {
+ /*
+ * The imaginary "sockmod" has been popped - act
+ * as a stream
+ */
+ vap->va_type = VCHR;
+ vap->va_mode = 0;
+ } else {
+ vap->va_type = vp->v_type;
+ vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|
+ S_IROTH|S_IWOTH;
+ }
+ vap->va_uid = vap->va_gid = 0;
+ vap->va_fsid = fsid;
+ /*
+ * If the va_nodeid is > MAX_USHORT, then i386 stats might fail.
+ * So we shift down the sonode pointer to try and get the most
+ * uniqueness into 16-bits.
+ */
+ vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF;
+ vap->va_nlink = 0;
+ vap->va_size = 0;
+
+ /*
+ * We need to zero out the va_rdev to avoid some fstats getting
+ * EOVERFLOW. This also mimics SunOS 4.x and BSD behavior.
+ */
+ vap->va_rdev = (dev_t)0;
+ vap->va_blksize = MAXBSIZE;
+ vap->va_nblocks = btod(vap->va_size);
+
+ if (!SOCK_IS_NONSTR(so)) {
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ mutex_enter(&so->so_lock);
+ vap->va_atime.tv_sec = sti->sti_atime;
+ vap->va_mtime.tv_sec = sti->sti_mtime;
+ vap->va_ctime.tv_sec = sti->sti_ctime;
+ mutex_exit(&so->so_lock);
+ } else {
+ vap->va_atime.tv_sec = 0;
+ vap->va_mtime.tv_sec = 0;
+ vap->va_ctime.tv_sec = 0;
+ }
+
+ vap->va_atime.tv_nsec = 0;
+ vap->va_mtime.tv_nsec = 0;
+ vap->va_ctime.tv_nsec = 0;
+ vap->va_seq = 0;
+
+ return (0);
+}
+
+/*
+ * Set attributes.
+ * Just like in BSD there is not effect on the underlying file system node
+ * bound to an AF_UNIX pathname.
+ *
+ * When sockmod has been popped this will act just like a stream. Since
+ * a socket is always a clone there is no need to modify the attributes
+ * of the "realvp".
+ */
+/* ARGSUSED */
+int
+socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags,
+ struct cred *cr, caller_context_t *ct)
+{
+ struct sonode *so = VTOSO(vp);
+
+ /*
+ * If times were changed, and we have a STREAMS socket, then update
+ * the sonode.
+ */
+ if (!SOCK_IS_NONSTR(so)) {
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ mutex_enter(&so->so_lock);
+ if (vap->va_mask & AT_ATIME)
+ sti->sti_atime = vap->va_atime.tv_sec;
+ if (vap->va_mask & AT_MTIME) {
+ sti->sti_mtime = vap->va_mtime.tv_sec;
+ sti->sti_ctime = gethrestime_sec();
+ }
+ mutex_exit(&so->so_lock);
+ }
+
+ return (0);
+}
+
+/*
+ * Check if user is allowed to access vp. For non-STREAMS based sockets,
+ * there might not be a device attached to the file system. So for those
+ * types of sockets there are no permissions to check.
+ *
+ * XXX Should there be some other mechanism to check access rights?
+ */
+/*ARGSUSED*/
+int
+socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr,
+ caller_context_t *ct)
+{
+ struct sonode *so = VTOSO(vp);
+
+ if (!SOCK_IS_NONSTR(so)) {
+ ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL);
+ return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode,
+ mode, flags, cr, NULL));
+ }
+ return (0);
+}
+
+/*
+ * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL.
+ * This code does the same to be compatible and also to not give an
+ * application the impression that the data has actually been "synced"
+ * to the other end of the connection.
+ */
+/* ARGSUSED */
+int
+socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr,
+ caller_context_t *ct)
+{
+ return (EINVAL);
+}
+
+/*ARGSUSED*/
+static void
+socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
+{
+ struct sonode *so = VTOSO(vp);
+
+ ASSERT(vp->v_type == VSOCK);
+
+ mutex_enter(&vp->v_lock);
+ /*
+ * If no one has reclaimed the vnode, remove from the
+ * cache now.
+ */
+ if (vp->v_count < 1)
+ cmn_err(CE_PANIC, "socket_inactive: Bad v_count");
+
+ /*
+ * Drop the temporary hold by vn_rele now
+ */
+ if (--vp->v_count != 0) {
+ mutex_exit(&vp->v_lock);
+ return;
+ }
+ mutex_exit(&vp->v_lock);
+
+
+ ASSERT(!vn_has_cached_data(vp));
+
+ /* socket specfic clean-up */
+ socket_destroy_internal(so, cr);
+}
+
+/* ARGSUSED */
+int
+socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
+{
+ return (EINVAL);
+}
+
+/*
+ * Sockets are not seekable.
+ * (and there is a bug to fix STREAMS to make them fail this as well).
+ */
+/*ARGSUSED*/
+int
+socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
+ caller_context_t *ct)
+{
+ return (ESPIPE);
+}
+
+/*ARGSUSED*/
+static int
+socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp, caller_context_t *ct)
+{
+ struct sonode *so = VTOSO(vp);
+
+ ASSERT(vp->v_type == VSOCK);
+
+ return (socket_poll(so, events, anyyet, reventsp, phpp));
+}
diff --git a/usr/src/uts/common/fs/sockfs/socknotify.c b/usr/src/uts/common/fs/sockfs/socknotify.c
new file mode 100644
index 0000000000..788efa9ff5
--- /dev/null
+++ b/usr/src/uts/common/fs/sockfs/socknotify.c
@@ -0,0 +1,379 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/stropts.h>
+#include <sys/socketvar.h>
+#include <sys/ksocket.h>
+#include <io/ksocket/ksocket_impl.h>
+#include <fs/sockfs/sockcommon.h>
+
+/*
+ * There can only be a single thread waiting for data (enforced by
+ * so_lock_read()), whereas for write there might be multiple threads
+ * waiting for transmit buffers. So therefore we use cv_broadcast for
+ * write and cv_signal for read.
+ */
+#define SO_WAKEUP_READER(so) { \
+ if ((so)->so_rcv_wakeup) { \
+ (so)->so_rcv_wakeup = B_FALSE; \
+ cv_signal(&(so)->so_rcv_cv); \
+ } \
+}
+
+#define SO_WAKEUP_WRITER(so) { \
+ if ((so)->so_snd_wakeup) { \
+ (so)->so_snd_wakeup = B_FALSE; \
+ cv_broadcast(&(so)->so_snd_cv); \
+ } \
+}
+
+static int i_so_notify_last_rx(struct sonode *, int *, int *);
+static int i_so_notify_last_tx(struct sonode *, int *, int *);
+
+/*
+ * The notification functions must be called with so_lock held,
+ * and they will all *drop* so_lock before returning.
+ */
+
+/*
+ * Wake up anyone waiting for the connection to be established.
+ */
+void
+so_notify_connected(struct sonode *so)
+{
+ ASSERT(MUTEX_HELD(&so->so_lock));
+
+ if (IS_KERNEL_SOCKET(so)) {
+ KSOCKET_CALLBACK(so, connected, 0);
+ mutex_exit(&so->so_lock);
+ } else {
+ socket_sendsig(so, SOCKETSIG_WRITE);
+ mutex_exit(&so->so_lock);
+ pollwakeup(&so->so_poll_list, POLLOUT);
+ }
+
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+}
+
+/*
+ * The socket is disconnecting, so no more data can be sent. Wake up
+ * anyone that is waiting to send data.
+ */
+void
+so_notify_disconnecting(struct sonode *so)
+{
+ int pollev = 0;
+ int sigev = 0;
+
+ ASSERT(MUTEX_HELD(&so->so_lock));
+
+ if (IS_KERNEL_SOCKET(so)) {
+ SO_WAKEUP_WRITER(so);
+ KSOCKET_CALLBACK(so, cantsendmore, 0);
+ mutex_exit(&so->so_lock);
+ } else if (i_so_notify_last_tx(so, &pollev, &sigev)) {
+ socket_sendsig(so, sigev);
+ mutex_exit(&so->so_lock);
+ pollwakeup(&so->so_poll_list, pollev);
+ } else {
+ mutex_exit(&so->so_lock);
+ }
+
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+}
+
+/*
+ * The socket is disconnected, so not more data can be sent or received.
+ * Wake up anyone that is waiting to send or receive data.
+ */
+void
+so_notify_disconnected(struct sonode *so, int error)
+{
+ int pollev = 0;
+ int sigev = 0;
+
+ ASSERT(MUTEX_HELD(&so->so_lock));
+
+ (void) i_so_notify_last_tx(so, &pollev, &sigev);
+ (void) i_so_notify_last_rx(so, &pollev, &sigev);
+
+ if (IS_KERNEL_SOCKET(so)) {
+ KSOCKET_CALLBACK(so, disconnected, error);
+ mutex_exit(&so->so_lock);
+ } else {
+ if (sigev != 0)
+ socket_sendsig(so, sigev);
+ mutex_exit(&so->so_lock);
+ if (pollev != 0)
+ pollwakeup(&so->so_poll_list, pollev);
+ }
+
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+}
+
+/*
+ * The socket is writeable. Wake up anyone waiting to send data.
+ */
+void
+so_notify_writable(struct sonode *so)
+{
+ ASSERT(MUTEX_HELD(&so->so_lock));
+
+ SO_WAKEUP_WRITER(so);
+
+ if (IS_KERNEL_SOCKET(so)) {
+ KSOCKET_CALLBACK(so, cansend, 0);
+ mutex_exit(&so->so_lock);
+ } else {
+ socket_sendsig(so, SOCKETSIG_WRITE);
+ mutex_exit(&so->so_lock);
+ pollwakeup(&so->so_poll_list, POLLOUT);
+ }
+
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+}
+
+/*
+ * Data is available, so wake up anyone waiting for data.
+ */
+void
+so_notify_data(struct sonode *so, size_t qlen)
+{
+ ASSERT(MUTEX_HELD(&so->so_lock));
+
+ SO_WAKEUP_READER(so);
+
+ if (IS_KERNEL_SOCKET(so)) {
+ KSOCKET_CALLBACK(so, newdata, qlen);
+ mutex_exit(&so->so_lock);
+ } else {
+ socket_sendsig(so, SOCKETSIG_READ);
+ if (so->so_pollev & (SO_POLLEV_IN|SO_POLLEV_ALWAYS)) {
+ so->so_pollev &= ~SO_POLLEV_IN;
+ mutex_exit(&so->so_lock);
+ pollwakeup(&so->so_poll_list, POLLIN|POLLRDNORM);
+ } else {
+ mutex_exit(&so->so_lock);
+ }
+ }
+
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+}
+
+/*
+ * Transient error. Wake up anyone waiting to send or receive data.
+ */
+void
+so_notify_error(struct sonode *so)
+{
+ ASSERT(MUTEX_HELD(&so->so_lock));
+
+ SO_WAKEUP_WRITER(so);
+ SO_WAKEUP_READER(so);
+
+ if (IS_KERNEL_SOCKET(so)) {
+ KSOCKET_CALLBACK(so, error, 0);
+ mutex_exit(&so->so_lock);
+ } else {
+ socket_sendsig(so, SOCKETSIG_WRITE|SOCKETSIG_READ);
+ so->so_pollev &= ~SO_POLLEV_IN;
+ mutex_exit(&so->so_lock);
+ pollwakeup(&so->so_poll_list, POLLOUT|POLLIN|POLLRDNORM);
+ }
+
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+}
+
+/*
+ * Out-of-band data is incoming, notify any interested parties.
+ */
+void
+so_notify_oobsig(struct sonode *so)
+{
+ socket_sendsig(so, SOCKETSIG_URG);
+ mutex_exit(&so->so_lock);
+ pollwakeup(&so->so_poll_list, POLLRDBAND);
+}
+
+/*
+ * Received out-of-band data. If the OOB data is delivered inline, then
+ * in addition of regular OOB notification, anyone waiting for normal
+ * data is also notified.
+ */
+void
+so_notify_oobdata(struct sonode *so, boolean_t oob_inline)
+{
+ ASSERT(MUTEX_HELD(&so->so_lock));
+ SOD_UIOAFINI(so->so_direct);
+
+ if (IS_KERNEL_SOCKET(so)) {
+ if (oob_inline)
+ SO_WAKEUP_READER(so);
+ KSOCKET_CALLBACK(so, oobdata, 0);
+ mutex_exit(&so->so_lock);
+ } else {
+ if (oob_inline) {
+ socket_sendsig(so, SOCKETSIG_READ);
+ so->so_pollev &= ~SO_POLLEV_IN;
+ mutex_exit(&so->so_lock);
+ pollwakeup(&so->so_poll_list,
+ POLLRDBAND|POLLIN|POLLRDNORM);
+
+ SO_WAKEUP_READER(so);
+ } else {
+ mutex_exit(&so->so_lock);
+ pollwakeup(&so->so_poll_list, POLLRDBAND);
+ }
+ }
+
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+}
+
+/*
+ * End-of-file has been reach, so peer will send no new data. Wake up
+ * anyone that is waiting for data.
+ */
+void
+so_notify_eof(struct sonode *so)
+{
+ int pollev = 0;
+ int sigev = 0;
+
+ ASSERT(MUTEX_HELD(&so->so_lock));
+
+ (void) i_so_notify_last_rx(so, &pollev, &sigev);
+
+ if (IS_KERNEL_SOCKET(so)) {
+ SO_WAKEUP_READER(so);
+ KSOCKET_CALLBACK(so, cantrecvmore, 0);
+ mutex_exit(&so->so_lock);
+ } else {
+ if (sigev != 0)
+ socket_sendsig(so, sigev);
+ mutex_exit(&so->so_lock);
+ if (pollev != 0)
+ pollwakeup(&so->so_poll_list, pollev);
+
+ }
+
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+}
+
+/*
+ * Wake up anyone waiting for a new connection.
+ */
+void
+so_notify_newconn(struct sonode *so)
+{
+ ASSERT(MUTEX_HELD(&so->so_lock));
+
+ if (IS_KERNEL_SOCKET(so)) {
+ KSOCKET_CALLBACK(so, newconn, so->so_rcv_queued);
+ mutex_exit(&so->so_lock);
+ } else {
+ socket_sendsig(so, SOCKETSIG_READ);
+ if (so->so_pollev & (SO_POLLEV_IN|SO_POLLEV_ALWAYS)) {
+ so->so_pollev &= ~SO_POLLEV_IN;
+ mutex_exit(&so->so_lock);
+ pollwakeup(&so->so_poll_list, POLLIN|POLLRDNORM);
+ } else {
+ mutex_exit(&so->so_lock);
+ }
+ }
+
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+}
+
+/*
+ * User initated shutdown/close, wake anyone that is trying to do
+ * an operation that is no longer possible.
+ */
+void
+so_notify_shutdown(struct sonode *so)
+{
+ int pollev = 0;
+ int sigev = 0;
+
+ ASSERT(MUTEX_HELD(&so->so_lock));
+ ASSERT(so->so_state & (SS_CANTSENDMORE|SS_CANTRCVMORE));
+
+ if (so->so_state & SS_CANTSENDMORE)
+ (void) i_so_notify_last_tx(so, &pollev, &sigev);
+ if (so->so_state & SS_CANTRCVMORE)
+ (void) i_so_notify_last_rx(so, &pollev, &sigev);
+
+ if (sigev != 0)
+ socket_sendsig(so, sigev);
+ mutex_exit(&so->so_lock);
+ if (pollev != 0)
+ pollwakeup(&so->so_poll_list, pollev);
+
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+}
+
+/*
+ * No more data will be coming in, and this will be the last notification
+ * made.
+ */
+static int
+i_so_notify_last_rx(struct sonode *so, int *pollev, int *sigev)
+{
+ if (!(so->so_state & SS_SENTLASTREADSIG)) {
+ SOCKET_TIMER_CANCEL(so);
+ SO_WAKEUP_READER(so);
+ so->so_state |= SS_SENTLASTREADSIG;
+ so->so_pollev &= ~SO_POLLEV_IN;
+
+ *pollev |= POLLIN|POLLRDNORM;
+ *sigev |= SOCKETSIG_READ;
+
+ return (1);
+ } else {
+ return (0);
+ }
+}
+
+/*
+ * The socket is un-writeable. Make one last notification.
+ */
+static int
+i_so_notify_last_tx(struct sonode *so, int *pollev, int *sigev)
+{
+ if (!(so->so_state & SS_SENTLASTWRITESIG)) {
+ SO_WAKEUP_WRITER(so);
+ so->so_state |= SS_SENTLASTWRITESIG;
+
+ *pollev |= POLLOUT;
+ *sigev |= SOCKETSIG_WRITE;
+
+ return (1);
+ } else {
+ return (0);
+ }
+}
diff --git a/usr/src/uts/common/fs/sockfs/sockparams.c b/usr/src/uts/common/fs/sockfs/sockparams.c
new file mode 100644
index 0000000000..2e1d11c64e
--- /dev/null
+++ b/usr/src/uts/common/fs/sockfs/sockparams.c
@@ -0,0 +1,723 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/t_lock.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/cmn_err.h>
+#include <sys/list.h>
+
+#include <sys/stropts.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <fs/sockfs/sockcommon.h>
+#include <fs/sockfs/socktpi.h>
+
+/*
+ * Socket Parameters
+ *
+ * Socket parameter (struct sockparams) entries represent the socket types
+ * available on the system.
+ *
+ * Flags (sp_flags):
+ *
+ * SOCKPARAMS_EPHEMERAL: A temporary sockparams entry that will be deleted
+ * as soon as its' ref count drops to zero. In addition, ephemeral entries will
+ * never be hooked onto the global sockparams list. Ephemeral entries are
+ * created when application requests to create a socket using an application
+ * supplied device path, or when a socket is falling back to TPI.
+ *
+ * Lock order:
+ * The lock order is splist_lock -> sp_lock.
+ * The lock order is sp_ephem_lock -> sp_lock.
+ */
+extern int kobj_path_exists(char *, int);
+extern void nl7c_init(void);
+extern int sockfs_defer_nl7c_init;
+
+static int sockparams_sdev_init(struct sockparams *, char *, int);
+static void sockparams_sdev_fini(struct sockparams *);
+
+/*
+ * Global sockparams list (populated via soconfig(1M)).
+ */
+static list_t sphead;
+static krwlock_t splist_lock;
+
+/*
+ * List of ephemeral sockparams.
+ */
+static list_t sp_ephem_list;
+static krwlock_t sp_ephem_lock;
+
+/*
+ * Mearch criteria used by sockparams_find()
+ */
+typedef enum sp_match_criteria {
+ SP_MATCH_EXACT, /* family, type & proto must match */
+ SP_MATCH_WILDCARD, /* family & type must match, proto can be 0 */
+ SP_MATCH_INC_DEV, /* same as exact, but dev must also match */
+ SP_MATCH_INC_MOD /* same as exact, but mod must also match */
+} sp_match_criteria_t;
+
+
+void
+sockparams_init(void)
+{
+ list_create(&sphead, sizeof (struct sockparams),
+ offsetof(struct sockparams, sp_node));
+ list_create(&sp_ephem_list, sizeof (struct sockparams),
+ offsetof(struct sockparams, sp_node));
+
+ rw_init(&splist_lock, NULL, RW_DEFAULT, NULL);
+ rw_init(&sp_ephem_lock, NULL, RW_DEFAULT, NULL);
+}
+
+/*
+ * sockparams_create(int family, int type, int protocol, char *modname,
+ * char *devpath, int devpathlen, int flags, int kmflags, int *errorp)
+ *
+ * Create a new sockparams entry.
+ *
+ * Arguments:
+ * family, type, protocol: specifies the socket type
+ * modname: Name of the module associated with the socket type. The
+ * module can be NULL if a device path is given, in which
+ * case the TPI module is used.
+ * devpath: Path to the STREAMS device. May be NULL for non-STREAMS
+ * based transports, or those transports that do not provide
+ * the capability to fallback to STREAMS.
+ * devpathlen: Length of the devpath string. The argument can be 0,
+ * indicating that devpath was allocated statically, and should
+ * not be freed when the sockparams entry is destroyed.
+ *
+ * flags : SOCKPARAMS_EPHEMERAL is the only flag that is allowed.
+ * kmflags: KM_{NO,}SLEEP
+ * errorp : Value-return argument, set when an error occurs.
+ *
+ * Returns:
+ * On success a new sockparams entry is returned, and *errorp is set
+ * to 0. On failure NULL is returned and *errorp is set to indicate the
+ * type of error that occured.
+ *
+ * Notes:
+ * devpath and modname are freed upon failure.
+ */
+struct sockparams *
+sockparams_create(int family, int type, int protocol, char *modname,
+ char *devpath, int devpathlen, int flags, int kmflags, int *errorp)
+{
+ struct sockparams *sp = NULL;
+ size_t size;
+
+ ASSERT((flags & ~SOCKPARAMS_EPHEMERAL) == 0);
+ if (flags & ~SOCKPARAMS_EPHEMERAL) {
+ *errorp = EINVAL;
+ goto error;
+ }
+
+ /* either a module or device must be given */
+ if (modname == NULL && devpath == NULL) {
+ *errorp = EINVAL;
+ goto error;
+ }
+
+ sp = kmem_zalloc(sizeof (*sp), kmflags);
+ if (sp == NULL) {
+ *errorp = ENOMEM;
+ goto error;
+ }
+ sp->sp_family = family;
+ sp->sp_type = type;
+ sp->sp_protocol = protocol;
+ sp->sp_refcnt = 0;
+ sp->sp_flags = flags;
+
+ if (modname != NULL) {
+ sp->sp_smod_name = modname;
+ } else {
+ size = strlen(SOTPI_SMOD_NAME) + 1;
+ modname = kmem_zalloc(size, kmflags);
+ if (modname == NULL) {
+ *errorp = ENOMEM;
+ goto error;
+ }
+ sp->sp_smod_name = modname;
+ (void) sprintf(sp->sp_smod_name, "%s", SOTPI_SMOD_NAME);
+ }
+
+ if (devpath != NULL) {
+ /* Set up the device entry. */
+ *errorp = sockparams_sdev_init(sp, devpath, devpathlen);
+ if (*errorp != 0)
+ goto error;
+ }
+
+ mutex_init(&sp->sp_lock, NULL, MUTEX_DEFAULT, NULL);
+ *errorp = 0;
+ return (sp);
+error:
+ ASSERT(*errorp != 0);
+ if (modname != NULL)
+ kmem_free(modname, strlen(modname) + 1);
+ if (devpathlen != 0)
+ kmem_free(devpath, devpathlen);
+ if (sp != NULL)
+ kmem_free(sp, sizeof (*sp));
+ return (NULL);
+}
+
+/*
+ * Initialize the STREAMS device aspect of the sockparams entry.
+ */
+static int
+sockparams_sdev_init(struct sockparams *sp, char *devpath, int devpathlen)
+{
+ vnode_t *vp = NULL;
+ int error;
+
+ ASSERT(devpath != NULL);
+
+ if ((error = sogetvp(devpath, &vp, UIO_SYSSPACE)) != 0) {
+ dprint(0, ("sockparams_sdev_init: vp %s failed with %d\n",
+ devpath, error));
+ return (error);
+ }
+
+ ASSERT(vp != NULL);
+ sp->sp_sdev_info.sd_vnode = vp;
+ sp->sp_sdev_info.sd_devpath = devpath;
+ sp->sp_sdev_info.sd_devpathlen = devpathlen;
+
+ return (0);
+}
+
+/*
+ * sockparams_destroy(struct sockparams *sp)
+ *
+ * Releases all the resources associated with the sockparams entry,
+ * and frees the sockparams entry.
+ *
+ * Arguments:
+ * sp: the sockparams entry to destroy.
+ *
+ * Returns:
+ * Nothing.
+ *
+ * Locking:
+ * The sp_lock of the entry can not be held.
+ */
+void
+sockparams_destroy(struct sockparams *sp)
+{
+ ASSERT(sp->sp_refcnt == 0);
+ ASSERT(!list_link_active(&sp->sp_node));
+
+ sockparams_sdev_fini(sp);
+
+ if (sp->sp_smod_info != NULL)
+ SMOD_DEC_REF(sp, sp->sp_smod_info);
+ kmem_free(sp->sp_smod_name, strlen(sp->sp_smod_name) + 1);
+ sp->sp_smod_name = NULL;
+ sp->sp_smod_info = NULL;
+ mutex_destroy(&sp->sp_lock);
+
+ kmem_free(sp, sizeof (*sp));
+}
+
+/*
+ * Clean up the STREAMS device part of the sockparams entry.
+ */
+static void
+sockparams_sdev_fini(struct sockparams *sp)
+{
+ sdev_info_t sd;
+
+ /*
+ * if the entry does not have a STREAMS device, then there
+ * is nothing to do.
+ */
+ if (!SOCKPARAMS_HAS_DEVICE(sp))
+ return;
+
+ sd = sp->sp_sdev_info;
+ if (sd.sd_vnode != NULL)
+ VN_RELE(sd.sd_vnode);
+ if (sd.sd_devpathlen != 0)
+ kmem_free(sd.sd_devpath, sd.sd_devpathlen);
+
+ sp->sp_sdev_info.sd_vnode = NULL;
+ sp->sp_sdev_info.sd_devpath = NULL;
+}
+
+/*
+ * Look for a matching sockparams entry on the given list.
+ *
+ * The caller must hold the associated list lock.
+ */
+static struct sockparams *
+sockparams_find(list_t *list, int family, int type, int protocol,
+ enum sp_match_criteria crit, const char *name)
+{
+ struct sockparams *sp;
+ struct sockparams *wild = NULL;
+
+ for (sp = list_head(list); sp != NULL; sp = list_next(list, sp)) {
+ if (sp->sp_family == family &&
+ sp->sp_type == type) {
+
+ if (sp->sp_protocol == protocol) {
+ if (crit == SP_MATCH_EXACT ||
+ crit == SP_MATCH_WILDCARD)
+ break;
+ else if (crit == SP_MATCH_INC_DEV &&
+ sp->sp_sdev_info.sd_devpath != NULL &&
+ strcmp(sp->sp_sdev_info.sd_devpath,
+ name) == 0)
+ break;
+ else if (crit == SP_MATCH_INC_MOD &&
+ strcmp(sp->sp_smod_name, name) == 0)
+ break;
+ } else if (crit == SP_MATCH_WILDCARD &&
+ sp->sp_protocol == 0) {
+ /* best match so far */
+ wild = sp;
+ }
+ }
+ }
+
+ return ((sp == NULL) ? wild : sp);
+}
+
+/*
+ * sockparams_hold_ephemeral()
+ *
+ * Returns an ephemeral sockparams entry of the requested family, type and
+ * protocol. The entry is returned held, and the caller is responsible for
+ * dropping the reference using SOCKPARAMS_DEC_REF() once done.
+ *
+ * All ephemeral entries are on list (sp_ephem_list). If there is an
+ * entry on the list that match the search criteria, then a reference is
+ * placed on that entry. Otherwise, a new entry is created and inserted
+ * in the list. The entry is removed from the list when the last reference
+ * is dropped.
+ *
+ * The tpi flag is used to determine whether name refers to a device or
+ * module name.
+ */
+static struct sockparams *
+sockparams_hold_ephemeral(int family, int type, int protocol,
+ const char *name, boolean_t tpi, int kmflag, int *errorp)
+{
+ struct sockparams *sp = NULL;
+ sp_match_criteria_t crit = (tpi) ? SP_MATCH_INC_DEV : SP_MATCH_INC_MOD;
+
+ *errorp = 0;
+
+ /*
+ * First look for an existing entry
+ */
+ rw_enter(&sp_ephem_lock, RW_READER);
+ sp = sockparams_find(&sp_ephem_list, family, type, protocol,
+ crit, name);
+ if (sp != NULL) {
+ SOCKPARAMS_INC_REF(sp);
+ rw_exit(&sp_ephem_lock);
+
+ return (sp);
+ } else {
+ struct sockparams *newsp = NULL;
+ char *namebuf = NULL;
+ int namelen = 0;
+
+ rw_exit(&sp_ephem_lock);
+
+ namelen = strlen(name) + 1;
+ namebuf = kmem_alloc(namelen, kmflag);
+ if (namebuf == NULL) {
+ *errorp = ENOMEM;
+ return (NULL);
+ }
+
+ (void *)strncpy(namebuf, name, namelen);
+ if (tpi) {
+ newsp = sockparams_create(family, type,
+ protocol, NULL, namebuf, namelen,
+ SOCKPARAMS_EPHEMERAL, kmflag, errorp);
+ } else {
+ newsp = sockparams_create(family, type,
+ protocol, namebuf, NULL, 0,
+ SOCKPARAMS_EPHEMERAL, kmflag, errorp);
+ }
+
+ if (newsp == NULL) {
+ ASSERT(*errorp != 0);
+ return (NULL);
+ }
+
+ /*
+ * Time to load the socket module.
+ */
+ ASSERT(newsp->sp_smod_info == NULL);
+ newsp->sp_smod_info =
+ smod_lookup_byname(newsp->sp_smod_name);
+ if (newsp->sp_smod_info == NULL) {
+ /* Failed to load */
+ sockparams_destroy(newsp);
+ *errorp = ENXIO;
+ return (NULL);
+ }
+
+ /*
+ * The sockparams entry was created, now try to add it
+ * to the list. We need to hold the lock as a WRITER.
+ */
+ rw_enter(&sp_ephem_lock, RW_WRITER);
+ sp = sockparams_find(&sp_ephem_list, family, type, protocol,
+ crit, name);
+ if (sp != NULL) {
+ /*
+ * Someone has requested a matching entry, so just
+ * place a hold on it and release the entry we alloc'ed.
+ */
+ SOCKPARAMS_INC_REF(sp);
+ rw_exit(&sp_ephem_lock);
+
+ sockparams_destroy(newsp);
+ } else {
+ SOCKPARAMS_INC_REF(newsp);
+ list_insert_tail(&sp_ephem_list, newsp);
+ rw_exit(&sp_ephem_lock);
+
+ sp = newsp;
+ }
+ ASSERT(*errorp == 0);
+
+ return (sp);
+ }
+}
+
+struct sockparams *
+sockparams_hold_ephemeral_bydev(int family, int type, int protocol,
+ const char *dev, int kmflag, int *errorp)
+{
+ return (sockparams_hold_ephemeral(family, type, protocol, dev, B_TRUE,
+ kmflag, errorp));
+}
+
+struct sockparams *
+sockparams_hold_ephemeral_bymod(int family, int type, int protocol,
+ const char *mod, int kmflag, int *errorp)
+{
+ return (sockparams_hold_ephemeral(family, type, protocol, mod, B_FALSE,
+ kmflag, errorp));
+}
+
+/*
+ * Called when the last socket using the ephemeral entry is dropping
+ * its' reference. To maintain lock order we must drop the sockparams
+ * lock before calling this function. As a result, a new reference
+ * might be placed on the entry, in which case there is nothing to
+ * do. However, if ref count goes to zero, we delete the entry.
+ */
+void
+sockparams_ephemeral_drop_last_ref(struct sockparams *sp)
+{
+ ASSERT(sp->sp_flags & SOCKPARAMS_EPHEMERAL);
+ ASSERT(MUTEX_NOT_HELD(&sp->sp_lock));
+
+ rw_enter(&sp_ephem_lock, RW_WRITER);
+ mutex_enter(&sp->sp_lock);
+
+ if (--sp->sp_refcnt == 0) {
+ list_remove(&sp_ephem_list, sp);
+ mutex_exit(&sp->sp_lock);
+ rw_exit(&sp_ephem_lock);
+
+ sockparams_destroy(sp);
+ } else {
+ mutex_exit(&sp->sp_lock);
+ rw_exit(&sp_ephem_lock);
+ }
+}
+
+/*
+ * sockparams_add(struct sockparams *sp)
+ *
+ * Tries to add the given sockparams entry to the global list.
+ *
+ * Arguments:
+ * sp: the sockparms entry to add
+ *
+ * Returns:
+ * On success 0, but if an entry already exists, then EEXIST
+ * is returned.
+ *
+ * Locking:
+ * The caller can not be holding splist_lock.
+ */
+static int
+sockparams_add(struct sockparams *sp)
+{
+ ASSERT(!(sp->sp_flags & SOCKPARAMS_EPHEMERAL));
+
+ rw_enter(&splist_lock, RW_WRITER);
+ if (sockparams_find(&sphead, sp->sp_family, sp->sp_type,
+ sp->sp_protocol, SP_MATCH_EXACT, NULL) != 0) {
+ rw_exit(&splist_lock);
+ return (EEXIST);
+ } else {
+ list_insert_tail(&sphead, sp);
+ rw_exit(&splist_lock);
+ return (0);
+ }
+}
+
+/*
+ * sockparams_delete(int family, int type, int protocol)
+ *
+ * Marks the sockparams entry for a specific family, type and protocol
+ * for deletion. The entry is removed from the list and destroyed
+ * if no one is holding a reference to it.
+ *
+ * Arguments:
+ * family, type, protocol: the socket type that should be removed.
+ *
+ * Returns:
+ * On success 0, otherwise ENXIO.
+ *
+ * Locking:
+ * Caller can not be holding splist_lock or the sp_lock of
+ * any sockparams entry.
+ */
+static int
+sockparams_delete(int family, int type, int protocol)
+{
+ struct sockparams *sp;
+
+ rw_enter(&splist_lock, RW_WRITER);
+ sp = sockparams_find(&sphead, family, type, protocol, SP_MATCH_EXACT,
+ NULL);
+
+ if (sp != NULL) {
+ /*
+ * If no one is holding a reference to the entry, then
+ * we go ahead and remove it from the list and then
+ * destroy it.
+ */
+ mutex_enter(&sp->sp_lock);
+ if (sp->sp_refcnt != 0) {
+ mutex_exit(&sp->sp_lock);
+ rw_exit(&splist_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&sp->sp_lock);
+ /* Delete the sockparams entry. */
+ list_remove(&sphead, sp);
+ rw_exit(&splist_lock);
+
+ sockparams_destroy(sp);
+ return (0);
+ } else {
+ rw_exit(&splist_lock);
+ return (ENXIO);
+ }
+}
+
+/*
+ * soconfig(int family, int type, int protocol,
+ * char *devpath, int devpathlen, char *module)
+ *
+ * Add or delete an entry to the sockparams table.
+ * When devpath and module both are NULL, it will delete an entry.
+ *
+ * Arguments:
+ * family, type, protocol: the tuple in question
+ * devpath: STREAMS device path. Can be NULL for module based sockets.
+ * module : Name of the socket module. Can be NULL for STREAMS
+ * based sockets.
+ * devpathlen: length of the devpath string, or 0 if devpath
+ * was statically allocated.
+ *
+ * Note:
+ * This routine assumes that the caller has kmem_alloced
+ * devpath (if devpathlen > 0) and module for this routine to
+ * consume.
+ */
+int
+soconfig(int family, int type, int protocol,
+ char *devpath, int devpathlen, char *module)
+{
+ struct sockparams *sp;
+ int error = 0;
+
+ dprint(0, ("soconfig(%d,%d,%d,%s,%d,%s)\n",
+ family, type, protocol, devpath, devpathlen,
+ module == NULL ? "NULL" : module));
+
+ if (sockfs_defer_nl7c_init) {
+ nl7c_init();
+ sockfs_defer_nl7c_init = 0;
+ }
+
+ if (devpath == NULL && module == NULL) {
+ /*
+ * Delete existing entry,
+ * both socket module and STEAMS device.
+ */
+ ASSERT(module == NULL);
+ error = sockparams_delete(family, type, protocol);
+ } else {
+ /*
+ * Adding an entry
+ * sockparams_create frees mod name and devpath upon failure.
+ */
+ sp = sockparams_create(family, type, protocol, module,
+ devpath, devpathlen, 0, KM_SLEEP, &error);
+
+ if (sp != NULL) {
+ error = sockparams_add(sp);
+ if (error != 0)
+ sockparams_destroy(sp);
+ }
+ }
+
+ return (error);
+}
+
+/*
+ * solookup(int family, int type, int protocol, struct sockparams **spp)
+ *
+ * Lookup an entry in the sockparams list based on the triple. The returned
+ * entry either exactly match the given tuple, or it is the 'default' entry
+ * for the given <family, type>. A default entry is on with a protocol
+ * value of zero.
+ *
+ * Arguments:
+ * family, type, protocol: tuple to search for
+ * spp: Value-return argument
+ *
+ * Returns:
+ * If an entry is found, 0 is returned and *spp is set to point to the
+ * entry. In case an entry is not found, *spp is set to NULL, and an
+ * error code is returned. The errors are (in decreasing precedence):
+ * EAFNOSUPPORT - address family not in list
+ * EPROTONOSUPPORT - address family supported but not protocol.
+ * EPROTOTYPE - address family and protocol supported but not socket type.
+ *
+ * TODO: should use ddi_modopen()/ddi_modclose()
+ */
+
+int
+solookup(int family, int type, int protocol, struct sockparams **spp)
+{
+ struct sockparams *sp = NULL;
+ int error = 0;
+
+ *spp = NULL;
+ rw_enter(&splist_lock, RW_READER);
+
+ /*
+ * Search the sockparams list for an appropiate entry.
+ * Hopefully we find an entry that match the exact family,
+ * type and protocol specified by the user, in which case
+ * we return that entry. However, we also keep track of
+ * the default entry for a specific family and type, the
+ * entry of which would have a protocol value of 0.
+ */
+ sp = sockparams_find(&sphead, family, type, protocol, SP_MATCH_WILDCARD,
+ NULL);
+
+ if (sp == NULL) {
+ int found = 0;
+
+ /* Determine correct error code */
+ for (sp = list_head(&sphead); sp != NULL;
+ sp = list_next(&sphead, sp)) {
+ if (sp->sp_family == family && found < 1)
+ found = 1;
+ if (sp->sp_family == family &&
+ sp->sp_protocol == protocol && found < 2)
+ found = 2;
+ }
+ rw_exit(&splist_lock);
+
+ switch (found) {
+ case 0:
+ error = EAFNOSUPPORT;
+ break;
+ case 1:
+ error = EPROTONOSUPPORT;
+ break;
+ case 2:
+ error = EPROTOTYPE;
+ break;
+ }
+ return (error);
+ }
+
+ /*
+ * An entry was found.
+ *
+ * We put a hold on the entry early on, so if the
+ * sockmod is not loaded, and we have to exit
+ * splist_lock to call modload(), we know that the
+ * sockparams entry wont go away. That way we don't
+ * have to look up the entry once we come back from
+ * modload().
+ */
+ SOCKPARAMS_INC_REF(sp);
+ rw_exit(&splist_lock);
+
+ if (sp->sp_smod_info == NULL) {
+ sp->sp_smod_info = smod_lookup_byname(sp->sp_smod_name);
+ if (sp->sp_smod_info == NULL) {
+ /*
+ * We put a hold on the sockparams entry
+ * earlier, hoping everything would work out.
+ * That obviously did not happen, so release
+ * the hold here.
+ */
+ SOCKPARAMS_DEC_REF(sp);
+ /*
+ * We should probably mark the sockparams as
+ * "bad", and redo the lookup skipping the
+ * "bad" entries. I.e., sp->sp_mod_state |= BAD,
+ * return (solookup(...))
+ */
+ return (ENXIO);
+ }
+ }
+
+ /*
+ * Alright, we have a valid sockparams entry.
+ */
+ *spp = sp;
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/sockfs/socksctp.c b/usr/src/uts/common/fs/sockfs/socksctp.c
deleted file mode 100644
index a5763b0b5f..0000000000
--- a/usr/src/uts/common/fs/sockfs/socksctp.c
+++ /dev/null
@@ -1,2773 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/types.h>
-#include <sys/t_lock.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/buf.h>
-#include <sys/vfs.h>
-#include <sys/vfs_opreg.h>
-#include <sys/vnode.h>
-#include <sys/debug.h>
-#include <sys/errno.h>
-#include <sys/stropts.h>
-#include <sys/cmn_err.h>
-#include <sys/sysmacros.h>
-
-#include <sys/project.h>
-#include <sys/tihdr.h>
-#include <sys/strsubr.h>
-
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/strsun.h>
-
-#include <netinet/sctp.h>
-#include <inet/sctp_itf.h>
-#include "socksctp.h"
-
-/*
- * SCTP sockfs sonode operations, 1-1 socket
- */
-static int sosctp_accept(struct sonode *, int, struct sonode **);
-static int sosctp_listen(struct sonode *, int);
-static int sosctp_connect(struct sonode *, const struct sockaddr *, socklen_t,
- int, int);
-static int sosctp_sendmsg(struct sonode *, struct nmsghdr *, struct uio *);
-static int sosctp_getpeername(struct sonode *);
-static int sosctp_getsockname(struct sonode *);
-static int sosctp_shutdown(struct sonode *, int);
-static int sosctp_getsockopt(struct sonode *, int, int, void *, socklen_t *,
- int);
-static int sosctp_setsockopt(struct sonode *, int, int, const void *,
- socklen_t);
-
-/*
- * SCTP sockfs sonode operations, 1-N socket
- */
-static int sosctp_seq_connect(struct sonode *, const struct sockaddr *,
- socklen_t, int, int);
-static int sosctp_seq_sendmsg(struct sonode *, struct nmsghdr *, struct uio *);
-
-/*
- * Socket upcalls, 1-1 socket connection
- */
-static void *sctp_sock_newconn(void *parenthandle, void *connind);
-static void sctp_sock_connected(void *handle);
-static int sctp_sock_disconnected(void *handle, int error);
-static void sctp_sock_disconnecting(void *handle);
-static int sctp_sock_recv(void *handle, mblk_t *mp, int flags);
-static void sctp_sock_xmitted(void *handle, int txqueued);
-static void sctp_sock_properties(void *handle, int wroff, size_t maxblk);
-
-/*
- * Socket association upcalls, 1-N socket connection
- */
-static void *sctp_assoc_newconn(void *parenthandle, void *connind);
-static void sctp_assoc_connected(void *handle);
-static int sctp_assoc_disconnected(void *handle, int error);
-static void sctp_assoc_disconnecting(void *handle);
-static int sctp_assoc_recv(void *handle, mblk_t *mp, int flags);
-static void sctp_assoc_xmitted(void *handle, int txqueued);
-static void sctp_assoc_properties(void *handle, int wroff, size_t maxblk);
-
-static kmem_cache_t *sosctp_sockcache;
-kmem_cache_t *sosctp_assoccache;
-
-sonodeops_t sosctp_sonodeops = {
- sosctp_accept, /* sop_accept */
- sosctp_bind, /* sop_bind */
- sosctp_listen, /* sop_listen */
- sosctp_connect, /* sop_connect */
- sosctp_recvmsg, /* sop_recvmsg */
- sosctp_sendmsg, /* sop_sendmsg */
- sosctp_getpeername, /* sop_getpeername */
- sosctp_getsockname, /* sop_getsockname */
- sosctp_shutdown, /* sop_shutdown */
- sosctp_getsockopt, /* sop_getsockopt */
- sosctp_setsockopt /* sop_setsockopt */
-};
-
-sonodeops_t sosctp_seq_sonodeops = {
- sosctp_accept, /* sop_accept */
- sosctp_bind, /* sop_bind */
- sosctp_listen, /* sop_listen */
- sosctp_seq_connect, /* sop_connect */
- sosctp_recvmsg, /* sop_recvmsg */
- sosctp_seq_sendmsg, /* sop_sendmsg */
- sosctp_getpeername, /* sop_getpeername */
- sosctp_getsockname, /* sop_getsockname */
- sosctp_shutdown, /* sop_shutdown */
- sosctp_getsockopt, /* sop_getsockopt */
- sosctp_setsockopt /* sop_setsockopt */
-};
-
-sctp_upcalls_t sosctp_sock_upcalls = {
- sctp_sock_newconn,
- sctp_sock_connected,
- sctp_sock_disconnected,
- sctp_sock_disconnecting,
- sctp_sock_recv,
- sctp_sock_xmitted,
- sctp_sock_properties
-};
-
-sctp_upcalls_t sosctp_assoc_upcalls = {
- sctp_assoc_newconn,
- sctp_assoc_connected,
- sctp_assoc_disconnected,
- sctp_assoc_disconnecting,
- sctp_assoc_recv,
- sctp_assoc_xmitted,
- sctp_assoc_properties
-};
-
-/*ARGSUSED*/
-static int
-sosctp_sock_constructor(void *buf, void *cdrarg, int kmflags)
-{
- struct sctp_sonode *ss = buf;
- struct sonode *so = &ss->ss_so;
- struct vnode *vp;
-
- ss->ss_type = SOSCTP_SOCKET;
- so->so_oobmsg = NULL;
- so->so_ack_mp = NULL;
- so->so_conn_ind_head = NULL;
- so->so_conn_ind_tail = NULL;
- so->so_discon_ind_mp = NULL;
- so->so_ux_bound_vp = NULL;
- so->so_unbind_mp = NULL;
- so->so_ops = NULL;
- so->so_accessvp = NULL;
- so->so_priv = NULL;
-
- so->so_nl7c_flags = 0;
- so->so_nl7c_uri = NULL;
- so->so_nl7c_rcv_mp = NULL;
-
- so->so_direct = NULL;
-
- vp = vn_alloc(kmflags);
- if (vp == NULL) {
- return (-1);
- }
- so->so_vnode = vp;
-
- vn_setops(vp, socksctp_vnodeops);
- vp->v_data = (caddr_t)so;
-
- ss->ss_rxdata = NULL;
- ss->ss_rxtail = &ss->ss_rxdata;
-
- mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&so->so_plumb_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&so->so_ack_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&so->so_connind_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&so->so_want_cv, NULL, CV_DEFAULT, NULL);
-
- cv_init(&ss->ss_txdata_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&ss->ss_rxdata_cv, NULL, CV_DEFAULT, NULL);
-
- return (0);
-}
-
-/*ARGSUSED*/
-static void
-sosctp_sock_destructor(void *buf, void *cdrarg)
-{
- struct sctp_sonode *ss = buf;
- struct sonode *so = &ss->ss_so;
- struct vnode *vp = SOTOV(so);
-
- ASSERT(so->so_direct == NULL);
-
- ASSERT(so->so_nl7c_flags == 0);
- ASSERT(so->so_nl7c_uri == NULL);
- ASSERT(so->so_nl7c_rcv_mp == NULL);
-
- ASSERT(so->so_oobmsg == NULL);
- ASSERT(so->so_ack_mp == NULL);
- ASSERT(so->so_conn_ind_head == NULL);
- ASSERT(so->so_conn_ind_tail == NULL);
- ASSERT(so->so_discon_ind_mp == NULL);
- ASSERT(so->so_ux_bound_vp == NULL);
- ASSERT(so->so_unbind_mp == NULL);
- ASSERT(so->so_ops == NULL ||
- so->so_ops == &sosctp_sonodeops ||
- so->so_ops == &sosctp_seq_sonodeops);
-
- ASSERT(ss->ss_rxdata == NULL);
-
- ASSERT(vn_matchops(vp, socksctp_vnodeops));
- ASSERT(vp->v_data == (caddr_t)so);
-
- vn_free(vp);
-
- mutex_destroy(&so->so_lock);
- mutex_destroy(&so->so_plumb_lock);
- cv_destroy(&so->so_state_cv);
- cv_destroy(&so->so_ack_cv);
- cv_destroy(&so->so_connind_cv);
- cv_destroy(&so->so_want_cv);
- cv_destroy(&ss->ss_txdata_cv);
- cv_destroy(&ss->ss_rxdata_cv);
-}
-
-int
-sosctp_init(void)
-{
- int error;
-
- error = vn_make_ops("socksctp", socksctp_vnodeops_template,
- &socksctp_vnodeops);
- if (error != 0) {
- zcmn_err(GLOBAL_ZONEID, CE_WARN,
- "sosctp_init: bad vnode ops template");
- return (error);
- }
-
- sosctp_sockcache = kmem_cache_create("sctpsock",
- sizeof (struct sctp_sonode), 0, sosctp_sock_constructor,
- sosctp_sock_destructor, NULL, NULL, NULL, 0);
- sosctp_assoccache = kmem_cache_create("sctp_assoc",
- sizeof (struct sctp_soassoc), 0, NULL, NULL, NULL, NULL, NULL, 0);
- return (0);
-}
-
-static struct vnode *
-sosctp_makevp(struct vnode *accessvp, int domain, int type, int protocol,
- int kmflags)
-{
- struct sctp_sonode *ss;
- struct sonode *so;
- struct vnode *vp;
- time_t now;
-
- ss = kmem_cache_alloc(sosctp_sockcache, kmflags);
- if (ss == NULL) {
- return (NULL);
- }
- so = &ss->ss_so;
- so->so_cache = sosctp_sockcache;
- so->so_obj = ss;
- vp = SOTOV(so);
- now = gethrestime_sec();
-
- so->so_flag = 0;
- so->so_accessvp = accessvp;
- so->so_dev = accessvp->v_rdev;
-
- so->so_state = 0;
- so->so_mode = 0;
-
- so->so_fsid = sockdev;
- so->so_atime = now;
- so->so_mtime = now;
- so->so_ctime = now;
- so->so_count = 0;
-
- so->so_family = domain;
- so->so_type = type;
- so->so_protocol = protocol;
- so->so_pushcnt = 0;
-
- so->so_options = 0;
- so->so_linger.l_onoff = 0;
- so->so_linger.l_linger = 0;
- so->so_sndbuf = 0;
- so->so_rcvbuf = 0;
- so->so_error = 0;
- so->so_delayed_error = 0;
-
- ASSERT(so->so_oobmsg == NULL);
- so->so_oobcnt = 0;
- so->so_oobsigcnt = 0;
- so->so_pgrp = 0;
- so->so_provinfo = NULL;
-
- so->so_laddr_sa = (struct sockaddr *)&ss->ss_laddr;
- so->so_faddr_sa = (struct sockaddr *)&ss->ss_faddr;
- so->so_laddr_maxlen = so->so_faddr_maxlen = sizeof (ss->ss_laddr);
- so->so_laddr_len = so->so_faddr_len = 0;
- so->so_eaddr_mp = NULL;
- so->so_delayed_error = 0;
-
- so->so_peercred = NULL;
-
- ASSERT(so->so_ack_mp == NULL);
- ASSERT(so->so_conn_ind_head == NULL);
- ASSERT(so->so_conn_ind_tail == NULL);
- ASSERT(so->so_ux_bound_vp == NULL);
- ASSERT(so->so_unbind_mp == NULL);
-
- vn_reinit(vp);
- vp->v_vfsp = rootvfs;
- vp->v_type = VSOCK;
- vp->v_rdev = so->so_dev;
-
- ss->ss_maxassoc = 0;
- ss->ss_assoccnt = 0;
- ss->ss_assocs = NULL;
-
- if (type == SOCK_STREAM) {
- so->so_ops = &sosctp_sonodeops;
- } else {
- ASSERT(type == SOCK_SEQPACKET);
- so->so_ops = &sosctp_seq_sonodeops;
- mutex_enter(&so->so_lock);
- (void) sosctp_aid_grow(ss, 1, kmflags);
- mutex_exit(&so->so_lock);
- }
- ss->ss_rxqueued = 0;
- ss->ss_txqueued = 0;
- ss->ss_wroff = 0;
- ss->ss_wrsize = strmsgsz;
- bzero(&ss->ss_poll_list, sizeof (ss->ss_poll_list));
-
- vn_exists(vp);
- return (vp);
-}
-
-/*
- * Creates a sctp socket data structure.
- * tso is non-NULL if it's passive open.
- */
-struct sonode *
-sosctp_create(vnode_t *accessvp, int domain, int type, int protocol,
- int version, struct sonode *tso, int *errorp)
-{
- struct sonode *so;
- vnode_t *vp;
- int error;
- int soflags;
- cred_t *cr;
-
- if (version == SOV_STREAM) {
- *errorp = EINVAL;
- return (NULL);
- }
- ASSERT(accessvp != NULL);
-
- /*
- * We only support two types of SCTP socket. Let sotpi_create()
- * handle all other cases, such as raw socket.
- */
- if (!(domain == AF_INET || domain == AF_INET6) ||
- !(type == SOCK_STREAM || type == SOCK_SEQPACKET)) {
- return (sotpi_create(accessvp, domain, type, protocol, version,
- NULL, errorp));
- }
-
- if (tso == NULL) {
- vp = sosctp_makevp(accessvp, domain, type, protocol, KM_SLEEP);
- ASSERT(vp != NULL);
-
- soflags = FREAD | FWRITE;
- } else {
- vp = sosctp_makevp(accessvp, domain, type, protocol,
- KM_NOSLEEP);
- if (vp == NULL) {
- /*
- * sosctp_makevp() only fails when there is no memory.
- */
- *errorp = ENOMEM;
- return (NULL);
- }
- soflags = FREAD | FWRITE | SO_ACCEPTOR;
- }
- /*
- * This function may be called in interrupt context, and CRED()
- * will be NULL. In this case, pass in kcred to VOP_OPEN().
- */
- if ((cr = CRED()) == NULL)
- cr = kcred;
- if ((error = VOP_OPEN(&vp, soflags, cr, NULL)) != 0) {
- VN_RELE(vp);
- *errorp = error;
- return (NULL);
- }
- so = VTOSO(vp);
-
- dprint(2, ("sosctp_create: %p domain %d type %d\n",
- (void *)so, domain, type));
-
- if (version == SOV_DEFAULT) {
- version = so_default_version;
- }
- so->so_version = (short)version;
-
- return (so);
-}
-
-/*
- * Free SCTP socket data structure.
- * Closes incoming connections which were never accepted, frees
- * resources.
- */
-void
-sosctp_free(struct sonode *so)
-{
- struct sctp_sonode *ss = SOTOSSO(so);
- struct sonode *nso;
- mblk_t *mp;
-
- mutex_enter(&so->so_lock);
-
- /*
- * Need to clear these out so that sockfree() doesn't think that
- * there's memory in need of free'ing.
- */
- so->so_laddr_sa = so->so_faddr_sa = NULL;
- so->so_laddr_len = so->so_laddr_maxlen = 0;
- so->so_faddr_len = so->so_faddr_maxlen = 0;
-
- while ((mp = ss->ss_rxdata) != NULL) {
- ss->ss_rxdata = mp->b_next;
- mp->b_next = NULL;
- freemsg(mp);
- mp = ss->ss_rxdata;
- }
- ss->ss_rxtail = &ss->ss_rxdata;
-
-
- while ((mp = so->so_conn_ind_head) != NULL) {
- so->so_conn_ind_head = mp->b_next;
- mutex_exit(&so->so_lock);
- mp->b_next = NULL;
- nso = *(struct sonode **)mp->b_rptr;
-
- (void) VOP_CLOSE(SOTOV(nso), 0, 1, 0, CRED(), NULL);
- vn_invalid(SOTOV(nso));
- VN_RELE(SOTOV(nso));
-
- freeb(mp);
- mutex_enter(&so->so_lock);
- }
- so->so_conn_ind_tail = NULL;
- so->so_state &= ~SS_HASCONNIND;
-
- if (ss->ss_assocs != NULL) {
- ASSERT(ss->ss_assoccnt == 0);
- kmem_free(ss->ss_assocs,
- ss->ss_maxassoc * sizeof (struct sctp_sa_id));
- }
- mutex_exit(&so->so_lock);
-
- sockfree(so);
-}
-
-/*
- * Accept incoming connection.
- */
-static int
-sosctp_accept(struct sonode *lso, int fflag, struct sonode **nsop)
-{
- int error = 0;
- mblk_t *mp;
- struct sonode *nso;
-
- if (!(lso->so_state & SS_ACCEPTCONN)) {
- /*
- * Not a listen socket.
- */
- eprintsoline(lso, EINVAL);
- return (EINVAL);
- }
- if (lso->so_type != SOCK_STREAM) {
- /*
- * Cannot accept() connections from SOCK_SEQPACKET type
- * socket.
- */
- eprintsoline(lso, EOPNOTSUPP);
- return (EOPNOTSUPP);
- }
-
- /*
- * Returns right away if socket is nonblocking.
- */
- error = sowaitconnind(lso, fflag, &mp);
- if (error != 0) {
- eprintsoline(lso, error);
- return (error);
- }
- nso = *(struct sonode **)mp->b_rptr;
- freeb(mp);
-
- mutex_enter(&lso->so_lock);
- ASSERT(SOTOSSO(lso)->ss_rxqueued > 0);
- --SOTOSSO(lso)->ss_rxqueued;
- mutex_exit(&lso->so_lock);
-
- /*
- * accept() needs remote address right away.
- * since sosctp_getpeername() is called with
- * socket lock released, the connection may
- * get aborted before we return from the
- * routine. So, we need to to handle aborted
- * socket connection here.
- */
- error = sosctp_getpeername(nso);
- if (error != 0) {
- vnode_t *nvp;
- nvp = SOTOV(nso);
- (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL);
- VN_RELE(nvp);
-
- /*
- * We can't return ENOTCONN to accept. accept
- * either returns connected socket in case no error
- * has occured or the connection which is getting
- * accepted is being aborted. This is the reason we
- * return ECONNABORTED in case sosctp_getpeername()
- * returns ENOTCONN.
- */
- return ((error == ENOTCONN) ? ECONNABORTED : error);
- }
-
- dprint(2, ("sosctp_accept: new %p\n", (void *)nso));
-
- *nsop = nso;
- return (0);
-}
-
-/*
- * Bind local endpoint.
- */
-int
-sosctp_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
- int flags)
-{
- int error = 0;
-
- if (!(flags & _SOBIND_LOCK_HELD)) {
- mutex_enter(&so->so_lock);
- so_lock_single(so); /* Set SOLOCKED */
- /* LINTED - statement has no conseq */
- } else {
- ASSERT(MUTEX_HELD(&so->so_lock));
- ASSERT(so->so_flag & SOLOCKED);
- }
-
- if ((so->so_state & SS_ISBOUND) || name == NULL || namelen == 0) {
- /*
- * Multiple binds not allowed for any SCTP socket.
- * Also binding with null address is not supported.
- */
- error = EINVAL;
- eprintsoline(so, error);
- goto done;
- }
- /*
- * X/Open requires this check
- */
- if (so->so_state & SS_CANTSENDMORE) {
- error = EINVAL;
- goto done;
- }
-
- /*
- * Protocol module does address family checks.
- */
- mutex_exit(&so->so_lock);
-
- error = sctp_bind(so->so_priv, name, namelen);
-
- mutex_enter(&so->so_lock);
- if (error == 0) {
- so->so_state |= SS_ISBOUND;
- /* LINTED - statement has no conseq */
- } else {
- eprintsoline(so, error);
- }
-done:
- if (!(flags & _SOBIND_LOCK_HELD)) {
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
- /* LINTED - statement has no conseq */
- } else {
- /* If the caller held the lock don't release it here */
- ASSERT(MUTEX_HELD(&so->so_lock));
- ASSERT(so->so_flag & SOLOCKED);
- }
- return (error);
-}
-
-/*
- * Turn socket into a listen socket.
- */
-static int
-sosctp_listen(struct sonode *so, int backlog)
-{
- int error = 0;
-
- mutex_enter(&so->so_lock);
- so_lock_single(so);
-
- /*
- * If this socket is trying to do connect, or if it has
- * been connected, disallow.
- */
- if (so->so_state & (SS_ISCONNECTING | SS_ISCONNECTED |
- SS_ISDISCONNECTING | SS_CANTRCVMORE | SS_CANTSENDMORE)) {
- error = EINVAL;
- eprintsoline(so, error);
- goto done;
- }
-
- if (backlog < 0) {
- backlog = 0;
- }
-
- /*
- * If listen() is only called to change backlog, we don't
- * need to notify protocol module.
- */
- if (so->so_state & SS_ACCEPTCONN) {
- so->so_backlog = backlog;
- goto done;
- }
-
- mutex_exit(&so->so_lock);
-
- error = sctp_listen(so->so_priv);
-
- mutex_enter(&so->so_lock);
- if (error == 0) {
- so->so_state |= (SS_ACCEPTCONN|SS_ISBOUND);
- so->so_backlog = backlog;
- /* LINTED - statement has no conseq */
- } else {
- eprintsoline(so, error);
- }
-done:
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
-
- return (error);
-}
-
-/*
- * Active open.
- */
-static int
-sosctp_connect(struct sonode *so, const struct sockaddr *name,
- socklen_t namelen, int fflag, int flags)
-{
- int error;
-
- ASSERT(so->so_type == SOCK_STREAM);
-
- mutex_enter(&so->so_lock);
- so_lock_single(so);
-
- /*
- * Can't connect() after listen(), or if the socket is already
- * connected.
- */
- if (so->so_state & (SS_ACCEPTCONN|SS_ISCONNECTED|SS_ISCONNECTING)) {
- if (so->so_state & SS_ISCONNECTED) {
- error = EISCONN;
- } else if (so->so_state & SS_ISCONNECTING) {
- error = EALREADY;
- } else {
- error = EOPNOTSUPP;
- }
- eprintsoline(so, error);
- goto done;
- }
-
- /*
- * Check for failure of an earlier call
- */
- if (so->so_error != 0) {
- error = sogeterr(so);
- eprintsoline(so, error);
- goto done;
- }
-
- /*
- * Connection is closing, or closed, don't allow reconnect.
- * TCP allows this to proceed, but the socket remains unwriteable.
- * BSD returns EINVAL.
- */
- if (so->so_state & (SS_ISDISCONNECTING|SS_CANTRCVMORE|
- SS_CANTSENDMORE)) {
- error = EINVAL;
- eprintsoline(so, error);
- goto done;
- }
- if (name == NULL || namelen == 0) {
- error = EINVAL;
- eprintsoline(so, error);
- goto done;
- }
- soisconnecting(so);
-
- mutex_exit(&so->so_lock);
-
- error = sctp_connect(so->so_priv, name, namelen);
-
- mutex_enter(&so->so_lock);
- if (error == 0) {
- /*
- * Allow other threads to access the socket
- */
- error = sosctp_waitconnected(so, fflag);
- }
- switch (error) {
- case 0:
- case EINPROGRESS:
- case EALREADY:
- case EINTR:
- /* Non-fatal errors */
- so->so_state |= SS_ISBOUND;
- break;
- case EHOSTUNREACH:
- if (flags & _SOCONNECT_XPG4_2) {
- /*
- * X/Open specification contains a requirement that
- * ENETUNREACH be returned but does not require
- * EHOSTUNREACH. In order to keep the test suite
- * happy we mess with the errno here.
- */
- error = ENETUNREACH;
- }
- /* FALLTHRU */
-
- default:
- /* clear SS_ISCONNECTING in case it was set */
- so->so_state &= ~SS_ISCONNECTING;
- break;
- }
-done:
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
- return (error);
-}
-
-/*
- * Active open for 1-N sockets, create a new association and
- * call connect on that.
- * If there parent hasn't been bound yet (this is the first association),
- * make it so.
- */
-static int
-sosctp_seq_connect(struct sonode *so, const struct sockaddr *name,
- socklen_t namelen, int fflag, int flags)
-{
- struct sctp_soassoc *ssa;
- struct sctp_sonode *ss;
- int error;
-
- ASSERT(so->so_type == SOCK_SEQPACKET);
-
- mutex_enter(&so->so_lock);
- so_lock_single(so);
-
- if (name == NULL || namelen == 0) {
- error = EINVAL;
- eprintsoline(so, error);
- goto done;
- }
-
- ss = SOTOSSO(so);
-
- error = sosctp_assoc_createconn(ss, name, namelen, NULL, 0, fflag,
- &ssa);
- if (error != 0) {
- if ((error == EHOSTUNREACH) && (flags & _SOCONNECT_XPG4_2)) {
- error = ENETUNREACH;
- }
- }
- if (ssa != NULL) {
- SSA_REFRELE(ss, ssa);
- }
-
-done:
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
- return (error);
-}
-
-/*
- * Receive data.
- */
-int
-sosctp_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
-{
- struct sctp_sonode *ss = SOTOSSO(so);
- struct sctp_soassoc *ssa = NULL;
- int flags, error = 0;
- struct T_unitdata_ind *tind;
- int len, count, readcnt = 0, rxqueued;
- boolean_t consumed = B_FALSE;
- void *opt;
- mblk_t *mp, *mdata;
-
- flags = msg->msg_flags;
- msg->msg_flags = 0;
-
- if (so->so_type == SOCK_STREAM) {
- if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|
- SS_CANTRCVMORE))) {
- return (ENOTCONN);
- }
- } else {
- /* For 1-N socket, recv() cannot be used. */
- if (msg->msg_namelen == 0)
- return (EOPNOTSUPP);
- /*
- * If there are no associations, and no new connections are
- * coming in, there's not going to be new messages coming
- * in either.
- */
- if (ss->ss_rxdata == NULL && ss->ss_assoccnt == 0 &&
- !(so->so_state & SS_ACCEPTCONN)) {
- return (ENOTCONN);
- }
- }
-
- /*
- * out-of-band data not supported.
- */
- if (flags & MSG_OOB) {
- return (EOPNOTSUPP);
- }
-
- /*
- * flag possibilities:
- *
- * MSG_PEEK Don't consume data
- * MSG_WAITALL Wait for full quantity of data (ignored if MSG_PEEK)
- * MSG_DONTWAIT Non-blocking (same as FNDELAY | FNONBLOCK)
- *
- * MSG_WAITALL can return less than the full buffer if either
- *
- * 1. we would block and we are non-blocking
- * 2. a full message cannot be delivered
- *
- * Given that we always get a full message from proto below,
- * MSG_WAITALL is not meaningful.
- */
-
- mutex_enter(&so->so_lock);
-
- /*
- * Allow just one reader at a time.
- */
- error = so_lock_read_intr(so,
- uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
- if (error) {
- mutex_exit(&so->so_lock);
- return (error);
- }
-again:
- mp = ss->ss_rxdata;
- if (mp != NULL) {
- if (so->so_type == SOCK_SEQPACKET) {
- ssa = *(struct sctp_soassoc **)DB_BASE(mp);
- }
- mutex_exit(&so->so_lock);
-
- tind = (struct T_unitdata_ind *)mp->b_rptr;
-
- len = tind->SRC_length;
-
- if (msg->msg_namelen > 0 && len > 0) {
-
- opt = sogetoff(mp, tind->SRC_offset, len, 1);
-
- ASSERT(opt != NULL);
-
- msg->msg_name = kmem_alloc(len, KM_SLEEP);
- msg->msg_namelen = len;
-
- bcopy(opt, msg->msg_name, len);
- } else {
- msg->msg_namelen = 0;
- }
-
- len = tind->OPT_length;
- if (msg->msg_controllen == 0) {
- if (len > 0) {
- msg->msg_flags |= MSG_CTRUNC;
- }
- } else if (len > 0) {
- opt = sogetoff(mp, tind->OPT_offset, len,
- __TPI_ALIGN_SIZE);
-
- ASSERT(opt != NULL);
- sosctp_pack_cmsg(opt, msg, len);
- } else {
- msg->msg_controllen = 0;
- }
-
- if (mp->b_flag & SCTP_NOTIFICATION) {
- msg->msg_flags |= MSG_NOTIFICATION;
- }
-
- mdata = mp->b_cont;
- while (mdata != NULL) {
- len = MBLKL(mdata);
- count = MIN(uiop->uio_resid, len);
-
- error = uiomove(mdata->b_rptr, count, UIO_READ, uiop);
- /*
- * We will re-read this message the next time.
- */
- if (error != 0) {
- if (msg->msg_namelen > 0) {
- kmem_free(msg->msg_name,
- msg->msg_namelen);
- }
- if (msg->msg_controllen > 0) {
- kmem_free(msg->msg_control,
- msg->msg_controllen);
- }
- mutex_enter(&so->so_lock);
- so_unlock_read(so);
- mutex_exit(&so->so_lock);
- return (error);
- }
- if (!(flags & MSG_PEEK))
- readcnt += count;
- if (uiop->uio_resid == 0) {
- mblk_t *mp1 = ss->ss_rxdata;
- mblk_t *mp2 = mp1->b_cont;
-#ifdef DEBUG
- int rcnt = readcnt;
-#endif
-
- /* Finished with this message? */
- if (count == len && mdata->b_cont == NULL)
- break;
- /*
- * Remove the bits that have been read, the
- * next read will start from where we left
- * off.
- */
- while (mp1->b_cont != mdata) {
-#ifdef DEBUG
- ASSERT(rcnt > MBLKL(mp1->b_cont));
- rcnt -= MBLKL(mp1->b_cont);
-#endif
- mp1 = mp1->b_cont;
- }
-#ifdef DEBUG
- ASSERT(rcnt == count);
-#endif
- if (len > count)
- mp1->b_cont->b_rptr += count;
- else
- mp1 = mp1->b_cont;
- mutex_enter(&so->so_lock);
- if (mp2 != mp1->b_cont) {
- ss->ss_rxdata->b_cont = mp1->b_cont;
- mp1->b_cont = NULL;
- freemsg(mp2);
- }
- goto done;
- }
- mdata = mdata->b_cont;
- }
- if (!(mp->b_flag & SCTP_PARTIAL_DATA))
- msg->msg_flags |= MSG_EOR;
- /*
- * Consume this message
- */
-consume:
- mutex_enter(&so->so_lock);
- if (!(flags & MSG_PEEK)) {
- ss->ss_rxdata = mp->b_next;
- if (ss->ss_rxtail == &mp->b_next) {
- ss->ss_rxtail = &ss->ss_rxdata;
- }
- mp->b_next = NULL;
- freemsg(mp);
- consumed = B_TRUE;
- }
- } else {
- /*
- * No pending data. Return right away for nonblocking
- * socket, otherwise sleep waiting for data.
- */
- if (!(so->so_state & SS_CANTRCVMORE)) {
- if ((uiop->uio_fmode & (FNDELAY|FNONBLOCK)) ||
- (flags & MSG_DONTWAIT)) {
- error = EWOULDBLOCK;
- } else {
- if (!cv_wait_sig(&ss->ss_rxdata_cv,
- &so->so_lock)) {
- error = EINTR;
- } else {
- goto again;
- }
- }
- } else {
- msg->msg_controllen = 0;
- msg->msg_namelen = 0;
- }
- }
-done:
- /*
- * Determine if we need to update SCTP about the buffer
- * space. For performance reason, we cannot update SCTP
- * every time a message is read. The socket buffer low
- * watermark is used as the threshold.
- */
- if (ssa == NULL) {
- rxqueued = ss->ss_rxqueued;
-
- ss->ss_rxqueued = rxqueued - readcnt;
- count = so->so_rcvbuf - ss->ss_rxqueued;
-
- ASSERT(ss->ss_rxdata != NULL || ss->ss_rxqueued == 0);
-
- so_unlock_read(so);
- mutex_exit(&so->so_lock);
-
- if (readcnt > 0 && (((count > 0) &&
- (rxqueued >= so->so_rcvlowat)) ||
- (ss->ss_rxqueued == 0))) {
- /*
- * If amount of queued data is higher than watermark,
- * updata SCTP's idea of available buffer space.
- */
- sctp_recvd(so->so_priv, count);
- }
- } else {
- rxqueued = ssa->ssa_rxqueued;
-
- ssa->ssa_rxqueued = rxqueued - readcnt;
- count = so->so_rcvbuf - ssa->ssa_rxqueued;
-
- so_unlock_read(so);
-
- if (readcnt > 0 &&
- (((count > 0) && (rxqueued >= so->so_rcvlowat)) ||
- (ssa->ssa_rxqueued == 0))) {
- /*
- * If amount of queued data is higher than watermark,
- * updata SCTP's idea of available buffer space.
- */
- mutex_exit(&so->so_lock);
-
- sctp_recvd(ssa->ssa_conn, count);
-
- mutex_enter(&so->so_lock);
- }
- if (consumed) {
- SSA_REFRELE(ss, ssa);
- }
- mutex_exit(&so->so_lock);
- }
-
- return (error);
-}
-
-int
-sosctp_uiomove(mblk_t *hdr_mp, ssize_t count, ssize_t blk_size, int wroff,
- struct uio *uiop, int flags, cred_t *cr)
-{
- ssize_t size;
- int error;
- mblk_t *mp;
- dblk_t *dp;
-
- /*
- * Loop until we have all data copied into mblk's.
- */
- while (count > 0) {
- size = MIN(count, blk_size);
-
- /*
- * As a message can be splitted up and sent in different
- * packets, each mblk will have the extra space before
- * data to accommodate what SCTP wants to put in there.
- */
- while ((mp = allocb_cred(size + wroff, cr)) == NULL) {
- if ((uiop->uio_fmode & (FNDELAY|FNONBLOCK)) ||
- (flags & MSG_DONTWAIT)) {
- return (EAGAIN);
- }
- if ((error = strwaitbuf(size + wroff, BPRI_MED))) {
- return (error);
- }
- }
-
- dp = mp->b_datap;
- dp->db_cpid = curproc->p_pid;
- ASSERT(wroff <= dp->db_lim - mp->b_wptr);
- mp->b_rptr += wroff;
- error = uiomove(mp->b_rptr, size, UIO_WRITE, uiop);
- if (error != 0) {
- freeb(mp);
- return (error);
- }
- mp->b_wptr = mp->b_rptr + size;
- count -= size;
- hdr_mp->b_cont = mp;
- hdr_mp = mp;
- }
- return (0);
-}
-
-/*
- * Send message.
- */
-static int
-sosctp_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
-{
- struct sctp_sonode *ss = SOTOSSO(so);
- mblk_t *mctl;
- struct cmsghdr *cmsg;
- struct sctp_sndrcvinfo *sinfo;
- int optlen, flags, fflag;
- ssize_t count, msglen;
- int error;
-
- ASSERT(so->so_type == SOCK_STREAM);
-
- flags = msg->msg_flags;
- if (flags & MSG_OOB) {
- /*
- * No out-of-band data support.
- */
- return (EOPNOTSUPP);
- }
-
- if (msg->msg_controllen != 0) {
- optlen = msg->msg_controllen;
- cmsg = sosctp_find_cmsg(msg->msg_control, optlen, SCTP_SNDRCV);
- if (cmsg != NULL) {
- if (cmsg->cmsg_len <
- (sizeof (*sinfo) + sizeof (*cmsg))) {
- eprintsoline(so, EINVAL);
- return (EINVAL);
- }
- sinfo = (struct sctp_sndrcvinfo *)(cmsg + 1);
-
- /* Both flags should not be set together. */
- if ((sinfo->sinfo_flags & MSG_EOF) &&
- (sinfo->sinfo_flags & MSG_ABORT)) {
- eprintsoline(so, EINVAL);
- return (EINVAL);
- }
-
- /* Initiate a graceful shutdown. */
- if (sinfo->sinfo_flags & MSG_EOF) {
- /* Can't include data in MSG_EOF message. */
- if (uiop->uio_resid != 0) {
- eprintsoline(so, EINVAL);
- return (EINVAL);
- }
-
- /*
- * This is the same sequence as done in
- * shutdown(SHUT_WR).
- */
- mutex_enter(&so->so_lock);
- so_lock_single(so);
- socantsendmore(so);
- cv_broadcast(&ss->ss_txdata_cv);
- so->so_state |= SS_ISDISCONNECTING;
- mutex_exit(&so->so_lock);
-
- pollwakeup(&ss->ss_poll_list, POLLOUT);
- sctp_recvd(so->so_priv, so->so_rcvbuf);
- error = sctp_disconnect(so->so_priv);
-
- mutex_enter(&so->so_lock);
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
- return (error);
- }
- }
- } else {
- optlen = 0;
- }
-
- mutex_enter(&so->so_lock);
- for (;;) {
- if (so->so_state & SS_CANTSENDMORE) {
- mutex_exit(&so->so_lock);
- tsignal(curthread, SIGPIPE);
- return (EPIPE);
- }
-
- if (so->so_error != 0) {
- error = sogeterr(so);
- mutex_exit(&so->so_lock);
- return (error);
- }
-
- if (ss->ss_txqueued < so->so_sndbuf)
- break;
-
- /*
- * Xmit window full in a blocking socket.
- */
- if ((uiop->uio_fmode & (FNDELAY|FNONBLOCK)) ||
- (flags & MSG_DONTWAIT)) {
- mutex_exit(&so->so_lock);
- return (EAGAIN);
- } else {
- /*
- * Wait for space to become available and try again.
- */
- error = cv_wait_sig(&ss->ss_txdata_cv, &so->so_lock);
- if (!error) { /* signal */
- mutex_exit(&so->so_lock);
- return (EINTR);
- }
- }
- }
- msglen = count = uiop->uio_resid;
-
- /* Don't allow sending a message larger than the send buffer size. */
- if (msglen > so->so_sndbuf) {
- mutex_exit(&so->so_lock);
- return (EMSGSIZE);
- }
-
- /*
- * Update TX buffer usage here so that we can lift the socket lock.
- */
- ss->ss_txqueued += msglen;
-
- /*
- * Allow piggybacking data on handshake messages (SS_ISCONNECTING).
- */
- if (!(so->so_state & (SS_ISCONNECTING | SS_ISCONNECTED))) {
- /*
- * We need to check here for listener so that the
- * same error will be returned as with a TCP socket.
- * In this case, sosctp_connect() returns EOPNOTSUPP
- * while a TCP socket returns ENOTCONN instead. Catch it
- * here to have the same behavior as a TCP socket.
- *
- * We also need to make sure that the peer address is
- * provided before we attempt to do the connect.
- */
- if ((so->so_state & SS_ACCEPTCONN) ||
- msg->msg_name == NULL) {
- mutex_exit(&so->so_lock);
- error = ENOTCONN;
- goto error_nofree;
- }
- mutex_exit(&so->so_lock);
- fflag = uiop->uio_fmode;
- if (flags & MSG_DONTWAIT) {
- fflag |= FNDELAY;
- }
- error = sosctp_connect(so, msg->msg_name, msg->msg_namelen,
- fflag, (so->so_version == SOV_XPG4_2) * _SOCONNECT_XPG4_2);
- if (error) {
- /*
- * Check for non-fatal errors, socket connected
- * while the lock had been lifted.
- */
- if (error != EISCONN && error != EALREADY) {
- goto error_nofree;
- }
- error = 0;
- }
- } else {
- mutex_exit(&so->so_lock);
- }
-
- mctl = sctp_alloc_hdr(msg->msg_name, msg->msg_namelen,
- msg->msg_control, optlen, SCTP_CAN_BLOCK);
- if (mctl == NULL) {
- error = EINTR;
- goto error_nofree;
- }
-
- /* Copy in the message. */
- if ((error = sosctp_uiomove(mctl, count, ss->ss_wrsize, ss->ss_wroff,
- uiop, flags, CRED())) != 0) {
- goto error_ret;
- }
- error = sctp_sendmsg(so->so_priv, mctl, 0);
- if (error == 0)
- return (0);
-
-error_ret:
- freemsg(mctl);
-error_nofree:
- mutex_enter(&so->so_lock);
- ss->ss_txqueued -= msglen;
- cv_broadcast(&ss->ss_txdata_cv);
- if ((error == EPIPE) && (so->so_state & SS_CANTSENDMORE)) {
- /*
- * We received shutdown between the time lock was
- * lifted and call to sctp_sendmsg().
- */
- mutex_exit(&so->so_lock);
- tsignal(curthread, SIGPIPE);
- return (EPIPE);
- }
- mutex_exit(&so->so_lock);
- return (error);
-}
-
-/*
- * Send message on 1-N socket. Connects automatically if there is
- * no association.
- */
-static int
-sosctp_seq_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
-{
- struct sctp_sonode *ss;
- struct sctp_soassoc *ssa;
- struct cmsghdr *cmsg;
- struct sctp_sndrcvinfo *sinfo;
- int aid = 0;
- mblk_t *mctl;
- int namelen, optlen, flags;
- ssize_t count, msglen;
- int error;
- uint16_t s_flags = 0;
-
- ASSERT(so->so_type == SOCK_SEQPACKET);
-
- /*
- * There shouldn't be problems with alignment, as the memory for
- * msg_control was alloced with kmem_alloc.
- */
- cmsg = sosctp_find_cmsg(msg->msg_control, msg->msg_controllen,
- SCTP_SNDRCV);
- if (cmsg != NULL) {
- if (cmsg->cmsg_len < (sizeof (*sinfo) + sizeof (*cmsg))) {
- eprintsoline(so, EINVAL);
- return (EINVAL);
- }
- sinfo = (struct sctp_sndrcvinfo *)(cmsg + 1);
- s_flags = sinfo->sinfo_flags;
- aid = sinfo->sinfo_assoc_id;
- }
-
- ss = SOTOSSO(so);
- namelen = msg->msg_namelen;
-
- if (msg->msg_controllen > 0) {
- optlen = msg->msg_controllen;
- } else {
- optlen = 0;
- }
-
- mutex_enter(&so->so_lock);
-
- /*
- * If there is no association id, connect to address specified
- * in msg_name. Otherwise look up the association using the id.
- */
- if (aid == 0) {
- /*
- * Connect and shutdown cannot be done together, so check for
- * MSG_EOF.
- */
- if (msg->msg_name == NULL || namelen == 0 ||
- (s_flags & MSG_EOF)) {
- error = EINVAL;
- eprintsoline(so, error);
- goto done;
- }
- flags = uiop->uio_fmode;
- if (msg->msg_flags & MSG_DONTWAIT) {
- flags |= FNDELAY;
- }
- so_lock_single(so);
- error = sosctp_assoc_createconn(ss, msg->msg_name, namelen,
- msg->msg_control, optlen, flags, &ssa);
- if (error) {
- if ((so->so_version == SOV_XPG4_2) &&
- (error == EHOSTUNREACH)) {
- error = ENETUNREACH;
- }
- if (ssa == NULL) {
- /*
- * Fatal error during connect(). Bail out.
- * If ssa exists, it means that the handshake
- * is in progress.
- */
- eprintsoline(so, error);
- so_unlock_single(so, SOLOCKED);
- goto done;
- }
- /*
- * All the errors are non-fatal ones, don't return
- * e.g. EINPROGRESS from sendmsg().
- */
- error = 0;
- }
- so_unlock_single(so, SOLOCKED);
- } else {
- if ((error = sosctp_assoc(ss, aid, &ssa)) != 0) {
- eprintsoline(so, error);
- goto done;
- }
- }
-
- /*
- * Now we have an association.
- */
- flags = msg->msg_flags;
-
- /*
- * MSG_EOF initiates graceful shutdown.
- */
- if (s_flags & MSG_EOF) {
- if (uiop->uio_resid) {
- /*
- * Can't include data in MSG_EOF message.
- */
- error = EINVAL;
- } else {
- mutex_exit(&so->so_lock);
- ssa->ssa_state |= SS_ISDISCONNECTING;
- sctp_recvd(ssa->ssa_conn, so->so_rcvbuf);
- error = sctp_disconnect(ssa->ssa_conn);
- mutex_enter(&so->so_lock);
- }
- goto refrele;
- }
-
- for (;;) {
- if (ssa->ssa_state & SS_CANTSENDMORE) {
- SSA_REFRELE(ss, ssa);
- mutex_exit(&so->so_lock);
- tsignal(curthread, SIGPIPE);
- return (EPIPE);
- }
-
- if (ssa->ssa_error != 0) {
- error = ssa->ssa_error;
- ssa->ssa_error = 0;
- goto refrele;
- }
-
- if (ssa->ssa_txqueued < so->so_sndbuf)
- break;
-
- if ((uiop->uio_fmode & (FNDELAY|FNONBLOCK)) ||
- (flags & MSG_DONTWAIT)) {
- error = EAGAIN;
- goto refrele;
- } else {
- /*
- * Wait for space to become available and try again.
- */
- error = cv_wait_sig(&ss->ss_txdata_cv, &so->so_lock);
- if (!error) { /* signal */
- error = EINTR;
- goto refrele;
- }
- }
- }
-
- msglen = count = uiop->uio_resid;
-
- /* Don't allow sending a message larger than the send buffer size. */
- if (msglen > so->so_sndbuf) {
- error = EMSGSIZE;
- goto refrele;
- }
-
- /*
- * Update TX buffer usage here so that we can lift the socket lock.
- */
- ssa->ssa_txqueued += msglen;
-
- mutex_exit(&so->so_lock);
-
- mctl = sctp_alloc_hdr(msg->msg_name, namelen, msg->msg_control,
- optlen, SCTP_CAN_BLOCK);
- if (mctl == NULL) {
- error = EINTR;
- goto lock_rele;
- }
-
- /* Copy in the message. */
- if ((error = sosctp_uiomove(mctl, count, ssa->ssa_wrsize,
- ssa->ssa_wroff, uiop, flags, CRED())) != 0) {
- goto lock_rele;
- }
- error = sctp_sendmsg(ssa->ssa_conn, mctl, 0);
-lock_rele:
- mutex_enter(&so->so_lock);
- if (error != 0) {
- freemsg(mctl);
- ssa->ssa_txqueued -= msglen;
- cv_broadcast(&ss->ss_txdata_cv);
- if ((error == EPIPE) && (ssa->ssa_state & SS_CANTSENDMORE)) {
- /*
- * We received shutdown between the time lock was
- * lifted and call to sctp_sendmsg().
- */
- SSA_REFRELE(ss, ssa);
- mutex_exit(&so->so_lock);
- tsignal(curthread, SIGPIPE);
- return (EPIPE);
- }
- }
-
-refrele:
- SSA_REFRELE(ss, ssa);
-done:
- mutex_exit(&so->so_lock);
- return (error);
-}
-
-/*
- * Get address of remote node.
- */
-static int
-sosctp_getpeername(struct sonode *so)
-{
- int error;
-
- if (so->so_type != SOCK_STREAM) {
- /*
- * SEQPACKET can have multiple end-points.
- */
- return (EOPNOTSUPP);
- }
-
- if (!(so->so_state & SS_ISCONNECTED)) {
- error = ENOTCONN;
- } else {
- error = sctp_getpeername(so->so_priv, so->so_faddr_sa,
- &so->so_faddr_len);
- }
- return (error);
-}
-
-/*
- * Get local address.
- */
-static int
-sosctp_getsockname(struct sonode *so)
-{
- int error;
-
- mutex_enter(&so->so_lock);
-
- if (!(so->so_state & SS_ISBOUND)) {
- /*
- * Zero address, except for address family
- */
- bzero(so->so_laddr_sa, so->so_laddr_maxlen);
-
- so->so_laddr_len = (so->so_family == AF_INET6) ?
- sizeof (struct sockaddr_in6) : sizeof (struct sockaddr_in);
- so->so_laddr_sa->sa_family = so->so_family;
- error = 0;
- mutex_exit(&so->so_lock);
- } else {
- mutex_exit(&so->so_lock);
-
- error = sctp_getsockname(so->so_priv, so->so_laddr_sa,
- &so->so_laddr_len);
- }
-
- return (error);
-}
-
-/*
- * Called from shutdown().
- */
-static int
-sosctp_shutdown(struct sonode *so, int how)
-{
- struct sctp_sonode *ss = SOTOSSO(so);
- uint_t state_change;
- int error = 0;
- short wakesig = 0;
-
- if (so->so_type == SOCK_SEQPACKET) {
- return (EOPNOTSUPP);
- }
- mutex_enter(&so->so_lock);
- so_lock_single(so);
-
- /*
- * SunOS 4.X has no check for datagram sockets.
- * 5.X checks that it is connected (ENOTCONN)
- * X/Open requires that we check the connected state.
- */
- if (!(so->so_state & SS_ISCONNECTED)) {
- error = ENOTCONN;
- goto done;
- }
-
- /*
- * Record the current state and then perform any state changes.
- * Then use the difference between the old and new states to
- * determine which needs to be done.
- */
- state_change = so->so_state;
-
- switch (how) {
- case SHUT_RD:
- socantrcvmore(so);
- break;
- case SHUT_WR:
- socantsendmore(so);
- break;
- case SHUT_RDWR:
- socantsendmore(so);
- socantrcvmore(so);
- break;
- default:
- error = EINVAL;
- goto done;
- }
-
- state_change = so->so_state & ~state_change;
-
- if (state_change & SS_CANTRCVMORE) {
- if (ss->ss_rxdata == NULL) {
- cv_signal(&ss->ss_rxdata_cv);
- }
- wakesig = POLLIN|POLLRDNORM;
-
- sosctp_sendsig(ss, SCTPSIG_READ);
- }
- if (state_change & SS_CANTSENDMORE) {
- cv_broadcast(&ss->ss_txdata_cv);
- wakesig |= POLLOUT;
-
- so->so_state |= SS_ISDISCONNECTING;
- }
- mutex_exit(&so->so_lock);
-
- pollwakeup(&ss->ss_poll_list, wakesig);
-
- if (state_change & SS_CANTSENDMORE) {
- sctp_recvd(so->so_priv, so->so_rcvbuf);
- error = sctp_disconnect(so->so_priv);
- }
- mutex_enter(&so->so_lock);
-done:
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
-
- /*
- * HACK: sctp_disconnect() may return EWOULDBLOCK. But this error is
- * not documented in standard socket API. Catch it here.
- */
- if (error == EWOULDBLOCK)
- error = 0;
- return (error);
-}
-
-/*
- * Get socket options.
- */
-/*ARGSUSED5*/
-static int
-sosctp_getsockopt(struct sonode *so, int level, int option_name,
- void *optval, socklen_t *optlenp, int flags)
-{
- int error = 0;
- void *option = NULL;
- socklen_t maxlen = *optlenp;
- socklen_t len;
- socklen_t optlen;
- uint32_t value;
- uint8_t buffer[4];
- void *optbuf = &buffer;
-
- mutex_enter(&so->so_lock);
-
- if (level == SOL_SOCKET) {
- switch (option_name) {
- /* Not supported options */
- case SO_SNDTIMEO:
- case SO_RCVTIMEO:
- case SO_EXCLBIND:
- error = ENOPROTOOPT;
- eprintsoline(so, error);
- goto done;
-
- case SO_TYPE:
- case SO_ERROR:
- case SO_DEBUG:
- case SO_ACCEPTCONN:
- case SO_REUSEADDR:
- case SO_KEEPALIVE:
- case SO_DONTROUTE:
- case SO_BROADCAST:
- case SO_USELOOPBACK:
- case SO_OOBINLINE:
- case SO_SNDBUF:
- case SO_RCVBUF:
- case SO_SNDLOWAT:
- case SO_RCVLOWAT:
- case SO_DGRAM_ERRIND:
- case SO_PROTOTYPE:
- case SO_DOMAIN:
- if (maxlen < (t_uscalar_t)sizeof (int32_t)) {
- error = EINVAL;
- eprintsoline(so, error);
- goto done;
- }
- break;
- case SO_LINGER:
- if (maxlen < (t_uscalar_t)sizeof (struct linger)) {
- error = EINVAL;
- eprintsoline(so, error);
- goto done;
- }
- break;
- }
- len = (t_uscalar_t)sizeof (uint32_t); /* Default */
- option = &value;
-
- /*
- * Most of the SOL_SOCKET level option values are also
- * recorded in sockfs. So we can return the recorded value
- * here without calling into SCTP.
- */
- switch (option_name) {
- case SO_TYPE:
- value = so->so_type;
- goto copyout;
-
- case SO_ERROR:
- value = sogeterr(so);
- goto copyout;
-
- case SO_ACCEPTCONN:
- value = (so->so_state & SS_ACCEPTCONN) ?
- SO_ACCEPTCONN : 0;
- goto copyout;
-
- case SO_DEBUG:
- case SO_REUSEADDR:
- case SO_KEEPALIVE:
- case SO_DONTROUTE:
- case SO_BROADCAST:
- case SO_USELOOPBACK:
- case SO_OOBINLINE:
- case SO_DGRAM_ERRIND:
- value = (so->so_options & option_name);
- goto copyout;
-
- case SO_SNDBUF:
- value = so->so_sndbuf;
- goto copyout;
-
- case SO_RCVBUF:
- value = so->so_rcvbuf;
- goto copyout;
-
- case SO_SNDLOWAT:
- value = so->so_sndlowat;
- goto copyout;
-
- case SO_RCVLOWAT:
- value = so->so_rcvlowat;
- goto copyout;
-
- case SO_PROTOTYPE:
- value = IPPROTO_SCTP;
- goto copyout;
-
- case SO_DOMAIN:
- value = so->so_family;
- goto copyout;
-
- case SO_LINGER:
- option = &so->so_linger;
- len = (t_uscalar_t)sizeof (struct linger);
- break;
-
- default:
- option = NULL;
- break;
- }
- }
- if (level == IPPROTO_SCTP) {
- /*
- * Should go through ioctl().
- */
- error = EINVAL;
- goto done;
- }
- if (maxlen > sizeof (buffer)) {
- optbuf = kmem_alloc(maxlen, KM_SLEEP);
- }
- optlen = maxlen;
- mutex_exit(&so->so_lock);
- /*
- * If the resulting optlen is greater than the provided maxlen, then
- * we sliently trucate.
- */
- error = sctp_get_opt(so->so_priv, level, option_name, optbuf, &optlen);
- mutex_enter(&so->so_lock);
- if (error != 0) {
- if (option == NULL) {
- /* We have no fallback value */
- eprintsoline(so, error);
- goto free;
- }
- error = 0;
- goto copyout;
- }
-
- option = optbuf;
- len = optlen;
-
-copyout:
- len = MIN(len, maxlen);
- bcopy(option, optval, len);
- *optlenp = len;
-
-free:
- if (optbuf != &buffer) {
- kmem_free(optbuf, maxlen);
- }
-done:
- mutex_exit(&so->so_lock);
- return (error);
-}
-
-/*
- * Set socket options
- */
-static int
-sosctp_setsockopt(struct sonode *so, int level, int option_name,
- const void *optval, t_uscalar_t optlen)
-{
- struct sctp_sonode *ss = SOTOSSO(so);
- struct sctp_soassoc *ssa = NULL;
- sctp_assoc_t id;
- int error, rc;
- void *conn = NULL;
-
- /* X/Open requires this check */
- if (so->so_state & SS_CANTSENDMORE) {
- return (EINVAL);
- }
- if ((option_name == SCTP_UC_SWAP) && (level == IPPROTO_SCTP)) {
- error = EOPNOTSUPP;
- eprintsoline(so, error);
- return (error);
- }
-
- /* Caller allocates aligned optval, or passes null */
- ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0);
-
- /* No SCTP options should be zero-length */
- if (optlen == 0) {
- error = EINVAL;
- eprintsoline(so, error);
- return (error);
- }
-
- mutex_enter(&so->so_lock);
- so_lock_single(so);
-
- /*
- * For some SCTP level options, one can select the association this
- * applies to.
- */
- if (so->so_type == SOCK_STREAM) {
- conn = so->so_priv;
- } else {
- /*
- * SOCK_SEQPACKET only
- */
- id = 0;
- if (level == IPPROTO_SCTP) {
- switch (option_name) {
- case SCTP_RTOINFO:
- case SCTP_ASSOCINFO:
- case SCTP_SET_PEER_PRIMARY_ADDR:
- case SCTP_PRIMARY_ADDR:
- case SCTP_PEER_ADDR_PARAMS:
- /*
- * Association ID is the first element
- * params struct
- */
- if (optlen < sizeof (sctp_assoc_t)) {
- error = EINVAL;
- eprintsoline(so, error);
- goto done;
- }
- id = *(sctp_assoc_t *)optval;
- break;
- case SCTP_DEFAULT_SEND_PARAM:
- if (optlen != sizeof (struct sctp_sndrcvinfo)) {
- error = EINVAL;
- eprintsoline(so, error);
- goto done;
- }
- id = ((struct sctp_sndrcvinfo *)
- optval)->sinfo_assoc_id;
- break;
- case SCTP_INITMSG:
- /*
- * Only applies to future associations
- */
- conn = so->so_priv;
- break;
- default:
- break;
- }
- } else if (level == SOL_SOCKET) {
- if (option_name == SO_LINGER) {
- error = EOPNOTSUPP;
- eprintsoline(so, error);
- goto done;
- }
- /*
- * These 2 options are applied to all associations.
- * The other socket level options are only applied
- * to the socket (not associations).
- */
- if ((option_name != SO_RCVBUF) &&
- (option_name != SO_SNDBUF)) {
- conn = so->so_priv;
- }
- } else {
- conn = NULL;
- }
-
- /*
- * If association ID was specified, do op on that assoc.
- * Otherwise set the default setting of a socket.
- */
- if (id != 0) {
- if ((error = sosctp_assoc(ss, id, &ssa)) != 0) {
- eprintsoline(so, error);
- goto done;
- }
- conn = ssa->ssa_conn;
- }
- }
- dprint(2, ("sosctp_setsockopt %p (%d) - conn %p %d %d id:%d\n",
- (void *)ss, so->so_type, conn, level, option_name, id));
-
- ASSERT(ssa == NULL || (ssa != NULL && conn != NULL));
- if (conn != NULL) {
- mutex_exit(&so->so_lock);
- error = sctp_set_opt(conn, level, option_name, optval, optlen);
- mutex_enter(&so->so_lock);
- if (ssa != NULL)
- SSA_REFRELE(ss, ssa);
- } else {
- /*
- * 1-N socket, and we have to apply the operation to ALL
- * associations. Like with anything of this sort, the
- * problem is what to do if the operation fails.
- * Just try to apply the setting to everyone, but store
- * error number if someone returns such. And since we are
- * looping through all possible aids, some of them can be
- * invalid. We just ignore this kind (sosctp_assoc()) of
- * errors.
- */
- sctp_assoc_t aid;
-
- mutex_exit(&so->so_lock);
- error = sctp_set_opt(so->so_priv, level, option_name, optval,
- optlen);
- mutex_enter(&so->so_lock);
- for (aid = 1; aid < ss->ss_maxassoc; aid++) {
- if (sosctp_assoc(ss, aid, &ssa) != 0)
- continue;
- mutex_exit(&so->so_lock);
- rc = sctp_set_opt(ssa->ssa_conn, level, option_name,
- optval, optlen);
- mutex_enter(&so->so_lock);
- SSA_REFRELE(ss, ssa);
- if (error == 0) {
- error = rc;
- }
- }
- }
- /*
- * Check for SOL_SOCKET options and record their values.
- * If we know about a SOL_SOCKET parameter and the transport
- * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or
- * EPROTO) we let the setsockopt succeed.
- */
- if (level == SOL_SOCKET) {
- boolean_t handled = B_FALSE;
-
- /* Check parameters */
- switch (option_name) {
- case SO_DEBUG:
- case SO_REUSEADDR:
- case SO_KEEPALIVE:
- case SO_DONTROUTE:
- case SO_BROADCAST:
- case SO_USELOOPBACK:
- case SO_OOBINLINE:
- case SO_SNDBUF:
- case SO_RCVBUF:
- case SO_SNDLOWAT:
- case SO_RCVLOWAT:
- case SO_DGRAM_ERRIND:
- if (optlen != (t_uscalar_t)sizeof (int32_t)) {
- error = EINVAL;
- eprintsoline(so, error);
- goto done;
- }
- ASSERT(optval);
- handled = B_TRUE;
- break;
- case SO_LINGER:
- if (optlen != (t_uscalar_t)sizeof (struct linger)) {
- error = EINVAL;
- eprintsoline(so, error);
- goto done;
- }
- ASSERT(optval);
- handled = B_TRUE;
- break;
- }
-
-#define intvalue (*(int32_t *)optval)
-
- switch (option_name) {
- case SO_SNDTIMEO:
- case SO_RCVTIMEO:
- case SO_EXCLBIND:
- case SO_TYPE:
- case SO_ERROR:
- case SO_ACCEPTCONN:
- case SO_PROTOTYPE:
- case SO_DOMAIN:
- /* Can't be set */
- error = ENOPROTOOPT;
- goto done;
- case SO_LINGER: {
- struct linger *l = (struct linger *)optval;
-
- so->so_linger.l_linger = l->l_linger;
- if (l->l_onoff) {
- so->so_linger.l_onoff = SO_LINGER;
- so->so_options |= SO_LINGER;
- } else {
- so->so_linger.l_onoff = 0;
- so->so_options &= ~SO_LINGER;
- }
- break;
- }
-
- case SO_DEBUG:
- case SO_REUSEADDR:
- case SO_KEEPALIVE:
- case SO_DONTROUTE:
- case SO_BROADCAST:
- case SO_USELOOPBACK:
- case SO_OOBINLINE:
- case SO_DGRAM_ERRIND:
- if (intvalue != 0) {
- dprintso(so, 1,
- ("sosctp_setsockopt: setting 0x%x\n",
- option_name));
- so->so_options |= option_name;
- } else {
- dprintso(so, 1,
- ("sosctp_setsockopt: clearing 0x%x\n",
- option_name));
- so->so_options &= ~option_name;
- }
- break;
- /*
- * The following options are only returned by us when
- * the sctp_set_opt fails.
- * XXX XPG 4.2 applications retrieve SO_RCVBUF from
- * sockfs since the transport might adjust the value
- * and not return exactly what was set by the
- * application.
- */
- case SO_SNDBUF:
- so->so_sndbuf = intvalue;
- if (so->so_sndlowat > so->so_sndbuf) {
- so->so_sndlowat = so->so_sndbuf;
- }
- break;
- case SO_RCVBUF:
- so->so_rcvbuf = intvalue;
- if (so->so_rcvlowat > so->so_rcvbuf) {
- so->so_rcvlowat = so->so_rcvbuf;
- }
- break;
- case SO_SNDLOWAT:
- so->so_sndlowat = intvalue;
- if (so->so_sndlowat > so->so_sndbuf) {
- so->so_sndlowat = so->so_sndbuf;
- }
- break;
- case SO_RCVLOWAT:
- so->so_rcvlowat = intvalue;
- if (so->so_rcvlowat > so->so_rcvbuf) {
- so->so_rcvlowat = so->so_rcvbuf;
- }
- break;
- }
-#undef intvalue
-
- if (error != 0) {
- if ((error == ENOPROTOOPT || error == EPROTO ||
- error == EINVAL) && handled) {
- dprintso(so, 1,
- ("sosctp_setsockopt: ignoring error %d "
- "for 0x%x\n", error, option_name));
- error = 0;
- }
- }
- }
-
-done:
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
-
- return (error);
-}
-
-/*
- * Upcalls from SCTP
- */
-
-/*
- * Incoming connection on listen socket.
- */
-static void *
-sctp_sock_newconn(void *parenthandle, void *connind)
-{
- struct sctp_sonode *lss = parenthandle;
- struct sonode *lso = &lss->ss_so;
- struct sonode *nso;
- struct sctp_sonode *nss;
- mblk_t *mp;
- int error;
-
- ASSERT(lso->so_state & SS_ACCEPTCONN);
- ASSERT(lso->so_priv != NULL); /* closed conn */
- ASSERT(lso->so_type == SOCK_STREAM);
-
- /*
- * Check current # of queued conns against backlog
- */
- if (lss->ss_rxqueued >= lso->so_backlog) {
- return (NULL);
- }
-
- /*
- * Need to create a new socket.
- */
- mp = allocb(sizeof (nso), BPRI_MED);
- if (mp == NULL) {
- eprintsoline(lso, ENOMEM);
- return (NULL);
- }
- DB_TYPE(mp) = M_PROTO;
-
- VN_HOLD(lso->so_accessvp);
- nso = sosctp_create(lso->so_accessvp, lso->so_family, lso->so_type,
- lso->so_protocol, lso->so_version, lso, &error);
- if (nso == NULL) {
- VN_RELE(lso->so_accessvp);
- freeb(mp);
- eprintsoline(lso, error);
- return (NULL);
- }
-
- dprint(2, ("sctp_stream_newconn: new %p\n", (void *)nso));
-
- nss = SOTOSSO(nso);
-
- /*
- * Inherit socket properties
- */
- mutex_enter(&lso->so_lock);
- mutex_enter(&nso->so_lock);
-
- nso->so_state |= (SS_ISBOUND | SS_ISCONNECTED |
- (lso->so_state & SS_ASYNC));
- sosctp_so_inherit(lss, nss);
- nso->so_priv = connind;
-
- mutex_exit(&nso->so_lock);
-
- ++lss->ss_rxqueued;
- mutex_exit(&lso->so_lock);
-
- /*
- * Copy pointer to new socket to connind queue message
- */
- *(struct sonode **)mp->b_wptr = nso;
- mp->b_wptr += sizeof (nso);
-
- /*
- * Wake people who're waiting incoming conns. Note that
- * soqueueconnind gets so_lock.
- */
- soqueueconnind(lso, mp);
- pollwakeup(&lss->ss_poll_list, POLLIN|POLLRDNORM);
-
- mutex_enter(&lso->so_lock);
- sosctp_sendsig(lss, SCTPSIG_READ);
- mutex_exit(&lso->so_lock);
-
- return (nss);
-}
-
-/*
- * This is the upcall function for 1-N (SOCK_SEQPACKET) socket when a new
- * association is created. Note that the first argument (handle) is of type
- * sctp_sonode *, which is the one changed to a listener for new
- * associations. All the other upcalls for 1-N socket take sctp_soassoc *
- * as handle. The only exception is the su_properties upcall, which
- * can take both types as handle.
- */
-static void *
-sctp_assoc_newconn(void *parenthandle, void *connind)
-{
- struct sctp_sonode *lss = (struct sctp_sonode *)parenthandle;
- struct sonode *lso = &lss->ss_so;
- struct sctp_soassoc *ssa;
- sctp_assoc_t id;
-
- ASSERT(lss->ss_type == SOSCTP_SOCKET);
- ASSERT(lso->so_state & SS_ACCEPTCONN);
- ASSERT(lso->so_priv != NULL); /* closed conn */
- ASSERT(lso->so_type == SOCK_SEQPACKET);
-
- mutex_enter(&lso->so_lock);
-
- if ((id = sosctp_aid_get(lss)) == -1) {
- /*
- * Array not large enough; increase size.
- */
- if (sosctp_aid_grow(lss, lss->ss_maxassoc, KM_NOSLEEP) < 0) {
- mutex_exit(&lso->so_lock);
- return (NULL);
- }
- id = sosctp_aid_get(lss);
- ASSERT(id != -1);
- }
-
- /*
- * Create soassoc for this connection
- */
- ssa = sosctp_assoc_create(lss, KM_NOSLEEP);
- if (ssa == NULL) {
- mutex_exit(&lso->so_lock);
- return (NULL);
- }
- sosctp_aid_reserve(lss, id, 1);
- lss->ss_assocs[id].ssi_assoc = ssa;
- ++lss->ss_assoccnt;
- ssa->ssa_id = id;
- ssa->ssa_conn = connind;
- ssa->ssa_state = (SS_ISBOUND | SS_ISCONNECTED);
- ssa->ssa_wroff = lss->ss_wroff;
- ssa->ssa_wrsize = lss->ss_wrsize;
-
- mutex_exit(&lso->so_lock);
-
- return (ssa);
-}
-
-/*
- * For outgoing connections, the connection has been established.
- */
-static void
-sctp_sock_connected(void *handle)
-{
- struct sctp_sonode *ss = handle;
- struct sonode *so = &ss->ss_so;
-
- ASSERT(so->so_type == SOCK_STREAM);
-
- mutex_enter(&so->so_lock);
- ASSERT(so->so_priv); /* closed conn */
-
- ASSERT(!(so->so_state & SS_ACCEPTCONN));
- soisconnected(so);
-
- sosctp_sendsig(ss, SCTPSIG_WRITE);
-
- mutex_exit(&so->so_lock);
-
- /*
- * Wake ones who're waiting for conn to become established.
- */
- pollwakeup(&ss->ss_poll_list, POLLOUT);
-}
-
-static void
-sctp_assoc_connected(void *handle)
-{
- struct sctp_soassoc *ssa = handle;
- struct sonode *so = &ssa->ssa_sonode->ss_so;
-
- ASSERT(so->so_type == SOCK_SEQPACKET);
- ASSERT(ssa->ssa_conn);
-
- mutex_enter(&so->so_lock);
- sosctp_assoc_isconnected(ssa);
- mutex_exit(&so->so_lock);
-}
-
-/*
- * Connection got disconnected. Either with an error, or through
- * normal handshake.
- * Note that there is no half-closed conn, like TCP.
- */
-static int
-sctp_sock_disconnected(void *handle, int error)
-{
- struct sctp_sonode *ss = handle;
- struct sonode *so = &ss->ss_so;
- int event = 0;
-
- ASSERT(so->so_type == SOCK_STREAM);
-
- mutex_enter(&so->so_lock);
- ASSERT(so->so_priv != NULL); /* closed conn */
-
- /*
- * Connection is gone, wake everybody.
- */
- if (ss->ss_rxdata == NULL) {
- cv_signal(&ss->ss_rxdata_cv);
- }
- cv_broadcast(&ss->ss_txdata_cv);
-
- /*
- * If socket is already disconnected/disconnecting,
- * don't (re)send signal.
- */
- if (!(so->so_state & SS_CANTRCVMORE))
- event |= SCTPSIG_READ;
- if (!(so->so_state & SS_CANTSENDMORE))
- event |= SCTPSIG_WRITE;
- if (event != 0)
- sosctp_sendsig(ss, event);
-
- soisdisconnected(so, error);
- mutex_exit(&so->so_lock);
-
- pollwakeup(&ss->ss_poll_list, POLLIN|POLLRDNORM|POLLOUT);
-
- return (0);
-}
-
-static int
-sctp_assoc_disconnected(void *handle, int error)
-{
- struct sctp_soassoc *ssa = handle;
- struct sctp_sonode *ss = ssa->ssa_sonode;
- struct sonode *so = &ssa->ssa_sonode->ss_so;
- int ret;
-
- ASSERT(so->so_type == SOCK_SEQPACKET);
- ASSERT(ssa->ssa_conn != NULL);
-
- mutex_enter(&so->so_lock);
- sosctp_assoc_isdisconnected(ssa, error);
- if (ssa->ssa_refcnt == 1) {
- ret = 1;
- ssa->ssa_conn = NULL;
- } else {
- ret = 0;
- }
- SSA_REFRELE(SOTOSSO(so), ssa);
-
- cv_broadcast(&ss->ss_txdata_cv);
-
- mutex_exit(&so->so_lock);
-
- return (ret);
-}
-
-/*
- * Peer sent a shutdown. After this point writes are not allowed
- * to this socket, but one might still receive notifications
- * (e.g. for data which never got sent).
- */
-static void
-sctp_sock_disconnecting(void *handle)
-{
- struct sctp_sonode *ss = handle;
- struct sonode *so = &ss->ss_so;
-
- ASSERT(so->so_type == SOCK_STREAM);
-
- mutex_enter(&so->so_lock);
- ASSERT(so->so_priv != NULL); /* closed conn */
-
- /*
- * Socket not writeable anymore. Wake writers, and ones
- * who're waiting on socket state change
- */
- cv_broadcast(&ss->ss_txdata_cv);
-
- if (!(so->so_state & SS_CANTSENDMORE)) {
- /*
- * If socket already un-writeable, don't (re)send signal.
- */
- sosctp_sendsig(ss, SCTPSIG_WRITE);
- }
- so->so_state &= ~(SS_ISCONNECTING);
- so->so_state |= SS_CANTSENDMORE;
- cv_broadcast(&so->so_state_cv);
- mutex_exit(&so->so_lock);
-
- pollwakeup(&ss->ss_poll_list, POLLOUT);
-}
-
-static void
-sctp_assoc_disconnecting(void *handle)
-{
- struct sctp_soassoc *ssa = handle;
- struct sonode *so = &ssa->ssa_sonode->ss_so;
-
- ASSERT(so->so_type == SOCK_SEQPACKET);
- ASSERT(ssa->ssa_conn != NULL);
-
- mutex_enter(&so->so_lock);
- sosctp_assoc_isdisconnecting(ssa);
- mutex_exit(&so->so_lock);
-}
-
-/*
- * Incoming data.
- */
-static int
-sctp_sock_recv(void *handle, mblk_t *mp, int flags)
-{
- struct sctp_sonode *ss = handle;
- struct sonode *so = &ss->ss_so;
- int msglen;
-#if defined(DEBUG) && !defined(lint)
- union T_primitives *tpr;
-#endif
-
- ASSERT(so->so_type == SOCK_STREAM);
- ASSERT(mp != NULL);
- ASSERT(!(so->so_state & SS_ACCEPTCONN));
-
- /*
- * Should be getting T_unitdata_req's only.
- * Must have address as part of packet.
- */
-#if defined(DEBUG) && !defined(lint)
- tpr = (union T_primitives *)mp->b_rptr;
- ASSERT((DB_TYPE(mp) == M_PROTO) &&
- (tpr->type == T_UNITDATA_IND));
- ASSERT((tpr->unitdata_ind.SRC_length));
-#endif
-
- /*
- * First mblk has only unitdata_req
- */
- msglen = msgsize(mp->b_cont);
-
- mutex_enter(&so->so_lock);
- ASSERT(so->so_priv); /* closed conn */
-
- if (so->so_state & SS_CANTRCVMORE) {
- mutex_exit(&so->so_lock);
- freemsg(mp);
- return (so->so_rcvbuf);
- }
- if (ss->ss_rxdata == NULL) {
- cv_signal(&ss->ss_rxdata_cv);
- }
- *ss->ss_rxtail = mp;
- ss->ss_rxtail = &mp->b_next;
- ss->ss_rxqueued += msglen;
-
- sosctp_sendsig(ss, SCTPSIG_READ);
-
- /*
- * Override b_flag for SCTP sockfs internal use
- */
- mp->b_flag = (short)flags;
-
- mutex_exit(&so->so_lock);
-
- pollwakeup(&ss->ss_poll_list, POLLIN|POLLRDNORM);
-
- return (so->so_rcvbuf - ss->ss_rxqueued);
-}
-
-static int
-sctp_assoc_recv(void *handle, mblk_t *mp, int flags)
-{
- struct sctp_soassoc *ssa = handle;
- struct sctp_sonode *ss = ssa->ssa_sonode;
- struct sonode *so = &ss->ss_so;
- struct T_unitdata_ind *tind;
- int msglen;
- mblk_t *mp2;
- union sctp_notification *sn;
- struct sctp_sndrcvinfo *sinfo;
-
- ASSERT(ssa->ssa_type == SOSCTP_ASSOC);
- ASSERT(so->so_type == SOCK_SEQPACKET);
- ASSERT(ssa->ssa_conn != NULL); /* closed conn */
- ASSERT(mp != NULL);
-
- /*
- * Should be getting T_unitdata_req's only.
- * Must have address as part of packet.
- */
- tind = (struct T_unitdata_ind *)mp->b_rptr;
- ASSERT((DB_TYPE(mp) == M_PROTO) &&
- (tind->PRIM_type == T_UNITDATA_IND));
- ASSERT(tind->SRC_length);
-
- /*
- * First mblk has only unitdata_req
- */
- msglen = msgsize(mp->b_cont);
-
- mutex_enter(&so->so_lock);
-
- /*
- * Override b_flag for SCTP sockfs internal use
- */
- mp->b_flag = (short)flags;
-
- /*
- * For notify messages, need to fill in association id.
- * For data messages, sndrcvinfo could be in ancillary data.
- */
- if (flags & SCTP_NOTIFICATION) {
- mp2 = mp->b_cont;
- sn = (union sctp_notification *)mp2->b_rptr;
- switch (sn->sn_header.sn_type) {
- case SCTP_ASSOC_CHANGE:
- sn->sn_assoc_change.sac_assoc_id = ssa->ssa_id;
- break;
- case SCTP_PEER_ADDR_CHANGE:
- sn->sn_paddr_change.spc_assoc_id = ssa->ssa_id;
- break;
- case SCTP_REMOTE_ERROR:
- sn->sn_remote_error.sre_assoc_id = ssa->ssa_id;
- break;
- case SCTP_SEND_FAILED:
- sn->sn_send_failed.ssf_assoc_id = ssa->ssa_id;
- break;
- case SCTP_SHUTDOWN_EVENT:
- sn->sn_shutdown_event.sse_assoc_id = ssa->ssa_id;
- break;
- case SCTP_ADAPTATION_INDICATION:
- sn->sn_adaptation_event.sai_assoc_id = ssa->ssa_id;
- break;
- case SCTP_PARTIAL_DELIVERY_EVENT:
- sn->sn_pdapi_event.pdapi_assoc_id = ssa->ssa_id;
- break;
- default:
- ASSERT(0);
- break;
- }
- } else {
- if (tind->OPT_length > 0) {
- struct cmsghdr *cmsg;
- char *cend;
-
- cmsg = (struct cmsghdr *)
- ((uchar_t *)mp->b_rptr + tind->OPT_offset);
- cend = (char *)cmsg + tind->OPT_length;
- for (;;) {
- if ((char *)(cmsg + 1) > cend ||
- ((char *)cmsg + cmsg->cmsg_len) > cend) {
- break;
- }
- if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
- (cmsg->cmsg_type == SCTP_SNDRCV)) {
- sinfo = (struct sctp_sndrcvinfo *)
- (cmsg + 1);
- sinfo->sinfo_assoc_id = ssa->ssa_id;
- break;
- }
- if (cmsg->cmsg_len > 0) {
- cmsg = (struct cmsghdr *)
- ((uchar_t *)cmsg + cmsg->cmsg_len);
- } else {
- break;
- }
- }
- }
- }
-
- /*
- * SCTP has reserved space in the header for storing a pointer.
- * Put the pointer to assocation there, and queue the data.
- */
- SSA_REFHOLD(ssa);
- ASSERT((mp->b_rptr - DB_BASE(mp)) >= sizeof (ssa));
- *(struct sctp_soassoc **)DB_BASE(mp) = ssa;
-
- if (ss->ss_rxdata == NULL) {
- cv_signal(&ss->ss_rxdata_cv);
- }
- *ss->ss_rxtail = mp;
- ss->ss_rxtail = &mp->b_next;
- ssa->ssa_rxqueued += msglen;
-
- sosctp_sendsig(ss, SCTPSIG_READ);
-
- mutex_exit(&so->so_lock);
-
- pollwakeup(&ss->ss_poll_list, POLLIN|POLLRDNORM);
-
- return (so->so_rcvbuf - ssa->ssa_rxqueued);
-}
-
-/*
- * TX queued data got acknowledged. Frees up space in TX queue.
- */
-static void
-sctp_sock_xmitted(void *handle, int txqueued)
-{
- struct sctp_sonode *ss = handle;
- struct sonode *so = &ss->ss_so;
- boolean_t writeable;
-
- mutex_enter(&so->so_lock);
- ASSERT(so->so_priv != NULL); /* closed conn */
-
- if (ss->ss_txqueued < so->so_sndlowat) {
- writeable = B_TRUE;
- } else {
- writeable = B_FALSE;
- }
- ss->ss_txqueued = txqueued;
-
- /*
- * Wake blocked writers.
- */
- cv_broadcast(&ss->ss_txdata_cv);
-
- /*
- * Only do pollwakeup if the amount of queued data is less than
- * watermark, and the socket wasn't writeable before.
- */
- if (!writeable && (ss->ss_txqueued < so->so_sndlowat)) {
- sosctp_sendsig(ss, SCTPSIG_WRITE);
- mutex_exit(&so->so_lock);
- pollwakeup(&ss->ss_poll_list, POLLOUT);
- } else {
- mutex_exit(&so->so_lock);
- }
-}
-
-static void
-sctp_assoc_xmitted(void *handle, int txqueued)
-{
- struct sctp_soassoc *ssa = handle;
- struct sctp_sonode *ss = ssa->ssa_sonode;
-
- ASSERT(ssa->ssa_type == SOSCTP_ASSOC);
- ASSERT(ss->ss_so.so_type == SOCK_SEQPACKET);
- ASSERT(ssa->ssa_conn != NULL);
-
- mutex_enter(&ss->ss_so.so_lock);
-
- ssa->ssa_txqueued = txqueued;
-
- /*
- * Wake blocked writers.
- */
- cv_broadcast(&ss->ss_txdata_cv);
-
- mutex_exit(&ss->ss_so.so_lock);
-}
-
-/*
- * SCTP notifies socket about write offset and amount of TX data per mblk.
- */
-static void
-sctp_sock_properties(void *handle, int wroff, size_t maxblk)
-{
- struct sctp_sonode *ss = handle;
-
- ASSERT(ss->ss_so.so_type == SOCK_STREAM);
-
- mutex_enter(&ss->ss_so.so_lock);
-
- ASSERT(ss->ss_so.so_priv != NULL); /* closed conn */
-
- /*
- * Only change them if they're set.
- */
- if (wroff != 0) {
- ss->ss_wroff = wroff;
- }
- if (maxblk != 0) {
- ss->ss_wrsize = maxblk;
- }
- mutex_exit(&ss->ss_so.so_lock);
-}
-
-static void
-sctp_assoc_properties(void *handle, int wroff, size_t maxblk)
-{
- struct sctp_soassoc *ssa = handle;
- struct sctp_sonode *ss;
-
- if (ssa->ssa_type == SOSCTP_ASSOC) {
- ss = ssa->ssa_sonode;
- mutex_enter(&ss->ss_so.so_lock);
-
- /*
- * Only change them if they're set.
- */
- if (wroff != 0) {
- ssa->ssa_wroff = wroff;
- }
- if (maxblk != 0) {
- ssa->ssa_wrsize = maxblk;
- }
- } else {
- ss = (struct sctp_sonode *)handle;
- mutex_enter(&ss->ss_so.so_lock);
-
- if (wroff != 0) {
- ss->ss_wroff = wroff;
- }
- if (maxblk != 0) {
- ss->ss_wrsize = maxblk;
- }
- }
-
- mutex_exit(&ss->ss_so.so_lock);
-}
diff --git a/usr/src/uts/common/fs/sockfs/socksctpvnops.c b/usr/src/uts/common/fs/sockfs/socksctpvnops.c
deleted file mode 100644
index b59bb8d163..0000000000
--- a/usr/src/uts/common/fs/sockfs/socksctpvnops.c
+++ /dev/null
@@ -1,875 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/types.h>
-#include <sys/t_lock.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/buf.h>
-#include <sys/debug.h>
-#include <sys/errno.h>
-#include <sys/uio.h>
-#include <sys/vfs.h>
-#include <sys/vfs_opreg.h>
-#include <sys/vnode.h>
-#include <sys/stropts.h>
-#include <sys/cmn_err.h>
-#include <sys/sysmacros.h>
-#include <sys/stream.h>
-#include <sys/strsun.h>
-
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-
-#include <sys/project.h>
-#include <sys/strsubr.h>
-
-#include <fs/fs_subr.h>
-
-#include <sys/esunddi.h>
-#include <sys/ddi.h>
-
-#include <sys/filio.h>
-#include <sys/sockio.h>
-
-#include <netinet/sctp.h>
-#include <inet/sctp_itf.h>
-#include "socksctp.h"
-
-/*
- * SCTP sockfs vnode operations
- */
-static int socksctpv_open(struct vnode **, int, struct cred *,
- caller_context_t *);
-static int socksctpv_close(struct vnode *, int, int, offset_t,
- struct cred *, caller_context_t *);
-static int socksctpv_read(struct vnode *, struct uio *, int, struct cred *,
- caller_context_t *);
-static int socksctpv_write(struct vnode *, struct uio *, int, struct cred *,
- caller_context_t *);
-static int socksctpv_ioctl(struct vnode *, int, intptr_t, int,
- struct cred *, int32_t *, caller_context_t *);
-static int socksctp_setfl(vnode_t *, int, int, cred_t *, caller_context_t *);
-static void socksctpv_inactive(struct vnode *, struct cred *,
- caller_context_t *);
-static int socksctpv_poll(struct vnode *, short, int, short *,
- struct pollhead **, caller_context_t *);
-
-const fs_operation_def_t socksctp_vnodeops_template[] = {
- VOPNAME_OPEN, { .vop_open = socksctpv_open },
- VOPNAME_CLOSE, { .vop_close = socksctpv_close },
- VOPNAME_READ, { .vop_read = socksctpv_read },
- VOPNAME_WRITE, { .vop_write = socksctpv_write },
- VOPNAME_IOCTL, { .vop_ioctl = socksctpv_ioctl },
- VOPNAME_SETFL, { .vop_setfl = socksctp_setfl },
- VOPNAME_GETATTR, { .vop_getattr = socktpi_getattr },
- VOPNAME_SETATTR, { .vop_setattr = socktpi_setattr },
- VOPNAME_ACCESS, { .vop_access = socktpi_access },
- VOPNAME_FSYNC, { .vop_fsync = socktpi_fsync },
- VOPNAME_INACTIVE, { .vop_inactive = socksctpv_inactive },
- VOPNAME_FID, { .vop_fid = socktpi_fid },
- VOPNAME_SEEK, { .vop_seek = socktpi_seek },
- VOPNAME_POLL, { .vop_poll = socksctpv_poll },
- VOPNAME_DISPOSE, { .error = fs_error },
- NULL, NULL
-};
-struct vnodeops *socksctp_vnodeops;
-
-/*ARGSUSED3*/
-static int
-socksctpv_open(struct vnode **vpp, int flag, struct cred *cr,
- caller_context_t *ct)
-{
- struct sonode *so;
- struct sctp_sonode *ss;
- struct vnode *vp = *vpp;
- int error = 0;
- sctp_sockbuf_limits_t sbl;
- sctp_upcalls_t *upcalls;
-
- flag &= ~FCREAT; /* paranoia */
-
- so = VTOSO(vp);
- ss = SOTOSSO(so);
-
- mutex_enter(&so->so_lock);
- so->so_count++; /* one more open reference */
- ASSERT(so->so_count != 0); /* wraparound */
- mutex_exit(&so->so_lock);
-
- ASSERT(vp->v_type == VSOCK);
-
- if (flag & SO_ACCEPTOR) {
- ASSERT(so->so_type == SOCK_STREAM);
- /*
- * Protocol control block already created
- */
- return (0);
- }
-
- /*
- * Active open.
- */
- if (so->so_type == SOCK_STREAM) {
- upcalls = &sosctp_sock_upcalls;
- } else {
- ASSERT(so->so_type == SOCK_SEQPACKET);
- upcalls = &sosctp_assoc_upcalls;
- }
- so->so_priv = sctp_create(ss, NULL, so->so_family, SCTP_CAN_BLOCK,
- upcalls, &sbl, cr);
- if (so->so_priv == NULL) {
- error = ENOMEM;
- mutex_enter(&so->so_lock);
- ASSERT(so->so_count > 0);
- so->so_count--; /* one less open reference */
- mutex_exit(&so->so_lock);
- }
- so->so_rcvbuf = sbl.sbl_rxbuf;
- so->so_rcvlowat = sbl.sbl_rxlowat;
- so->so_sndbuf = sbl.sbl_txbuf;
- so->so_sndlowat = sbl.sbl_txlowat;
-
- return (error);
-}
-
-/*ARGSUSED*/
-static int
-socksctpv_close(struct vnode *vp, int flag, int count, offset_t offset,
- struct cred *cr, caller_context_t *ct)
-{
- struct sonode *so;
- struct sctp_sonode *ss;
- struct sctp_sa_id *ssi;
- struct sctp_soassoc *ssa;
- int sendsig = 0;
- int32_t i;
-
- so = VTOSO(vp);
- ss = SOTOSSO(so);
-
- cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
- cleanshares(vp, ttoproc(curthread)->p_pid);
-
- ASSERT(vp->v_stream == NULL);
- if (count > 1) {
- dprint(2, ("socksctpv_close: count %d\n", count));
- return (0);
- }
-
- mutex_enter(&so->so_lock);
- so_lock_single(so); /* Set SOLOCKED */
- ASSERT(so->so_count > 0);
- so->so_count--; /* one fewer open reference */
-
- dprint(2, ("socksctpv_close: %p so_count %d\n", (void *)so,
- so->so_count));
-
- if (so->so_count == 0) {
- /*
- * Need to set flags as there might be ops in progress on
- * this socket.
- *
- * If socket already disconnected/disconnecting,
- * don't send signal (again).
- */
- if (!(so->so_state & SS_CANTRCVMORE))
- sendsig |= SCTPSIG_READ;
- if (!(so->so_state & SS_CANTSENDMORE))
- sendsig |= SCTPSIG_WRITE;
- soisdisconnected(so, 0);
- mutex_exit(&so->so_lock);
-
- /*
- * Initiate connection shutdown. Update SCTP's receive
- * window.
- */
- sctp_recvd(so->so_priv, so->so_rcvbuf - ss->ss_rxqueued);
- (void) sctp_disconnect(so->so_priv);
-
- /*
- * New associations can't come in, but old ones might get
- * closed in upcall. Protect against that by taking a reference
- * on the association.
- */
- mutex_enter(&so->so_lock);
- ssi = ss->ss_assocs;
- for (i = 0; i < ss->ss_maxassoc; i++, ssi++) {
- if ((ssa = ssi->ssi_assoc) != NULL) {
- SSA_REFHOLD(ssa);
- sosctp_assoc_isdisconnected(ssa, 0);
- mutex_exit(&so->so_lock);
-
- sctp_recvd(ssa->ssa_conn, so->so_rcvbuf -
- ssa->ssa_rxqueued);
- (void) sctp_disconnect(ssa->ssa_conn);
-
- mutex_enter(&so->so_lock);
- SSA_REFRELE(ss, ssa);
- }
- }
- if (sendsig != 0) {
- sosctp_sendsig(ss, sendsig);
- }
- mutex_exit(&so->so_lock);
- pollwakeup(&ss->ss_poll_list, POLLIN|POLLRDNORM|POLLOUT);
- mutex_enter(&so->so_lock);
- }
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
-
- return (0);
-}
-
-/*ARGSUSED2*/
-static int
-socksctpv_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
- caller_context_t *ct)
-{
- struct sonode *so = VTOSO(vp);
- struct nmsghdr lmsg;
-
- if (so->so_type != SOCK_STREAM) {
- return (EOPNOTSUPP);
- }
-
- ASSERT(vp->v_type == VSOCK);
- so_update_attrs(so, SOACC);
- lmsg.msg_namelen = 0;
- lmsg.msg_controllen = 0;
- lmsg.msg_flags = 0;
- return (sosctp_recvmsg(so, &lmsg, uiop));
-}
-
-/*
- * Send data, see sosctp_sendmsg()
- */
-/*ARGSUSED2*/
-static int
-socksctpv_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
- caller_context_t *ct)
-{
- struct sctp_sonode *ss;
- struct sonode *so;
- mblk_t *head;
- ssize_t count, msglen;
- int error;
-
- so = VTOSO(vp);
- ss = SOTOSSO(so);
-
- if (so->so_type != SOCK_STREAM) {
- return (EOPNOTSUPP);
- }
-
- mutex_enter(&so->so_lock);
-
- for (;;) {
- if (so->so_state & SS_CANTSENDMORE) {
- mutex_exit(&so->so_lock);
- tsignal(curthread, SIGPIPE);
- return (EPIPE);
- }
-
- if (so->so_error != 0) {
- error = sogeterr(so);
- if (error != 0) {
- mutex_exit(&so->so_lock);
- return (error);
- }
- }
-
- if (ss->ss_txqueued < so->so_sndbuf)
- break;
-
- if (uiop->uio_fmode & (FNDELAY|FNONBLOCK)) {
- mutex_exit(&so->so_lock);
- return (EAGAIN);
- } else {
- /*
- * Xmit window full in a blocking socket.
- * Wait for space to become available and try again.
- */
- error = cv_wait_sig(&ss->ss_txdata_cv, &so->so_lock);
- if (error == 0) { /* signal */
- mutex_exit(&so->so_lock);
- return (EINTR);
- }
- }
- }
-
- if (!(so->so_state & (SS_ISCONNECTING | SS_ISCONNECTED))) {
- mutex_exit(&so->so_lock);
- return (ENOTCONN);
- }
-
- msglen = count = uiop->uio_resid;
- /* Don't allow sending a message larger than the send buffer size. */
- if (msglen > so->so_sndbuf) {
- mutex_exit(&so->so_lock);
- return (EMSGSIZE);
- }
- ss->ss_txqueued += msglen;
-
- mutex_exit(&so->so_lock);
-
- if (count == 0) {
- return (0);
- }
-
- head = sctp_alloc_hdr(NULL, 0, NULL, 0, SCTP_CAN_BLOCK);
- if (head == NULL) {
- error = EINTR;
- goto error_ret;
- }
-
- /* Copy in the message. */
- if ((error = sosctp_uiomove(head, count, ss->ss_wrsize, ss->ss_wroff,
- uiop, 0, cr)) != 0) {
- goto error_ret;
- }
- so_update_attrs(so, SOMOD);
-
- error = sctp_sendmsg(so->so_priv, head, 0);
- if (error == 0)
- return (0);
-
-error_ret:
- mutex_enter(&so->so_lock);
- ss->ss_txqueued -= msglen;
- cv_broadcast(&ss->ss_txdata_cv);
- mutex_exit(&so->so_lock);
- freemsg(head);
- return (error);
-}
-
-/*ARGSUSED4*/
-static int
-socksctpv_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
- struct cred *cr, int32_t *rvalp, caller_context_t *ct)
-{
- struct sonode *so;
- struct sctp_sonode *ss;
- int32_t value;
- int error;
- int intval;
- pid_t pid;
- struct sctp_soassoc *ssa;
- void *conn;
- void *buf;
- STRUCT_DECL(sctpopt, opt);
- uint32_t optlen;
- int buflen;
-
- so = VTOSO(vp);
- ss = SOTOSSO(so);
-
- /* handle socket specific ioctls */
- switch (cmd) {
- case FIONBIO:
- if (so_copyin((void *)arg, &value, sizeof (int32_t),
- (mode & (int)FKIOCTL))) {
- return (EFAULT);
- }
- mutex_enter(&so->so_lock);
- if (value) {
- so->so_state |= SS_NDELAY;
- } else {
- so->so_state &= ~SS_NDELAY;
- }
- mutex_exit(&so->so_lock);
- return (0);
-
- case FIOASYNC:
- if (so_copyin((void *)arg, &value, sizeof (int32_t),
- (mode & (int)FKIOCTL))) {
- return (EFAULT);
- }
- mutex_enter(&so->so_lock);
-
- if (value) {
- /* Turn on SIGIO */
- so->so_state |= SS_ASYNC;
- } else {
- /* Turn off SIGIO */
- so->so_state &= ~SS_ASYNC;
- }
- mutex_exit(&so->so_lock);
- return (0);
-
- case SIOCSPGRP:
- case FIOSETOWN:
- if (so_copyin((void *)arg, &pid, sizeof (pid_t),
- (mode & (int)FKIOCTL))) {
- return (EFAULT);
- }
- mutex_enter(&so->so_lock);
-
- error = (pid != so->so_pgrp) ? sosctp_chgpgrp(ss, pid) : 0;
- mutex_exit(&so->so_lock);
- return (error);
-
- case SIOCGPGRP:
- case FIOGETOWN:
- if (so_copyout(&so->so_pgrp, (void *)arg,
- sizeof (pid_t), (mode & (int)FKIOCTL)))
- return (EFAULT);
- return (0);
-
- case SIOCATMARK:
- /*
- * No support for urgent data.
- */
- intval = 0;
-
- if (so_copyout(&intval, (void *)arg, sizeof (int),
- (mode & (int)FKIOCTL)))
- return (EFAULT);
- return (0);
-
- /* from strioctl */
- case FIONREAD:
- /*
- * Return number of bytes of data in all data messages
- * in queue in "arg".
- * For stream socket, amount of available data.
- * For sock_dgram, # of available bytes + addresses.
- */
- intval = (so->so_state & SS_ACCEPTCONN) ? 0 :
- MIN(ss->ss_rxqueued, INT_MAX);
- if (so_copyout(&intval, (void *)arg, sizeof (intval),
- (mode & (int)FKIOCTL)))
- return (EFAULT);
- return (0);
-
- case SIOCSCTPGOPT:
- STRUCT_INIT(opt, mode);
-
- if (so_copyin((void *)arg, STRUCT_BUF(opt), STRUCT_SIZE(opt),
- (mode & (int)FKIOCTL))) {
- return (EFAULT);
- }
- if ((optlen = STRUCT_FGET(opt, sopt_len)) > SO_MAXARGSIZE)
- return (EINVAL);
-
- /*
- * Find the correct sctp_t based on whether it is 1-N socket
- * or not.
- */
- intval = STRUCT_FGET(opt, sopt_aid);
- mutex_enter(&so->so_lock);
- if ((so->so_type == SOCK_SEQPACKET) && intval) {
- if ((error = sosctp_assoc(ss, intval, &ssa)) != 0) {
- mutex_exit(&so->so_lock);
- return (error);
- }
- conn = ssa->ssa_conn;
- ASSERT(conn != NULL);
- } else {
- conn = so->so_priv;
- ssa = NULL;
- }
- mutex_exit(&so->so_lock);
-
- /* Copyin the option buffer and then call sctp_get_opt(). */
- buflen = optlen;
- /* Let's allocate a buffer enough to hold an int */
- if (buflen < sizeof (uint32_t))
- buflen = sizeof (uint32_t);
- buf = kmem_alloc(buflen, KM_SLEEP);
- if (so_copyin(STRUCT_FGETP(opt, sopt_val), buf, optlen,
- (mode & (int)FKIOCTL))) {
- if (ssa != NULL) {
- mutex_enter(&so->so_lock);
- SSA_REFRELE(ss, ssa);
- mutex_exit(&so->so_lock);
- }
- kmem_free(buf, buflen);
- return (EFAULT);
- }
- /* The option level has to be IPPROTO_SCTP */
- error = sctp_get_opt(conn, IPPROTO_SCTP,
- STRUCT_FGET(opt, sopt_name), buf, &optlen);
- if (ssa != NULL) {
- mutex_enter(&so->so_lock);
- SSA_REFRELE(ss, ssa);
- mutex_exit(&so->so_lock);
- }
- optlen = MIN(buflen, optlen);
- /* No error, copyout the result with the correct buf len. */
- if (error == 0) {
- STRUCT_FSET(opt, sopt_len, optlen);
- if (so_copyout(STRUCT_BUF(opt), (void *)arg,
- STRUCT_SIZE(opt), (mode & (int)FKIOCTL))) {
- error = EFAULT;
- } else if (so_copyout(buf, STRUCT_FGETP(opt, sopt_val),
- optlen, (mode & (int)FKIOCTL))) {
- error = EFAULT;
- }
- }
- kmem_free(buf, buflen);
- return (error);
-
- case SIOCSCTPSOPT:
- STRUCT_INIT(opt, mode);
-
- if (so_copyin((void *)arg, STRUCT_BUF(opt), STRUCT_SIZE(opt),
- (mode & (int)FKIOCTL))) {
- return (EFAULT);
- }
- if ((optlen = STRUCT_FGET(opt, sopt_len)) > SO_MAXARGSIZE)
- return (EINVAL);
-
- /*
- * Find the correct sctp_t based on whether it is 1-N socket
- * or not.
- */
- intval = STRUCT_FGET(opt, sopt_aid);
- mutex_enter(&so->so_lock);
- if (intval != 0) {
- if ((error = sosctp_assoc(ss, intval, &ssa)) != 0) {
- mutex_exit(&so->so_lock);
- return (error);
- }
- conn = ssa->ssa_conn;
- ASSERT(conn != NULL);
- } else {
- conn = so->so_priv;
- ssa = NULL;
- }
- mutex_exit(&so->so_lock);
-
- /* Copyin the option buffer and then call sctp_set_opt(). */
- buf = kmem_alloc(optlen, KM_SLEEP);
- if (so_copyin(STRUCT_FGETP(opt, sopt_val), buf, optlen,
- (mode & (int)FKIOCTL))) {
- if (ssa != NULL) {
- mutex_enter(&so->so_lock);
- SSA_REFRELE(ss, ssa);
- mutex_exit(&so->so_lock);
- }
- kmem_free(buf, intval);
- return (EFAULT);
- }
- /* The option level has to be IPPROTO_SCTP */
- error = sctp_set_opt(conn, IPPROTO_SCTP,
- STRUCT_FGET(opt, sopt_name), buf, optlen);
- if (ssa) {
- mutex_enter(&so->so_lock);
- SSA_REFRELE(ss, ssa);
- mutex_exit(&so->so_lock);
- }
- kmem_free(buf, optlen);
- return (error);
-
- case SIOCSCTPPEELOFF: {
- struct sonode *nso;
- struct sctp_uc_swap us;
- int nfd;
- struct file *nfp;
- struct vnode *nvp = NULL, *accessvp;
-
- dprint(2, ("sctppeeloff %p\n", (void *)ss));
-
- if (so->so_type != SOCK_SEQPACKET) {
- return (EOPNOTSUPP);
- }
- if (so_copyin((void *)arg, &intval, sizeof (intval),
- (mode & (int)FKIOCTL))) {
- return (EFAULT);
- }
- if (intval == 0) {
- return (EINVAL);
- }
-
- /*
- * Find accessvp. This is different from parent's vp,
- * as the socket type is different.
- */
- accessvp = solookup(so->so_family, SOCK_STREAM,
- so->so_protocol, NULL, &error);
- if (accessvp == NULL) {
- return (error);
- }
-
- /*
- * Allocate the user fd.
- */
- if ((nfd = ufalloc(0)) == -1) {
- eprintsoline(so, EMFILE);
- return (EMFILE);
- }
-
- /*
- * Copy the fd out.
- */
- if (so_copyout(&nfd, (void *)arg, sizeof (nfd),
- (mode & (int)FKIOCTL))) {
- error = EFAULT;
- goto err;
- }
- mutex_enter(&so->so_lock);
-
- /*
- * Don't use sosctp_assoc() in order to peel off disconnected
- * associations.
- */
- ssa = ((uint32_t)intval >= ss->ss_maxassoc) ? NULL :
- ss->ss_assocs[intval].ssi_assoc;
- if (ssa == NULL) {
- mutex_exit(&so->so_lock);
- error = EINVAL;
- goto err;
- }
- SSA_REFHOLD(ssa);
-
- nso = sosctp_create(accessvp, so->so_family, SOCK_STREAM,
- so->so_protocol, so->so_version, so, &error);
- if (nso == NULL) {
- SSA_REFRELE(ss, ssa);
- mutex_exit(&so->so_lock);
- goto err;
- }
- nvp = SOTOV(nso);
- so_lock_single(so);
- mutex_exit(&so->so_lock);
- us.sus_handle = SOTOSSO(nso);
- us.sus_upcalls = &sosctp_sock_upcalls;
-
- /*
- * Upcalls to new socket are blocked for the duration of
- * downcall.
- */
- mutex_enter(&nso->so_lock);
-
- error = sctp_set_opt(ssa->ssa_conn, IPPROTO_SCTP, SCTP_UC_SWAP,
- &us, sizeof (us));
- if (error) {
- goto peelerr;
- }
- error = falloc(nvp, FWRITE|FREAD, &nfp, NULL);
- if (error) {
- goto peelerr;
- }
-
- /*
- * fill in the entries that falloc reserved
- */
- nfp->f_vnode = nvp;
- mutex_exit(&nfp->f_tlock);
- setf(nfd, nfp);
-
- mutex_enter(&so->so_lock);
-
- sosctp_assoc_move(ss, SOTOSSO(nso), ssa);
-
- mutex_exit(&nso->so_lock);
-
- ssa->ssa_conn = NULL;
- sosctp_assoc_free(ss, ssa);
-
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
-
- return (0);
-
-err:
- setf(nfd, NULL);
- eprintsoline(so, error);
- return (error);
-
-peelerr:
- mutex_exit(&nso->so_lock);
- mutex_enter(&so->so_lock);
- ASSERT(nso->so_count == 1);
- nso->so_count = 0;
- so_unlock_single(so, SOLOCKED);
- SSA_REFRELE(ss, ssa);
- mutex_exit(&so->so_lock);
- /* held in VOP_OPEN() */
- ddi_rele_driver(getmajor(nso->so_dev));
- setf(nfd, NULL);
- ASSERT(nvp->v_count == 1);
- VN_RELE(nvp);
- eprintsoline(so, error);
- return (error);
- }
- default:
- return (EINVAL);
- }
-}
-
-/*
- * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
- * from listener to acceptor.
- */
-/* ARGSUSED */
-static int
-socksctp_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr,
- caller_context_t *ct)
-{
- struct sonode *so;
-
- so = VTOSO(vp);
-
- mutex_enter(&so->so_lock);
- if (nflags & FNDELAY)
- so->so_state |= SS_NDELAY;
- else
- so->so_state &= ~SS_NDELAY;
- if (nflags & FNONBLOCK)
- so->so_state |= SS_NONBLOCK;
- else
- so->so_state &= ~SS_NONBLOCK;
- mutex_exit(&so->so_lock);
- return (0);
-}
-
-/*ARGSUSED*/
-static void
-socksctpv_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
-{
- struct sonode *so;
- struct sctp_sonode *ss;
- struct sctp_sa_id *ssi;
- struct sctp_soassoc *ssa;
- int32_t i;
-
- so = VTOSO(vp);
- ss = SOTOSSO(so);
-
- mutex_enter(&vp->v_lock);
- /*
- * If no one has reclaimed the vnode, remove from the
- * cache now.
- */
- if (vp->v_count < 1)
- cmn_err(CE_PANIC, "socksctpv_inactive: Bad v_count");
-
- /*
- * Drop the temporary hold by vn_rele now
- */
- if (--vp->v_count != 0) {
- mutex_exit(&vp->v_lock);
- return;
- }
- mutex_exit(&vp->v_lock);
-
- /* We are the sole owner of so now */
-
- /*
- * New associations can't come in, but old ones might get
- * closed in upcall. Protect against that by taking a reference
- * on the association.
- */
- mutex_enter(&so->so_lock);
-
- ssi = ss->ss_assocs;
- for (i = 0; i < ss->ss_maxassoc; i++, ssi++) {
- if ((ssa = ssi->ssi_assoc) != NULL) {
- SSA_REFHOLD(ssa);
- mutex_exit(&so->so_lock);
-
- sctp_close(ssa->ssa_conn);
-
- mutex_enter(&so->so_lock);
- ssa->ssa_conn = NULL;
- sosctp_assoc_free(ss, ssa);
- }
- }
- mutex_exit(&so->so_lock);
-
- ASSERT(!vn_has_cached_data(vp));
- if (so->so_priv) {
- sctp_close(so->so_priv);
- }
- so->so_priv = NULL;
- sosctp_free(so);
-}
-
-/*
- * Check socktpi_poll() on why so_lock is not held in this function.
- */
-/*ARGSUSED5*/
-static int
-socksctpv_poll(struct vnode *vp, short events, int anyyet, short *reventsp,
- struct pollhead **phpp, caller_context_t *ct)
-{
- struct sonode *so;
- struct sctp_sonode *ss;
- short origevents = events;
- int so_state;
-
- so = VTOSO(vp);
- ss = SOTOSSO(so);
- so_state = so->so_state;
-
- ASSERT(vp->v_type == VSOCK);
- ASSERT(vp->v_stream == NULL);
- ASSERT(so->so_version != SOV_STREAM);
-
- if (!(so_state & SS_ISCONNECTED) && (so->so_type == SOCK_STREAM)) {
- /*
- * Not connected yet - turn off write side events
- */
- events &= ~(POLLOUT|POLLWRBAND);
- }
-
- /*
- * Check for errors
- */
- if (so->so_error != 0 &&
- ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) {
- *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents;
- return (0);
- }
-
- *reventsp = 0;
-
- /*
- * Don't mark socket as writable until TX queued data is
- * below watermark.
- */
- if (so->so_type == SOCK_STREAM) {
- if (ss->ss_txqueued < so->so_sndlowat) {
- *reventsp |= POLLOUT & events;
- }
- } else {
- *reventsp |= POLLOUT & events;
- }
- if (ss->ss_rxdata) {
- *reventsp |= (POLLIN|POLLRDNORM) & events;
- }
- if ((so_state & (SS_HASCONNIND|SS_CANTRCVMORE)) != 0) {
- *reventsp |= (POLLIN|POLLRDNORM) & events;
- }
-
- if (!*reventsp && !anyyet) {
- *phpp = &ss->ss_poll_list;
- }
-
- return (0);
-}
diff --git a/usr/src/uts/common/fs/sockfs/socksdp.h b/usr/src/uts/common/fs/sockfs/socksdp.h
deleted file mode 100755
index 68231bb0e5..0000000000
--- a/usr/src/uts/common/fs/sockfs/socksdp.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SOCKSDP_H_
-#define _SOCKSDP_H_
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * SDP socket structure.
- *
- * The opaque pointer passed in upcalls is a pointer to sdp_sonode.
- */
-struct sdp_sonode {
- int ss_type; /* sonode or soassoc */
- struct sonode ss_so;
- struct sockaddr_in6 ss_laddr; /* can fit both v4 & v6 */
- struct sockaddr_in6 ss_faddr;
- int ss_rxqueued; /* queued # of conn */
- struct pollhead ss_poll_list;
-};
-
-extern sdp_upcalls_t sosdp_sock_upcalls;
-extern struct vnodeops *socksdp_vnodeops;
-extern const fs_operation_def_t socksdp_vnodeops_template[];
-
-extern void sosdp_free(struct sonode *so);
-extern int sosdp_chgpgrp(struct sdp_sonode *ss, pid_t pid);
-extern void sosdp_sendsig(struct sdp_sonode *ss, int event);
-
-extern int sosdp_bind(struct sonode *so, struct sockaddr *name,
- socklen_t namelen, int flags);
-extern int sosdp_recvmsg(struct sonode *, struct nmsghdr *, struct uio *);
-
-extern int sosdp_waitconnected(struct sonode *so, int fmode);
-
-extern void sosdp_so_inherit(struct sdp_sonode *lss, struct sdp_sonode *nss);
-
-/*
- * Data structure types.
- */
-#define SOSDP_SOCKET 0x1
-
-#define SOTOSDO(so) ((struct sdp_sonode *)(((char *)so) - \
- offsetof(struct sdp_sonode, ss_so)))
-
-/*
- * Event flags to sosdp_sendsig().
- */
-#define SDPSIG_WRITE 0x1
-#define SDPSIG_READ 0x2
-#define SDPSIG_URG 0x4
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SOCKSDP_H_ */
diff --git a/usr/src/uts/common/fs/sockfs/socksdpsubr.c b/usr/src/uts/common/fs/sockfs/socksdpsubr.c
deleted file mode 100755
index 357c61db3d..0000000000
--- a/usr/src/uts/common/fs/sockfs/socksdpsubr.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/t_lock.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/debug.h>
-#include <sys/errno.h>
-#include <sys/strsubr.h>
-#include <sys/cmn_err.h>
-#include <sys/sysmacros.h>
-
-#include <sys/vfs.h>
-#include <sys/vfs_opreg.h>
-
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/strsun.h>
-#include <sys/signal.h>
-
-#include <inet/sdp_itf.h>
-#include "socksdp.h"
-
-
-/*
- * Wait until the socket is connected or there is an error.
- * fmode should contain any nonblocking flags.
- */
-int
-sosdp_waitconnected(struct sonode *so, int fmode)
-{
- int error;
-
- ASSERT(MUTEX_HELD(&so->so_lock));
- ASSERT((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ||
- so->so_error != 0);
-
- while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
- SS_ISCONNECTING && so->so_error == 0) {
-
- dprint(3, ("waiting for SS_ISCONNECTED on %p\n", (void *)so));
- if (fmode & (FNDELAY|FNONBLOCK))
- return (EINPROGRESS);
-
- if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
- /*
- * Return EINTR and let the application use
- * nonblocking techniques for detecting when
- * the connection has been established.
- */
- error = EINTR;
- break;
- }
- dprint(3, ("awoken on %p\n", (void *)so));
- }
-
- if (so->so_error != 0) {
- error = sogeterr(so);
- ASSERT(error != 0);
- dprint(3, ("sosdp_waitconnected: error %d\n", error));
- } else if (so->so_state & SS_ISCONNECTED) {
- error = 0;
- }
- return (error);
-}
-
-
-/*
- * Change the process/process group to which SIGIO is sent.
- */
-int
-sosdp_chgpgrp(struct sdp_sonode *ss, pid_t pid)
-{
- int error;
-
- ASSERT(MUTEX_HELD(&ss->ss_so.so_lock));
- if (pid != 0) {
- /*
- * Permissions check by sending signal 0.
- * Note that when kill fails it does a
- * set_errno causing the system call to fail.
- */
- error = kill(pid, 0);
- if (error != 0) {
- return (error);
- }
- }
- ss->ss_so.so_pgrp = pid;
- return (0);
-}
-
-
-/*
- * Generate a SIGIO, for 'writable' events include siginfo structure,
- * for read events just send the signal.
- */
-/*ARGSUSED*/
-static void
-sosdp_sigproc(proc_t *proc, int event)
-{
- k_siginfo_t info;
-
- if (event & SDPSIG_WRITE) {
- info.si_signo = SIGPOLL;
- info.si_code = POLL_OUT;
- info.si_errno = 0;
- info.si_fd = 0;
- info.si_band = 0;
- sigaddq(proc, NULL, &info, KM_NOSLEEP);
- }
- if (event & SDPSIG_READ) {
- sigtoproc(proc, NULL, SIGPOLL);
- }
- if (event & SDPSIG_URG) {
- sigtoproc(proc, NULL, SIGURG);
- }
-}
-
-void
-sosdp_sendsig(struct sdp_sonode *ss, int event)
-{
- proc_t *proc;
- struct sonode *so = &ss->ss_so;
-
- ASSERT(MUTEX_HELD(&ss->ss_so.so_lock));
-
- if (so->so_pgrp == 0 || (!(so->so_state & SS_ASYNC) &&
- event != SDPSIG_URG)) {
- return;
- }
-
- dprint(3, ("sending sig %d to %d\n", event, so->so_pgrp));
-
- if (so->so_pgrp > 0) {
- /*
- * XXX This unfortunately still generates
- * a signal when a fd is closed but
- * the proc is active.
- */
- mutex_enter(&pidlock);
- proc = prfind(so->so_pgrp);
- if (proc == NULL) {
- mutex_exit(&pidlock);
- return;
- }
- mutex_enter(&proc->p_lock);
- mutex_exit(&pidlock);
- sosdp_sigproc(proc, event);
- mutex_exit(&proc->p_lock);
- } else {
- /*
- * Send to process group. Hold pidlock across
- * calls to sosdp_sigproc().
- */
- pid_t pgrp = -so->so_pgrp;
-
- mutex_enter(&pidlock);
- proc = pgfind(pgrp);
- while (proc != NULL) {
- mutex_enter(&proc->p_lock);
- sosdp_sigproc(proc, event);
- mutex_exit(&proc->p_lock);
- proc = proc->p_pglink;
- }
- mutex_exit(&pidlock);
- }
-}
-
-
-/*
- * Inherit socket properties
- */
-void
-sosdp_so_inherit(struct sdp_sonode *lss, struct sdp_sonode *nss)
-{
- struct sonode *nso = &nss->ss_so;
- struct sonode *lso = &lss->ss_so;
-
- nso->so_options = lso->so_options & (SO_DEBUG|SO_REUSEADDR|
- SO_KEEPALIVE|SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK|
- SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER);
- nso->so_sndbuf = lso->so_sndbuf;
- nso->so_rcvbuf = lso->so_rcvbuf;
- nso->so_pgrp = lso->so_pgrp;
-
- nso->so_rcvlowat = lso->so_rcvlowat;
- nso->so_sndlowat = lso->so_sndlowat;
-}
diff --git a/usr/src/uts/common/fs/sockfs/socksdpvnops.c b/usr/src/uts/common/fs/sockfs/socksdpvnops.c
deleted file mode 100644
index 395599daab..0000000000
--- a/usr/src/uts/common/fs/sockfs/socksdpvnops.c
+++ /dev/null
@@ -1,535 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/types.h>
-#include <sys/t_lock.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/buf.h>
-#include <sys/debug.h>
-#include <sys/errno.h>
-#include <sys/uio.h>
-#include <sys/vfs.h>
-#include <sys/vfs_opreg.h>
-#include <sys/vnode.h>
-#include <sys/stropts.h>
-#include <sys/cmn_err.h>
-#include <sys/sysmacros.h>
-#include <sys/stream.h>
-#include <sys/strsun.h>
-
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-
-#include <sys/project.h>
-#include <sys/strsubr.h>
-
-#include <fs/fs_subr.h>
-
-#include <sys/esunddi.h>
-#include <sys/ddi.h>
-
-#include <sys/filio.h>
-#include <sys/sockio.h>
-
-#include <inet/sdp_itf.h>
-#include "socksdp.h"
-
-/*
- * SDP sockfs vnode operations
- */
-static int socksdpv_open(struct vnode **, int, struct cred *,
- caller_context_t *);
-static int socksdpv_close(struct vnode *, int, int, offset_t,
- struct cred *, caller_context_t *);
-static int socksdpv_read(struct vnode *, struct uio *, int, struct cred *,
- caller_context_t *);
-static int socksdpv_write(struct vnode *, struct uio *, int, struct cred *,
- caller_context_t *);
-static int socksdpv_ioctl(struct vnode *, int, intptr_t, int,
- struct cred *, int32_t *, caller_context_t *);
-static int socksdp_setfl(vnode_t *, int, int, cred_t *, caller_context_t *);
-static void socksdpv_inactive(struct vnode *, struct cred *,
- caller_context_t *);
-static int socksdpv_poll(struct vnode *, short, int, short *,
- struct pollhead **, caller_context_t *);
-
-const fs_operation_def_t socksdp_vnodeops_template[] = {
- VOPNAME_OPEN, { .vop_open = socksdpv_open },
- VOPNAME_CLOSE, { .vop_close = socksdpv_close },
- VOPNAME_READ, { .vop_read = socksdpv_read },
- VOPNAME_WRITE, { .vop_write = socksdpv_write },
- VOPNAME_IOCTL, { .vop_ioctl = socksdpv_ioctl },
- VOPNAME_SETFL, { .vop_setfl = socksdp_setfl },
- VOPNAME_GETATTR, { .vop_getattr = socktpi_getattr },
- VOPNAME_SETATTR, { .vop_setattr = socktpi_setattr },
- VOPNAME_ACCESS, { .vop_access = socktpi_access },
- VOPNAME_FSYNC, { .vop_fsync = socktpi_fsync },
- VOPNAME_INACTIVE, { .vop_inactive = socksdpv_inactive },
- VOPNAME_FID, { .vop_fid = socktpi_fid },
- VOPNAME_SEEK, { .vop_seek = socktpi_seek },
- VOPNAME_POLL, { .vop_poll = socksdpv_poll },
- VOPNAME_DISPOSE, { .error = fs_error },
- NULL, NULL
-};
-struct vnodeops *socksdp_vnodeops;
-
-/*ARGSUSED3*/
-static int
-socksdpv_open(struct vnode **vpp, int flag, struct cred *cr,
- caller_context_t *ct)
-{
- struct sonode *so;
- struct sdp_sonode *ss;
- struct vnode *vp = *vpp;
- int error = EPROTONOSUPPORT; /* in case sdpib fails to load */
- sdp_sockbuf_limits_t sbl;
- sdp_upcalls_t *upcalls;
-
- flag &= ~FCREAT; /* paranoia */
-
- so = VTOSO(vp);
- ss = SOTOSDO(so);
-
- mutex_enter(&so->so_lock);
- so->so_count++; /* one more open reference */
- ASSERT(so->so_count != 0); /* wraparound */
- mutex_exit(&so->so_lock);
-
- ASSERT(vp->v_type == VSOCK);
-
- if (flag & SO_ACCEPTOR) {
- ASSERT(so->so_type == SOCK_STREAM);
- return (0);
- }
-
- /*
- * Active open.
- */
- upcalls = &sosdp_sock_upcalls;
-
- /*
- * When the necessary hardware is not available, the sdp_create stub
- * will evaluate to nomod_zero, which leaves 'error' untouched. Hence
- * the EPROTONOSUPPORT above. A successful call to sdp_create clears
- * the error.
- */
- so->so_priv = sdp_create(ss, NULL, so->so_family, SDP_CAN_BLOCK,
- upcalls, &sbl, cr, &error);
- if (so->so_priv == NULL) {
- ASSERT(error != 0);
- mutex_enter(&so->so_lock);
- ASSERT(so->so_count > 0);
- so->so_count--; /* one less open reference */
- mutex_exit(&so->so_lock);
- return (error);
- }
- so->so_rcvbuf = sbl.sbl_rxbuf;
- so->so_rcvlowat = sbl.sbl_rxlowat;
- so->so_sndbuf = sbl.sbl_txbuf;
- so->so_sndlowat = sbl.sbl_txlowat;
-
- return (error);
-}
-
-/*ARGSUSED*/
-static int
-socksdpv_close(struct vnode *vp, int flag, int count, offset_t offset,
- struct cred *cr, caller_context_t *ct)
-{
- int sendsig = 0;
- int error = 0;
- struct sonode *so;
- struct sdp_sonode *ss;
-
- so = VTOSO(vp);
- ss = SOTOSDO(so);
-
- cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
- cleanshares(vp, ttoproc(curthread)->p_pid);
-
- ASSERT(vp->v_stream == NULL);
- if (count > 1) {
- dprint(2, ("socksdpv_close: count %d\n", count));
- return (0);
- }
-
- mutex_enter(&so->so_lock);
- so_lock_single(so); /* Set SOLOCKED */
- ASSERT(so->so_count > 0);
- so->so_count--; /* one fewer open reference */
-
- dprint(2, ("socksdpv_close: %p so_count %d\n", (void *)so,
- so->so_count));
-
- if (so->so_count == 0) {
- /*
- * Need to set flags as there might be ops in progress on
- * this socket.
- *
- * If socket already disconnected/disconnecting,
- * don't send signal (again).
- */
- if (!(so->so_state & SS_CANTRCVMORE))
- sendsig |= SDPSIG_READ;
- if (!(so->so_state & SS_CANTSENDMORE))
- sendsig |= SDPSIG_WRITE;
- soisdisconnected(so, 0);
- mutex_exit(&so->so_lock);
-
- /*
- * Initiate connection shutdown.
- */
- error = sdp_disconnect(so->so_priv, flag);
-
- mutex_enter(&so->so_lock);
- if (sendsig != 0)
- sosdp_sendsig(ss, sendsig);
- mutex_exit(&so->so_lock);
-
- pollwakeup(&ss->ss_poll_list, POLLIN|POLLRDNORM|POLLOUT);
- }
- mutex_enter(&so->so_lock);
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
-
- return (error);
-}
-
-/*ARGSUSED2*/
-static int
-socksdpv_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
- caller_context_t *ct)
-{
- struct sonode *so = VTOSO(vp);
- struct nmsghdr lmsg;
-
- if (so->so_type != SOCK_STREAM) {
- return (EOPNOTSUPP);
- }
-
- ASSERT(vp->v_type == VSOCK);
- so_update_attrs(so, SOACC);
- lmsg.msg_namelen = 0;
- lmsg.msg_controllen = 0;
- lmsg.msg_flags = 0;
- return (sosdp_recvmsg(so, &lmsg, uiop));
-}
-
-/*
- * Send data, see sosdp_sendmsg()
- */
-/*ARGSUSED2*/
-static int
-socksdpv_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
- caller_context_t *ct)
-{
- struct sonode *so;
- ssize_t count;
- int error;
- int flags = 0;
-
- so = VTOSO(vp);
-
- mutex_enter(&so->so_lock);
- if (so->so_state & SS_CANTSENDMORE) {
- mutex_exit(&so->so_lock);
- tsignal(curthread, SIGPIPE);
- return (EPIPE);
- }
-
- if (so->so_error != 0) {
- error = sogeterr(so);
- if (error != 0) {
- mutex_exit(&so->so_lock);
- return (error);
- }
- }
-
- if (uiop->uio_fmode & (FNDELAY|FNONBLOCK))
- flags |= MSG_DONTWAIT;
-
- if (!(so->so_state & (SS_ISCONNECTING | SS_ISCONNECTED))) {
- mutex_exit(&so->so_lock);
- return (ENOTCONN);
- }
- count = uiop->uio_resid;
- mutex_exit(&so->so_lock);
-
- if (count == 0) {
- return (0);
- }
- so_update_attrs(so, SOMOD);
-
- error = sdp_send(so->so_priv, NULL, count, flags, uiop);
- return (error);
-}
-
-/*ARGSUSED4*/
-static int
-socksdpv_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
- struct cred *cr, int32_t *rvalp, caller_context_t *ct)
-{
- struct sonode *so;
- struct sdp_sonode *ss;
- int32_t value;
- int error, intval;
- pid_t pid;
-
- so = VTOSO(vp);
- ss = SOTOSDO(so);
-
- /* handle socket specific ioctls */
- switch (cmd) {
- case FIONBIO:
- if (so_copyin((void *)arg, &value, sizeof (int32_t),
- (mode & (int)FKIOCTL))) {
- return (EFAULT);
- }
- mutex_enter(&so->so_lock);
- if (value != 0) {
- so->so_state |= SS_NDELAY;
- } else {
- so->so_state &= ~SS_NDELAY;
- }
- mutex_exit(&so->so_lock);
- return (0);
-
- case FIOASYNC:
- if (so_copyin((void *)arg, &value, sizeof (int32_t),
- (mode & (int)FKIOCTL))) {
- return (EFAULT);
- }
- mutex_enter(&so->so_lock);
-
- if (value) {
- /* Turn on SIGIO */
- so->so_state |= SS_ASYNC;
- } else {
- /* Turn off SIGIO */
- so->so_state &= ~SS_ASYNC;
- }
- mutex_exit(&so->so_lock);
- return (0);
-
- case SIOCSPGRP:
- case FIOSETOWN:
- if (so_copyin((void *)arg, &pid, sizeof (pid_t),
- (mode & (int)FKIOCTL))) {
- return (EFAULT);
- }
- mutex_enter(&so->so_lock);
-
- error = (pid != so->so_pgrp) ? sosdp_chgpgrp(ss, pid) : 0;
- mutex_exit(&so->so_lock);
- return (error);
-
- case SIOCGPGRP:
- case FIOGETOWN:
- if (so_copyout(&so->so_pgrp, (void *)arg,
- sizeof (pid_t), (mode & (int)FKIOCTL)))
- return (EFAULT);
- return (0);
-
- case SIOCATMARK:
- intval = 0;
- error = sdp_ioctl(so->so_priv, cmd, &intval, cr);
- if (so_copyout(&intval, (void *)arg, sizeof (int),
- (mode & (int)FKIOCTL)))
- return (EFAULT);
- return (0);
-
-
- case SIOCSENABLESDP: {
- int32_t enable;
-
- /*
- * System wide enable SDP
- */
-
- if (so_copyin((void *)arg, &enable, sizeof (int32_t),
- mode & (int)FKIOCTL))
- return (EFAULT);
-
- error = sdp_ioctl(so->so_priv, cmd, &enable, cr);
- if (so_copyout(&enable, (void *)arg,
- sizeof (int32_t), (mode & (int)FKIOCTL)))
- return (EFAULT);
- return (0);
- }
- /* from strioctl */
- case FIONREAD:
- /*
- * Return number of bytes of data in all data messages
- * in queue in "arg".
- * For stream socket, amount of available data.
- */
- if (so->so_state & SS_ACCEPTCONN) {
- intval = 0;
- } else {
- mutex_enter(&so->so_lock);
- intval = sdp_polldata(so->so_priv, SDP_READ);
- mutex_exit(&so->so_lock);
- }
- if (so_copyout(&intval, (void *)arg, sizeof (intval),
- (mode & (int)FKIOCTL)))
- return (EFAULT);
- return (0);
- default:
- return (EINVAL);
- }
-
-}
-
-/*
- * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
- * from listener to acceptor.
- */
-/* ARGSUSED */
-static int
-socksdp_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr,
- caller_context_t *ct)
-{
- struct sonode *so;
-
- so = VTOSO(vp);
-
- mutex_enter(&so->so_lock);
- if (nflags & FNDELAY)
- so->so_state |= SS_NDELAY;
- else
- so->so_state &= ~SS_NDELAY;
- if (nflags & FNONBLOCK)
- so->so_state |= SS_NONBLOCK;
- else
- so->so_state &= ~SS_NONBLOCK;
- mutex_exit(&so->so_lock);
- return (0);
-}
-
-/*ARGSUSED*/
-static void
-socksdpv_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
-{
- struct sonode *so;
-
- so = VTOSO(vp);
-
- mutex_enter(&vp->v_lock);
- /*
- * If no one has reclaimed the vnode, remove from the
- * cache now.
- */
- if (vp->v_count < 1)
- cmn_err(CE_PANIC, "socksdpv_inactive: Bad v_count");
-
- /*
- * Drop the temporary hold by vn_rele now
- */
- if (--vp->v_count != 0) {
- mutex_exit(&vp->v_lock);
- return;
- }
- mutex_exit(&vp->v_lock);
-
- /* We are the sole owner of so now */
-
- ASSERT(!vn_has_cached_data(vp));
- if (so->so_priv) {
- sdp_close(so->so_priv);
- }
- so->so_priv = NULL;
- sosdp_free(so);
-}
-
-/*
- * Check socktpi_poll() on why so_lock is not held in this function.
- */
-/*ARGSUSED5*/
-static int
-socksdpv_poll(struct vnode *vp, short events, int anyyet, short *reventsp,
- struct pollhead **phpp, caller_context_t *ct)
-{
- struct sonode *so;
- struct sdp_sonode *ss;
- short origevents = events;
- int so_state;
-
- so = VTOSO(vp);
- ss = SOTOSDO(so);
- so_state = so->so_state;
-
-
- ASSERT(vp->v_type == VSOCK);
- ASSERT(vp->v_stream == NULL);
- ASSERT(so->so_version != SOV_STREAM);
-
- if (!(so_state & SS_ISCONNECTED) && (so->so_type == SOCK_STREAM)) {
- /*
- * Not connected yet - turn off write side events
- */
- events &= ~(POLLOUT|POLLWRBAND);
- }
-
- /*
- * Check for errors
- */
- if (so->so_error != 0 &&
- ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) {
- *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents;
- return (0);
- }
-
- *reventsp = 0;
-
- /*
- * Don't mark socket as writable until TX queued data is
- * below watermark.
- */
- if (so->so_type == SOCK_STREAM) {
- if (sdp_polldata(so->so_priv, SDP_XMIT)) {
- *reventsp |= POLLOUT & events;
- }
- } else {
- *reventsp = 0;
- goto done;
- }
-
- if (sdp_polldata(so->so_priv, SDP_READ)) {
- *reventsp |= (POLLIN|POLLRDNORM) & events;
- }
-
- if ((so_state & (SS_HASCONNIND|SS_CANTRCVMORE)) != 0) {
- *reventsp |= (POLLIN|POLLRDNORM) & events;
- }
-
-done:
- if (!*reventsp && !anyyet) {
- *phpp = &ss->ss_poll_list;
- }
-
- return (0);
-}
diff --git a/usr/src/uts/common/fs/sockfs/sockssl.c b/usr/src/uts/common/fs/sockfs/sockssl.c
index 037805e6da..8df1d3fe58 100644
--- a/usr/src/uts/common/fs/sockfs/sockssl.c
+++ b/usr/src/uts/common/fs/sockfs/sockssl.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
@@ -43,8 +41,9 @@
#include <sys/sockio.h>
#include <sys/socketvar.h>
-#include <inet/kssl/ksslapi.h>
+#include <fs/sockfs/socktpi.h>
+#include <inet/kssl/ksslapi.h>
/*
* This routine is registered with the stream head to be called by kstrgetmsg()
@@ -61,7 +60,7 @@ strsock_kssl_input(vnode_t *vp, mblk_t *mp,
strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
{
struct sonode *so = VTOSO(vp);
- kssl_ctx_t kssl_ctx = so->so_kssl_ctx;
+ kssl_ctx_t kssl_ctx = SOTOTPI(so)->sti_kssl_ctx;
kssl_cmd_t kssl_cmd;
mblk_t *out;
@@ -101,7 +100,7 @@ strsock_kssl_output(vnode_t *vp, mblk_t *mp,
strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
{
struct sonode *so = VTOSO(vp);
- kssl_ctx_t kssl_ctx = so->so_kssl_ctx;
+ kssl_ctx_t kssl_ctx = SOTOTPI(so)->sti_kssl_ctx;
mblk_t *recmp;
dprintso(so, 1, ("strsock_kssl_output(%p, %p)\n",
diff --git a/usr/src/uts/common/fs/sockfs/sockstr.c b/usr/src/uts/common/fs/sockfs/sockstr.c
index b783a27251..71c8d4c49c 100644
--- a/usr/src/uts/common/fs/sockfs/sockstr.c
+++ b/usr/src/uts/common/fs/sockfs/sockstr.c
@@ -51,13 +51,15 @@
#include <sys/cmn_err.h>
#include <sys/proc.h>
#include <sys/ddi.h>
-#include <sys/kmem_impl.h>
#include <sys/suntpi.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/socketvar.h>
+#include <sys/sodirect.h>
#include <netinet/in.h>
+#include <inet/common.h>
+#include <inet/proto_set.h>
#include <sys/tiuser.h>
#define _SUN_TPI_VERSION 2
@@ -67,6 +69,8 @@
#include <c2/audit.h>
+#include <fs/sockfs/socktpi.h>
+#include <fs/sockfs/socktpi_impl.h>
#include <sys/dcopy.h>
int so_default_version = SOV_SOCKSTREAM;
@@ -115,13 +119,9 @@ static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp,
static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp,
strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
-
-static int tlitosyserr(int terr);
-
/*
- * Sodirect kmem_cache and put/wakeup functions.
+ * STREAMS based sodirect put/wakeup functions.
*/
-struct kmem_cache *socktpi_sod_cache;
static int sodput(sodirect_t *, mblk_t *);
static void sodwakeup(sodirect_t *);
@@ -131,10 +131,7 @@ static void sodwakeup(sodirect_t *);
int
sostr_init()
{
- /* Allocate sodirect_t kmem_cache */
- socktpi_sod_cache = kmem_cache_create("socktpi_sod_cache",
- sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
-
+ sod_init();
return (0);
}
@@ -151,15 +148,16 @@ so_sock2stream(struct sonode *so)
queue_t *rq;
mblk_t *mp;
int error = 0;
+ sotpi_info_t *sti = SOTOTPI(so);
- ASSERT(MUTEX_HELD(&so->so_plumb_lock));
+ ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
mutex_enter(&so->so_lock);
so_lock_single(so);
ASSERT(so->so_version != SOV_STREAM);
- if (so->so_state & SS_DIRECT) {
+ if (sti->sti_direct) {
mblk_t **mpp;
int rval;
@@ -175,9 +173,9 @@ so_sock2stream(struct sonode *so)
"_SIOCSOCKFALLBACK failed\n", (void *)so));
goto exit;
}
- so->so_state &= ~SS_DIRECT;
+ sti->sti_direct = 0;
- for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL;
+ for (mpp = &sti->sti_conn_ind_head; (mp = *mpp) != NULL;
mpp = &mp->b_next) {
struct T_conn_ind *conn_ind;
@@ -236,7 +234,7 @@ so_sock2stream(struct sonode *so)
}
so->so_version = SOV_STREAM;
- so->so_priv = NULL;
+ so->so_proto_handle = NULL;
/*
* Remove the hooks in the stream head to avoid queuing more
@@ -251,20 +249,20 @@ so_sock2stream(struct sonode *so)
* on the queue - the behavior of urgent data after a switch is
* left undefined.
*/
- so->so_error = so->so_delayed_error = 0;
+ so->so_error = sti->sti_delayed_error = 0;
freemsg(so->so_oobmsg);
so->so_oobmsg = NULL;
- so->so_oobsigcnt = so->so_oobcnt = 0;
+ sti->sti_oobsigcnt = sti->sti_oobcnt = 0;
so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
- SS_HASCONNIND|SS_SAVEDEOR);
+ SS_SAVEDEOR);
ASSERT(so_verify_oobstate(so));
- freemsg(so->so_ack_mp);
- so->so_ack_mp = NULL;
+ freemsg(sti->sti_ack_mp);
+ sti->sti_ack_mp = NULL;
/*
- * Flush the T_DISCON_IND on so_discon_ind_mp.
+ * Flush the T_DISCON_IND on sti_discon_ind_mp.
*/
so_flush_discon_ind(so);
@@ -272,16 +270,15 @@ so_sock2stream(struct sonode *so)
* Move any queued T_CONN_IND messages to stream head queue.
*/
rq = RD(strvp2wq(vp));
- while ((mp = so->so_conn_ind_head) != NULL) {
- so->so_conn_ind_head = mp->b_next;
+ while ((mp = sti->sti_conn_ind_head) != NULL) {
+ sti->sti_conn_ind_head = mp->b_next;
mp->b_next = NULL;
- if (so->so_conn_ind_head == NULL) {
- ASSERT(so->so_conn_ind_tail == mp);
- so->so_conn_ind_tail = NULL;
+ if (sti->sti_conn_ind_head == NULL) {
+ ASSERT(sti->sti_conn_ind_tail == mp);
+ sti->sti_conn_ind_tail = NULL;
}
dprintso(so, 0,
- ("so_sock2stream(%p): moving T_CONN_IND\n",
- (void *)so));
+ ("so_sock2stream(%p): moving T_CONN_IND\n", (void *)so));
/* Drop lock across put() */
mutex_exit(&so->so_lock);
@@ -311,14 +308,15 @@ void
so_stream2sock(struct sonode *so)
{
struct vnode *vp = SOTOV(so);
+ sotpi_info_t *sti = SOTOTPI(so);
- ASSERT(MUTEX_HELD(&so->so_plumb_lock));
+ ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
mutex_enter(&so->so_lock);
so_lock_single(so);
ASSERT(so->so_version == SOV_STREAM);
so->so_version = SOV_SOCKSTREAM;
- so->so_pushcnt = 0;
+ sti->sti_pushcnt = 0;
mutex_exit(&so->so_lock);
/*
@@ -350,7 +348,7 @@ so_stream2sock(struct sonode *so)
mutex_enter(&so->so_lock);
/*
- * Flush the T_DISCON_IND on so_discon_ind_mp.
+ * Flush the T_DISCON_IND on sti_discon_ind_mp.
*/
so_flush_discon_ind(so);
so_unlock_read(so); /* Clear SOREADLOCKED */
@@ -388,25 +386,18 @@ so_removehooks(struct sonode *so)
*/
}
-/*
- * Initialize the streams side of a socket including
- * T_info_req/ack processing. If tso is not NULL its values are used thereby
- * avoiding the T_INFO_REQ.
- */
-int
-so_strinit(struct sonode *so, struct sonode *tso)
+void
+so_basic_strinit(struct sonode *so)
{
struct vnode *vp = SOTOV(so);
struct stdata *stp;
mblk_t *mp;
- int error;
-
- dprintso(so, 1, ("so_strinit(%p)\n", (void *)so));
+ sotpi_info_t *sti = SOTOTPI(so);
/* Preallocate an unbind_req message */
mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP);
mutex_enter(&so->so_lock);
- so->so_unbind_mp = mp;
+ sti->sti_unbind_mp = mp;
#ifdef DEBUG
so->so_options = so_default_options;
#endif /* DEBUG */
@@ -414,6 +405,40 @@ so_strinit(struct sonode *so, struct sonode *tso)
so_installhooks(so);
+ stp = vp->v_stream;
+ /*
+ * Have to keep minpsz at zero in order to allow write/send of zero
+ * bytes.
+ */
+ mutex_enter(&stp->sd_lock);
+ if (stp->sd_qn_minpsz == 1)
+ stp->sd_qn_minpsz = 0;
+ mutex_exit(&stp->sd_lock);
+
+ /*
+ * If sodirect capable allocate and initialize sodirect_t.
+ * Note, SS_SODIRECT is set in socktpi_open().
+ */
+ if ((so->so_state & SS_SODIRECT) &&
+ !(so->so_state & SS_FALLBACK_PENDING)) {
+ sod_sock_init(so, stp, sodput, sodwakeup, &stp->sd_lock);
+ }
+}
+
+/*
+ * Initialize the streams side of a socket including
+ * T_info_req/ack processing. If tso is not NULL its values are used thereby
+ * avoiding the T_INFO_REQ.
+ */
+int
+so_strinit(struct sonode *so, struct sonode *tso)
+{
+ sotpi_info_t *sti = SOTOTPI(so);
+ sotpi_info_t *tsti;
+ int error;
+
+ so_basic_strinit(so);
+
/*
* The T_CAPABILITY_REQ should be the first message sent down because
* at least TCP has a fast-path for this which avoids timeouts while
@@ -424,19 +449,21 @@ so_strinit(struct sonode *so, struct sonode *tso)
if (error)
return (error);
} else {
+ tsti = SOTOTPI(tso);
+
mutex_enter(&so->so_lock);
- so->so_tsdu_size = tso->so_tsdu_size;
- so->so_etsdu_size = tso->so_etsdu_size;
- so->so_addr_size = tso->so_addr_size;
- so->so_opt_size = tso->so_opt_size;
- so->so_tidu_size = tso->so_tidu_size;
- so->so_serv_type = tso->so_serv_type;
+ sti->sti_tsdu_size = tsti->sti_tsdu_size;
+ sti->sti_etsdu_size = tsti->sti_etsdu_size;
+ sti->sti_addr_size = tsti->sti_addr_size;
+ sti->sti_opt_size = tsti->sti_opt_size;
+ sti->sti_tidu_size = tsti->sti_tidu_size;
+ sti->sti_serv_type = tsti->sti_serv_type;
so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID;
mutex_exit(&so->so_lock);
/* the following do_tcapability may update so->so_mode */
- if ((tso->so_serv_type != T_CLTS) &&
- !(tso->so_state & SS_DIRECT)) {
+ if ((tsti->sti_serv_type != T_CLTS) &&
+ (sti->sti_direct == 0)) {
error = do_tcapability(so, TC1_ACCEPTOR_ID);
if (error)
return (error);
@@ -448,73 +475,19 @@ so_strinit(struct sonode *so, struct sonode *tso)
* We set the addr_size to something to allocate a the address
* structures.
*/
- if (so->so_addr_size == 0) {
+ if (sti->sti_addr_size == 0) {
so->so_state |= SS_ISBOUND | SS_ISCONNECTED;
/* Address size can vary with address families. */
if (so->so_family == AF_INET6)
- so->so_addr_size =
+ sti->sti_addr_size =
(t_scalar_t)sizeof (struct sockaddr_in6);
else
- so->so_addr_size =
+ sti->sti_addr_size =
(t_scalar_t)sizeof (struct sockaddr_in);
- ASSERT(so->so_unbind_mp);
+ ASSERT(sti->sti_unbind_mp);
}
- /*
- * Allocate the addresses.
- */
- ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL);
- ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0);
- so->so_laddr_maxlen = so->so_faddr_maxlen =
- P2ROUNDUP(so->so_addr_size, KMEM_ALIGN);
- so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP);
- so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa
- + so->so_laddr_maxlen);
-
- if (so->so_family == AF_UNIX) {
- /*
- * Initialize AF_UNIX related fields.
- */
- bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr));
- bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr));
- }
-
- stp = vp->v_stream;
- /*
- * Have to keep minpsz at zero in order to allow write/send of zero
- * bytes.
- */
- mutex_enter(&stp->sd_lock);
- if (stp->sd_qn_minpsz == 1)
- stp->sd_qn_minpsz = 0;
- mutex_exit(&stp->sd_lock);
- /*
- * If sodirect capable allocate and initialize sodirect_t.
- * Note, SS_SODIRECT is set in socktpi_open().
- */
- if (so->so_state & SS_SODIRECT) {
- sodirect_t *sodp;
-
- ASSERT(so->so_direct == NULL);
-
- sodp = kmem_cache_alloc(socktpi_sod_cache, KM_SLEEP);
- sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT;
- sodp->sod_want = 0;
- sodp->sod_q = RD(stp->sd_wrq);
- sodp->sod_enqueue = sodput;
- sodp->sod_wakeup = sodwakeup;
- sodp->sod_uioafh = NULL;
- sodp->sod_uioaft = NULL;
- sodp->sod_lockp = &stp->sd_lock;
- /*
- * Remainder of the sod_uioa members are left uninitialized
- * but will be initialized later by uioainit() before uioa
- * is enabled.
- */
- sodp->sod_uioa.uioa_state = UIOA_ALLOC;
- so->so_direct = sodp;
- stp->sd_sodirect = sodp;
- }
+ so_alloc_addr(so, sti->sti_addr_size);
return (0);
}
@@ -522,25 +495,28 @@ so_strinit(struct sonode *so, struct sonode *tso)
static void
copy_tinfo(struct sonode *so, struct T_info_ack *tia)
{
- so->so_tsdu_size = tia->TSDU_size;
- so->so_etsdu_size = tia->ETSDU_size;
- so->so_addr_size = tia->ADDR_size;
- so->so_opt_size = tia->OPT_size;
- so->so_tidu_size = tia->TIDU_size;
- so->so_serv_type = tia->SERV_type;
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ sti->sti_tsdu_size = tia->TSDU_size;
+ sti->sti_etsdu_size = tia->ETSDU_size;
+ sti->sti_addr_size = tia->ADDR_size;
+ sti->sti_opt_size = tia->OPT_size;
+ sti->sti_tidu_size = tia->TIDU_size;
+ sti->sti_serv_type = tia->SERV_type;
switch (tia->CURRENT_state) {
case TS_UNBND:
break;
case TS_IDLE:
so->so_state |= SS_ISBOUND;
- so->so_laddr_len = 0;
- so->so_state &= ~SS_LADDR_VALID;
+ sti->sti_laddr_len = 0;
+ sti->sti_laddr_valid = 0;
break;
case TS_DATA_XFER:
so->so_state |= SS_ISBOUND|SS_ISCONNECTED;
- so->so_laddr_len = 0;
- so->so_faddr_len = 0;
- so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID);
+ sti->sti_laddr_len = 0;
+ sti->sti_faddr_len = 0;
+ sti->sti_laddr_valid = 0;
+ sti->sti_faddr_valid = 0;
break;
}
@@ -550,11 +526,11 @@ copy_tinfo(struct sonode *so, struct T_info_ack *tia)
* and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM)
* from the info ack.
*/
- if (so->so_serv_type == T_CLTS) {
+ if (sti->sti_serv_type == T_CLTS) {
so->so_mode |= SM_ATOMIC | SM_ADDR;
} else {
so->so_mode |= SM_CONNREQUIRED;
- if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2)
+ if (sti->sti_etsdu_size != 0 && sti->sti_etsdu_size != -2)
so->so_mode |= SM_EXDATA;
}
if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) {
@@ -563,9 +539,9 @@ copy_tinfo(struct sonode *so, struct T_info_ack *tia)
}
if (so->so_family == AF_UNIX) {
so->so_mode |= SM_FDPASSING | SM_OPTDATA;
- if (so->so_addr_size == -1) {
+ if (sti->sti_addr_size == -1) {
/* MAXPATHLEN + soun_family + nul termination */
- so->so_addr_size = (t_scalar_t)(MAXPATHLEN +
+ sti->sti_addr_size = (t_scalar_t)(MAXPATHLEN +
sizeof (short) + 1);
}
if (so->so_type == SOCK_STREAM) {
@@ -573,60 +549,62 @@ copy_tinfo(struct sonode *so, struct T_info_ack *tia)
* Make it into a byte-stream transport.
* SOCK_SEQPACKET sockets are unchanged.
*/
- so->so_tsdu_size = 0;
+ sti->sti_tsdu_size = 0;
}
- } else if (so->so_addr_size == -1) {
+ } else if (sti->sti_addr_size == -1) {
/*
* Logic extracted from sockmod - have to pick some max address
* length in order to preallocate the addresses.
*/
- so->so_addr_size = SOA_DEFSIZE;
+ sti->sti_addr_size = SOA_DEFSIZE;
}
- if (so->so_tsdu_size == 0)
+ if (sti->sti_tsdu_size == 0)
so->so_mode |= SM_BYTESTREAM;
}
static int
check_tinfo(struct sonode *so)
{
+ sotpi_info_t *sti = SOTOTPI(so);
+
/* Consistency checks */
- if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) {
+ if (so->so_type == SOCK_DGRAM && sti->sti_serv_type != T_CLTS) {
eprintso(so, ("service type and socket type mismatch\n"));
eprintsoline(so, EPROTO);
return (EPROTO);
}
- if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) {
+ if (so->so_type == SOCK_STREAM && sti->sti_serv_type == T_CLTS) {
eprintso(so, ("service type and socket type mismatch\n"));
eprintsoline(so, EPROTO);
return (EPROTO);
}
- if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) {
+ if (so->so_type == SOCK_SEQPACKET && sti->sti_serv_type == T_CLTS) {
eprintso(so, ("service type and socket type mismatch\n"));
eprintsoline(so, EPROTO);
return (EPROTO);
}
if (so->so_family == AF_INET &&
- so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) {
+ sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) {
eprintso(so,
("AF_INET must have sockaddr_in address length. Got %d\n",
- so->so_addr_size));
+ sti->sti_addr_size));
eprintsoline(so, EMSGSIZE);
return (EMSGSIZE);
}
if (so->so_family == AF_INET6 &&
- so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) {
+ sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) {
eprintso(so,
("AF_INET6 must have sockaddr_in6 address length. Got %d\n",
- so->so_addr_size));
+ sti->sti_addr_size));
eprintsoline(so, EMSGSIZE);
return (EMSGSIZE);
}
dprintso(so, 1, (
"tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n",
- so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size,
- so->so_addr_size, so->so_opt_size,
- so->so_tidu_size));
+ sti->sti_serv_type, sti->sti_tsdu_size, sti->sti_etsdu_size,
+ sti->sti_addr_size, sti->sti_opt_size,
+ sti->sti_tidu_size));
dprintso(so, 1, ("tinfo: so_state %s\n",
pr_state(so->so_state, so->so_mode)));
return (0);
@@ -646,7 +624,7 @@ do_tinfo(struct sonode *so)
ASSERT(MUTEX_NOT_HELD(&so->so_lock));
if (so_no_tinfo) {
- so->so_addr_size = 0;
+ SOTOTPI(so)->sti_addr_size = 0;
return (0);
}
@@ -697,16 +675,17 @@ do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
struct T_capability_ack *tca;
mblk_t *mp;
int error;
+ sotpi_info_t *sti = SOTOTPI(so);
ASSERT(cap_bits1 != 0);
ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0);
ASSERT(MUTEX_NOT_HELD(&so->so_lock));
- if (so->so_provinfo->tpi_capability == PI_NO)
+ if (sti->sti_provinfo->tpi_capability == PI_NO)
return (do_tinfo(so));
if (so_no_tinfo) {
- so->so_addr_size = 0;
+ sti->sti_addr_size = 0;
if ((cap_bits1 &= ~TC1_INFO) == 0)
return (0);
}
@@ -737,10 +716,10 @@ do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK,
(t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) {
mutex_exit(&so->so_lock);
- PI_PROVLOCK(so->so_provinfo);
- if (so->so_provinfo->tpi_capability == PI_DONTKNOW)
- so->so_provinfo->tpi_capability = PI_NO;
- PI_PROVUNLOCK(so->so_provinfo);
+ PI_PROVLOCK(sti->sti_provinfo);
+ if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW)
+ sti->sti_provinfo->tpi_capability = PI_NO;
+ PI_PROVUNLOCK(sti->sti_provinfo);
ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0);
if (cap_bits1 & TC1_INFO) {
/*
@@ -758,27 +737,14 @@ do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
return (0);
}
- if (so->so_provinfo->tpi_capability == PI_DONTKNOW) {
- PI_PROVLOCK(so->so_provinfo);
- so->so_provinfo->tpi_capability = PI_YES;
- PI_PROVUNLOCK(so->so_provinfo);
- }
-
ASSERT(mp);
tca = (struct T_capability_ack *)mp->b_rptr;
ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO));
+ so_proc_tcapability_ack(so, tca);
cap_bits1 = tca->CAP_bits1;
- if (cap_bits1 & TC1_ACCEPTOR_ID) {
- so->so_acceptor_id = tca->ACCEPTOR_id;
- so->so_mode |= SM_ACCEPTOR_ID;
- }
-
- if (cap_bits1 & TC1_INFO)
- copy_tinfo(so, &tca->INFO_ack);
-
mutex_exit(&so->so_lock);
freemsg(mp);
@@ -789,17 +755,41 @@ do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
}
/*
- * Retrieve and clear the socket error.
+ * Process a T_CAPABILITY_ACK
+ */
+void
+so_proc_tcapability_ack(struct sonode *so, struct T_capability_ack *tca)
+{
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) {
+ PI_PROVLOCK(sti->sti_provinfo);
+ sti->sti_provinfo->tpi_capability = PI_YES;
+ PI_PROVUNLOCK(sti->sti_provinfo);
+ }
+
+ if (tca->CAP_bits1 & TC1_ACCEPTOR_ID) {
+ sti->sti_acceptor_id = tca->ACCEPTOR_id;
+ so->so_mode |= SM_ACCEPTOR_ID;
+ }
+
+ if (tca->CAP_bits1 & TC1_INFO)
+ copy_tinfo(so, &tca->INFO_ack);
+}
+
+/*
+ * Retrieve socket error, clear error if not peek.
*/
int
-sogeterr(struct sonode *so)
+sogeterr(struct sonode *so, boolean_t clear_err)
{
int error;
ASSERT(MUTEX_HELD(&so->so_lock));
error = so->so_error;
- so->so_error = 0;
+ if (clear_err)
+ so->so_error = 0;
return (error);
}
@@ -898,8 +888,7 @@ void
soisdisconnected(struct sonode *so, int error)
{
ASSERT(MUTEX_HELD(&so->so_lock));
- so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING|
- SS_LADDR_VALID|SS_FADDR_VALID);
+ so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
so->so_error = (ushort_t)error;
if (so->so_peercred != NULL) {
@@ -935,7 +924,7 @@ void
socantsendmore(struct sonode *so)
{
ASSERT(MUTEX_HELD(&so->so_lock));
- so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE;
+ so->so_state |= SS_CANTSENDMORE;
cv_broadcast(&so->so_state_cv);
}
@@ -1013,13 +1002,11 @@ sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim,
if (tpr->error_ack.TLI_error == TSYSERR) {
error = tpr->error_ack.UNIX_error;
} else {
- error = tlitosyserr(tpr->error_ack.TLI_error);
+ error = proto_tlitosyserr(tpr->error_ack.TLI_error);
}
dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n",
- tpr->error_ack.ERROR_prim,
- tpr->error_ack.TLI_error,
- tpr->error_ack.UNIX_error,
- error));
+ tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error,
+ tpr->error_ack.UNIX_error, error));
freemsg(mp);
return (error);
}
@@ -1029,13 +1016,11 @@ sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim,
#ifdef DEBUG
if (tpr->type == T_ERROR_ACK) {
dprintso(so, 0, ("error_ack for %d: %d/%d\n",
- tpr->error_ack.ERROR_prim,
- tpr->error_ack.TLI_error,
+ tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error,
tpr->error_ack.UNIX_error));
} else if (tpr->type == T_OK_ACK) {
dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n",
- tpr->ok_ack.CORRECT_prim,
- ack_prim, request_prim));
+ tpr->ok_ack.CORRECT_prim, ack_prim, request_prim));
} else {
dprintso(so, 0,
("unexpected primitive %d, expected %d for %d\n",
@@ -1066,11 +1051,13 @@ sowaitokack(struct sonode *so, t_scalar_t request_prim)
}
/*
- * Queue a received TPI ack message on so_ack_mp.
+ * Queue a received TPI ack message on sti_ack_mp.
*/
void
soqueueack(struct sonode *so, mblk_t *mp)
{
+ sotpi_info_t *sti = SOTOTPI(so);
+
if (DB_TYPE(mp) != M_PCPROTO) {
zcmn_err(getzoneid(), CE_WARN,
"sockfs: received unexpected M_PROTO TPI ack. Prim %d\n",
@@ -1080,13 +1067,13 @@ soqueueack(struct sonode *so, mblk_t *mp)
}
mutex_enter(&so->so_lock);
- if (so->so_ack_mp != NULL) {
- dprintso(so, 1, ("so_ack_mp already set\n"));
- freemsg(so->so_ack_mp);
- so->so_ack_mp = NULL;
+ if (sti->sti_ack_mp != NULL) {
+ dprintso(so, 1, ("sti_ack_mp already set\n"));
+ freemsg(sti->sti_ack_mp);
+ sti->sti_ack_mp = NULL;
}
- so->so_ack_mp = mp;
- cv_broadcast(&so->so_ack_cv);
+ sti->sti_ack_mp = mp;
+ cv_broadcast(&sti->sti_ack_cv);
mutex_exit(&so->so_lock);
}
@@ -1096,9 +1083,11 @@ soqueueack(struct sonode *so, mblk_t *mp)
int
sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
{
+ sotpi_info_t *sti = SOTOTPI(so);
+
ASSERT(MUTEX_HELD(&so->so_lock));
- while (so->so_ack_mp == NULL) {
+ while (sti->sti_ack_mp == NULL) {
#ifdef SOCK_TEST
if (wait == 0 && sock_test_timelimit != 0)
wait = sock_test_timelimit;
@@ -1110,16 +1099,16 @@ sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
clock_t now;
time_to_wait(&now, wait);
- if (cv_timedwait(&so->so_ack_cv, &so->so_lock,
+ if (cv_timedwait(&sti->sti_ack_cv, &so->so_lock,
now) == -1) {
eprintsoline(so, ETIME);
return (ETIME);
}
}
else
- cv_wait(&so->so_ack_cv, &so->so_lock);
+ cv_wait(&sti->sti_ack_cv, &so->so_lock);
}
- *mpp = so->so_ack_mp;
+ *mpp = sti->sti_ack_mp;
#ifdef DEBUG
{
union T_primitives *tpr;
@@ -1135,16 +1124,18 @@ sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
tpr->type == T_OPTMGMT_ACK);
}
#endif /* DEBUG */
- so->so_ack_mp = NULL;
+ sti->sti_ack_mp = NULL;
return (0);
}
/*
- * Queue a received T_CONN_IND message on so_conn_ind_head/tail.
+ * Queue a received T_CONN_IND message on sti_conn_ind_head/tail.
*/
void
soqueueconnind(struct sonode *so, mblk_t *mp)
{
+ sotpi_info_t *sti = SOTOTPI(so);
+
if (DB_TYPE(mp) != M_PROTO) {
zcmn_err(getzoneid(), CE_WARN,
"sockfs: received unexpected M_PCPROTO T_CONN_IND\n");
@@ -1154,17 +1145,15 @@ soqueueconnind(struct sonode *so, mblk_t *mp)
mutex_enter(&so->so_lock);
ASSERT(mp->b_next == NULL);
- if (so->so_conn_ind_head == NULL) {
- so->so_conn_ind_head = mp;
- so->so_state |= SS_HASCONNIND;
+ if (sti->sti_conn_ind_head == NULL) {
+ sti->sti_conn_ind_head = mp;
} else {
- ASSERT(so->so_state & SS_HASCONNIND);
- ASSERT(so->so_conn_ind_tail->b_next == NULL);
- so->so_conn_ind_tail->b_next = mp;
+ ASSERT(sti->sti_conn_ind_tail->b_next == NULL);
+ sti->sti_conn_ind_tail->b_next = mp;
}
- so->so_conn_ind_tail = mp;
+ sti->sti_conn_ind_tail = mp;
/* Wakeup a single consumer of the T_CONN_IND */
- cv_signal(&so->so_connind_cv);
+ cv_signal(&so->so_acceptq_cv);
mutex_exit(&so->so_lock);
}
@@ -1177,37 +1166,43 @@ int
sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp)
{
mblk_t *mp;
+ sotpi_info_t *sti = SOTOTPI(so);
int error = 0;
ASSERT(MUTEX_NOT_HELD(&so->so_lock));
mutex_enter(&so->so_lock);
check_error:
if (so->so_error) {
- error = sogeterr(so);
+ error = sogeterr(so, B_TRUE);
if (error) {
mutex_exit(&so->so_lock);
return (error);
}
}
- if (so->so_conn_ind_head == NULL) {
+ if (sti->sti_conn_ind_head == NULL) {
if (fmode & (FNDELAY|FNONBLOCK)) {
error = EWOULDBLOCK;
goto done;
}
- if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) {
+
+ if (so->so_state & SS_CLOSING) {
+ error = EINTR;
+ goto done;
+ }
+
+ if (!cv_wait_sig_swap(&so->so_acceptq_cv, &so->so_lock)) {
error = EINTR;
goto done;
}
goto check_error;
}
- mp = so->so_conn_ind_head;
- so->so_conn_ind_head = mp->b_next;
+ mp = sti->sti_conn_ind_head;
+ sti->sti_conn_ind_head = mp->b_next;
mp->b_next = NULL;
- if (so->so_conn_ind_head == NULL) {
- ASSERT(so->so_conn_ind_tail == mp);
- so->so_conn_ind_tail = NULL;
- so->so_state &= ~SS_HASCONNIND;
+ if (sti->sti_conn_ind_head == NULL) {
+ ASSERT(sti->sti_conn_ind_tail == mp);
+ sti->sti_conn_ind_tail = NULL;
}
*mpp = mp;
done:
@@ -1225,31 +1220,32 @@ soflushconnind(struct sonode *so, t_scalar_t seqno)
{
mblk_t *prevmp, *mp;
struct T_conn_ind *tci;
+ sotpi_info_t *sti = SOTOTPI(so);
mutex_enter(&so->so_lock);
- for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL;
+ for (prevmp = NULL, mp = sti->sti_conn_ind_head; mp != NULL;
prevmp = mp, mp = mp->b_next) {
tci = (struct T_conn_ind *)mp->b_rptr;
if (tci->SEQ_number == seqno) {
dprintso(so, 1,
("t_discon_ind: found T_CONN_IND %d\n", seqno));
/* Deleting last? */
- if (so->so_conn_ind_tail == mp) {
- so->so_conn_ind_tail = prevmp;
+ if (sti->sti_conn_ind_tail == mp) {
+ sti->sti_conn_ind_tail = prevmp;
}
if (prevmp == NULL) {
/* Deleting first */
- so->so_conn_ind_head = mp->b_next;
+ sti->sti_conn_ind_head = mp->b_next;
} else {
prevmp->b_next = mp->b_next;
}
mp->b_next = NULL;
- if (so->so_conn_ind_head == NULL) {
- ASSERT(so->so_conn_ind_tail == NULL);
- so->so_state &= ~SS_HASCONNIND;
- } else {
- ASSERT(so->so_conn_ind_tail != NULL);
- }
+
+ ASSERT((sti->sti_conn_ind_head == NULL &&
+ sti->sti_conn_ind_tail == NULL) ||
+ (sti->sti_conn_ind_head != NULL &&
+ sti->sti_conn_ind_tail != NULL));
+
so->so_error = ECONNABORTED;
mutex_exit(&so->so_lock);
@@ -1295,6 +1291,9 @@ sowaitconnected(struct sonode *so, int fmode, int nosig)
if (fmode & (FNDELAY|FNONBLOCK))
return (EINPROGRESS);
+ if (so->so_state & SS_CLOSING)
+ return (EINTR);
+
if (nosig)
cv_wait(&so->so_state_cv, &so->so_lock);
else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
@@ -1309,7 +1308,7 @@ sowaitconnected(struct sonode *so, int fmode, int nosig)
}
if (so->so_error != 0) {
- error = sogeterr(so);
+ error = sogeterr(so, B_TRUE);
ASSERT(error != 0);
dprintso(so, 1, ("sowaitconnected: error %d\n", error));
return (error);
@@ -1335,11 +1334,13 @@ static void
so_oob_sig(struct sonode *so, int extrasig,
strsigset_t *signals, strpollset_t *pollwakeups)
{
+ sotpi_info_t *sti = SOTOTPI(so);
+
ASSERT(MUTEX_HELD(&so->so_lock));
ASSERT(so_verify_oobstate(so));
- ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
- if (so->so_oobsigcnt > so->so_oobcnt) {
+ ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
+ if (sti->sti_oobsigcnt > sti->sti_oobcnt) {
/*
* Signal has already been generated once for this
* urgent "event". However, since TCP can receive updated
@@ -1353,9 +1354,9 @@ so_oob_sig(struct sonode *so, int extrasig,
return;
}
- so->so_oobsigcnt++;
- ASSERT(so->so_oobsigcnt > 0); /* Wraparound */
- ASSERT(so->so_oobsigcnt > so->so_oobcnt);
+ sti->sti_oobsigcnt++;
+ ASSERT(sti->sti_oobsigcnt > 0); /* Wraparound */
+ ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt);
/*
* Record (for select/poll) that urgent data is pending.
@@ -1385,15 +1386,17 @@ static mblk_t *
so_oob_exdata(struct sonode *so, mblk_t *mp,
strsigset_t *signals, strpollset_t *pollwakeups)
{
+ sotpi_info_t *sti = SOTOTPI(so);
+
ASSERT(MUTEX_HELD(&so->so_lock));
ASSERT(so_verify_oobstate(so));
- ASSERT(so->so_oobsigcnt > so->so_oobcnt);
+ ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt);
- so->so_oobcnt++;
- ASSERT(so->so_oobcnt > 0); /* wraparound? */
- ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
+ sti->sti_oobcnt++;
+ ASSERT(sti->sti_oobcnt > 0); /* wraparound? */
+ ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
/*
* Set MSGMARK for SIOCATMARK.
@@ -1412,11 +1415,13 @@ static mblk_t *
so_oob_data(struct sonode *so, mblk_t *mp,
strsigset_t *signals, strpollset_t *pollwakeups)
{
+ sotpi_info_t *sti = SOTOTPI(so);
+
ASSERT(MUTEX_HELD(&so->so_lock));
ASSERT(so_verify_oobstate(so));
- ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
+ ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
ASSERT(mp != NULL);
/*
* For OOBINLINE we keep the data in the T_EXDATA_IND.
@@ -1439,7 +1444,7 @@ so_oob_data(struct sonode *so, mblk_t *mp,
/*
* Caller must hold the mutex.
* For delayed processing, save the T_DISCON_IND received
- * from below on so_discon_ind_mp.
+ * from below on sti_discon_ind_mp.
* When the message is processed the framework will call:
* (*func)(so, mp);
*/
@@ -1448,14 +1453,16 @@ so_save_discon_ind(struct sonode *so,
mblk_t *mp,
void (*func)(struct sonode *so, mblk_t *))
{
+ sotpi_info_t *sti = SOTOTPI(so);
+
ASSERT(MUTEX_HELD(&so->so_lock));
/*
* Discard new T_DISCON_IND if we have already received another.
- * Currently the earlier message can either be on so_discon_ind_mp
+ * Currently the earlier message can either be on sti_discon_ind_mp
* or being processed.
*/
- if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) {
+ if (sti->sti_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) {
zcmn_err(getzoneid(), CE_WARN,
"sockfs: received unexpected additional T_DISCON_IND\n");
freemsg(mp);
@@ -1463,13 +1470,13 @@ so_save_discon_ind(struct sonode *so,
}
mp->b_prev = (mblk_t *)func;
mp->b_next = NULL;
- so->so_discon_ind_mp = mp;
+ sti->sti_discon_ind_mp = mp;
}
/*
* Caller must hold the mutex and make sure that either SOLOCKED
* or SOASYNC_UNBIND is set. Called from so_unlock_single().
- * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp.
+ * Perform delayed processing of T_DISCON_IND message on sti_discon_ind_mp.
* Need to ensure that strsock_proto() will not end up sleeping for
* SOASYNC_UNBIND, while executing this function.
*/
@@ -1478,13 +1485,14 @@ so_drain_discon_ind(struct sonode *so)
{
mblk_t *bp;
void (*func)(struct sonode *so, mblk_t *);
+ sotpi_info_t *sti = SOTOTPI(so);
ASSERT(MUTEX_HELD(&so->so_lock));
ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND));
- /* Process T_DISCON_IND on so_discon_ind_mp */
- if ((bp = so->so_discon_ind_mp) != NULL) {
- so->so_discon_ind_mp = NULL;
+ /* Process T_DISCON_IND on sti_discon_ind_mp */
+ if ((bp = sti->sti_discon_ind_mp) != NULL) {
+ sti->sti_discon_ind_mp = NULL;
func = (void (*)())bp->b_prev;
bp->b_prev = NULL;
@@ -1502,20 +1510,21 @@ so_drain_discon_ind(struct sonode *so)
/*
* Caller must hold the mutex.
- * Remove the T_DISCON_IND on so_discon_ind_mp.
+ * Remove the T_DISCON_IND on sti_discon_ind_mp.
*/
void
so_flush_discon_ind(struct sonode *so)
{
mblk_t *bp;
+ sotpi_info_t *sti = SOTOTPI(so);
ASSERT(MUTEX_HELD(&so->so_lock));
/*
- * Remove T_DISCON_IND mblk at so_discon_ind_mp.
+ * Remove T_DISCON_IND mblk at sti_discon_ind_mp.
*/
- if ((bp = so->so_discon_ind_mp) != NULL) {
- so->so_discon_ind_mp = NULL;
+ if ((bp = sti->sti_discon_ind_mp) != NULL) {
+ sti->sti_discon_ind_mp = NULL;
bp->b_prev = NULL;
freemsg(bp);
}
@@ -1526,9 +1535,9 @@ so_flush_discon_ind(struct sonode *so)
*
* This function is used to process the T_DISCON_IND message. It does
* immediate processing when called from strsock_proto and delayed
- * processing of discon_ind saved on so_discon_ind_mp when called from
+ * processing of discon_ind saved on sti_discon_ind_mp when called from
* so_drain_discon_ind. When a T_DISCON_IND message is saved in
- * so_discon_ind_mp for delayed processing, this function is registered
+ * sti_discon_ind_mp for delayed processing, this function is registered
* as the callback function to process the message.
*
* SOASYNC_UNBIND should be held in this function, during the non-blocking
@@ -1549,6 +1558,7 @@ strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
struct T_unbind_req *ubr;
mblk_t *mp;
int error;
+ sotpi_info_t *sti = SOTOTPI(so);
ASSERT(MUTEX_HELD(&so->so_lock));
ASSERT(discon_mp);
@@ -1571,6 +1581,8 @@ strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
* is the errno name space.
*/
soisdisconnected(so, tpr->discon_ind.DISCON_reason);
+ sti->sti_laddr_valid = 0;
+ sti->sti_faddr_valid = 0;
/*
* Unbind with the transport without blocking.
@@ -1581,14 +1593,14 @@ strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
*
* If the socket is not bound, no need to unbind.
*/
- mp = so->so_unbind_mp;
+ mp = sti->sti_unbind_mp;
if (mp == NULL) {
ASSERT(!(so->so_state & SS_ISBOUND));
mutex_exit(&so->so_lock);
} else if (!(so->so_state & SS_ISBOUND)) {
mutex_exit(&so->so_lock);
} else {
- so->so_unbind_mp = NULL;
+ sti->sti_unbind_mp = NULL;
/*
* Is another T_DISCON_IND being processed.
@@ -1602,7 +1614,8 @@ strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
*/
so->so_flag |= SOASYNC_UNBIND;
ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)));
- so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID);
+ so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN);
+ sti->sti_laddr_valid = 0;
mutex_exit(&so->so_lock);
/*
@@ -1686,8 +1699,10 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
{
union T_primitives *tpr;
struct sonode *so;
+ sotpi_info_t *sti;
so = VTOSO(vp);
+ sti = SOTOTPI(so);
dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp));
@@ -1849,11 +1864,11 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
*/
struct sockaddr_in *faddr, *sin;
- /* Prevent so_faddr_sa from changing while accessed */
+ /* Prevent sti_faddr_sa from changing while accessed */
mutex_enter(&so->so_lock);
- ASSERT(so->so_faddr_len ==
+ ASSERT(sti->sti_faddr_len ==
(socklen_t)sizeof (struct sockaddr_in));
- faddr = (struct sockaddr_in *)so->so_faddr_sa;
+ faddr = (struct sockaddr_in *)sti->sti_faddr_sa;
sin = (struct sockaddr_in *)addr;
if (addrlen !=
(t_uscalar_t)sizeof (struct sockaddr_in) ||
@@ -1866,11 +1881,10 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
dprintso(so, 0,
("sockfs: T_UNITDATA_IND mismatch: %s",
pr_addr(so->so_family,
- (struct sockaddr *)addr,
- addrlen)));
+ (struct sockaddr *)addr, addrlen)));
dprintso(so, 0, (" - %s\n",
- pr_addr(so->so_family, so->so_faddr_sa,
- (t_uscalar_t)so->so_faddr_len)));
+ pr_addr(so->so_family, sti->sti_faddr_sa,
+ (t_uscalar_t)sti->sti_faddr_len)));
#endif /* DEBUG */
mutex_exit(&so->so_lock);
freemsg(mp);
@@ -1885,11 +1899,11 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
struct sockaddr_in6 *faddr6, *sin6;
static struct in6_addr zeroes; /* inits to all zeros */
- /* Prevent so_faddr_sa from changing while accessed */
+ /* Prevent sti_faddr_sa from changing while accessed */
mutex_enter(&so->so_lock);
- ASSERT(so->so_faddr_len ==
+ ASSERT(sti->sti_faddr_len ==
(socklen_t)sizeof (struct sockaddr_in6));
- faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa;
+ faddr6 = (struct sockaddr_in6 *)sti->sti_faddr_sa;
sin6 = (struct sockaddr_in6 *)addr;
/* XXX could we get a mapped address ::ffff:0.0.0.0 ? */
if (addrlen !=
@@ -1904,11 +1918,10 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
dprintso(so, 0,
("sockfs: T_UNITDATA_IND mismatch: %s",
pr_addr(so->so_family,
- (struct sockaddr *)addr,
- addrlen)));
+ (struct sockaddr *)addr, addrlen)));
dprintso(so, 0, (" - %s\n",
- pr_addr(so->so_family, so->so_faddr_sa,
- (t_uscalar_t)so->so_faddr_len)));
+ pr_addr(so->so_family, sti->sti_faddr_sa,
+ (t_uscalar_t)sti->sti_faddr_len)));
#endif /* DEBUG */
mutex_exit(&so->so_lock);
freemsg(mp);
@@ -2008,6 +2021,7 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
if (so_getopt_unix_close(opt, optlen)) {
mutex_enter(&so->so_lock);
socantsendmore(so);
+ sti->sti_faddr_valid = 0;
mutex_exit(&so->so_lock);
strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
freemsg(mp);
@@ -2045,7 +2059,7 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
*/
dprintso(so, 1,
("T_EXDATA_IND(%p): counts %d/%d state %s\n",
- (void *)vp, so->so_oobsigcnt, so->so_oobcnt,
+ (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
pr_state(so->so_state, so->so_mode)));
if (msgdsize(mp->b_cont) == 0) {
@@ -2113,8 +2127,8 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
* adjust the OOB count and OOB signal count
* just incremented for the new OOB data.
*/
- so->so_oobcnt--;
- so->so_oobsigcnt--;
+ sti->sti_oobcnt--;
+ sti->sti_oobsigcnt--;
mutex_exit(QLOCK(qp));
mutex_exit(&so->so_lock);
return (NULL);
@@ -2141,15 +2155,15 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
dprintso(so, 1,
("after outofline T_EXDATA_IND(%p): "
"counts %d/%d poll 0x%x sig 0x%x state %s\n",
- (void *)vp, so->so_oobsigcnt,
- so->so_oobcnt, *pollwakeups, *allmsgsigs,
+ (void *)vp, sti->sti_oobsigcnt,
+ sti->sti_oobcnt, *pollwakeups, *allmsgsigs,
pr_state(so->so_state, so->so_mode)));
} else {
dprintso(so, 1,
("after inline T_EXDATA_IND(%p): "
"counts %d/%d poll 0x%x sig 0x%x state %s\n",
- (void *)vp, so->so_oobsigcnt,
- so->so_oobcnt, *pollwakeups, *allmsgsigs,
+ (void *)vp, sti->sti_oobsigcnt,
+ sti->sti_oobcnt, *pollwakeups, *allmsgsigs,
pr_state(so->so_state, so->so_mode)));
}
#endif /* DEBUG */
@@ -2194,13 +2208,15 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
* For AF_UNIX require the identical length.
*/
if (so->so_family == AF_UNIX ?
- addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) :
- addrlen > (t_uscalar_t)so->so_faddr_maxlen) {
+ addrlen != (t_uscalar_t)sizeof (sti->sti_ux_laddr) :
+ addrlen > (t_uscalar_t)sti->sti_faddr_maxlen) {
zcmn_err(getzoneid(), CE_WARN,
"sockfs: T_conn_con with different "
"length %u/%d\n",
addrlen, conn_con->RES_length);
soisdisconnected(so, EPROTO);
+ sti->sti_laddr_valid = 0;
+ sti->sti_faddr_valid = 0;
mutex_exit(&so->so_lock);
strsetrerror(SOTOV(so), 0, 0, sogetrderr);
strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
@@ -2240,10 +2256,10 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
* Save for getpeername.
*/
if (so->so_family != AF_UNIX) {
- so->so_faddr_len = (socklen_t)addrlen;
- ASSERT(so->so_faddr_len <= so->so_faddr_maxlen);
- bcopy(addr, so->so_faddr_sa, addrlen);
- so->so_state |= SS_FADDR_VALID;
+ sti->sti_faddr_len = (socklen_t)addrlen;
+ ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
+ bcopy(addr, sti->sti_faddr_sa, addrlen);
+ sti->sti_faddr_valid = 1;
}
if (so->so_peercred != NULL)
@@ -2275,7 +2291,7 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
case T_CONN_IND:
/*
* Verify the min size and queue the message on
- * the so_conn_ind_head/tail list.
+ * the sti_conn_ind_head/tail list.
*/
if (MBLKL(mp) < sizeof (struct T_conn_ind)) {
zcmn_err(getzoneid(), CE_WARN,
@@ -2301,7 +2317,7 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
tpr->type = T_CONN_IND;
- fbso = kssl_find_fallback(so->so_kssl_ent);
+ fbso = kssl_find_fallback(sti->sti_kssl_ent);
/*
* No fallback: the remote will timeout and
@@ -2391,6 +2407,7 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
if ((so->so_state & SS_CANTRCVMORE) &&
(so->so_family == AF_UNIX)) {
socantsendmore(so);
+ sti->sti_faddr_valid = 0;
mutex_exit(&so->so_lock);
strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
dprintso(so, 1,
@@ -2468,7 +2485,7 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
/* Compare just IP address and port */
struct sockaddr_in *sin1, *sin2;
- sin1 = (struct sockaddr_in *)so->so_faddr_sa;
+ sin1 = (struct sockaddr_in *)sti->sti_faddr_sa;
sin2 = (struct sockaddr_in *)addr;
if (addrlen == sizeof (struct sockaddr_in) &&
sin1->sin_port == sin2->sin_port &&
@@ -2481,7 +2498,7 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
/* Compare just IP address and port. Not flow */
struct sockaddr_in6 *sin1, *sin2;
- sin1 = (struct sockaddr_in6 *)so->so_faddr_sa;
+ sin1 = (struct sockaddr_in6 *)sti->sti_faddr_sa;
sin2 = (struct sockaddr_in6 *)addr;
if (addrlen == sizeof (struct sockaddr_in6) &&
sin1->sin6_port == sin2->sin6_port &&
@@ -2491,16 +2508,16 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
break;
}
case AF_UNIX:
- faddr = &so->so_ux_faddr;
+ faddr = &sti->sti_ux_faddr;
faddr_len =
- (t_uscalar_t)sizeof (so->so_ux_faddr);
+ (t_uscalar_t)sizeof (sti->sti_ux_faddr);
if (faddr_len == addrlen &&
bcmp(addr, faddr, addrlen) == 0)
match = B_TRUE;
break;
default:
- faddr = so->so_faddr_sa;
- faddr_len = (t_uscalar_t)so->so_faddr_len;
+ faddr = sti->sti_faddr_sa;
+ faddr_len = (t_uscalar_t)sti->sti_faddr_len;
if (faddr_len == addrlen &&
bcmp(addr, faddr, addrlen) == 0)
match = B_TRUE;
@@ -2512,11 +2529,10 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
dprintso(so, 0,
("sockfs: T_UDERR_IND mismatch: %s - ",
pr_addr(so->so_family,
- (struct sockaddr *)addr,
- addrlen)));
+ (struct sockaddr *)addr, addrlen)));
dprintso(so, 0, ("%s\n",
- pr_addr(so->so_family, so->so_faddr_sa,
- so->so_faddr_len)));
+ pr_addr(so->so_family, sti->sti_faddr_sa,
+ sti->sti_faddr_len)));
#endif /* DEBUG */
mutex_exit(&so->so_lock);
freemsg(mp);
@@ -2545,8 +2561,8 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
}
/*
* If the application asked for delayed errors
- * record the T_UDERROR_IND so_eaddr_mp and the reason in
- * so_delayed_error for delayed error posting. If the reason
+ * record the T_UDERROR_IND sti_eaddr_mp and the reason in
+ * sti_delayed_error for delayed error posting. If the reason
* is zero use ECONNRESET.
* Note that delayed error indications do not make sense for
* AF_UNIX sockets since sendto checks that the destination
@@ -2557,15 +2573,15 @@ strsock_proto(vnode_t *vp, mblk_t *mp,
freemsg(mp);
return (NULL);
}
- if (so->so_eaddr_mp != NULL)
- freemsg(so->so_eaddr_mp);
+ if (sti->sti_eaddr_mp != NULL)
+ freemsg(sti->sti_eaddr_mp);
- so->so_eaddr_mp = mp;
+ sti->sti_eaddr_mp = mp;
if (tudi->ERROR_type != 0)
error = tudi->ERROR_type;
else
error = ECONNRESET;
- so->so_delayed_error = (ushort_t)error;
+ sti->sti_delayed_error = (ushort_t)error;
mutex_exit(&so->so_lock);
return (NULL);
}
@@ -2700,8 +2716,10 @@ strsock_misc(vnode_t *vp, mblk_t *mp,
strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
{
struct sonode *so;
+ sotpi_info_t *sti;
so = VTOSO(vp);
+ sti = SOTOTPI(so);
dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n",
(void *)vp, (void *)mp, DB_TYPE(mp)));
@@ -2724,15 +2742,14 @@ strsock_misc(vnode_t *vp, mblk_t *mp,
mutex_enter(&so->so_lock);
dprintso(so, 1,
("SIGURG(%p): counts %d/%d state %s\n",
- (void *)vp, so->so_oobsigcnt,
- so->so_oobcnt,
+ (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
pr_state(so->so_state, so->so_mode)));
so_oob_sig(so, 1, allmsgsigs, pollwakeups);
dprintso(so, 1,
("after SIGURG(%p): counts %d/%d "
" poll 0x%x sig 0x%x state %s\n",
- (void *)vp, so->so_oobsigcnt,
- so->so_oobcnt, *pollwakeups, *allmsgsigs,
+ (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
+ *pollwakeups, *allmsgsigs,
pr_state(so->so_state, so->so_mode)));
mutex_exit(&so->so_lock);
}
@@ -2873,53 +2890,118 @@ bad:
return (error);
}
+/*
+ * Wrapper for getmsg. If the socket has been converted to a stream
+ * pass the request to the stream head.
+ */
+int
+sock_getmsg(
+ struct vnode *vp,
+ struct strbuf *mctl,
+ struct strbuf *mdata,
+ uchar_t *prip,
+ int *flagsp,
+ int fmode,
+ rval_t *rvp
+)
+{
+ struct sonode *so;
+
+ ASSERT(vp->v_type == VSOCK);
+ /*
+ * Use the stream head to find the real socket vnode.
+ * This is needed when namefs sits above sockfs. Some
+ * sockets (like SCTP) are not streams.
+ */
+ if (!vp->v_stream) {
+ return (ENOSTR);
+ }
+ ASSERT(vp->v_stream->sd_vnode);
+ vp = vp->v_stream->sd_vnode;
+ ASSERT(vn_matchops(vp, socket_vnodeops));
+ so = VTOSO(vp);
+ dprintso(so, 1, ("sock_getmsg(%p) %s\n",
+ (void *)so, pr_state(so->so_state, so->so_mode)));
+
+ if (so->so_version == SOV_STREAM) {
+ /* The imaginary "sockmod" has been popped - act as a stream */
+ return (strgetmsg(vp, mctl, mdata, prip, flagsp, fmode, rvp));
+ }
+ eprintsoline(so, ENOSTR);
+ return (ENOSTR);
+}
/*
- * Translate a TLI(/XTI) error into a system error as best we can.
+ * Wrapper for putmsg. If the socket has been converted to a stream
+ * pass the request to the stream head.
+ *
+ * Note that a while a regular socket (SOV_SOCKSTREAM) does support the
+ * streams ioctl set it does not support putmsg and getmsg.
+ * Allowing putmsg would prevent sockfs from tracking the state of
+ * the socket/transport and would also invalidate the locking in sockfs.
*/
-static const int tli_errs[] = {
- 0, /* no error */
- EADDRNOTAVAIL, /* TBADADDR */
- ENOPROTOOPT, /* TBADOPT */
- EACCES, /* TACCES */
- EBADF, /* TBADF */
- EADDRNOTAVAIL, /* TNOADDR */
- EPROTO, /* TOUTSTATE */
- ECONNABORTED, /* TBADSEQ */
- 0, /* TSYSERR - will never get */
- EPROTO, /* TLOOK - should never be sent by transport */
- EMSGSIZE, /* TBADDATA */
- EMSGSIZE, /* TBUFOVFLW */
- EPROTO, /* TFLOW */
- EWOULDBLOCK, /* TNODATA */
- EPROTO, /* TNODIS */
- EPROTO, /* TNOUDERR */
- EINVAL, /* TBADFLAG */
- EPROTO, /* TNOREL */
- EOPNOTSUPP, /* TNOTSUPPORT */
- EPROTO, /* TSTATECHNG */
- /* following represent error namespace expansion with XTI */
- EPROTO, /* TNOSTRUCTYPE - never sent by transport */
- EPROTO, /* TBADNAME - never sent by transport */
- EPROTO, /* TBADQLEN - never sent by transport */
- EADDRINUSE, /* TADDRBUSY */
- EBADF, /* TINDOUT */
- EBADF, /* TPROVMISMATCH */
- EBADF, /* TRESQLEN */
- EBADF, /* TRESADDR */
- EPROTO, /* TQFULL - never sent by transport */
- EPROTO, /* TPROTO */
-};
+int
+sock_putmsg(
+ struct vnode *vp,
+ struct strbuf *mctl,
+ struct strbuf *mdata,
+ uchar_t pri,
+ int flag,
+ int fmode
+)
+{
+ struct sonode *so;
-static int
-tlitosyserr(int terr)
+ ASSERT(vp->v_type == VSOCK);
+ /*
+ * Use the stream head to find the real socket vnode.
+ * This is needed when namefs sits above sockfs.
+ */
+ if (!vp->v_stream) {
+ return (ENOSTR);
+ }
+ ASSERT(vp->v_stream->sd_vnode);
+ vp = vp->v_stream->sd_vnode;
+ ASSERT(vn_matchops(vp, socket_vnodeops));
+ so = VTOSO(vp);
+
+ dprintso(so, 1, ("sock_putmsg(%p) %s\n",
+ (void *)so, pr_state(so->so_state, so->so_mode)));
+
+ if (so->so_version == SOV_STREAM) {
+ /* The imaginary "sockmod" has been popped - act as a stream */
+ return (strputmsg(vp, mctl, mdata, pri, flag, fmode));
+ }
+ eprintsoline(so, ENOSTR);
+ return (ENOSTR);
+}
+
+/*
+ * Special function called only from f_getfl().
+ * Returns FASYNC if the SS_ASYNC flag is set on a socket, else 0.
+ * No locks are acquired here, so it is safe to use while uf_lock is held.
+ * This exists solely for BSD fcntl() FASYNC compatibility.
+ */
+int
+sock_getfasync(vnode_t *vp)
{
- ASSERT(terr != TSYSERR);
- if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0])))
- return (EPROTO);
+ struct sonode *so;
+
+ ASSERT(vp->v_type == VSOCK);
+ /*
+ * For stream model, v_stream is used; For non-stream, v_stream always
+ * equals NULL
+ */
+ if (vp->v_stream != NULL)
+ so = VTOSO(vp->v_stream->sd_vnode);
else
- return (tli_errs[terr]);
+ so = VTOSO(vp);
+
+ if (so->so_version == SOV_STREAM || !(so->so_state & SS_ASYNC))
+ return (0);
+
+ return (FASYNC);
}
/*
diff --git a/usr/src/uts/common/fs/sockfs/socksubr.c b/usr/src/uts/common/fs/sockfs/socksubr.c
index 33a6841f16..b82adb1789 100644
--- a/usr/src/uts/common/fs/sockfs/socksubr.c
+++ b/usr/src/uts/common/fs/sockfs/socksubr.c
@@ -73,6 +73,9 @@
#include <c2/audit.h>
#include <fs/sockfs/nl7c.h>
+#include <fs/sockfs/sockcommon.h>
+#include <fs/sockfs/socktpi.h>
+#include <fs/sockfs/socktpi_impl.h>
/*
* Macros that operate on struct cmsghdr.
@@ -88,18 +91,16 @@
((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
#define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */
-static struct kmem_cache *socktpi_cache, *socktpi_unix_cache;
-struct kmem_cache *socktpi_sod_cache;
-
dev_t sockdev; /* For fsid in getattr */
int sockfs_defer_nl7c_init = 0;
-struct sockparams *sphead;
-krwlock_t splist_lock;
struct socklist socklist;
+struct kmem_cache *socket_cache;
+
static int sockfs_update(kstat_t *, int);
static int sockfs_snapshot(kstat_t *, void *, int);
+extern smod_info_t *sotpi_smod_create(void);
extern void sendfile_init();
@@ -124,7 +125,7 @@ struct k_sockinfo {
* Translate from a device pathname (e.g. "/dev/tcp") to a vnode.
* Returns with the vnode held.
*/
-static int
+int
sogetvp(char *devpath, vnode_t **vpp, int uioflag)
{
struct snode *csp;
@@ -133,6 +134,7 @@ sogetvp(char *devpath, vnode_t **vpp, int uioflag)
int error;
ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE);
+
/*
* Lookup the underlying filesystem vnode.
*/
@@ -179,382 +181,6 @@ sogetvp(char *devpath, vnode_t **vpp, int uioflag)
}
/*
- * Add or delete (latter if devpath is NULL) an enter to the sockparams
- * table. If devpathlen is zero the devpath with not be kmem_freed. Otherwise
- * this routine assumes that the caller has kmem_alloced devpath/devpathlen
- * for this routine to consume.
- * The zero devpathlen could be used if the kernel wants to create entries
- * itself by calling sockconfig(1,2,3, "/dev/tcp", 0);
- */
-int
-soconfig(int domain, int type, int protocol,
- char *devpath, int devpathlen)
-{
- struct sockparams **spp;
- struct sockparams *sp;
- int error = 0;
-
- dprint(0, ("soconfig(%d,%d,%d,%s,%d)\n",
- domain, type, protocol, devpath, devpathlen));
-
- if (sockfs_defer_nl7c_init) {
- nl7c_init();
- sockfs_defer_nl7c_init = 0;
- }
-
- /*
- * Look for an existing match.
- */
- rw_enter(&splist_lock, RW_WRITER);
- for (spp = &sphead; (sp = *spp) != NULL; spp = &sp->sp_next) {
- if (sp->sp_domain == domain &&
- sp->sp_type == type &&
- sp->sp_protocol == protocol) {
- break;
- }
- }
- if (devpath == NULL) {
- ASSERT(devpathlen == 0);
-
- /* Delete existing entry */
- if (sp == NULL) {
- error = ENXIO;
- goto done;
- }
- /* Unlink and free existing entry */
- *spp = sp->sp_next;
- ASSERT(sp->sp_vnode);
- VN_RELE(sp->sp_vnode);
- if (sp->sp_devpathlen != 0)
- kmem_free(sp->sp_devpath, sp->sp_devpathlen);
- kmem_free(sp, sizeof (*sp));
- } else {
- vnode_t *vp;
-
- /* Add new entry */
- if (sp != NULL) {
- error = EEXIST;
- goto done;
- }
-
- error = sogetvp(devpath, &vp, UIO_SYSSPACE);
- if (error) {
- dprint(0, ("soconfig: vp %s failed with %d\n",
- devpath, error));
- goto done;
- }
-
- dprint(0, ("soconfig: %s => vp %p, dev 0x%lx\n",
- devpath, (void *)vp, vp->v_rdev));
-
- sp = kmem_alloc(sizeof (*sp), KM_SLEEP);
- sp->sp_domain = domain;
- sp->sp_type = type;
- sp->sp_protocol = protocol;
- sp->sp_devpath = devpath;
- sp->sp_devpathlen = devpathlen;
- sp->sp_vnode = vp;
- sp->sp_next = NULL;
- *spp = sp;
- }
-done:
- rw_exit(&splist_lock);
- if (error) {
- if (devpath != NULL)
- kmem_free(devpath, devpathlen);
-#ifdef SOCK_DEBUG
- eprintline(error);
-#endif /* SOCK_DEBUG */
- }
- return (error);
-}
-
-/*
- * Lookup an entry in the sockparams list based on the triple.
- * If no entry is found and devpath is not NULL translate devpath to a
- * vnode. Note that devpath is a pointer to a user address!
- * Returns with the vnode held.
- *
- * When this routine uses devpath it does not create an entry in the sockparams
- * list since this routine can run on behalf of any user and one user
- * should not be able to effect the transport used by another user.
- *
- * In order to return the correct error this routine has to do wildcard scans
- * of the list. The errors are (in decreasing precedence):
- * EAFNOSUPPORT - address family not in list
- * EPROTONOSUPPORT - address family supported but not protocol.
- * EPROTOTYPE - address family and protocol supported but not socket type.
- */
-vnode_t *
-solookup(int domain, int type, int protocol, char *devpath, int *errorp)
-{
- struct sockparams *sp;
- int error;
- vnode_t *vp;
-
- rw_enter(&splist_lock, RW_READER);
- for (sp = sphead; sp != NULL; sp = sp->sp_next) {
- if (sp->sp_domain == domain &&
- sp->sp_type == type &&
- sp->sp_protocol == protocol) {
- break;
- }
- }
- if (sp == NULL) {
- dprint(0, ("solookup(%d,%d,%d) not found\n",
- domain, type, protocol));
- if (devpath == NULL) {
- /* Determine correct error code */
- int found = 0;
-
- for (sp = sphead; sp != NULL; sp = sp->sp_next) {
- if (sp->sp_domain == domain && found < 1)
- found = 1;
- if (sp->sp_domain == domain &&
- sp->sp_protocol == protocol && found < 2)
- found = 2;
- }
- rw_exit(&splist_lock);
- switch (found) {
- case 0:
- *errorp = EAFNOSUPPORT;
- break;
- case 1:
- *errorp = EPROTONOSUPPORT;
- break;
- case 2:
- *errorp = EPROTOTYPE;
- break;
- }
- return (NULL);
- }
- rw_exit(&splist_lock);
-
- /*
- * Return vp based on devpath.
- * Do not enter into table to avoid random users
- * modifying the sockparams list.
- */
- error = sogetvp(devpath, &vp, UIO_USERSPACE);
- if (error) {
- dprint(0, ("solookup: vp %p failed with %d\n",
- (void *)devpath, error));
- *errorp = EPROTONOSUPPORT;
- return (NULL);
- }
- dprint(0, ("solookup: %p => vp %p, dev 0x%lx\n",
- (void *)devpath, (void *)vp, vp->v_rdev));
-
- return (vp);
- }
- dprint(0, ("solookup(%d,%d,%d) vp %p devpath %s\n",
- domain, type, protocol, (void *)sp->sp_vnode, sp->sp_devpath));
-
- vp = sp->sp_vnode;
- VN_HOLD(vp);
- rw_exit(&splist_lock);
- return (vp);
-}
-
-/*
- * Return a socket vnode.
- *
- * Assumes that the caller is "passing" an VN_HOLD for accessvp i.e.
- * when the socket is freed a VN_RELE will take place.
- *
- * Note that sockets assume that the driver will clone (either itself
- * or by using the clone driver) i.e. a socket() call will always
- * result in a new vnode being created.
- */
-struct vnode *
-makesockvp(struct vnode *accessvp, int domain, int type, int protocol)
-{
- kmem_cache_t *cp;
- struct sonode *so;
- struct vnode *vp;
- time_t now;
- dev_t dev;
-
- cp = (domain == AF_UNIX) ? socktpi_unix_cache : socktpi_cache;
- so = kmem_cache_alloc(cp, KM_SLEEP);
- so->so_cache = cp;
- so->so_obj = so;
- vp = SOTOV(so);
- now = gethrestime_sec();
-
- so->so_flag = 0;
- ASSERT(so->so_accessvp == NULL);
- so->so_accessvp = accessvp;
- dev = accessvp->v_rdev;
-
- /*
- * Record in so_flag that it is a clone.
- */
- if (getmajor(dev) == clone_major) {
- so->so_flag |= SOCLONE;
- }
- so->so_dev = dev;
-
- so->so_state = 0;
- so->so_mode = 0;
-
- so->so_fsid = sockdev;
- so->so_atime = now;
- so->so_mtime = now;
- so->so_ctime = now; /* Never modified */
- so->so_count = 0;
-
- so->so_family = (short)domain;
- so->so_type = (short)type;
- so->so_protocol = (short)protocol;
- so->so_pushcnt = 0;
-
- so->so_options = 0;
- so->so_linger.l_onoff = 0;
- so->so_linger.l_linger = 0;
- so->so_sndbuf = 0;
- so->so_rcvbuf = 0;
- so->so_sndlowat = 0;
- so->so_rcvlowat = 0;
-#ifdef notyet
- so->so_sndtimeo = 0;
- so->so_rcvtimeo = 0;
-#endif /* notyet */
- so->so_error = 0;
- so->so_delayed_error = 0;
-
- ASSERT(so->so_oobmsg == NULL);
- so->so_oobcnt = 0;
- so->so_oobsigcnt = 0;
- so->so_pgrp = 0;
- so->so_provinfo = NULL;
-
- ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL);
- so->so_laddr_len = so->so_faddr_len = 0;
- so->so_laddr_maxlen = so->so_faddr_maxlen = 0;
- so->so_eaddr_mp = NULL;
- so->so_priv = NULL;
-
- so->so_peercred = NULL;
-
- ASSERT(so->so_ack_mp == NULL);
- ASSERT(so->so_conn_ind_head == NULL);
- ASSERT(so->so_conn_ind_tail == NULL);
- ASSERT(so->so_ux_bound_vp == NULL);
- ASSERT(so->so_unbind_mp == NULL);
-
- vn_reinit(vp);
- vp->v_vfsp = rootvfs;
- vp->v_type = VSOCK;
- vp->v_rdev = so->so_dev;
- vn_exists(vp);
-
- return (vp);
-}
-
-void
-sockfree(struct sonode *so)
-{
- mblk_t *mp;
- vnode_t *vp;
-
- ASSERT(so->so_count == 0);
- ASSERT(so->so_accessvp);
- ASSERT(so->so_discon_ind_mp == NULL);
-
- vp = so->so_accessvp;
- VN_RELE(vp);
-
- /*
- * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely
- * indirect them. It also uses so_accessvp as a validity test.
- */
- mutex_enter(&so->so_lock);
-
- so->so_accessvp = NULL;
-
- if (so->so_laddr_sa) {
- ASSERT((caddr_t)so->so_faddr_sa ==
- (caddr_t)so->so_laddr_sa + so->so_laddr_maxlen);
- ASSERT(so->so_faddr_maxlen == so->so_laddr_maxlen);
- so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID);
- kmem_free(so->so_laddr_sa, so->so_laddr_maxlen * 2);
- so->so_laddr_sa = NULL;
- so->so_laddr_len = so->so_laddr_maxlen = 0;
- so->so_faddr_sa = NULL;
- so->so_faddr_len = so->so_faddr_maxlen = 0;
- }
-
- mutex_exit(&so->so_lock);
-
- if ((mp = so->so_eaddr_mp) != NULL) {
- freemsg(mp);
- so->so_eaddr_mp = NULL;
- so->so_delayed_error = 0;
- }
- if ((mp = so->so_ack_mp) != NULL) {
- freemsg(mp);
- so->so_ack_mp = NULL;
- }
- if ((mp = so->so_conn_ind_head) != NULL) {
- mblk_t *mp1;
-
- while (mp) {
- mp1 = mp->b_next;
- mp->b_next = NULL;
- freemsg(mp);
- mp = mp1;
- }
- so->so_conn_ind_head = so->so_conn_ind_tail = NULL;
- so->so_state &= ~SS_HASCONNIND;
- }
-#ifdef DEBUG
- mutex_enter(&so->so_lock);
- ASSERT(so_verify_oobstate(so));
- mutex_exit(&so->so_lock);
-#endif /* DEBUG */
- if ((mp = so->so_oobmsg) != NULL) {
- freemsg(mp);
- so->so_oobmsg = NULL;
- so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA);
- }
-
- if ((mp = so->so_nl7c_rcv_mp) != NULL) {
- so->so_nl7c_rcv_mp = NULL;
- freemsg(mp);
- }
- so->so_nl7c_rcv_rval = 0;
- if (so->so_nl7c_uri != NULL) {
- nl7c_urifree(so);
- /* urifree() cleared nl7c_uri */
- }
- if (so->so_nl7c_flags) {
- so->so_nl7c_flags = 0;
- }
-
- if (so->so_direct != NULL) {
- sodirect_t *sodp = so->so_direct;
-
- ASSERT(sodp->sod_uioafh == NULL);
-
- so->so_direct = NULL;
- kmem_cache_free(socktpi_sod_cache, sodp);
- }
-
- ASSERT(so->so_ux_bound_vp == NULL);
- if ((mp = so->so_unbind_mp) != NULL) {
- freemsg(mp);
- so->so_unbind_mp = NULL;
- }
- vn_invalid(SOTOV(so));
-
- if (so->so_peercred != NULL)
- crfree(so->so_peercred);
-
- kmem_cache_free(so->so_cache, so->so_obj);
-}
-
-/*
* Update the accessed, updated, or changed times in an sonode
* with the current time.
*
@@ -569,133 +195,20 @@ so_update_attrs(struct sonode *so, int flag)
{
time_t now = gethrestime_sec();
+ if (SOCK_IS_NONSTR(so))
+ return;
+
mutex_enter(&so->so_lock);
so->so_flag |= flag;
if (flag & SOACC)
- so->so_atime = now;
+ SOTOTPI(so)->sti_atime = now;
if (flag & SOMOD)
- so->so_mtime = now;
+ SOTOTPI(so)->sti_mtime = now;
mutex_exit(&so->so_lock);
}
-/*ARGSUSED*/
-static int
-socktpi_constructor(void *buf, void *cdrarg, int kmflags)
-{
- struct sonode *so = buf;
- struct vnode *vp;
-
- vp = so->so_vnode = vn_alloc(kmflags);
- if (vp == NULL) {
- return (-1);
- }
- vn_setops(vp, socktpi_vnodeops);
- vp->v_data = so;
-
- so->so_direct = NULL;
-
- so->so_nl7c_flags = 0;
- so->so_nl7c_uri = NULL;
- so->so_nl7c_rcv_mp = NULL;
-
- so->so_oobmsg = NULL;
- so->so_ack_mp = NULL;
- so->so_conn_ind_head = NULL;
- so->so_conn_ind_tail = NULL;
- so->so_discon_ind_mp = NULL;
- so->so_ux_bound_vp = NULL;
- so->so_unbind_mp = NULL;
- so->so_accessvp = NULL;
- so->so_laddr_sa = NULL;
- so->so_faddr_sa = NULL;
- so->so_ops = &sotpi_sonodeops;
-
- mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&so->so_plumb_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&so->so_ack_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&so->so_connind_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&so->so_want_cv, NULL, CV_DEFAULT, NULL);
-
- return (0);
-}
-
-/*ARGSUSED1*/
-static void
-socktpi_destructor(void *buf, void *cdrarg)
-{
- struct sonode *so = buf;
- struct vnode *vp = SOTOV(so);
-
- ASSERT(so->so_direct == NULL);
-
- ASSERT(so->so_nl7c_flags == 0);
- ASSERT(so->so_nl7c_uri == NULL);
- ASSERT(so->so_nl7c_rcv_mp == NULL);
-
- ASSERT(so->so_oobmsg == NULL);
- ASSERT(so->so_ack_mp == NULL);
- ASSERT(so->so_conn_ind_head == NULL);
- ASSERT(so->so_conn_ind_tail == NULL);
- ASSERT(so->so_discon_ind_mp == NULL);
- ASSERT(so->so_ux_bound_vp == NULL);
- ASSERT(so->so_unbind_mp == NULL);
- ASSERT(so->so_ops == &sotpi_sonodeops);
-
- ASSERT(vn_matchops(vp, socktpi_vnodeops));
- ASSERT(vp->v_data == so);
-
- vn_free(vp);
-
- mutex_destroy(&so->so_lock);
- mutex_destroy(&so->so_plumb_lock);
- cv_destroy(&so->so_state_cv);
- cv_destroy(&so->so_ack_cv);
- cv_destroy(&so->so_connind_cv);
- cv_destroy(&so->so_want_cv);
-}
-
-static int
-socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags)
-{
- int retval;
-
- if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) {
- struct sonode *so = (struct sonode *)buf;
-
- mutex_enter(&socklist.sl_lock);
-
- so->so_next = socklist.sl_list;
- so->so_prev = NULL;
- if (so->so_next != NULL)
- so->so_next->so_prev = so;
- socklist.sl_list = so;
-
- mutex_exit(&socklist.sl_lock);
-
- }
- return (retval);
-}
-
-static void
-socktpi_unix_destructor(void *buf, void *cdrarg)
-{
- struct sonode *so = (struct sonode *)buf;
-
- mutex_enter(&socklist.sl_lock);
-
- if (so->so_next != NULL)
- so->so_next->so_prev = so->so_prev;
- if (so->so_prev != NULL)
- so->so_prev->so_next = so->so_next;
- else
- socklist.sl_list = so->so_next;
-
- mutex_exit(&socklist.sl_lock);
-
- socktpi_destructor(buf, cdrarg);
-}
-
+extern so_create_func_t sock_comm_create_function;
+extern so_destroy_func_t sock_comm_destroy_function;
/*
* Init function called when sockfs is loaded.
*/
@@ -716,21 +229,20 @@ sockinit(int fstype, char *name)
return (error);
}
- error = vn_make_ops(name, socktpi_vnodeops_template, &socktpi_vnodeops);
+ error = vn_make_ops(name, socket_vnodeops_template,
+ &socket_vnodeops);
if (error != 0) {
- err_str = "sockinit: bad sock vnode ops template";
+ err_str = "sockinit: bad socket vnode ops template";
/* vn_make_ops() does not reset socktpi_vnodeops on failure. */
- socktpi_vnodeops = NULL;
+ socket_vnodeops = NULL;
goto failure;
}
- error = sosctp_init();
- if (error != 0) {
- err_str = NULL;
- goto failure;
- }
+ socket_cache = kmem_cache_create("socket_cache",
+ sizeof (struct sonode), 0, sonode_constructor,
+ sonode_destructor, NULL, NULL, NULL, 0);
- error = sosdp_init();
+ error = socktpi_init();
if (error != 0) {
err_str = NULL;
goto failure;
@@ -743,21 +255,18 @@ sockinit(int fstype, char *name)
}
/*
- * Create sonode caches. We create a special one for AF_UNIX so
- * that we can track them for netstat(1m).
+ * Set up the default create and destroy functions
*/
- socktpi_cache = kmem_cache_create("socktpi_cache",
- sizeof (struct sonode), 0, socktpi_constructor,
- socktpi_destructor, NULL, NULL, NULL, 0);
-
- socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache",
- sizeof (struct sonode), 0, socktpi_unix_constructor,
- socktpi_unix_destructor, NULL, NULL, NULL, 0);
+ sock_comm_create_function = socket_sonode_create;
+ sock_comm_destroy_function = socket_sonode_destroy;
/*
* Build initial list mapping socket parameters to vnode.
*/
- rw_init(&splist_lock, NULL, RW_DEFAULT, NULL);
+ smod_init();
+ smod_add(sotpi_smod_create());
+
+ sockparams_init();
/*
* If sockets are needed before init runs /sbin/soconfig
@@ -786,8 +295,8 @@ sockinit(int fstype, char *name)
failure:
(void) vfs_freevfsops_by_type(fstype);
- if (socktpi_vnodeops != NULL)
- vn_freevnodeops(socktpi_vnodeops);
+ if (socket_vnodeops != NULL)
+ vn_freevnodeops(socket_vnodeops);
if (err_str != NULL)
zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str);
return (error);
@@ -820,15 +329,18 @@ so_unlock_single(struct sonode *so, int flag)
ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND));
ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0);
ASSERT(so->so_flag & flag);
-
/*
- * Process the T_DISCON_IND on so_discon_ind_mp.
+ * Process the T_DISCON_IND on sti_discon_ind_mp.
*
* Call to so_drain_discon_ind will result in so_lock
* being dropped and re-acquired later.
*/
- if (so->so_discon_ind_mp != NULL)
- so_drain_discon_ind(so);
+ if (!SOCK_IS_NONSTR(so)) {
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ if (sti->sti_discon_ind_mp != NULL)
+ so_drain_discon_ind(so);
+ }
if (so->so_flag & SOWANT)
cv_broadcast(&so->so_want_cv);
@@ -1076,7 +588,7 @@ so_addr_verify(struct sonode *so, const struct sockaddr *name,
break;
}
case AF_UNIX:
- if (so->so_state & SS_FADDR_NOXLATE) {
+ if (SOTOTPI(so)->sti_faddr_noxlate) {
return (0);
}
if (namelen < (socklen_t)sizeof (short)) {
@@ -1122,13 +634,14 @@ so_ux_addr_xlate(struct sonode *so, struct sockaddr *name,
vnode_t *vp;
void *addr;
socklen_t addrlen;
+ sotpi_info_t *sti = SOTOTPI(so);
dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n",
(void *)so, (void *)name, namelen, checkaccess));
ASSERT(name != NULL);
ASSERT(so->so_family == AF_UNIX);
- ASSERT(!(so->so_state & SS_FADDR_NOXLATE));
+ ASSERT(!sti->sti_faddr_noxlate);
ASSERT(namelen >= (socklen_t)sizeof (short));
ASSERT(name->sa_family == AF_UNIX);
soun = (struct sockaddr_un *)name;
@@ -1147,10 +660,10 @@ so_ux_addr_xlate(struct sonode *so, struct sockaddr *name,
* closed by the time the T_CONN_REQ or T_UNIDATA_REQ reaches the
* transport the message will get an error or be dropped.
*/
- so->so_ux_faddr.soua_vp = vp;
- so->so_ux_faddr.soua_magic = SOU_MAGIC_EXPLICIT;
- addr = &so->so_ux_faddr;
- addrlen = (socklen_t)sizeof (so->so_ux_faddr);
+ sti->sti_ux_faddr.soua_vp = vp;
+ sti->sti_ux_faddr.soua_magic = SOU_MAGIC_EXPLICIT;
+ addr = &sti->sti_ux_faddr;
+ addrlen = (socklen_t)sizeof (sti->sti_ux_faddr);
dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n",
addrlen, (void *)vp));
VN_RELE(vp);
@@ -2007,8 +1520,6 @@ pr_state(uint_t state, uint_t mode)
(void) strcat(buf, "ASYNC ");
if (state & SS_ACCEPTCONN)
(void) strcat(buf, "ACCEPTCONN ");
- if (state & SS_HASCONNIND)
- (void) strcat(buf, "HASCONNIND ");
if (state & SS_SAVEDEOR)
(void) strcat(buf, "SAVEDEOR ");
@@ -2021,9 +1532,6 @@ pr_state(uint_t state, uint_t mode)
if (state & SS_HADOOBDATA)
(void) strcat(buf, "HADOOBDATA ");
- if (state & SS_FADDR_NOXLATE)
- (void) strcat(buf, "FADDR_NOXLATE ");
-
if (mode & SM_PRIV)
(void) strcat(buf, "PRIV ");
if (mode & SM_ATOMIC)
@@ -2102,6 +1610,8 @@ pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen)
int
so_verify_oobstate(struct sonode *so)
{
+ boolean_t havemark;
+
ASSERT(MUTEX_HELD(&so->so_lock));
/*
@@ -2120,28 +1630,29 @@ so_verify_oobstate(struct sonode *so)
case SS_HADOOBDATA:
break;
default:
- printf("Bad oob state 1 (%p): counts %d/%d state %s\n",
- (void *)so, so->so_oobsigcnt,
- so->so_oobcnt, pr_state(so->so_state, so->so_mode));
+ printf("Bad oob state 1 (%p): state %s\n",
+ (void *)so, pr_state(so->so_state, so->so_mode));
return (0);
}
/* SS_RCVATMARK should only be set when SS_OOBPEND is set */
if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) {
- printf("Bad oob state 2 (%p): counts %d/%d state %s\n",
- (void *)so, so->so_oobsigcnt,
- so->so_oobcnt, pr_state(so->so_state, so->so_mode));
+ printf("Bad oob state 2 (%p): state %s\n",
+ (void *)so, pr_state(so->so_state, so->so_mode));
return (0);
}
/*
- * (so_oobsigcnt != 0 or SS_RCVATMARK) iff SS_OOBPEND
+ * (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND
+ * For TPI, the presence of a "mark" is indicated by sti_oobsigcnt.
*/
- if (!EQUIV((so->so_oobsigcnt != 0) || (so->so_state & SS_RCVATMARK),
+ havemark = (SOCK_IS_NONSTR(so)) ? so->so_oobmark > 0 :
+ SOTOTPI(so)->sti_oobsigcnt > 0;
+
+ if (!EQUIV(havemark || (so->so_state & SS_RCVATMARK),
so->so_state & SS_OOBPEND)) {
- printf("Bad oob state 3 (%p): counts %d/%d state %s\n",
- (void *)so, so->so_oobsigcnt,
- so->so_oobcnt, pr_state(so->so_state, so->so_mode));
+ printf("Bad oob state 3 (%p): state %s\n",
+ (void *)so, pr_state(so->so_state, so->so_mode));
return (0);
}
@@ -2150,21 +1661,23 @@ so_verify_oobstate(struct sonode *so)
*/
if (!(so->so_options & SO_OOBINLINE) &&
!EQUIV(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) {
- printf("Bad oob state 4 (%p): counts %d/%d state %s\n",
- (void *)so, so->so_oobsigcnt,
- so->so_oobcnt, pr_state(so->so_state, so->so_mode));
+ printf("Bad oob state 4 (%p): state %s\n",
+ (void *)so, pr_state(so->so_state, so->so_mode));
return (0);
}
- if (so->so_oobsigcnt < so->so_oobcnt) {
+
+ if (!SOCK_IS_NONSTR(so) &&
+ SOTOTPI(so)->sti_oobsigcnt < SOTOTPI(so)->sti_oobcnt) {
printf("Bad oob state 5 (%p): counts %d/%d state %s\n",
- (void *)so, so->so_oobsigcnt,
- so->so_oobcnt, pr_state(so->so_state, so->so_mode));
+ (void *)so, SOTOTPI(so)->sti_oobsigcnt,
+ SOTOTPI(so)->sti_oobcnt,
+ pr_state(so->so_state, so->so_mode));
return (0);
}
+
return (1);
}
#undef EQUIV
-
#endif /* DEBUG */
/* initialize sockfs zone specific kstat related items */
@@ -2224,8 +1737,8 @@ sockfs_update(kstat_t *ksp, int rw)
return (EACCES);
}
- for (so = socklist.sl_list; so != NULL; so = so->so_next) {
- if (so->so_accessvp != NULL && so->so_zoneid == myzoneid) {
+ for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
+ if (so->so_count != 0 && so->so_zoneid == myzoneid) {
nactive++;
}
}
@@ -2243,6 +1756,7 @@ sockfs_snapshot(kstat_t *ksp, void *buf, int rw)
struct k_sockinfo *pksi; /* where we put sockinfo data */
t_uscalar_t sn_len; /* soa_len */
zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
+ sotpi_info_t *sti;
ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
@@ -2257,9 +1771,10 @@ sockfs_snapshot(kstat_t *ksp, void *buf, int rw)
* info into buf, in k_sockinfo format.
*/
pksi = (struct k_sockinfo *)buf;
- for (ns = 0, so = socklist.sl_list; so != NULL; so = so->so_next) {
+ ns = 0;
+ for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
/* only stuff active sonodes and the same zone: */
- if (so->so_accessvp == NULL || so->so_zoneid != myzoneid) {
+ if (so->so_count == 0 || so->so_zoneid != myzoneid) {
continue;
}
@@ -2271,50 +1786,54 @@ sockfs_snapshot(kstat_t *ksp, void *buf, int rw)
break;
}
+ sti = SOTOTPI(so);
/* copy important info into buf: */
pksi->ks_si.si_size = sizeof (struct k_sockinfo);
pksi->ks_si.si_family = so->so_family;
pksi->ks_si.si_type = so->so_type;
pksi->ks_si.si_flag = so->so_flag;
pksi->ks_si.si_state = so->so_state;
- pksi->ks_si.si_serv_type = so->so_serv_type;
- pksi->ks_si.si_ux_laddr_sou_magic = so->so_ux_laddr.soua_magic;
- pksi->ks_si.si_ux_faddr_sou_magic = so->so_ux_faddr.soua_magic;
- pksi->ks_si.si_laddr_soa_len = so->so_laddr.soa_len;
- pksi->ks_si.si_faddr_soa_len = so->so_faddr.soa_len;
+ pksi->ks_si.si_serv_type = sti->sti_serv_type;
+ pksi->ks_si.si_ux_laddr_sou_magic =
+ sti->sti_ux_laddr.soua_magic;
+ pksi->ks_si.si_ux_faddr_sou_magic =
+ sti->sti_ux_faddr.soua_magic;
+ pksi->ks_si.si_laddr_soa_len = sti->sti_laddr.soa_len;
+ pksi->ks_si.si_faddr_soa_len = sti->sti_faddr.soa_len;
pksi->ks_si.si_szoneid = so->so_zoneid;
+ pksi->ks_si.si_faddr_noxlate = sti->sti_faddr_noxlate;
mutex_enter(&so->so_lock);
- if (so->so_laddr_sa != NULL) {
- ASSERT(so->so_laddr_sa->sa_data != NULL);
- sn_len = so->so_laddr_len;
+ if (sti->sti_laddr_sa != NULL) {
+ ASSERT(sti->sti_laddr_sa->sa_data != NULL);
+ sn_len = sti->sti_laddr_len;
ASSERT(sn_len <= sizeof (short) +
sizeof (pksi->ks_si.si_laddr_sun_path));
pksi->ks_si.si_laddr_family =
- so->so_laddr_sa->sa_family;
+ sti->sti_laddr_sa->sa_family;
if (sn_len != 0) {
/* AF_UNIX socket names are NULL terminated */
(void) strncpy(pksi->ks_si.si_laddr_sun_path,
- so->so_laddr_sa->sa_data,
+ sti->sti_laddr_sa->sa_data,
sizeof (pksi->ks_si.si_laddr_sun_path));
sn_len = strlen(pksi->ks_si.si_laddr_sun_path);
}
pksi->ks_si.si_laddr_sun_path[sn_len] = 0;
}
- if (so->so_faddr_sa != NULL) {
- ASSERT(so->so_faddr_sa->sa_data != NULL);
- sn_len = so->so_faddr_len;
+ if (sti->sti_faddr_sa != NULL) {
+ ASSERT(sti->sti_faddr_sa->sa_data != NULL);
+ sn_len = sti->sti_faddr_len;
ASSERT(sn_len <= sizeof (short) +
sizeof (pksi->ks_si.si_faddr_sun_path));
pksi->ks_si.si_faddr_family =
- so->so_faddr_sa->sa_family;
+ sti->sti_faddr_sa->sa_family;
if (sn_len != 0) {
(void) strncpy(pksi->ks_si.si_faddr_sun_path,
- so->so_faddr_sa->sa_data,
+ sti->sti_faddr_sa->sa_data,
sizeof (pksi->ks_si.si_faddr_sun_path));
sn_len = strlen(pksi->ks_si.si_faddr_sun_path);
}
@@ -2325,9 +1844,9 @@ sockfs_snapshot(kstat_t *ksp, void *buf, int rw)
(void) sprintf(pksi->ks_straddr[0], "%p", (void *)so);
(void) sprintf(pksi->ks_straddr[1], "%p",
- (void *)so->so_ux_laddr.soua_vp);
+ (void *)sti->sti_ux_laddr.soua_vp);
(void) sprintf(pksi->ks_straddr[2], "%p",
- (void *)so->so_ux_faddr.soua_vp);
+ (void *)sti->sti_ux_faddr.soua_vp);
ns++;
pksi++;
@@ -2389,3 +1908,23 @@ out:
return (cnt);
}
}
+
+int
+so_copyin(const void *from, void *to, size_t size, int fromkernel)
+{
+ if (fromkernel) {
+ bcopy(from, to, size);
+ return (0);
+ }
+ return (xcopyin(from, to, size));
+}
+
+int
+so_copyout(const void *from, void *to, size_t size, int tokernel)
+{
+ if (tokernel) {
+ bcopy(from, to, size);
+ return (0);
+ }
+ return (xcopyout(from, to, size));
+}
diff --git a/usr/src/uts/common/fs/sockfs/socksyscalls.c b/usr/src/uts/common/fs/sockfs/socksyscalls.c
index 95f4f5738d..4d0929f39b 100644
--- a/usr/src/uts/common/fs/sockfs/socksyscalls.c
+++ b/usr/src/uts/common/fs/sockfs/socksyscalls.c
@@ -64,7 +64,10 @@
#include <vm/seg.h>
#include <vm/seg_map.h>
#include <vm/seg_kpm.h>
+
#include <fs/sockfs/nl7c.h>
+#include <fs/sockfs/sockcommon.h>
+#include <fs/sockfs/socktpi.h>
#ifdef SOCK_TEST
int do_useracc = 1; /* Controlled by setting SO_DEBUG to 4 */
@@ -90,115 +93,39 @@ extern int xnet_truncate_print;
* devpath for the kernel to use.
*/
int
-so_socket(int domain, int type, int protocol, char *devpath, int version)
+so_socket(int family, int type, int protocol, char *devpath, int version)
{
- vnode_t *accessvp;
struct sonode *so;
vnode_t *vp;
struct file *fp;
int fd;
int error;
- boolean_t wildcard = B_FALSE;
- int saved_error = 0;
- int sdomain = domain;
-
- dprint(1, ("so_socket(%d,%d,%d,%p,%d)\n",
- domain, type, protocol, (void *)devpath, version));
-
- if (domain == AF_NCA) {
- /*
- * The request is for an NCA socket so for NL7C use the
- * INET domain instead and mark NL7C_AF_NCA below.
- */
- domain = AF_INET;
- /*
- * NL7C is not supported in non-global zones,
- * we enforce this restriction here.
- */
- if (getzoneid() != GLOBAL_ZONEID) {
- return (set_errno(ENOTSUP));
- }
- }
-
- accessvp = solookup(domain, type, protocol, devpath, &error);
- if (accessvp == NULL) {
- /*
- * If there is either an EPROTONOSUPPORT or EPROTOTYPE error
- * it makes sense doing the wildcard lookup since the
- * protocol might not be in the table.
- */
- if (devpath != NULL || protocol == 0 ||
- !(error == EPROTONOSUPPORT || error == EPROTOTYPE))
- return (set_errno(error));
- saved_error = error;
+ if (devpath != NULL) {
+ char *buf;
+ size_t kdevpathlen = 0;
- /*
- * Try wildcard lookup. Never use devpath for wildcards.
- */
- accessvp = solookup(domain, type, 0, NULL, &error);
- if (accessvp == NULL) {
- /*
- * Can't find in kernel table - have library
- * fall back to /etc/netconfig and tell us
- * the devpath (The library will do this if it didn't
- * already pass in a devpath).
- */
- if (saved_error != 0)
- error = saved_error;
+ buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ if ((error = copyinstr(devpath, buf,
+ MAXPATHLEN, &kdevpathlen)) != 0) {
+ kmem_free(buf, MAXPATHLEN);
return (set_errno(error));
}
- wildcard = B_TRUE;
- }
-
- /* Check the device policy */
- if ((error = secpolicy_spec_open(CRED(),
- accessvp, FREAD|FWRITE)) != 0) {
- return (set_errno(error));
- }
-
- if (protocol == IPPROTO_SCTP) {
- so = sosctp_create(accessvp, domain, type, protocol, version,
- NULL, &error);
- } else if (protocol == PROTO_SDP) {
- so = sosdp_create(accessvp, domain, type, protocol, version,
- NULL, &error);
+ so = socket_create(family, type, protocol, buf, NULL,
+ SOCKET_SLEEP, version, CRED(), &error);
+ kmem_free(buf, MAXPATHLEN);
} else {
- so = sotpi_create(accessvp, domain, type, protocol, version,
- NULL, &error);
+ so = socket_create(family, type, protocol, NULL, NULL,
+ SOCKET_SLEEP, version, CRED(), &error);
}
- if (so == NULL) {
+ if (so == NULL)
return (set_errno(error));
- }
- if (sdomain == AF_NCA && domain == AF_INET) {
- so->so_nl7c_flags = NL7C_AF_NCA;
- }
- vp = SOTOV(so);
- if (wildcard) {
- /*
- * Issue SO_PROTOTYPE setsockopt.
- */
- error = SOP_SETSOCKOPT(so, SOL_SOCKET, SO_PROTOTYPE,
- &protocol,
- (t_uscalar_t)sizeof (protocol));
- if (error) {
- (void) VOP_CLOSE(vp, 0, 1, 0, CRED(), NULL);
- VN_RELE(vp);
- /*
- * Setsockopt often fails with ENOPROTOOPT but socket()
- * should fail with EPROTONOSUPPORT/EPROTOTYPE.
- */
- if (saved_error != 0 && error == ENOPROTOOPT)
- error = saved_error;
- else
- error = EPROTONOSUPPORT;
- return (set_errno(error));
- }
- }
+ /* Allocate a file descriptor for the socket */
+ vp = SOTOV(so);
if (error = falloc(vp, FWRITE|FREAD, &fp, &fd)) {
- (void) VOP_CLOSE(vp, 0, 1, 0, CRED(), NULL);
- VN_RELE(vp);
+ (void) socket_close(so, 0, CRED());
+ socket_destroy(so);
return (set_errno(error));
}
@@ -402,6 +329,8 @@ so_socketpair(int sv[2])
int error;
struct sockaddr_ux *name;
size_t namelen;
+ sotpi_info_t *sti1;
+ sotpi_info_t *sti2;
dprint(1, ("so_socketpair(%p)\n", (void *)sv));
@@ -425,6 +354,9 @@ so_socketpair(int sv[2])
goto done;
}
+ sti1 = SOTOTPI(so1);
+ sti2 = SOTOTPI(so2);
+
/*
* The code below makes assumptions about the "sockfs" implementation.
* So make sure that the correct implementation is really used.
@@ -437,12 +369,12 @@ so_socketpair(int sv[2])
* Bind both sockets and connect them with each other.
* Need to allocate name/namelen for soconnect.
*/
- error = SOP_BIND(so1, NULL, 0, _SOBIND_UNSPEC);
+ error = socket_bind(so1, NULL, 0, _SOBIND_UNSPEC, CRED());
if (error) {
eprintsoline(so1, error);
goto done;
}
- error = SOP_BIND(so2, NULL, 0, _SOBIND_UNSPEC);
+ error = socket_bind(so2, NULL, 0, _SOBIND_UNSPEC, CRED());
if (error) {
eprintsoline(so2, error);
goto done;
@@ -450,21 +382,21 @@ so_socketpair(int sv[2])
namelen = sizeof (struct sockaddr_ux);
name = kmem_alloc(namelen, KM_SLEEP);
name->sou_family = AF_UNIX;
- name->sou_addr = so2->so_ux_laddr;
- error = SOP_CONNECT(so1,
+ name->sou_addr = sti2->sti_ux_laddr;
+ error = socket_connect(so1,
(struct sockaddr *)name,
(socklen_t)namelen,
- 0, _SOCONNECT_NOXLATE);
+ 0, _SOCONNECT_NOXLATE, CRED());
if (error) {
kmem_free(name, namelen);
eprintsoline(so1, error);
goto done;
}
- name->sou_addr = so1->so_ux_laddr;
- error = SOP_CONNECT(so2,
+ name->sou_addr = sti1->sti_ux_laddr;
+ error = socket_connect(so2,
(struct sockaddr *)name,
(socklen_t)namelen,
- 0, _SOCONNECT_NOXLATE);
+ 0, _SOCONNECT_NOXLATE, CRED());
kmem_free(name, namelen);
if (error) {
eprintsoline(so2, error);
@@ -487,17 +419,18 @@ so_socketpair(int sv[2])
int nfd;
/*
- * We could simply call SOP_LISTEN() here (which would do the
+ * We could simply call socket_listen() here (which would do the
* binding automatically) if the code didn't rely on passing
- * _SOBIND_NOXLATE to the TPI implementation of SOP_BIND().
+ * _SOBIND_NOXLATE to the TPI implementation of socket_bind().
*/
- error = SOP_BIND(so1, NULL, 0, _SOBIND_UNSPEC|_SOBIND_NOXLATE|
- _SOBIND_LISTEN|_SOBIND_SOCKETPAIR);
+ error = socket_bind(so1, NULL, 0, _SOBIND_UNSPEC|
+ _SOBIND_NOXLATE|_SOBIND_LISTEN|_SOBIND_SOCKETPAIR,
+ CRED());
if (error) {
eprintsoline(so1, error);
goto done;
}
- error = SOP_BIND(so2, NULL, 0, _SOBIND_UNSPEC);
+ error = socket_bind(so2, NULL, 0, _SOBIND_UNSPEC, CRED());
if (error) {
eprintsoline(so2, error);
goto done;
@@ -506,20 +439,19 @@ so_socketpair(int sv[2])
namelen = sizeof (struct sockaddr_ux);
name = kmem_alloc(namelen, KM_SLEEP);
name->sou_family = AF_UNIX;
- name->sou_addr = so1->so_ux_laddr;
- error = SOP_CONNECT(so2,
+ name->sou_addr = sti1->sti_ux_laddr;
+ error = socket_connect(so2,
(struct sockaddr *)name,
(socklen_t)namelen,
- FNONBLOCK, _SOCONNECT_NOXLATE);
+ FNONBLOCK, _SOCONNECT_NOXLATE, CRED());
kmem_free(name, namelen);
if (error) {
if (error != EINPROGRESS) {
- eprintsoline(so2, error);
- goto done;
+ eprintsoline(so2, error); goto done;
}
}
- error = SOP_ACCEPT(so1, 0, &nso);
+ error = socket_accept(so1, 0, CRED(), &nso);
if (error) {
eprintsoline(so1, error);
goto done;
@@ -529,17 +461,17 @@ so_socketpair(int sv[2])
mutex_enter(&so2->so_lock);
error = sowaitconnected(so2, 0, 1);
mutex_exit(&so2->so_lock);
- nvp = SOTOV(nso);
if (error != 0) {
- (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL);
- VN_RELE(nvp);
+ (void) socket_close(nso, 0, CRED());
+ socket_destroy(nso);
eprintsoline(so2, error);
goto done;
}
+ nvp = SOTOV(nso);
if (error = falloc(nvp, FWRITE|FREAD, &nfp, &nfd)) {
- (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL);
- VN_RELE(nvp);
+ (void) socket_close(nso, 0, CRED());
+ socket_destroy(nso);
eprintsoline(nso, error);
goto done;
}
@@ -603,13 +535,13 @@ bind(int sock, struct sockaddr *name, socklen_t namelen, int version)
switch (version) {
default:
- error = SOP_BIND(so, name, namelen, 0);
+ error = socket_bind(so, name, namelen, 0, CRED());
break;
case SOV_XPG4_2:
- error = SOP_BIND(so, name, namelen, _SOBIND_XPG4_2);
+ error = socket_bind(so, name, namelen, _SOBIND_XPG4_2, CRED());
break;
case SOV_SOCKBSD:
- error = SOP_BIND(so, name, namelen, _SOBIND_SOCKBSD);
+ error = socket_bind(so, name, namelen, _SOBIND_SOCKBSD, CRED());
break;
}
done:
@@ -635,7 +567,7 @@ listen(int sock, int backlog, int version)
if ((so = getsonode(sock, &error, NULL)) == NULL)
return (set_errno(error));
- error = SOP_LISTEN(so, backlog);
+ error = socket_listen(so, backlog, CRED());
releasef(sock);
if (error)
@@ -655,6 +587,8 @@ accept(int sock, struct sockaddr *name, socklen_t *namelenp, int version)
struct vnode *nvp;
struct file *nfp;
int nfd;
+ struct sockaddr *addrp;
+ socklen_t addrlen;
dprint(1, ("accept(%d, %p, %p)\n",
sock, (void *)name, (void *)namelenp));
@@ -681,15 +615,15 @@ accept(int sock, struct sockaddr *name, socklen_t *namelenp, int version)
}
/*
- * Allocate the user fd before SOP_ACCEPT() in order to
- * catch EMFILE errors before calling SOP_ACCEPT().
+ * Allocate the user fd before socket_accept() in order to
+ * catch EMFILE errors before calling socket_accept().
*/
if ((nfd = ufalloc(0)) == -1) {
eprintsoline(so, EMFILE);
releasef(sock);
return (set_errno(EMFILE));
}
- error = SOP_ACCEPT(so, fp->f_flag, &nso);
+ error = socket_accept(so, fp->f_flag, CRED(), &nso);
releasef(sock);
if (error) {
setf(nfd, NULL);
@@ -698,34 +632,32 @@ accept(int sock, struct sockaddr *name, socklen_t *namelenp, int version)
nvp = SOTOV(nso);
- /*
- * so_faddr_sa can not go away even though we are not holding so_lock.
- * However, in theory its content could change from underneath us.
- * But this is not possible in practice since it can only
- * change due to either some socket system call
- * or due to a T_CONN_CON being received from the stream head.
- * Since the falloc/setf have not yet been done no thread
- * can do any system call on nso and T_CONN_CON can not arrive
- * on a socket that is already connected.
- * Thus there is no reason to hold so_lock here.
- *
- * SOP_ACCEPT() is required to have set the valid bit for the faddr,
- * but it could be instantly cleared by a disconnect from the transport.
- * For that reason we ignore it here.
- */
ASSERT(MUTEX_NOT_HELD(&nso->so_lock));
- error = copyout_name(name, namelen, namelenp,
- nso->so_faddr_sa, (socklen_t)nso->so_faddr_len);
+ if (namelen != 0) {
+ addrlen = so->so_max_addr_len;
+ addrp = (struct sockaddr *)kmem_alloc(addrlen, KM_SLEEP);
+
+ if ((error = socket_getpeername(nso, (struct sockaddr *)addrp,
+ &addrlen, B_TRUE, CRED())) == 0) {
+ error = copyout_name(name, namelen, namelenp,
+ addrp, addrlen);
+ } else {
+ ASSERT(error == EINVAL || error == ENOTCONN);
+ error = ECONNABORTED;
+ }
+ kmem_free(addrp, so->so_max_addr_len);
+ }
+
if (error) {
setf(nfd, NULL);
- (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL);
- VN_RELE(nvp);
+ (void) socket_close(nso, 0, CRED());
+ socket_destroy(nso);
return (set_errno(error));
}
if (error = falloc(NULL, FWRITE|FREAD, &nfp, NULL)) {
setf(nfd, NULL);
- (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL);
- VN_RELE(nvp);
+ (void) socket_close(nso, 0, CRED());
+ socket_destroy(nso);
eprintsoline(so, error);
return (set_errno(error));
}
@@ -790,8 +722,8 @@ connect(int sock, struct sockaddr *name, socklen_t namelen, int version)
} else
name = NULL;
- error = SOP_CONNECT(so, name, namelen, fp->f_flag,
- (version != SOV_XPG4_2) ? 0 : _SOCONNECT_XPG4_2);
+ error = socket_connect(so, name, namelen, fp->f_flag,
+ (version != SOV_XPG4_2) ? 0 : _SOCONNECT_XPG4_2, CRED());
releasef(sock);
if (name)
kmem_free(name, (size_t)namelen);
@@ -813,7 +745,7 @@ shutdown(int sock, int how, int version)
if ((so = getsonode(sock, &error, NULL)) == NULL)
return (set_errno(error));
- error = SOP_SHUTDOWN(so, how);
+ error = socket_shutdown(so, how, CRED());
releasef(sock);
if (error)
@@ -857,13 +789,12 @@ recvit(int sock,
msg->msg_flags = flags & (MSG_OOB | MSG_PEEK | MSG_WAITALL |
MSG_DONTWAIT | MSG_XPG4_2);
- error = SOP_RECVMSG(so, msg, uiop);
+ error = socket_recvmsg(so, msg, uiop, CRED());
if (error) {
releasef(sock);
return (set_errno(error));
}
lwp_stat_update(LWP_STAT_MSGRCV, 1);
- so_update_attrs(so, SOACC);
releasef(sock);
error = copyout_name(name, namelen, namelenp,
@@ -1198,7 +1129,7 @@ sendit(int sock, struct nmsghdr *msg, struct uio *uiop, int flags)
len = uiop->uio_resid;
msg->msg_flags = flags;
- error = SOP_SENDMSG(so, msg, uiop);
+ error = socket_sendmsg(so, msg, uiop, CRED());
done1:
if (control != NULL)
kmem_free(control, controllen);
@@ -1211,7 +1142,6 @@ done3:
return (set_errno(error));
}
lwp_stat_update(LWP_STAT_MSGSND, 1);
- so_update_attrs(so, SOMOD);
releasef(sock);
return (len - uiop->uio_resid);
}
@@ -1413,12 +1343,8 @@ getpeername(int sock, struct sockaddr *name, socklen_t *namelenp, int version)
struct sonode *so;
int error;
socklen_t namelen;
- union {
- struct sockaddr_in sin;
- struct sockaddr_in6 sin6;
- } sin; /* Temporary buffer, common case */
- void *addr; /* Temporary buffer, uncommon case */
- socklen_t addrlen, size;
+ socklen_t sock_addrlen;
+ struct sockaddr *sock_addrp;
dprint(1, ("getpeername(%d, %p, %p)\n",
sock, (void *)name, (void *)namelenp));
@@ -1432,44 +1358,16 @@ getpeername(int sock, struct sockaddr *name, socklen_t *namelenp, int version)
error = EFAULT;
goto rel_out;
}
- /*
- * If a connect or accept has been done, unless we're an Xnet socket,
- * the remote address has already been updated in so_faddr_sa.
- */
- if (so->so_version != SOV_SOCKSTREAM && so->so_version != SOV_SOCKBSD ||
- !(so->so_state & SS_FADDR_VALID)) {
- if ((error = SOP_GETPEERNAME(so)) != 0)
- goto rel_out;
- }
+ sock_addrlen = so->so_max_addr_len;
+ sock_addrp = (struct sockaddr *)kmem_alloc(sock_addrlen, KM_SLEEP);
- if (so->so_faddr_maxlen <= sizeof (sin)) {
- size = 0;
- addr = &sin;
- } else {
- /*
- * Allocate temporary to avoid holding so_lock across
- * copyout
- */
- size = so->so_faddr_maxlen;
- addr = kmem_alloc(size, KM_SLEEP);
+ if ((error = socket_getpeername(so, sock_addrp, &sock_addrlen,
+ B_FALSE, CRED())) == 0) {
+ ASSERT(sock_addrlen <= so->so_max_addr_len);
+ error = copyout_name(name, namelen, namelenp,
+ (void *)sock_addrp, sock_addrlen);
}
- /* Prevent so_faddr_sa/len from changing while accessed */
- mutex_enter(&so->so_lock);
- if (!(so->so_state & SS_ISCONNECTED)) {
- mutex_exit(&so->so_lock);
- error = ENOTCONN;
- goto free_out;
- }
- addrlen = so->so_faddr_len;
- bcopy(so->so_faddr_sa, addr, addrlen);
- mutex_exit(&so->so_lock);
-
- ASSERT(MUTEX_NOT_HELD(&so->so_lock));
- error = copyout_name(name, namelen, namelenp, addr,
- (so->so_state & SS_FADDR_NOXLATE) ? 0 : addrlen);
-free_out:
- if (size != 0)
- kmem_free(addr, size);
+ kmem_free(sock_addrp, so->so_max_addr_len);
rel_out:
releasef(sock);
bad: return (error != 0 ? set_errno(error) : 0);
@@ -1482,13 +1380,8 @@ getsockname(int sock, struct sockaddr *name,
{
struct sonode *so;
int error;
- socklen_t namelen;
- union {
- struct sockaddr_in sin;
- struct sockaddr_in6 sin6;
- } sin; /* Temporary buffer, common case */
- void *addr; /* Temporary buffer, uncommon case */
- socklen_t addrlen, size;
+ socklen_t namelen, sock_addrlen;
+ struct sockaddr *sock_addrp;
dprint(1, ("getsockname(%d, %p, %p)\n",
sock, (void *)name, (void *)namelenp));
@@ -1503,39 +1396,16 @@ getsockname(int sock, struct sockaddr *name,
goto rel_out;
}
- /*
- * If a bind or accept has been done, unless we're an Xnet endpoint,
- * the local address has already been updated in so_laddr_sa.
- */
- if ((so->so_version != SOV_SOCKSTREAM &&
- so->so_version != SOV_SOCKBSD) ||
- !(so->so_state & SS_LADDR_VALID)) {
- if ((error = SOP_GETSOCKNAME(so)) != 0)
- goto rel_out;
- }
-
- if (so->so_laddr_maxlen <= sizeof (sin)) {
- size = 0;
- addr = &sin;
- } else {
- /*
- * Allocate temporary to avoid holding so_lock across
- * copyout
- */
- size = so->so_laddr_maxlen;
- addr = kmem_alloc(size, KM_SLEEP);
+ sock_addrlen = so->so_max_addr_len;
+ sock_addrp = (struct sockaddr *)kmem_alloc(sock_addrlen, KM_SLEEP);
+ if ((error = socket_getsockname(so, sock_addrp, &sock_addrlen,
+ CRED())) == 0) {
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+ ASSERT(sock_addrlen <= so->so_max_addr_len);
+ error = copyout_name(name, namelen, namelenp,
+ (void *)sock_addrp, sock_addrlen);
}
- /* Prevent so_laddr_sa/len from changing while accessed */
- mutex_enter(&so->so_lock);
- addrlen = so->so_laddr_len;
- bcopy(so->so_laddr_sa, addr, addrlen);
- mutex_exit(&so->so_lock);
-
- ASSERT(MUTEX_NOT_HELD(&so->so_lock));
- error = copyout_name(name, namelen, namelenp,
- addr, addrlen);
- if (size != 0)
- kmem_free(addr, size);
+ kmem_free(sock_addrp, so->so_max_addr_len);
rel_out:
releasef(sock);
bad: return (error != 0 ? set_errno(error) : 0);
@@ -1577,8 +1447,9 @@ getsockopt(int sock,
}
optval = kmem_alloc(optlen, KM_SLEEP);
optlen_res = optlen;
- error = SOP_GETSOCKOPT(so, level, option_name, optval,
- &optlen_res, (version != SOV_XPG4_2) ? 0 : _SOGETSOCKOPT_XPG4_2);
+ error = socket_getsockopt(so, level, option_name, optval,
+ &optlen_res, (version != SOV_XPG4_2) ? 0 : _SOGETSOCKOPT_XPG4_2,
+ CRED());
releasef(sock);
if (error) {
kmem_free(optval, optlen);
@@ -1633,8 +1504,8 @@ setsockopt(int sock,
} else
option_len = 0;
- error = SOP_SETSOCKOPT(so, level, option_name, optval,
- (t_uscalar_t)option_len);
+ error = socket_setsockopt(so, level, option_name, optval,
+ (t_uscalar_t)option_len, CRED());
done1:
if (optval != buffer)
kmem_free(optval, (size_t)option_len);
@@ -1646,51 +1517,140 @@ done2:
}
/*
- * Add config info when devpath is non-NULL; delete info when devpath is NULL.
- * devpath is a user address.
+ * Add config info when name is non-NULL; delete info when name is NULL.
+ * name could be a device name or a module name and are user address.
*/
int
-sockconfig(int domain, int type, int protocol, char *devpath)
+sockconfig(int family, int type, int protocol, char *name)
{
- char *kdevpath; /* Copied in devpath string */
- size_t kdevpathlen;
+ char *kdevpath = NULL; /* Copied in devpath string */
+ char *kmodule = NULL;
+ size_t pathlen = 0;
int error = 0;
dprint(1, ("sockconfig(%d, %d, %d, %p)\n",
- domain, type, protocol, (void *)devpath));
+ family, type, protocol, (void *)name));
if (secpolicy_net_config(CRED(), B_FALSE) != 0)
return (set_errno(EPERM));
- if (devpath == NULL) {
- /* Deleting an entry */
- kdevpath = NULL;
- kdevpathlen = 0;
- } else {
+ /*
+ * By default set the kdevpath and kmodule to NULL to delete an entry.
+ * Otherwise when name is not NULL, set the kdevpath or kmodule
+ * value to add an entry.
+ */
+ if (name != NULL) {
/*
* Adding an entry.
- * Copyin the devpath.
+ * Copyin the name.
* This also makes it possible to check for too long pathnames.
- * Compress the space needed for the devpath before passing it
+ * Compress the space needed for the name before passing it
* to soconfig - soconfig will store the string until
* the configuration is removed.
*/
char *buf;
-
buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- if ((error = copyinstr(devpath, buf, MAXPATHLEN,
- &kdevpathlen)) != 0) {
+ if ((error = copyinstr(name, buf, MAXPATHLEN, &pathlen)) != 0) {
kmem_free(buf, MAXPATHLEN);
goto done;
}
+ if (strncmp(buf, "/dev", strlen("/dev")) == 0) {
+ /* For device */
- kdevpath = kmem_alloc(kdevpathlen, KM_SLEEP);
- bcopy(buf, kdevpath, kdevpathlen);
- kdevpath[kdevpathlen - 1] = '\0';
+ /*
+ * Special handling for NCA:
+ *
+ * DEV_NCA is never opened even if an application
+ * requests for AF_NCA. The device opened is instead a
+ * predefined AF_INET transport (NCA_INET_DEV).
+ *
+ * Prior to Volo (PSARC/2007/587) NCA would determine
+ * the device using a lookup, which worked then because
+ * all protocols were based on TPI. Since TPI is no
+ * longer the default, we have to explicitly state
+ * which device to use.
+ */
+ if (strcmp(buf, NCA_DEV) == 0) {
+ /* only support entry <28, 2, 0> */
+ if (family != AF_NCA || type != SOCK_STREAM ||
+ protocol != 0) {
+ kmem_free(buf, MAXPATHLEN);
+ error = EINVAL;
+ goto done;
+ }
+
+ pathlen = strlen(NCA_INET_DEV) + 1;
+ kdevpath = kmem_alloc(pathlen, KM_SLEEP);
+ bcopy(NCA_INET_DEV, kdevpath, pathlen);
+ kdevpath[pathlen - 1] = '\0';
+ } else {
+ kdevpath = kmem_alloc(pathlen, KM_SLEEP);
+ bcopy(buf, kdevpath, pathlen);
+ kdevpath[pathlen - 1] = '\0';
+ }
+ } else {
+ /* For socket module */
+ kmodule = kmem_alloc(pathlen, KM_SLEEP);
+ bcopy(buf, kmodule, pathlen);
+ kmodule[pathlen - 1] = '\0';
+
+ pathlen = 0;
+ if (strcmp(kmodule, "tcp") == 0) {
+ /* Get the tcp device name for fallback */
+ if (family == 2) {
+ pathlen = strlen("/dev/tcp") + 1;
+ kdevpath = kmem_alloc(pathlen,
+ KM_SLEEP);
+ bcopy("/dev/tcp", kdevpath,
+ pathlen);
+ kdevpath[pathlen - 1] = '\0';
+ } else {
+ ASSERT(family == 26);
+ pathlen = strlen("/dev/tcp6") + 1;
+ kdevpath = kmem_alloc(pathlen,
+ KM_SLEEP);
+ bcopy("/dev/tcp6", kdevpath, pathlen);
+ kdevpath[pathlen - 1] = '\0';
+ }
+ } else if (strcmp(kmodule, "udp") == 0) {
+ /* Get the udp device name for fallback */
+ if (family == 2) {
+ pathlen = strlen("/dev/udp") + 1;
+ kdevpath = kmem_alloc(pathlen,
+ KM_SLEEP);
+ bcopy("/dev/udp", kdevpath, pathlen);
+ kdevpath[pathlen - 1] = '\0';
+ } else {
+ ASSERT(family == 26);
+ pathlen = strlen("/dev/udp6") + 1;
+ kdevpath = kmem_alloc(pathlen,
+ KM_SLEEP);
+ bcopy("/dev/udp6", kdevpath, pathlen);
+ kdevpath[pathlen - 1] = '\0';
+ }
+ } else if (strcmp(kmodule, "icmp") == 0) {
+ /* Get the icmp device name for fallback */
+ if (family == 2) {
+ pathlen = strlen("/dev/rawip") + 1;
+ kdevpath = kmem_alloc(pathlen,
+ KM_SLEEP);
+ bcopy("/dev/rawip", kdevpath, pathlen);
+ kdevpath[pathlen - 1] = '\0';
+ } else {
+ ASSERT(family == 26);
+ pathlen = strlen("/dev/rawip6") + 1;
+ kdevpath = kmem_alloc(pathlen,
+ KM_SLEEP);
+ bcopy("/dev/rawip6", kdevpath, pathlen);
+ kdevpath[pathlen - 1] = '\0';
+ }
+ }
+ }
kmem_free(buf, MAXPATHLEN);
}
- error = soconfig(domain, type, protocol, kdevpath, (int)kdevpathlen);
+ error = soconfig(family, type, protocol, kdevpath, (int)pathlen,
+ kmodule);
done:
if (error) {
eprintline(error);
@@ -1961,9 +1921,15 @@ snf_async_read(snf_req_t *sr)
*/
so = VTOSO(vp);
stp = vp->v_stream;
- wroff = (int)(stp->sd_wroff);
- maxblk = (int)(stp->sd_maxblk);
- extra = wroff + (int)(stp->sd_tail);
+ if (stp == NULL) {
+ wroff = so->so_proto_props.sopp_wroff;
+ maxblk = so->so_proto_props.sopp_maxblk;
+ extra = wroff + so->so_proto_props.sopp_tail;
+ } else {
+ wroff = (int)(stp->sd_wroff);
+ maxblk = (int)(stp->sd_maxblk);
+ extra = wroff + (int)(stp->sd_tail);
+ }
}
while ((size != 0) && (sr->sr_write_error == 0)) {
@@ -1975,7 +1941,8 @@ snf_async_read(snf_req_t *sr)
* need to adjust the size to the maximum
* SSL record size set in the stream head.
*/
- if (vp->v_type == VSOCK && so->so_kssl_ctx != NULL)
+ if (vp->v_type == VSOCK && !SOCK_IS_NONSTR(so) &&
+ SOTOTPI(so)->sti_kssl_ctx != NULL)
iosize = (int)MIN(iosize, maxblk);
if ((mp = allocb(iosize + extra, BPRI_MED)) == NULL) {
@@ -2066,7 +2033,7 @@ create_thread(int operation, struct vnode *vp, file_t *fp,
* store sd_qn_maxpsz into sr_maxpsz while we have stream head.
* stream might be closed before thread returns from snf_async_read.
*/
- if (stp->sd_qn_maxpsz > 0) {
+ if (stp != NULL && stp->sd_qn_maxpsz > 0) {
sr->sr_maxpsz = MIN(MAXBSIZE, stp->sd_qn_maxpsz);
} else {
sr->sr_maxpsz = MAXBSIZE;
@@ -2115,9 +2082,11 @@ snf_direct_io(file_t *fp, file_t *rfp, u_offset_t fileoff, u_offset_t size,
short fflag;
struct vnode *vp;
int ksize;
+ struct nmsghdr msg;
ksize = 0;
*count = 0;
+ bzero(&msg, sizeof (msg));
vp = fp->f_vnode;
fflag = fp->f_flag;
@@ -2138,8 +2107,11 @@ snf_direct_io(file_t *fp, file_t *rfp, u_offset_t fileoff, u_offset_t size,
}
iosize = MBLKL(mp);
- if ((error = kstrwritemp(vp, mp, fflag)) != 0) {
- freeb(mp);
+ error = socket_sendmblk(VTOSO(vp), &msg, fflag, CRED(), &mp);
+
+ if (error != 0) {
+ if (mp != NULL)
+ freeb(mp);
break;
}
ksize += iosize;
@@ -2233,10 +2205,13 @@ snf_segmap(file_t *fp, vnode_t *fvp, u_offset_t fileoff, u_offset_t size,
snf_smap_desbinfo *snfi;
struct vattr va;
boolean_t dowait = B_FALSE;
+ struct nmsghdr msg;
vp = fp->f_vnode;
fflag = fp->f_flag;
ksize = 0;
+ bzero(&msg, sizeof (msg));
+
for (;;) {
if (ISSIG(curthread, JUSTLOOKING)) {
error = EINTR;
@@ -2307,9 +2282,11 @@ snf_segmap(file_t *fp, vnode_t *fvp, u_offset_t fileoff, u_offset_t size,
mp->b_datap->db_struioflag |= STRUIO_ZCNOTIFY;
}
VOP_RWUNLOCK(fvp, V_WRITELOCK_FALSE, NULL);
- if ((error = kstrwritemp(vp, mp, fflag)) != 0) {
+ error = socket_sendmblk(VTOSO(vp), &msg, fflag, CRED(), &mp);
+ if (error != 0) {
*count = ksize;
- freemsg(mp);
+ if (mp != NULL)
+ freemsg(mp);
return (error);
}
ksize += iosize;
@@ -2335,16 +2312,22 @@ done:
stdata_t *stp;
stp = vp->v_stream;
- mutex_enter(&stp->sd_lock);
- while (!(stp->sd_flag & STZCNOTIFY)) {
- if (cv_wait_sig(&stp->sd_zcopy_wait,
- &stp->sd_lock) == 0) {
- error = EINTR;
- break;
+ if (stp == NULL) {
+ struct sonode *so;
+ so = VTOSO(vp);
+ error = so_zcopy_wait(so);
+ } else {
+ mutex_enter(&stp->sd_lock);
+ while (!(stp->sd_flag & STZCNOTIFY)) {
+ if (cv_wait_sig(&stp->sd_zcopy_wait,
+ &stp->sd_lock) == 0) {
+ error = EINTR;
+ break;
+ }
}
+ stp->sd_flag &= ~STZCNOTIFY;
+ mutex_exit(&stp->sd_lock);
}
- stp->sd_flag &= ~STZCNOTIFY;
- mutex_exit(&stp->sd_lock);
}
return (error);
}
@@ -2367,6 +2350,7 @@ snf_cache(file_t *fp, vnode_t *fvp, u_offset_t fileoff, u_offset_t size,
int maxblk = 0;
int wroff = 0;
struct sonode *so;
+ struct nmsghdr msg;
vp = fp->f_vnode;
if (vp->v_type == VSOCK) {
@@ -2377,11 +2361,17 @@ snf_cache(file_t *fp, vnode_t *fvp, u_offset_t fileoff, u_offset_t size,
*/
so = VTOSO(vp);
stp = vp->v_stream;
- wroff = (int)(stp->sd_wroff);
- maxblk = (int)(stp->sd_maxblk);
- extra = wroff + (int)(stp->sd_tail);
+ if (stp == NULL) {
+ wroff = so->so_proto_props.sopp_wroff;
+ maxblk = so->so_proto_props.sopp_maxblk;
+ extra = wroff + so->so_proto_props.sopp_tail;
+ } else {
+ wroff = (int)(stp->sd_wroff);
+ maxblk = (int)(stp->sd_maxblk);
+ extra = wroff + (int)(stp->sd_tail);
+ }
}
-
+ bzero(&msg, sizeof (msg));
fflag = fp->f_flag;
ksize = 0;
auio.uio_iov = &aiov;
@@ -2406,7 +2396,8 @@ snf_cache(file_t *fp, vnode_t *fvp, u_offset_t fileoff, u_offset_t size,
* need to adjust the size to the maximum
* SSL record size set in the stream head.
*/
- if (vp->v_type == VSOCK && so->so_kssl_ctx != NULL)
+ if (vp->v_type == VSOCK && !SOCK_IS_NONSTR(so) &&
+ SOTOTPI(so)->sti_kssl_ctx != NULL)
iosize = (int)MIN(iosize, maxblk);
if ((mp = allocb(iosize + extra, BPRI_MED)) == NULL) {
@@ -2434,9 +2425,13 @@ snf_cache(file_t *fp, vnode_t *fvp, u_offset_t fileoff, u_offset_t size,
mp->b_wptr = mp->b_rptr + iosize;
VOP_RWUNLOCK(fvp, V_WRITELOCK_FALSE, NULL);
- if ((error = kstrwritemp(vp, mp, fflag)) != 0) {
+
+ error = socket_sendmblk(VTOSO(vp), &msg, fflag, CRED(), &mp);
+
+ if (error != 0) {
*count = ksize;
- freeb(mp);
+ if (mp != NULL)
+ freeb(mp);
return (error);
}
ksize += iosize;
@@ -2540,14 +2535,17 @@ sosendfile64(file_t *fp, file_t *rfp, const struct ksendfilevec64 *sfv,
if (sfv_len >= MAXBSIZE && (sfv_len >= (va_size >> 1) ||
(sfv->sfv_flag & SFV_NOWAIT) || sfv_len >= 0x1000000) &&
!vn_has_flocks(fvp) && !(fvp->v_flag & VNOMAP)) {
- if ((stp->sd_copyflag & (STZCVMSAFE|STZCVMUNSAFE)) == 0) {
+ uint_t copyflag;
+ copyflag = stp != NULL ? stp->sd_copyflag :
+ VTOSO(vp)->so_proto_props.sopp_zcopyflag;
+ if ((copyflag & (STZCVMSAFE|STZCVMUNSAFE)) == 0) {
int on = 1;
- if (SOP_SETSOCKOPT(VTOSO(vp), SOL_SOCKET,
- SO_SND_COPYAVOID, &on, sizeof (on)) == 0)
+ if (socket_setsockopt(VTOSO(vp), SOL_SOCKET,
+ SO_SND_COPYAVOID, &on, sizeof (on), CRED()) == 0)
dozcopy = B_TRUE;
} else {
- dozcopy = (stp->sd_copyflag & STZCVMSAFE);
+ dozcopy = copyflag & STZCVMSAFE;
}
}
if (dozcopy) {
@@ -2555,10 +2553,19 @@ sosendfile64(file_t *fp, file_t *rfp, const struct ksendfilevec64 *sfv,
error = snf_segmap(fp, fvp, sfv_off, (u_offset_t)sfv_len,
&count, ((sfv->sfv_flag & SFV_NOWAIT) != 0));
} else {
- if (stp->sd_qn_maxpsz == INFPSZ)
+ if (vp->v_type == VSOCK && stp == NULL) {
+ sonode_t *so = VTOSO(vp);
+ maxpsz = so->so_proto_props.sopp_maxpsz;
+ } else if (stp != NULL) {
+ maxpsz = stp->sd_qn_maxpsz;
+ } else {
+ maxpsz = maxphys;
+ }
+
+ if (maxpsz == INFPSZ)
maxpsz = maxphys;
else
- maxpsz = roundup(stp->sd_qn_maxpsz, MAXBSIZE);
+ maxpsz = roundup(maxpsz, MAXBSIZE);
sf_stats.ss_file_cached++;
error = snf_cache(fp, fvp, sfv_off, (u_offset_t)sfv_len,
maxpsz, &count);
@@ -2613,7 +2620,7 @@ sendto32(int32_t sock, caddr32_t buffer, size32_t len, int32_t flags,
int
soaccept(struct sonode *so, int fflag, struct sonode **nsop)
{
- return (SOP_ACCEPT(so, fflag, nsop));
+ return (socket_accept(so, fflag, CRED(), nsop));
}
int
@@ -2622,9 +2629,9 @@ sobind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
{
int error;
- error = SOP_BIND(so, name, namelen, flags);
+ error = socket_bind(so, name, namelen, flags, CRED());
if (error == 0 && backlog != 0)
- return (SOP_LISTEN(so, backlog));
+ return (socket_listen(so, backlog, CRED()));
return (error);
}
@@ -2632,59 +2639,48 @@ sobind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
int
solisten(struct sonode *so, int backlog)
{
- return (SOP_LISTEN(so, backlog));
+ return (socket_listen(so, backlog, CRED()));
}
int
soconnect(struct sonode *so, const struct sockaddr *name, socklen_t namelen,
int fflag, int flags)
{
- return (SOP_CONNECT(so, name, namelen, fflag, flags));
+ return (socket_connect(so, name, namelen, fflag, flags, CRED()));
}
int
sorecvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
{
- return (SOP_RECVMSG(so, msg, uiop));
+ return (socket_recvmsg(so, msg, uiop, CRED()));
}
int
sosendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
{
- return (SOP_SENDMSG(so, msg, uiop));
-}
-
-int
-sogetpeername(struct sonode *so)
-{
- return (SOP_GETPEERNAME(so));
-}
-
-int
-sogetsockname(struct sonode *so)
-{
- return (SOP_GETSOCKNAME(so));
+ return (socket_sendmsg(so, msg, uiop, CRED()));
}
int
soshutdown(struct sonode *so, int how)
{
- return (SOP_SHUTDOWN(so, how));
+ return (socket_shutdown(so, how, CRED()));
}
int
sogetsockopt(struct sonode *so, int level, int option_name, void *optval,
socklen_t *optlenp, int flags)
{
- return (SOP_GETSOCKOPT(so, level, option_name, optval, optlenp,
- flags));
+ return (socket_getsockopt(so, level, option_name, optval, optlenp,
+ flags, CRED()));
}
int
sosetsockopt(struct sonode *so, int level, int option_name, const void *optval,
t_uscalar_t optlen)
{
- return (SOP_SETSOCKOPT(so, level, option_name, optval, optlen));
+ return (socket_setsockopt(so, level, option_name, optval, optlen,
+ CRED()));
}
/*
@@ -2692,9 +2688,25 @@ sosetsockopt(struct sonode *so, int level, int option_name, const void *optval,
* able to handle the creation of TPI sockfs sockets.
*/
struct sonode *
-socreate(vnode_t *accessvp, int domain, int type, int protocol, int version,
- struct sonode *tso, int *errorp)
+socreate(struct sockparams *sp, int family, int type, int protocol, int version,
+ int *errorp)
{
- return (sotpi_create(accessvp, domain, type, protocol, version, tso,
- errorp));
+ struct sonode *so;
+
+ ASSERT(sp != NULL);
+
+ so = sp->sp_smod_info->smod_sock_create_func(sp, family, type, protocol,
+ version, SOCKET_SLEEP, errorp, CRED());
+ if (so == NULL) {
+ SOCKPARAMS_DEC_REF(sp);
+ } else {
+ if ((*errorp = SOP_INIT(so, NULL, CRED(), SOCKET_SLEEP)) == 0) {
+ /* Cannot fail, only bumps so_count */
+ (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, CRED(), NULL);
+ } else {
+ socket_destroy(so);
+ so = NULL;
+ }
+ }
+ return (so);
}
diff --git a/usr/src/uts/common/fs/sockfs/socktpi.c b/usr/src/uts/common/fs/sockfs/socktpi.c
index f27c34578b..01873727f8 100644
--- a/usr/src/uts/common/fs/sockfs/socktpi.c
+++ b/usr/src/uts/common/fs/sockfs/socktpi.c
@@ -32,6 +32,7 @@
#include <sys/conf.h>
#include <sys/cred.h>
#include <sys/kmem.h>
+#include <sys/kmem_impl.h>
#include <sys/sysmacros.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
@@ -45,6 +46,7 @@
#include <sys/stream.h>
#include <sys/strsubr.h>
#include <sys/strsun.h>
+#include <sys/suntpi.h>
#include <sys/ddi.h>
#include <sys/esunddi.h>
#include <sys/flock.h>
@@ -81,6 +83,10 @@
#include <inet/kssl/ksslapi.h>
+#include <fs/sockfs/sockcommon.h>
+#include <fs/sockfs/socktpi.h>
+#include <fs/sockfs/socktpi_impl.h>
+
/*
* Possible failures when memory can't be allocated. The documented behavior:
*
@@ -170,13 +176,29 @@ int xnet_skip_checks = 0;
int xnet_check_print = 0;
int xnet_truncate_print = 0;
+static void sotpi_destroy(struct sonode *);
+static struct sonode *sotpi_create(struct sockparams *, int, int, int, int,
+ int, int *, cred_t *cr);
+
+static boolean_t sotpi_info_create(struct sonode *, int);
+static void sotpi_info_init(struct sonode *);
+static void sotpi_info_fini(struct sonode *);
+static void sotpi_info_destroy(struct sonode *);
+
+/*
+ * Do direct function call to the transport layer below; this would
+ * also allow the transport to utilize read-side synchronous stream
+ * interface if necessary. This is a /etc/system tunable that must
+ * not be modified on a running system. By default this is enabled
+ * for performance reasons and may be disabled for debugging purposes.
+ */
+boolean_t socktpi_direct = B_TRUE;
+
+static struct kmem_cache *socktpi_cache, *socktpi_unix_cache;
+
extern void sigintr(k_sigset_t *, int);
extern void sigunintr(k_sigset_t *);
-extern void *nl7c_lookup_addr(void *, t_uscalar_t);
-extern void *nl7c_add_addr(void *, t_uscalar_t);
-extern void nl7c_listener_addr(void *, struct sonode *);
-
/* Sockets acting as an in-kernel SSL proxy */
extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *,
strsigset_t *, strsigset_t *, strpollset_t *);
@@ -189,62 +211,198 @@ extern int sodput(sodirect_t *, mblk_t *);
extern void sodwakeup(sodirect_t *);
/* TPI sockfs sonode operations */
-static int sotpi_accept(struct sonode *, int, struct sonode **);
-static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t,
+int sotpi_init(struct sonode *, struct sonode *, struct cred *,
int);
+static int sotpi_accept(struct sonode *, int, struct cred *,
+ struct sonode **);
+static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t,
+ int, struct cred *);
+static int sotpi_listen(struct sonode *, int, struct cred *);
static int sotpi_connect(struct sonode *, const struct sockaddr *,
- socklen_t, int, int);
-static int sotpi_listen(struct sonode *, int);
+ socklen_t, int, int, struct cred *);
+extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *,
+ struct uio *, struct cred *);
static int sotpi_sendmsg(struct sonode *, struct nmsghdr *,
- struct uio *);
-static int sotpi_shutdown(struct sonode *, int);
-static int sotpi_getsockname(struct sonode *);
+ struct uio *, struct cred *);
+static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int,
+ struct cred *, mblk_t **);
static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t,
struct uio *, void *, t_uscalar_t, int);
static int sodgram_direct(struct sonode *, struct sockaddr *,
socklen_t, struct uio *, int);
+extern int sotpi_getpeername(struct sonode *, struct sockaddr *,
+ socklen_t *, boolean_t, struct cred *);
+static int sotpi_getsockname(struct sonode *, struct sockaddr *,
+ socklen_t *, struct cred *);
+static int sotpi_shutdown(struct sonode *, int, struct cred *);
+extern int sotpi_getsockopt(struct sonode *, int, int, void *,
+ socklen_t *, int, struct cred *);
+extern int sotpi_setsockopt(struct sonode *, int, int, const void *,
+ socklen_t, struct cred *);
+static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *,
+ int32_t *);
+static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int,
+ struct cred *, int32_t *);
+static int sotpi_poll(struct sonode *, short, int, short *,
+ struct pollhead **);
+static int sotpi_close(struct sonode *, int, struct cred *);
+
+static int i_sotpi_info_constructor(sotpi_info_t *);
+static void i_sotpi_info_destructor(sotpi_info_t *);
sonodeops_t sotpi_sonodeops = {
+ sotpi_init, /* sop_init */
sotpi_accept, /* sop_accept */
sotpi_bind, /* sop_bind */
sotpi_listen, /* sop_listen */
sotpi_connect, /* sop_connect */
sotpi_recvmsg, /* sop_recvmsg */
sotpi_sendmsg, /* sop_sendmsg */
+ sotpi_sendmblk, /* sop_sendmblk */
sotpi_getpeername, /* sop_getpeername */
sotpi_getsockname, /* sop_getsockname */
sotpi_shutdown, /* sop_shutdown */
sotpi_getsockopt, /* sop_getsockopt */
- sotpi_setsockopt /* sop_setsockopt */
+ sotpi_setsockopt, /* sop_setsockopt */
+ sotpi_ioctl, /* sop_ioctl */
+ sotpi_poll, /* sop_poll */
+ sotpi_close, /* sop_close */
};
/*
+ * Return a TPI socket vnode.
+ *
+ * Note that sockets assume that the driver will clone (either itself
+ * or by using the clone driver) i.e. a socket() call will always
+ * result in a new vnode being created.
+ */
+
+/*
* Common create code for socket and accept. If tso is set the values
* from that node is used instead of issuing a T_INFO_REQ.
- *
- * Assumes that the caller has a VN_HOLD on accessvp.
- * The VN_RELE will occur either when sotpi_create() fails or when
- * the returned sonode is freed.
*/
-struct sonode *
-sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version,
- struct sonode *tso, int *errorp)
+
+/* ARGSUSED */
+static struct sonode *
+sotpi_create(struct sockparams *sp, int family, int type, int protocol,
+ int version, int sflags, int *errorp, cred_t *cr)
{
struct sonode *so;
- vnode_t *vp;
- int flags, error;
+ kmem_cache_t *cp;
+ int sfamily = family;
- ASSERT(accessvp != NULL);
- vp = makesockvp(accessvp, domain, type, protocol);
- ASSERT(vp != NULL);
- so = VTOSO(vp);
+ ASSERT(sp->sp_sdev_info.sd_vnode != NULL);
+
+ if (family == AF_NCA) {
+ /*
+ * The request is for an NCA socket so for NL7C use the
+ * INET domain instead and mark NL7C_AF_NCA below.
+ */
+ family = AF_INET;
+ /*
+ * NL7C is not supported in the non-global zone,
+ * we enforce this restriction here.
+ */
+ if (getzoneid() != GLOBAL_ZONEID) {
+ *errorp = ENOTSUP;
+ return (NULL);
+ }
+ }
+
+ /*
+ * to be compatible with old tpi socket implementation ignore
+ * sleep flag (sflags) passed in
+ */
+ cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache;
+ so = kmem_cache_alloc(cp, KM_SLEEP);
+ if (so == NULL) {
+ *errorp = ENOMEM;
+ return (NULL);
+ }
+
+ sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops);
+ sotpi_info_init(so);
+
+ if (sfamily == AF_NCA) {
+ SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA;
+ }
+
+ if (version == SOV_DEFAULT)
+ version = so_default_version;
+
+ so->so_version = (short)version;
+ *errorp = 0;
+
+ return (so);
+}
+
+static void
+sotpi_destroy(struct sonode *so)
+{
+ kmem_cache_t *cp;
+ struct sockparams *origsp;
+
+ /*
+ * If there is a new dealloc function (ie. smod_destroy_func),
+ * then it should check the correctness of the ops.
+ */
+
+ ASSERT(so->so_ops == &sotpi_sonodeops);
+
+ origsp = SOTOTPI(so)->sti_orig_sp;
+
+ sotpi_info_fini(so);
+
+ if (so->so_state & SS_FALLBACK_COMP) {
+ /*
+ * A fallback happend, which means that a sotpi_info_t struct
+ * was allocated (as opposed to being allocated from the TPI
+ * sonode cache. Therefore we explicitly free the struct
+ * here.
+ */
+ sotpi_info_destroy(so);
+ ASSERT(origsp != NULL);
+
+ origsp->sp_smod_info->smod_sock_destroy_func(so);
+ SOCKPARAMS_DEC_REF(origsp);
+ } else {
+ sonode_fini(so);
+ cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache :
+ socktpi_cache;
+ kmem_cache_free(cp, so);
+ }
+}
+
+/* ARGSUSED1 */
+int
+sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags)
+{
+ major_t maj;
+ dev_t newdev;
+ struct vnode *vp;
+ int error = 0;
+ struct stdata *stp;
+
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ dprint(1, ("sotpi_init()\n"));
+
+ /*
+ * over write the sleep flag passed in but that is ok
+ * as tpi socket does not honor sleep flag.
+ */
+ flags |= FREAD|FWRITE;
- flags = FREAD|FWRITE;
+ /*
+ * Record in so_flag that it is a clone.
+ */
+ if (getmajor(sti->sti_dev) == clone_major)
+ so->so_flag |= SOCLONE;
- if ((type == SOCK_STREAM || type == SOCK_DGRAM) &&
- (domain == AF_INET || domain == AF_INET6) &&
- (protocol == IPPROTO_TCP || protocol == IPPROTO_UDP ||
- protocol == IPPROTO_IP)) {
+ if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) &&
+ (so->so_family == AF_INET || so->so_family == AF_INET6) &&
+ (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP ||
+ so->so_protocol == IPPROTO_IP)) {
/* Tell tcp or udp that it's talking to sockets */
flags |= SO_SOCKSTR;
@@ -253,25 +411,25 @@ sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version,
* make direct calls between sockfs and transport.
* The final decision is left to socktpi_open().
*/
- so->so_state |= SS_DIRECT;
+ sti->sti_direct = 1;
ASSERT(so->so_type != SOCK_DGRAM || tso == NULL);
if (so->so_type == SOCK_STREAM && tso != NULL) {
- if (tso->so_state & SS_DIRECT) {
+ if (SOTOTPI(tso)->sti_direct) {
/*
- * Inherit SS_DIRECT from listener and pass
+ * Inherit sti_direct from listener and pass
* SO_ACCEPTOR open flag to tcp, indicating
* that this is an accept fast-path instance.
*/
flags |= SO_ACCEPTOR;
} else {
/*
- * SS_DIRECT is not set on listener, meaning
+ * sti_direct is not set on listener, meaning
* that the listener has been converted from
* a socket to a stream. Ensure that the
* acceptor inherits these settings.
*/
- so->so_state &= ~SS_DIRECT;
+ sti->sti_direct = 0;
flags &= ~SO_SOCKSTR;
}
}
@@ -284,30 +442,157 @@ sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version,
flags |= SO_SOCKSTR;
}
- /* Initialize the kernel SSL proxy fields */
- so->so_kssl_type = KSSL_NO_PROXY;
- so->so_kssl_ent = NULL;
- so->so_kssl_ctx = NULL;
+ vp = SOTOV(so);
+ newdev = vp->v_rdev;
+ maj = getmajor(newdev);
+ ASSERT(STREAMSTAB(maj));
- if (error = socktpi_open(&vp, flags, CRED(), NULL)) {
- VN_RELE(vp);
- *errorp = error;
- return (NULL);
- }
+ error = stropen(vp, &newdev, flags, cr);
- if (error = so_strinit(so, tso)) {
- (void) VOP_CLOSE(vp, 0, 1, 0, CRED(), NULL);
- VN_RELE(vp);
- *errorp = error;
- return (NULL);
- }
+ stp = vp->v_stream;
+ if (error == 0) {
+ if (so->so_flag & SOCLONE)
+ ASSERT(newdev != vp->v_rdev);
+ mutex_enter(&so->so_lock);
+ sti->sti_dev = newdev;
+ vp->v_rdev = newdev;
+ mutex_exit(&so->so_lock);
- if (version == SOV_DEFAULT)
- version = so_default_version;
+ if (stp->sd_flag & STRISTTY) {
+ /*
+ * this is a post SVR4 tty driver - a socket can not
+ * be a controlling terminal. Fail the open.
+ */
+ (void) sotpi_close(so, flags, cr);
+ return (ENOTTY); /* XXX */
+ }
- so->so_version = (short)version;
+ ASSERT(stp->sd_wrq != NULL);
+ sti->sti_provinfo = tpi_findprov(stp->sd_wrq);
- return (so);
+ /*
+ * If caller is interested in doing direct function call
+ * interface to/from transport module, probe the module
+ * directly beneath the streamhead to see if it qualifies.
+ *
+ * We turn off the direct interface when qualifications fail.
+ * In the acceptor case, we simply turn off the sti_direct
+ * flag on the socket. We do the fallback after the accept
+ * has completed, before the new socket is returned to the
+ * application.
+ */
+ if (sti->sti_direct) {
+ queue_t *tq = stp->sd_wrq->q_next;
+
+ /*
+ * sti_direct is currently supported and tested
+ * only for tcp/udp; this is the main reason to
+ * have the following assertions.
+ */
+ ASSERT(so->so_family == AF_INET ||
+ so->so_family == AF_INET6);
+ ASSERT(so->so_protocol == IPPROTO_UDP ||
+ so->so_protocol == IPPROTO_TCP ||
+ so->so_protocol == IPPROTO_IP);
+ ASSERT(so->so_type == SOCK_DGRAM ||
+ so->so_type == SOCK_STREAM);
+
+ /*
+ * Abort direct call interface if the module directly
+ * underneath the stream head is not defined with the
+ * _D_DIRECT flag. This could happen in the tcp or
+ * udp case, when some other module is autopushed
+ * above it, or for some reasons the expected module
+ * isn't purely D_MP (which is the main requirement).
+ *
+ * Else, SS_DIRECT is valid. If the read-side Q has
+ * _QSODIRECT set then and uioasync is enabled then
+ * set SS_SODIRECT to enable sodirect.
+ */
+ if (!socktpi_direct || !(tq->q_flag & _QDIRECT) ||
+ !(_OTHERQ(tq)->q_flag & _QDIRECT)) {
+ int rval;
+
+ /* Continue on without direct calls */
+ sti->sti_direct = 0;
+
+ /*
+ * Cannot issue ioctl on fallback socket since
+ * there is no conn associated with the queue.
+ * The fallback downcall will notify the proto
+ * of the change.
+ */
+ if (!(flags & SO_ACCEPTOR) &&
+ !(flags & SO_FALLBACK)) {
+ if ((error = strioctl(vp,
+ _SIOCSOCKFALLBACK, 0, 0, K_TO_K,
+ cr, &rval)) != 0) {
+ (void) sotpi_close(so, flags,
+ cr);
+ return (error);
+ }
+ }
+ } else if ((_OTHERQ(tq)->q_flag & _QSODIRECT) &&
+ uioasync.enabled) {
+ /* Enable sodirect */
+ so->so_state |= SS_SODIRECT;
+ }
+ }
+
+ if (flags & SO_FALLBACK) {
+ /*
+ * The stream created does not have a conn.
+ * do stream set up after conn has been assigned
+ */
+ return (error);
+ }
+ if (error = so_strinit(so, tso)) {
+ (void) sotpi_close(so, flags, cr);
+ return (error);
+ }
+
+ /* Wildcard */
+ if (so->so_protocol != so->so_sockparams->sp_protocol) {
+ int protocol = so->so_protocol;
+ /*
+ * Issue SO_PROTOTYPE setsockopt.
+ */
+ error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE,
+ &protocol, (t_uscalar_t)sizeof (protocol), cr);
+ if (error != 0) {
+ (void) sotpi_close(so, flags, cr);
+ /*
+ * Setsockopt often fails with ENOPROTOOPT but
+ * socket() should fail with
+ * EPROTONOSUPPORT/EPROTOTYPE.
+ */
+ return (EPROTONOSUPPORT);
+ }
+ }
+
+ } else {
+ /*
+ * While the same socket can not be reopened (unlike specfs)
+ * the stream head sets STREOPENFAIL when the autopush fails.
+ */
+ if ((stp != NULL) &&
+ (stp->sd_flag & STREOPENFAIL)) {
+ /*
+ * Open failed part way through.
+ */
+ mutex_enter(&stp->sd_lock);
+ stp->sd_flag &= ~STREOPENFAIL;
+ mutex_exit(&stp->sd_lock);
+ (void) sotpi_close(so, flags, cr);
+ return (error);
+ /*NOTREACHED*/
+ }
+ ASSERT(stp == NULL);
+ }
+ TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN,
+ "sockfs open:maj %d vp %p so %p error %d",
+ maj, vp, so, error);
+ return (error);
}
/*
@@ -318,15 +603,16 @@ sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version,
static void
so_automatic_bind(struct sonode *so)
{
+ sotpi_info_t *sti = SOTOTPI(so);
ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6);
ASSERT(MUTEX_HELD(&so->so_lock));
ASSERT(!(so->so_state & SS_ISBOUND));
- ASSERT(so->so_unbind_mp);
+ ASSERT(sti->sti_unbind_mp);
- ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
- bzero(so->so_laddr_sa, so->so_laddr_len);
- so->so_laddr_sa->sa_family = so->so_family;
+ ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
+ bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
+ sti->sti_laddr_sa->sa_family = so->so_family;
so->so_state |= SS_ISBOUND;
}
@@ -353,9 +639,10 @@ so_automatic_bind(struct sonode *so)
* - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
* and no listen() has been done.
*/
+/* ARGSUSED */
static int
sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
- socklen_t namelen, int backlog, int flags)
+ socklen_t namelen, int backlog, int flags, struct cred *cr)
{
struct T_bind_req bind_req;
struct T_bind_ack *bind_ack;
@@ -370,6 +657,7 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
t_scalar_t PRIM_type = O_T_BIND_REQ;
boolean_t tcp_udp_xport;
void *nl7c = NULL;
+ sotpi_info_t *sti = SOTOTPI(so);
dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n",
(void *)so, (void *)name, namelen, backlog, flags,
@@ -390,10 +678,10 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
* before binding. This message allocated when the socket is
* created but it might be have been consumed.
*/
- if (so->so_unbind_mp == NULL) {
+ if (sti->sti_unbind_mp == NULL) {
dprintso(so, 1, ("sobind: allocating unbind_req\n"));
/* NOTE: holding so_lock while sleeping */
- so->so_unbind_mp =
+ sti->sti_unbind_mp =
soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP);
}
@@ -405,17 +693,17 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
ASSERT(name == NULL && namelen == 0);
if (so->so_family == AF_UNIX) {
- ASSERT(so->so_ux_bound_vp);
- addr = &so->so_ux_laddr;
- addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr);
+ ASSERT(sti->sti_ux_bound_vp);
+ addr = &sti->sti_ux_laddr;
+ addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, "
"addr 0x%p, vp %p\n",
addrlen,
(void *)((struct so_ux_addr *)addr)->soua_vp,
- (void *)so->so_ux_bound_vp));
+ (void *)sti->sti_ux_bound_vp));
} else {
- addr = so->so_laddr_sa;
- addrlen = (t_uscalar_t)so->so_laddr_len;
+ addr = sti->sti_laddr_sa;
+ addrlen = (t_uscalar_t)sti->sti_laddr_len;
}
} else if (flags & _SOBIND_UNSPEC) {
ASSERT(name == NULL && namelen == 0);
@@ -436,21 +724,21 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
* Use an address with same size as struct sockaddr
* just like BSD.
*/
- so->so_laddr_len =
+ sti->sti_laddr_len =
(socklen_t)sizeof (struct sockaddr);
- ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
- bzero(so->so_laddr_sa, so->so_laddr_len);
- so->so_laddr_sa->sa_family = so->so_family;
+ ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
+ bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
+ sti->sti_laddr_sa->sa_family = so->so_family;
/*
* Pass down an address with the implicit bind
* magic number and the rest all zeros.
* The transport will return a unique address.
*/
- so->so_ux_laddr.soua_vp = NULL;
- so->so_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT;
- addr = &so->so_ux_laddr;
- addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr);
+ sti->sti_ux_laddr.soua_vp = NULL;
+ sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT;
+ addr = &sti->sti_ux_laddr;
+ addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
break;
case AF_INET:
@@ -459,12 +747,12 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
* An unspecified bind in TPI has a NULL address.
* Set the address in sockfs to have the sa_family.
*/
- so->so_laddr_len = (so->so_family == AF_INET) ?
+ sti->sti_laddr_len = (so->so_family == AF_INET) ?
(socklen_t)sizeof (sin_t) :
(socklen_t)sizeof (sin6_t);
- ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
- bzero(so->so_laddr_sa, so->so_laddr_len);
- so->so_laddr_sa->sa_family = so->so_family;
+ ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
+ bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
+ sti->sti_laddr_sa->sa_family = so->so_family;
addr = NULL;
addrlen = 0;
break;
@@ -478,8 +766,8 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
* protocol families. For example, AF_X25 does not
* have a family field.
*/
- bzero(so->so_laddr_sa, so->so_laddr_len);
- so->so_laddr_len = 0; /* XXX correct? */
+ bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
+ sti->sti_laddr_len = 0; /* XXX correct? */
addr = NULL;
addrlen = 0;
break;
@@ -525,6 +813,7 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
goto done;
}
}
+
/* X/Open requires this check */
if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
if (xnet_check_print) {
@@ -656,7 +945,7 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
break;
}
- if (namelen > (t_uscalar_t)so->so_laddr_maxlen) {
+ if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) {
error = ENAMETOOLONG;
eprintsoline(so, error);
goto done;
@@ -664,26 +953,26 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
/*
* Save local address.
*/
- so->so_laddr_len = (socklen_t)namelen;
- ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
- bcopy(name, so->so_laddr_sa, namelen);
+ sti->sti_laddr_len = (socklen_t)namelen;
+ ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
+ bcopy(name, sti->sti_laddr_sa, namelen);
- addr = so->so_laddr_sa;
- addrlen = (t_uscalar_t)so->so_laddr_len;
+ addr = sti->sti_laddr_sa;
+ addrlen = (t_uscalar_t)sti->sti_laddr_len;
switch (so->so_family) {
case AF_INET6:
case AF_INET:
break;
case AF_UNIX: {
struct sockaddr_un *soun =
- (struct sockaddr_un *)so->so_laddr_sa;
+ (struct sockaddr_un *)sti->sti_laddr_sa;
struct vnode *vp, *rvp;
struct vattr vattr;
- ASSERT(so->so_ux_bound_vp == NULL);
+ ASSERT(sti->sti_ux_bound_vp == NULL);
/*
* Create vnode for the specified path name.
- * Keep vnode held with a reference in so_ux_bound_vp.
+ * Keep vnode held with a reference in sti_ux_bound_vp.
* Use the vnode pointer as the address used in the
* bind with the transport.
*
@@ -691,7 +980,7 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
* not observe the umask.
*/
/* MAXPATHLEN + soun_family + nul termination */
- if (so->so_laddr_len >
+ if (sti->sti_laddr_len >
(socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
error = ENAMETOOLONG;
eprintsoline(so, error);
@@ -712,7 +1001,7 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
/*
* Establish pointer from the underlying filesystem
* vnode to the socket node.
- * so_ux_bound_vp and v_stream->sd_vnode form the
+ * sti_ux_bound_vp and v_stream->sd_vnode form the
* cross-linkage between the underlying filesystem
* node and the socket node.
*/
@@ -726,7 +1015,7 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
ASSERT(SOTOV(so)->v_stream);
mutex_enter(&vp->v_lock);
vp->v_stream = SOTOV(so)->v_stream;
- so->so_ux_bound_vp = vp;
+ sti->sti_ux_bound_vp = vp;
mutex_exit(&vp->v_lock);
/*
@@ -734,13 +1023,14 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
* (together with the magic number to avoid conflicts
* with implicit binds) in the transport provider.
*/
- so->so_ux_laddr.soua_vp = (void *)so->so_ux_bound_vp;
- so->so_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT;
- addr = &so->so_ux_laddr;
- addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr);
+ sti->sti_ux_laddr.soua_vp =
+ (void *)sti->sti_ux_bound_vp;
+ sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT;
+ addr = &sti->sti_ux_laddr;
+ addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n",
addrlen,
- ((struct so_ux_addr *)addr)->soua_vp));
+ (void *)((struct so_ux_addr *)addr)->soua_vp));
break;
}
} /* end switch (so->so_family) */
@@ -771,14 +1061,14 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
if (nl7c_enabled && ((addr != NULL &&
(so->so_family == AF_INET || so->so_family == AF_INET6) &&
(nl7c = nl7c_lookup_addr(addr, addrlen))) ||
- so->so_nl7c_flags == NL7C_AF_NCA)) {
+ sti->sti_nl7c_flags == NL7C_AF_NCA)) {
/*
* NL7C is not supported in non-global zones,
* we enforce this restriction here.
*/
if (so->so_zoneid == GLOBAL_ZONEID) {
/* An NL7C socket, mark it */
- so->so_nl7c_flags |= NL7C_ENABLED;
+ sti->sti_nl7c_flags |= NL7C_ENABLED;
if (nl7c == NULL) {
/*
* Was an AF_NCA bind() so add it to the
@@ -789,6 +1079,7 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
} else
nl7c = NULL;
}
+
/*
* We send a T_BIND_REQ for TCP/UDP since we know it supports it,
* for other transports we will send in a O_T_BIND_REQ.
@@ -804,9 +1095,9 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
/* NOTE: holding so_lock while sleeping */
mp = soallocproto2(&bind_req, sizeof (bind_req),
addr, addrlen, 0, _ALLOC_SLEEP);
- so->so_state &= ~SS_LADDR_VALID;
+ sti->sti_laddr_valid = 0;
- /* Done using so_laddr_sa - can drop the lock */
+ /* Done using sti_laddr_sa - can drop the lock */
mutex_exit(&so->so_lock);
/*
@@ -820,13 +1111,15 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
(so->so_family == AF_INET || so->so_family == AF_INET6) &&
so->so_type == SOCK_STREAM) {
- if (so->so_kssl_ent != NULL) {
- kssl_release_ent(so->so_kssl_ent, so, so->so_kssl_type);
- so->so_kssl_ent = NULL;
+ if (sti->sti_kssl_ent != NULL) {
+ kssl_release_ent(sti->sti_kssl_ent, so,
+ sti->sti_kssl_type);
+ sti->sti_kssl_ent = NULL;
}
- so->so_kssl_type = kssl_check_proxy(mp, so, &so->so_kssl_ent);
- switch (so->so_kssl_type) {
+ sti->sti_kssl_type = kssl_check_proxy(mp, so,
+ &sti->sti_kssl_ent);
+ switch (sti->sti_kssl_type) {
case KSSL_NO_PROXY:
break;
@@ -865,11 +1158,11 @@ skip_transport:
/* Mark as bound. This will be undone if we detect errors below. */
if (flags & _SOBIND_NOXLATE) {
ASSERT(so->so_family == AF_UNIX);
- so->so_state |= SS_FADDR_NOXLATE;
+ sti->sti_faddr_noxlate = 1;
}
ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND));
so->so_state |= SS_ISBOUND;
- ASSERT(so->so_unbind_mp);
+ ASSERT(sti->sti_unbind_mp);
/* note that we've already set SS_ACCEPTCONN above */
@@ -879,7 +1172,7 @@ skip_transport:
* in return.
*/
addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ?
- sizeof (so->so_ux_laddr) : so->so_laddr_len);
+ sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len);
bind_ack = (struct T_bind_ack *)mp->b_rptr;
/*
@@ -965,7 +1258,7 @@ skip_transport:
sin_t *rname, *aname;
rname = (sin_t *)addr;
- aname = (sin_t *)so->so_laddr_sa;
+ aname = (sin_t *)sti->sti_laddr_sa;
/*
* Take advantage of the alignment
@@ -990,7 +1283,7 @@ skip_transport:
*/
if (aname->sin_port == 0)
aname->sin_port = rname->sin_port;
- so->so_state |= SS_LADDR_VALID;
+ sti->sti_laddr_valid = 1;
break;
}
if (aname->sin_port != 0 &&
@@ -1031,31 +1324,31 @@ skip_transport:
break;
}
case AF_UNIX:
- if (bcmp(addr, &so->so_ux_laddr, addrlen) != 0) {
+ if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) {
freemsg(mp);
error = EADDRINUSE;
eprintsoline(so, error);
eprintso(so,
("addrlen %d, addr 0x%x, vp %p\n",
addrlen, *((int *)addr),
- (void *)so->so_ux_bound_vp));
+ (void *)sti->sti_ux_bound_vp));
goto done;
}
- so->so_state |= SS_LADDR_VALID;
+ sti->sti_laddr_valid = 1;
break;
default:
/*
* NOTE: This assumes that addresses can be
* byte-compared for equivalence.
*/
- if (bcmp(addr, so->so_laddr_sa, addrlen) != 0) {
+ if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) {
freemsg(mp);
error = EADDRINUSE;
eprintsoline(so, error);
goto done;
}
/*
- * Don't mark SS_LADDR_VALID, as we cannot be
+ * Don't mark sti_laddr_valid, as we cannot be
* sure that the returned address is the real
* bound address when talking to an unknown
* transport.
@@ -1071,8 +1364,8 @@ skip_transport:
* caching info here is much better performance than
* a TPI/STREAMS trip to the transport for getsockname.
* Any which can't for some reason _must_ _not_ set
- * LADDR_VALID here for the caching version of getsockname
- * to not break;
+ * sti_laddr_valid here for the caching version of
+ * getsockname to not break;
*/
switch (so->so_family) {
case AF_UNIX:
@@ -1080,18 +1373,18 @@ skip_transport:
* Record the address bound with the transport
* for use by socketpair.
*/
- bcopy(addr, &so->so_ux_laddr, addrlen);
- so->so_state |= SS_LADDR_VALID;
+ bcopy(addr, &sti->sti_ux_laddr, addrlen);
+ sti->sti_laddr_valid = 1;
break;
case AF_INET:
case AF_INET6:
- ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
- bcopy(addr, so->so_laddr_sa, so->so_laddr_len);
- so->so_state |= SS_LADDR_VALID;
+ ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
+ bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len);
+ sti->sti_laddr_valid = 1;
break;
default:
/*
- * Don't mark SS_LADDR_VALID, as we cannot be
+ * Don't mark sti_laddr_valid, as we cannot be
* sure that the returned address is the real
* bound address when talking to an unknown
* transport.
@@ -1131,7 +1424,6 @@ done:
so_unlock_single(so, SOLOCKED);
mutex_exit(&so->so_lock);
} else {
- /* If the caller held the lock don't release it here */
ASSERT(MUTEX_HELD(&so->so_lock));
ASSERT(so->so_flag & SOLOCKED);
}
@@ -1141,13 +1433,13 @@ done:
/* bind the socket */
static int
sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
- int flags)
+ int flags, struct cred *cr)
{
if ((flags & _SOBIND_SOCKETPAIR) == 0)
- return (sotpi_bindlisten(so, name, namelen, 0, flags));
+ return (sotpi_bindlisten(so, name, namelen, 0, flags, cr));
flags &= ~_SOBIND_SOCKETPAIR;
- return (sotpi_bindlisten(so, name, namelen, 1, flags));
+ return (sotpi_bindlisten(so, name, namelen, 1, flags, cr));
}
/*
@@ -1162,6 +1454,7 @@ sotpi_unbind(struct sonode *so, int flags)
struct T_unbind_req unbind_req;
int error = 0;
mblk_t *mp;
+ sotpi_info_t *sti = SOTOTPI(so);
dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n",
(void *)so, flags, pr_state(so->so_state, so->so_mode)));
@@ -1211,26 +1504,26 @@ sotpi_unbind(struct sonode *so, int flags)
*/
vnode_t *vp;
- if ((vp = so->so_ux_bound_vp) != NULL) {
+ if ((vp = sti->sti_ux_bound_vp) != NULL) {
/* Undo any SSL proxy setup */
if ((so->so_family == AF_INET ||
so->so_family == AF_INET6) &&
(so->so_type == SOCK_STREAM) &&
- (so->so_kssl_ent != NULL)) {
- kssl_release_ent(so->so_kssl_ent, so,
- so->so_kssl_type);
- so->so_kssl_ent = NULL;
- so->so_kssl_type = KSSL_NO_PROXY;
+ (sti->sti_kssl_ent != NULL)) {
+ kssl_release_ent(sti->sti_kssl_ent, so,
+ sti->sti_kssl_type);
+ sti->sti_kssl_ent = NULL;
+ sti->sti_kssl_type = KSSL_NO_PROXY;
}
-
- so->so_ux_bound_vp = NULL;
+ sti->sti_ux_bound_vp = NULL;
vn_rele_stream(vp);
}
/* Clear out address */
- so->so_laddr_len = 0;
+ sti->sti_laddr_len = 0;
}
- so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID);
+ so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN);
+ sti->sti_laddr_valid = 0;
done:
@@ -1246,15 +1539,17 @@ done:
* For TPI conforming transports this has to first unbind with the transport
* and then bind again using the new backlog.
*/
+/* ARGSUSED */
int
-sotpi_listen(struct sonode *so, int backlog)
+sotpi_listen(struct sonode *so, int backlog, struct cred *cr)
{
int error = 0;
+ sotpi_info_t *sti = SOTOTPI(so);
dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n",
(void *)so, backlog, pr_state(so->so_state, so->so_mode)));
- if (so->so_serv_type == T_CLTS)
+ if (sti->sti_serv_type == T_CLTS)
return (EOPNOTSUPP);
/*
@@ -1276,24 +1571,6 @@ sotpi_listen(struct sonode *so, int backlog)
mutex_enter(&so->so_lock);
so_lock_single(so); /* Set SOLOCKED */
- if (backlog < 0)
- backlog = 0;
- /*
- * Use the same qlimit as in BSD. BSD checks the qlimit
- * before queuing the next connection implying that a
- * listen(sock, 0) allows one connection to be queued.
- * BSD also uses 1.5 times the requested backlog.
- *
- * XNS Issue 4 required a strict interpretation of the backlog.
- * This has been waived subsequently for Issue 4 and the change
- * incorporated in XNS Issue 5. So we aren't required to do
- * anything special for XPG apps.
- */
- if (backlog >= (INT_MAX - 1) / 3)
- backlog = INT_MAX;
- else
- backlog = backlog * 3 / 2 + 1;
-
/*
* If the listen doesn't change the backlog we do nothing.
* This avoids an EPROTO error from the transport.
@@ -1311,7 +1588,7 @@ sotpi_listen(struct sonode *so, int backlog)
goto done;
}
error = sotpi_bindlisten(so, NULL, 0, backlog,
- _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN);
+ _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr);
} else if (backlog > 0) {
/*
* AF_INET{,6} hack to avoid losing the port.
@@ -1327,7 +1604,7 @@ sotpi_listen(struct sonode *so, int backlog)
goto done;
}
error = sotpi_bindlisten(so, NULL, 0, backlog,
- _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN);
+ _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr);
} else {
so->so_state |= SS_ACCEPTCONN;
so->so_backlog = backlog;
@@ -1349,7 +1626,7 @@ done:
* the current use of sodisconnect(seqno == -1) is only for shutdown
* so there is no point (and potentially incorrect) to unbind.
*/
-int
+static int
sodisconnect(struct sonode *so, t_scalar_t seqno, int flags)
{
struct T_discon_req discon_req;
@@ -1406,8 +1683,9 @@ sodisconnect(struct sonode *so, t_scalar_t seqno, int flags)
* is allowed to complete. However, it is not possible to
* assert that SS_ISCONNECTED|SS_ISCONNECTING are set.
*/
- so->so_state &=
- ~(SS_ISCONNECTED|SS_ISCONNECTING|SS_LADDR_VALID|SS_FADDR_VALID);
+ so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING);
+ SOTOTPI(so)->sti_laddr_valid = 0;
+ SOTOTPI(so)->sti_faddr_valid = 0;
done:
if (!(flags & _SODISCONNECT_LOCK_HELD)) {
so_unlock_single(so, SOLOCKED);
@@ -1420,8 +1698,10 @@ done:
return (error);
}
+/* ARGSUSED */
int
-sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop)
+sotpi_accept(struct sonode *so, int fflag, struct cred *cr,
+ struct sonode **nsop)
{
struct T_conn_ind *conn_ind;
struct T_conn_res *conn_res;
@@ -1436,6 +1716,8 @@ sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop)
t_scalar_t PRIM_type;
t_scalar_t SEQ_number;
size_t sinlen;
+ sotpi_info_t *sti = SOTOTPI(so);
+ sotpi_info_t *nsti;
dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n",
(void *)so, fflag, (void *)nsop,
@@ -1454,7 +1736,7 @@ again:
if ((error = sowaitconnind(so, fflag, &mp)) != 0)
goto e_bad;
- ASSERT(mp);
+ ASSERT(mp != NULL);
conn_ind = (struct T_conn_ind *)mp->b_rptr;
ctxmp = mp->b_cont;
@@ -1475,8 +1757,7 @@ again:
switch (so->so_family) {
case AF_INET:
case AF_INET6:
- if ((optlen == sizeof (intptr_t)) &&
- ((so->so_state & SS_DIRECT) != 0)) {
+ if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) {
bcopy(mp->b_rptr + conn_ind->OPT_offset,
&opt, conn_ind->OPT_length);
} else {
@@ -1489,7 +1770,7 @@ again:
* problems when sockfs sends a normal T_CONN_RES
* message down the new stream.
*/
- if (so->so_state & SS_DIRECT) {
+ if (sti->sti_direct) {
int rval;
/*
* For consistency we inform tcp to disable
@@ -1498,7 +1779,7 @@ again:
* because no data will ever travel upstream
* on the listening socket.
*/
- so->so_state &= ~SS_DIRECT;
+ sti->sti_direct = 0;
(void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK,
0, 0, K_TO_K, CRED(), &rval);
}
@@ -1519,7 +1800,7 @@ again:
}
}
if (so->so_family == AF_UNIX) {
- if (!(so->so_state & SS_FADDR_NOXLATE)) {
+ if (!sti->sti_faddr_noxlate) {
src = NULL;
srclen = 0;
}
@@ -1533,9 +1814,7 @@ again:
/*
* Create the new socket.
*/
- VN_HOLD(so->so_accessvp);
- nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type,
- so->so_protocol, so->so_version, so, &error);
+ nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error);
if (nso == NULL) {
ASSERT(error != 0);
/*
@@ -1549,6 +1828,7 @@ again:
goto e_disc_unl;
}
nvp = SOTOV(nso);
+ nsti = SOTOTPI(nso);
/*
* If the transport sent up an SSL connection context, then attach
@@ -1561,7 +1841,7 @@ again:
* This kssl_ctx_t is already held for us by the transport.
* So, we don't need to do a kssl_hold_ctx() here.
*/
- nso->so_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr);
+ nsti->sti_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr);
freemsg(ctxmp);
mp->b_cont = NULL;
strsetrwputdatahooks(nvp, strsock_kssl_input,
@@ -1572,7 +1852,6 @@ again:
mutex_enter(nso->so_direct->sod_lockp);
SOD_DISABLE(nso->so_direct);
mutex_exit(nso->so_direct->sod_lockp);
- nso->so_direct = NULL;
}
}
#ifdef DEBUG
@@ -1591,16 +1870,16 @@ again:
* NOTE: AF_UNIX NUL termination is ensured by the sender's
* copyin_name().
*/
- if (srclen > (t_uscalar_t)nso->so_faddr_maxlen) {
+ if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) {
error = EINVAL;
freemsg(mp);
eprintsoline(so, error);
goto disconnect_vp_unlocked;
}
- nso->so_faddr_len = (socklen_t)srclen;
- ASSERT(so->so_faddr_len <= so->so_faddr_maxlen);
- bcopy(src, nso->so_faddr_sa, srclen);
- nso->so_state |= SS_FADDR_VALID;
+ nsti->sti_faddr_len = (socklen_t)srclen;
+ ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
+ bcopy(src, nsti->sti_faddr_sa, srclen);
+ nsti->sti_faddr_valid = 1;
if ((DB_REF(mp) > 1) || MBLKSIZE(mp) <
(sizeof (struct T_conn_res) + sizeof (intptr_t))) {
@@ -1654,7 +1933,8 @@ again:
mutex_exit(&nso->so_lock);
} else {
/* Perform NULL bind with the transport provider. */
- if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC)) != 0) {
+ if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC,
+ cr)) != 0) {
ASSERT(error != ENOBUFS);
freemsg(mp);
eprintsoline(nso, error);
@@ -1671,7 +1951,8 @@ again:
* can access the new socket thus we relax the locking.
*/
nso->so_pgrp = so->so_pgrp;
- nso->so_state |= so->so_state & (SS_ASYNC|SS_FADDR_NOXLATE);
+ nso->so_state |= so->so_state & SS_ASYNC;
+ nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate;
if (nso->so_pgrp != 0) {
if ((error = so_set_events(nso, nvp, CRED())) != 0) {
@@ -1695,7 +1976,12 @@ again:
if (nso->so_options & SO_LINGER)
nso->so_linger = so->so_linger;
- if ((so->so_state & SS_DIRECT) != 0) {
+ /*
+ * Note that the following sti_direct code path should be
+ * removed once we are confident that the direct sockets
+ * do not result in any degradation.
+ */
+ if (sti->sti_direct) {
ASSERT(opt != NULL);
@@ -1731,22 +2017,23 @@ again:
sin = (sin_t *)(ack_mp->b_rptr +
sizeof (struct T_ok_ack));
- bcopy(sin, nso->so_laddr_sa, sizeof (sin_t));
- nso->so_laddr_len = sizeof (sin_t);
+ bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t));
+ nsti->sti_laddr_len = sizeof (sin_t);
} else {
sin6_t *sin6;
sin6 = (sin6_t *)(ack_mp->b_rptr +
sizeof (struct T_ok_ack));
- bcopy(sin6, nso->so_laddr_sa, sizeof (sin6_t));
- nso->so_laddr_len = sizeof (sin6_t);
+ bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t));
+ nsti->sti_laddr_len = sizeof (sin6_t);
}
freemsg(ack_mp);
- nso->so_state |= SS_ISCONNECTED | SS_LADDR_VALID;
- nso->so_priv = opt;
+ nso->so_state |= SS_ISCONNECTED;
+ nso->so_proto_handle = (sock_lower_handle_t)opt;
+ nsti->sti_laddr_valid = 1;
- if (so->so_nl7c_flags & NL7C_ENABLED) {
+ if (sti->sti_nl7c_flags & NL7C_ENABLED) {
/*
* A NL7C marked listen()er so the new socket
* inherits the listen()er's NL7C state, except
@@ -1755,14 +2042,15 @@ again:
* Only call NL7C to process the new socket if
* the listen socket allows blocking i/o.
*/
- nso->so_nl7c_flags = so->so_nl7c_flags & (~NL7C_POLLIN);
+ nsti->sti_nl7c_flags =
+ sti->sti_nl7c_flags & (~NL7C_POLLIN);
if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) {
/*
* Nonblocking accept() just make it
* persist to defer processing to the
* read-side syscall (e.g. read).
*/
- nso->so_nl7c_flags |= NL7C_SOPERSIST;
+ nsti->sti_nl7c_flags |= NL7C_SOPERSIST;
} else if (nl7c_process(nso, B_FALSE)) {
/*
* NL7C has completed processing on the
@@ -1782,12 +2070,12 @@ again:
/*
* It's possible, through the use of autopush for example,
- * that the acceptor stream may not support SS_DIRECT
- * semantics. If the new socket does not support SS_DIRECT
+ * that the acceptor stream may not support sti_direct
+ * semantics. If the new socket does not support sti_direct
* we issue a _SIOCSOCKFALLBACK to inform the transport
* as we would in the I_PUSH case.
*/
- if (!(nso->so_state & SS_DIRECT)) {
+ if (nsti->sti_direct == 0) {
int rval;
if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK,
@@ -1842,7 +2130,7 @@ again:
conn_res->PRIM_type = O_T_CONN_RES;
PRIM_type = O_T_CONN_RES;
} else {
- conn_res->ACCEPTOR_id = nso->so_acceptor_id;
+ conn_res->ACCEPTOR_id = nsti->sti_acceptor_id;
conn_res->PRIM_type = T_CONN_RES;
PRIM_type = T_CONN_RES;
}
@@ -1871,27 +2159,28 @@ again:
* If there is a sin/sin6 appended onto the T_OK_ACK use
* that to set the local address. If this is not present
* then we zero out the address and don't set the
- * SS_LADDR_VALID bit. For AF_UNIX endpoints we copy over
+ * sti_laddr_valid bit. For AF_UNIX endpoints we copy over
* the pathname from the listening socket.
*/
sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t);
if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) &&
MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) {
ack_mp->b_rptr += sizeof (struct T_ok_ack);
- bcopy(ack_mp->b_rptr, nso->so_laddr_sa, sinlen);
- nso->so_laddr_len = sinlen;
- nso->so_state |= SS_LADDR_VALID;
+ bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen);
+ nsti->sti_laddr_len = sinlen;
+ nsti->sti_laddr_valid = 1;
} else if (nso->so_family == AF_UNIX) {
ASSERT(so->so_family == AF_UNIX);
- nso->so_laddr_len = so->so_laddr_len;
- ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen);
- bcopy(so->so_laddr_sa, nso->so_laddr_sa, nso->so_laddr_len);
- nso->so_state |= SS_LADDR_VALID;
+ nsti->sti_laddr_len = sti->sti_laddr_len;
+ ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen);
+ bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa,
+ nsti->sti_laddr_len);
+ nsti->sti_laddr_valid = 1;
} else {
- nso->so_laddr_len = so->so_laddr_len;
- ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen);
- bzero(nso->so_laddr_sa, nso->so_addr_size);
- nso->so_laddr_sa->sa_family = nso->so_family;
+ nsti->sti_laddr_len = sti->sti_laddr_len;
+ ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen);
+ bzero(nsti->sti_laddr_sa, nsti->sti_addr_size);
+ nsti->sti_laddr_sa->sa_family = nso->so_family;
}
freemsg(ack_mp);
@@ -1953,7 +2242,8 @@ sotpi_connect(struct sonode *so,
const struct sockaddr *name,
socklen_t namelen,
int fflag,
- int flags)
+ int flags,
+ struct cred *cr)
{
struct T_conn_req conn_req;
int error = 0;
@@ -1963,6 +2253,7 @@ sotpi_connect(struct sonode *so,
void *addr;
socklen_t addrlen;
boolean_t need_unlock;
+ sotpi_info_t *sti = SOTOTPI(so);
dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n",
(void *)so, (void *)name, namelen, fflag, flags,
@@ -1971,13 +2262,13 @@ sotpi_connect(struct sonode *so,
/*
* Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to
* avoid sleeping for memory with SOLOCKED held.
- * We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen
+ * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen
* + sizeof (struct T_opthdr).
* (the AF_UNIX so_ux_addr_xlate() does not make the address
- * exceed so_faddr_maxlen).
+ * exceed sti_faddr_maxlen).
*/
mp = soallocproto(sizeof (struct T_conn_req) +
- 2 * so->so_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR);
+ 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR);
if (mp == NULL) {
/*
* Connect can not fail with ENOBUFS. A signal was
@@ -2001,12 +2292,12 @@ sotpi_connect(struct sonode *so,
so_lock_single(so); /* Set SOLOCKED */
need_unlock = B_TRUE;
- if (so->so_unbind_mp == NULL) {
+ if (sti->sti_unbind_mp == NULL) {
dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n"));
/* NOTE: holding so_lock while sleeping */
- so->so_unbind_mp =
+ sti->sti_unbind_mp =
soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR);
- if (so->so_unbind_mp == NULL) {
+ if (sti->sti_unbind_mp == NULL) {
error = EINTR;
goto done;
}
@@ -2034,7 +2325,7 @@ sotpi_connect(struct sonode *so,
so_automatic_bind(so);
} else {
error = sotpi_bind(so, NULL, 0,
- _SOBIND_UNSPEC|_SOBIND_LOCK_HELD);
+ _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr);
if (error)
goto done;
}
@@ -2088,17 +2379,19 @@ sotpi_connect(struct sonode *so,
_SODISCONNECT_LOCK_HELD);
} else {
so->so_state &=
- ~(SS_ISCONNECTED | SS_ISCONNECTING |
- SS_FADDR_VALID);
- so->so_faddr_len = 0;
+ ~(SS_ISCONNECTED | SS_ISCONNECTING);
+ sti->sti_faddr_valid = 0;
+ sti->sti_faddr_len = 0;
}
+ /* Remove SOLOCKED since setsockopt will grab it */
so_unlock_single(so, SOLOCKED);
mutex_exit(&so->so_lock);
val = 0;
- (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND,
- &val, (t_uscalar_t)sizeof (val));
+ (void) sotpi_setsockopt(so, SOL_SOCKET,
+ SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val),
+ cr);
mutex_enter(&so->so_lock);
so_lock_single(so); /* Set SOLOCKED */
@@ -2112,7 +2405,7 @@ sotpi_connect(struct sonode *so,
goto done;
}
/*
- * Mark the socket if so_faddr_sa represents the transport level
+ * Mark the socket if sti_faddr_sa represents the transport level
* address.
*/
if (flags & _SOCONNECT_NOXLATE) {
@@ -2126,7 +2419,7 @@ sotpi_connect(struct sonode *so,
soaddr_ux = (struct sockaddr_ux *)name;
name = (struct sockaddr *)&soaddr_ux->sou_addr;
namelen = sizeof (soaddr_ux->sou_addr);
- so->so_state |= SS_FADDR_NOXLATE;
+ sti->sti_faddr_noxlate = 1;
}
/*
@@ -2141,46 +2434,46 @@ sotpi_connect(struct sonode *so,
* transport providers that do not support TI_GETPEERNAME.
* Also used for cached foreign address for TCP and UDP.
*/
- if (namelen > (t_uscalar_t)so->so_faddr_maxlen) {
+ if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) {
error = EINVAL;
goto done;
}
- so->so_faddr_len = (socklen_t)namelen;
- ASSERT(so->so_faddr_len <= so->so_faddr_maxlen);
- bcopy(name, so->so_faddr_sa, namelen);
- so->so_state |= SS_FADDR_VALID;
+ sti->sti_faddr_len = (socklen_t)namelen;
+ ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
+ bcopy(name, sti->sti_faddr_sa, namelen);
+ sti->sti_faddr_valid = 1;
if (so->so_family == AF_UNIX) {
- if (so->so_state & SS_FADDR_NOXLATE) {
+ if (sti->sti_faddr_noxlate) {
/*
* Already have a transport internal address. Do not
* pass any (transport internal) source address.
*/
- addr = so->so_faddr_sa;
- addrlen = (t_uscalar_t)so->so_faddr_len;
+ addr = sti->sti_faddr_sa;
+ addrlen = (t_uscalar_t)sti->sti_faddr_len;
src = NULL;
srclen = 0;
} else {
/*
* Pass the sockaddr_un source address as an option
* and translate the remote address.
- * Holding so_lock thus so_laddr_sa can not change.
+ * Holding so_lock thus sti_laddr_sa can not change.
*/
- src = so->so_laddr_sa;
- srclen = (t_uscalar_t)so->so_laddr_len;
+ src = sti->sti_laddr_sa;
+ srclen = (t_uscalar_t)sti->sti_laddr_len;
dprintso(so, 1,
("sotpi_connect UNIX: srclen %d, src %p\n",
srclen, src));
error = so_ux_addr_xlate(so,
- so->so_faddr_sa, (socklen_t)so->so_faddr_len,
+ sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len,
(flags & _SOCONNECT_XPG4_2),
&addr, &addrlen);
if (error)
goto bad;
}
} else {
- addr = so->so_faddr_sa;
- addrlen = (t_uscalar_t)so->so_faddr_len;
+ addr = sti->sti_faddr_sa;
+ addrlen = (t_uscalar_t)sti->sti_faddr_len;
src = NULL;
srclen = 0;
}
@@ -2209,7 +2502,7 @@ sotpi_connect(struct sonode *so,
val = 1;
(void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND,
- &val, (t_uscalar_t)sizeof (val));
+ &val, (t_uscalar_t)sizeof (val), cr);
mutex_enter(&so->so_lock);
so_lock_single(so); /* Set SOLOCKED */
@@ -2225,8 +2518,8 @@ sotpi_connect(struct sonode *so,
*/
fflag = 0;
ASSERT(so->so_family != AF_UNIX);
- so->so_state &= ~SS_LADDR_VALID;
- } else if (so->so_laddr_len != 0) {
+ sti->sti_laddr_valid = 0;
+ } else if (sti->sti_laddr_len != 0) {
/*
* If the local address or port was "any" then it may be
* changed by the transport as a result of the
@@ -2234,21 +2527,22 @@ sotpi_connect(struct sonode *so,
*/
switch (so->so_family) {
case AF_INET:
- ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin_t));
- if (((sin_t *)so->so_laddr_sa)->sin_addr.s_addr ==
+ ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t));
+ if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr ==
INADDR_ANY ||
- ((sin_t *)so->so_laddr_sa)->sin_port == 0)
- so->so_state &= ~SS_LADDR_VALID;
+ ((sin_t *)sti->sti_laddr_sa)->sin_port == 0)
+ sti->sti_laddr_valid = 0;
break;
case AF_INET6:
- ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin6_t));
+ ASSERT(sti->sti_laddr_len ==
+ (socklen_t)sizeof (sin6_t));
if (IN6_IS_ADDR_UNSPECIFIED(
- &((sin6_t *)so->so_laddr_sa) ->sin6_addr) ||
+ &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) ||
IN6_IS_ADDR_V4MAPPED_ANY(
- &((sin6_t *)so->so_laddr_sa)->sin6_addr) ||
- ((sin6_t *)so->so_laddr_sa)->sin6_port == 0)
- so->so_state &= ~SS_LADDR_VALID;
+ &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) ||
+ ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0)
+ sti->sti_laddr_valid = 0;
break;
default:
@@ -2337,30 +2631,18 @@ done:
case EISCONN:
case EINTR:
/* Non-fatal errors */
- so->so_state &= ~SS_LADDR_VALID;
+ sti->sti_laddr_valid = 0;
/* FALLTHRU */
case 0:
break;
-
- case EHOSTUNREACH:
- if (flags & _SOCONNECT_XPG4_2) {
- /*
- * X/Open specification contains a requirement that
- * ENETUNREACH be returned but does not require
- * EHOSTUNREACH. In order to keep the test suite
- * happy we mess with the errno here.
- */
- error = ENETUNREACH;
- }
- /* FALLTHRU */
-
default:
ASSERT(need_unlock);
/*
* Fatal errors: clear SS_ISCONNECTING in case it was set,
* and invalidate local-address cache
*/
- so->so_state &= ~(SS_ISCONNECTING | SS_LADDR_VALID);
+ so->so_state &= ~SS_ISCONNECTING;
+ sti->sti_laddr_valid = 0;
/* A discon_ind might have already unbound us */
if ((flags & _SOCONNECT_DID_BIND) &&
(so->so_state & SS_ISBOUND)) {
@@ -2379,18 +2661,20 @@ done:
mutex_exit(&so->so_lock);
return (error);
-so_bad: error = sogeterr(so);
+so_bad: error = sogeterr(so, B_TRUE);
bad: eprintsoline(so, error);
goto done;
}
+/* ARGSUSED */
int
-sotpi_shutdown(struct sonode *so, int how)
+sotpi_shutdown(struct sonode *so, int how, struct cred *cr)
{
struct T_ordrel_req ordrel_req;
mblk_t *mp;
uint_t old_state, state_change;
int error = 0;
+ sotpi_info_t *sti = SOTOTPI(so);
dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n",
(void *)so, how, pr_state(so->so_state, so->so_mode)));
@@ -2523,14 +2807,14 @@ sotpi_shutdown(struct sonode *so, int how)
* For SunOS 4.X compatibility we tell the other end
* that we are unable to receive at this point.
*/
- if (so->so_family == AF_UNIX && so->so_serv_type != T_CLTS)
+ if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS)
so_unix_close(so);
- if (so->so_serv_type == T_COTS)
+ if (sti->sti_serv_type == T_COTS)
error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD);
}
if ((state_change & SS_CANTSENDMORE) &&
- (so->so_serv_type == T_COTS_ORD)) {
+ (sti->sti_serv_type == T_COTS_ORD)) {
/* Send an orderly release */
ordrel_req.PRIM_type = T_ORDREL_REQ;
@@ -2582,6 +2866,7 @@ so_unix_close(struct sonode *so)
int error;
struct T_opthdr toh;
mblk_t *mp;
+ sotpi_info_t *sti = SOTOTPI(so);
ASSERT(MUTEX_HELD(&so->so_lock));
@@ -2632,35 +2917,35 @@ so_unix_close(struct sonode *so)
/*
* Length and family checks.
*/
- error = so_addr_verify(so, so->so_faddr_sa,
- (t_uscalar_t)so->so_faddr_len);
+ error = so_addr_verify(so, sti->sti_faddr_sa,
+ (t_uscalar_t)sti->sti_faddr_len);
if (error) {
eprintsoline(so, error);
return;
}
- if (so->so_state & SS_FADDR_NOXLATE) {
+ if (sti->sti_faddr_noxlate) {
/*
* Already have a transport internal address. Do not
* pass any (transport internal) source address.
*/
- addr = so->so_faddr_sa;
- addrlen = (t_uscalar_t)so->so_faddr_len;
+ addr = sti->sti_faddr_sa;
+ addrlen = (t_uscalar_t)sti->sti_faddr_len;
src = NULL;
srclen = 0;
} else {
/*
* Pass the sockaddr_un source address as an option
* and translate the remote address.
- * Holding so_lock thus so_laddr_sa can not change.
+ * Holding so_lock thus sti_laddr_sa can not change.
*/
- src = so->so_laddr_sa;
- srclen = (socklen_t)so->so_laddr_len;
+ src = sti->sti_laddr_sa;
+ srclen = (socklen_t)sti->sti_laddr_len;
dprintso(so, 1,
("so_ux_close: srclen %d, src %p\n",
srclen, src));
error = so_ux_addr_xlate(so,
- so->so_faddr_sa,
- (socklen_t)so->so_faddr_len, 0,
+ sti->sti_faddr_sa,
+ (socklen_t)sti->sti_faddr_len, 0,
&addr, &addrlen);
if (error) {
eprintsoline(so, error);
@@ -2717,93 +3002,6 @@ so_unix_close(struct sonode *so)
}
/*
- * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK.
- */
-int
-sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags)
-{
- mblk_t *mp, *nmp;
- int error;
-
- dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n",
- (void *)so, (void *)msg, flags));
-
- /*
- * There is never any oob data with addresses or control since
- * the T_EXDATA_IND does not carry any options.
- */
- msg->msg_controllen = 0;
- msg->msg_namelen = 0;
-
- mutex_enter(&so->so_lock);
- ASSERT(so_verify_oobstate(so));
- if ((so->so_options & SO_OOBINLINE) ||
- (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) {
- dprintso(so, 1, ("sorecvoob: inline or data consumed\n"));
- mutex_exit(&so->so_lock);
- return (EINVAL);
- }
- if (!(so->so_state & SS_HAVEOOBDATA)) {
- dprintso(so, 1, ("sorecvoob: no data yet\n"));
- mutex_exit(&so->so_lock);
- return (EWOULDBLOCK);
- }
- ASSERT(so->so_oobmsg != NULL);
- mp = so->so_oobmsg;
- if (flags & MSG_PEEK) {
- /*
- * Since recv* can not return ENOBUFS we can not use dupmsg.
- * Instead we revert to the consolidation private
- * allocb_wait plus bcopy.
- */
- mblk_t *mp1;
-
- mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL);
- ASSERT(mp1);
-
- while (mp != NULL) {
- ssize_t size;
-
- size = MBLKL(mp);
- bcopy(mp->b_rptr, mp1->b_wptr, size);
- mp1->b_wptr += size;
- ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim);
- mp = mp->b_cont;
- }
- mp = mp1;
- } else {
- /*
- * Update the state indicating that the data has been consumed.
- * Keep SS_OOBPEND set until data is consumed past the mark.
- */
- so->so_oobmsg = NULL;
- so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA;
- }
- dprintso(so, 1,
- ("after recvoob(%p): counts %d/%d state %s\n",
- (void *)so, so->so_oobsigcnt,
- so->so_oobcnt, pr_state(so->so_state, so->so_mode)));
- ASSERT(so_verify_oobstate(so));
- mutex_exit(&so->so_lock);
-
- error = 0;
- nmp = mp;
- while (nmp != NULL && uiop->uio_resid > 0) {
- ssize_t n = MBLKL(nmp);
-
- n = MIN(n, uiop->uio_resid);
- if (n > 0)
- error = uiomove(nmp->b_rptr, n,
- UIO_READ, uiop);
- if (error)
- break;
- nmp = nmp->b_cont;
- }
- freemsg(mp);
- return (error);
-}
-
-/*
* Called by sotpi_recvmsg when reading a non-zero amount of data.
* In addition, the caller typically verifies that there is some
* potential state to clear by checking
@@ -2811,7 +3009,7 @@ sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags)
* before calling this routine.
* Note that such a check can be made without holding so_lock since
* sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg
- * decrements so_oobsigcnt.
+ * decrements sti_oobsigcnt.
*
* When data is read *after* the point that all pending
* oob data has been consumed the oob indication is cleared.
@@ -2823,13 +3021,15 @@ sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags)
static void
sorecv_update_oobstate(struct sonode *so)
{
+ sotpi_info_t *sti = SOTOTPI(so);
+
mutex_enter(&so->so_lock);
ASSERT(so_verify_oobstate(so));
dprintso(so, 1,
("sorecv_update_oobstate: counts %d/%d state %s\n",
- so->so_oobsigcnt,
- so->so_oobcnt, pr_state(so->so_state, so->so_mode)));
- if (so->so_oobsigcnt == 0) {
+ sti->sti_oobsigcnt,
+ sti->sti_oobcnt, pr_state(so->so_state, so->so_mode)));
+ if (sti->sti_oobsigcnt == 0) {
/* No more pending oob indications */
so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK);
freemsg(so->so_oobmsg);
@@ -2845,10 +3045,11 @@ sorecv_update_oobstate(struct sonode *so)
static int
nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp)
{
+ sotpi_info_t *sti = SOTOTPI(so);
int error = 0;
mblk_t *tmp = NULL;
mblk_t *pmp = NULL;
- mblk_t *nmp = so->so_nl7c_rcv_mp;
+ mblk_t *nmp = sti->sti_nl7c_rcv_mp;
ASSERT(nmp != NULL);
@@ -2889,25 +3090,24 @@ nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp)
if (pmp != NULL) {
/* Free any mblk_t(s) which we have consumed */
pmp->b_cont = NULL;
- freemsg(so->so_nl7c_rcv_mp);
+ freemsg(sti->sti_nl7c_rcv_mp);
}
- if ((so->so_nl7c_rcv_mp = nmp) == NULL) {
+ if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) {
/* Last mblk_t so return the saved kstrgetmsg() rval/error */
if (error == 0) {
- rval_t *p = (rval_t *)&so->so_nl7c_rcv_rval;
+ rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval;
error = p->r_v.r_v2;
p->r_v.r_v2 = 0;
}
- rp->r_vals = so->so_nl7c_rcv_rval;
- so->so_nl7c_rcv_rval = 0;
+ rp->r_vals = sti->sti_nl7c_rcv_rval;
+ sti->sti_nl7c_rcv_rval = 0;
} else {
/* More mblk_t(s) to process so no rval to return */
rp->r_vals = 0;
}
return (error);
}
-
/*
* Receive the next message on the queue.
* If msg_controllen is non-zero when called the caller is interested in
@@ -2917,8 +3117,10 @@ nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp)
* The routine returns with msg_control and msg_name pointing to
* kmem_alloc'ed memory which the caller has to free.
*/
+/* ARGSUSED */
int
-sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
+sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
+ struct cred *cr)
{
union T_primitives *tpr;
mblk_t *mp;
@@ -2932,10 +3134,10 @@ sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
rval_t rval;
int flags;
clock_t timout;
- int first;
int error = 0;
+ int reterr = 0;
struct uio *suiop = NULL;
- sodirect_t *sodp = so->so_direct;
+ sotpi_info_t *sti = SOTOTPI(so);
flags = msg->msg_flags;
msg->msg_flags = 0;
@@ -2944,6 +3146,12 @@ sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
(void *)so, (void *)msg, flags,
pr_state(so->so_state, so->so_mode), so->so_error));
+ if (so->so_version == SOV_STREAM) {
+ so_update_attrs(so, SOACC);
+ /* The imaginary "sockmod" has been popped - act as a stream */
+ return (strread(SOTOV(so), uiop, cr));
+ }
+
/*
* If we are not connected because we have never been connected
* we return ENOTCONN. If we have been connected (but are no longer
@@ -2970,9 +3178,13 @@ sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
/* Check that the transport supports OOB */
if (!(so->so_mode & SM_EXDATA))
return (EOPNOTSUPP);
- return (sorecvoob(so, msg, uiop, flags));
+ so_update_attrs(so, SOACC);
+ return (sorecvoob(so, msg, uiop, flags,
+ (so->so_options & SO_OOBINLINE)));
}
+ so_update_attrs(so, SOACC);
+
/*
* Set msg_controllen and msg_namelen to zero here to make it
* simpler in the cases that no control or name is returned.
@@ -2989,31 +3201,32 @@ sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
/*
* If an NL7C enabled socket and not waiting for write data.
*/
- if ((so->so_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) ==
+ if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) ==
NL7C_ENABLED) {
- if (so->so_nl7c_uri) {
+ if (sti->sti_nl7c_uri) {
/* Close uri processing for a previous request */
nl7c_close(so);
}
- if ((so_state & SS_CANTRCVMORE) && so->so_nl7c_rcv_mp == NULL) {
+ if ((so_state & SS_CANTRCVMORE) &&
+ sti->sti_nl7c_rcv_mp == NULL) {
/* Nothing to process, EOF */
mutex_exit(&so->so_lock);
return (0);
- } else if (so->so_nl7c_flags & NL7C_SOPERSIST) {
+ } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) {
/* Persistent NL7C socket, try to process request */
boolean_t ret;
ret = nl7c_process(so,
(so->so_state & (SS_NONBLOCK|SS_NDELAY)));
- rval.r_vals = so->so_nl7c_rcv_rval;
+ rval.r_vals = sti->sti_nl7c_rcv_rval;
error = rval.r_v.r_v2;
if (error) {
/* Error of some sort, return it */
mutex_exit(&so->so_lock);
return (error);
}
- if (so->so_nl7c_flags &&
- ! (so->so_nl7c_flags & NL7C_WAITWRITE)) {
+ if (sti->sti_nl7c_flags &&
+ ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) {
/*
* Still an NL7C socket and no data
* to pass up to the caller.
@@ -3031,7 +3244,7 @@ sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
/*
* Not persistent so no further NL7C processing.
*/
- so->so_nl7c_flags = 0;
+ sti->sti_nl7c_flags = 0;
}
}
/*
@@ -3081,84 +3294,23 @@ sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
else
timout = -1;
opflag = pflag;
- first = 1;
- if (uiop->uio_resid >= uioasync.mincnt &&
- sodp != NULL && (sodp->sod_state & SOD_ENABLED) &&
- uioasync.enabled && !(flags & MSG_PEEK) &&
- !(so_state & SS_CANTRCVMORE)) {
- /*
- * Big enough I/O for uioa min setup and an sodirect socket
- * and sodirect enabled and uioa enabled and I/O will be done
- * and not EOF so initialize the sodirect_t uioa_t with "uiop".
- */
- mutex_enter(sodp->sod_lockp);
- if (!uioainit(uiop, &sodp->sod_uioa)) {
- /*
- * Successful uioainit() so the uio_t part of the
- * uioa_t will be used for all uio_t work to follow,
- * we save the original "uiop" in "suiop".
- */
- suiop = uiop;
- uiop = (uio_t *)&sodp->sod_uioa;
- /*
- * Before returning to the caller the passed in uio_t
- * "uiop" will be updated via a call to uioafini()
- * below.
- *
- * Note, the uioa.uioa_state isn't set to UIOA_ENABLED
- * here as first we have to uioamove() any currently
- * queued M_DATA mblk_t(s) so it will be done in
- * kstrgetmsg().
- */
- }
- /*
- * In either uioainit() success or not case note the number
- * of uio bytes the caller wants for sod framework and/or
- * transport (e.g. TCP) strategy.
- */
- sodp->sod_want = uiop->uio_resid;
- mutex_exit(sodp->sod_lockp);
- } else if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) {
- /*
- * No uioa but still using sodirect so note the number of
- * uio bytes the caller wants for sodirect framework and/or
- * transport (e.g. TCP) strategy.
- *
- * Note, sod_lockp not held, only writer is in this function
- * and only one thread at a time so not needed just to init.
- */
- sodp->sod_want = uiop->uio_resid;
- }
+ suiop = sod_rcv_init(so, flags, &uiop);
retry:
saved_resid = uiop->uio_resid;
pri = 0;
mp = NULL;
- if (so->so_nl7c_rcv_mp != NULL) {
+ if (sti->sti_nl7c_rcv_mp != NULL) {
/* Already kstrgetmsg()ed saved mblk(s) from NL7C */
error = nl7c_sorecv(so, &mp, uiop, &rval);
} else {
error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag,
timout, &rval);
}
- if (error) {
- switch (error) {
- case EINTR:
- case EWOULDBLOCK:
- if (!first)
- error = 0;
- break;
- case ETIME:
- /* Returned from kstrgetmsg when timeout expires */
- if (!first)
- error = 0;
- else
- error = EWOULDBLOCK;
- break;
- default:
- eprintsoline(so, error);
- break;
- }
+ if (error != 0) {
+ /* kstrgetmsg returns ETIME when timeout expires */
+ if (error == ETIME)
+ error = EWOULDBLOCK;
goto out;
}
/*
@@ -3198,7 +3350,6 @@ retry:
if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
mutex_exit(&so->so_lock);
- first = 0;
pflag = opflag | MSG_NOMARK;
goto retry;
}
@@ -3238,7 +3389,6 @@ retry:
if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
mutex_exit(&so->so_lock);
- first = 0;
pflag = opflag | MSG_NOMARK;
goto retry;
}
@@ -3436,7 +3586,6 @@ retry:
controllen == 0 &&
uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
mutex_exit(&so->so_lock);
- first = 0;
pflag = opflag | MSG_NOMARK;
goto retry;
}
@@ -3446,7 +3595,7 @@ retry:
dprintso(so, 1,
("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld "
"state %s\n",
- so->so_oobsigcnt, so->so_oobcnt,
+ sti->sti_oobsigcnt, sti->sti_oobcnt,
saved_resid - uiop->uio_resid,
pr_state(so->so_state, so->so_mode)));
/*
@@ -3476,8 +3625,8 @@ retry:
dprintso(so, 1,
("sotpi_recvmsg: consume EXDATA_IND "
"counts %d/%d state %s\n",
- so->so_oobsigcnt,
- so->so_oobcnt,
+ sti->sti_oobsigcnt,
+ sti->sti_oobcnt,
pr_state(so->so_state, so->so_mode)));
pflag = MSG_ANY | MSG_DELAYERROR;
@@ -3516,11 +3665,11 @@ retry:
*/
mutex_enter(&so->so_lock);
ASSERT(so_verify_oobstate(so));
- ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
- ASSERT(so->so_oobsigcnt > 0);
- so->so_oobsigcnt--;
- ASSERT(so->so_oobcnt > 0);
- so->so_oobcnt--;
+ ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
+ ASSERT(sti->sti_oobsigcnt > 0);
+ sti->sti_oobsigcnt--;
+ ASSERT(sti->sti_oobcnt > 0);
+ sti->sti_oobcnt--;
/*
* Since the T_EXDATA_IND has been removed from the stream
* head, but we have not read data past the mark,
@@ -3533,12 +3682,14 @@ retry:
mutex_exit(&so->so_lock);
dprintso(so, 1,
("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n",
- so->so_oobsigcnt, so->so_oobcnt,
+ sti->sti_oobsigcnt, sti->sti_oobcnt,
pr_state(so->so_state, so->so_mode)));
pflag = opflag;
goto retry;
}
default:
+ cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n",
+ (void *)so, tpr->type, (void *)mp);
ASSERT(0);
freemsg(mp);
error = EPROTO;
@@ -3549,35 +3700,13 @@ retry:
out:
mutex_enter(&so->so_lock);
out_locked:
- if (sodp != NULL) {
- /* Finish any sodirect and uioa processing */
- mutex_enter(sodp->sod_lockp);
- if (suiop != NULL) {
- /* Finish any uioa_t processing */
- int ret;
-
- ASSERT(uiop == (uio_t *)&sodp->sod_uioa);
- ret = uioafini(suiop, (uioa_t *)uiop);
- if (error == 0 && ret != 0) {
- /* If no error yet, set it */
- error = ret;
- }
- if ((mp = sodp->sod_uioafh) != NULL) {
- sodp->sod_uioafh = NULL;
- sodp->sod_uioaft = NULL;
- freemsg(mp);
- }
- }
- ASSERT(sodp->sod_uioafh == NULL);
- if (!(sodp->sod_state & SOD_WAKE_NOT)) {
- /* Awoke */
- sodp->sod_state &= SOD_WAKE_CLR;
- sodp->sod_state |= SOD_WAKE_NOT;
- }
- /* Last, clear sod_want value */
- sodp->sod_want = 0;
- mutex_exit(sodp->sod_lockp);
+ if (so->so_direct != NULL) {
+ mutex_enter(so->so_direct->sod_lockp);
+ reterr = sod_rcv_done(so, suiop, uiop);
+ mutex_exit(so->so_direct->sod_lockp);
}
+ if (reterr != 0 && error == 0)
+ error = reterr;
so_unlock_read(so); /* Clear SOREADLOCKED */
mutex_exit(&so->so_lock);
return (error);
@@ -3605,12 +3734,13 @@ sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen,
t_uscalar_t optlen;
void *fds;
int fdlen;
+ sotpi_info_t *sti = SOTOTPI(so);
ASSERT(name && namelen);
ASSERT(control && controllen);
len = uiop->uio_resid;
- if (len > (ssize_t)so->so_tidu_size) {
+ if (len > (ssize_t)sti->sti_tidu_size) {
return (EMSGSIZE);
}
@@ -3630,7 +3760,7 @@ sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen,
return (error);
}
if (so->so_family == AF_UNIX) {
- if (so->so_state & SS_FADDR_NOXLATE) {
+ if (sti->sti_faddr_noxlate) {
/*
* Already have a transport internal address. Do not
* pass any (transport internal) source address.
@@ -3644,14 +3774,14 @@ sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen,
* Pass the sockaddr_un source address as an option
* and translate the remote address.
*
- * Note that this code does not prevent so_laddr_sa
+ * Note that this code does not prevent sti_laddr_sa
* from changing while it is being used. Thus
* if an unbind+bind occurs concurrently with this
* send the peer might see a partially new and a
* partially old "from" address.
*/
- src = so->so_laddr_sa;
- srclen = (t_uscalar_t)so->so_laddr_len;
+ src = sti->sti_laddr_sa;
+ srclen = (t_uscalar_t)sti->sti_laddr_len;
dprintso(so, 1,
("sosend_dgramcmsg UNIX: srclen %d, src %p\n",
srclen, src));
@@ -3762,24 +3892,20 @@ sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen,
* Assumes caller has verified that SS_ISCONNECTED is set.
*/
static int
-sosend_svccmsg(struct sonode *so,
- struct uio *uiop,
- int more,
- void *control,
- t_uscalar_t controllen,
- int flags)
+sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control,
+ t_uscalar_t controllen, int flags)
{
struct T_optdata_req tdr;
mblk_t *mp;
int error;
ssize_t iosize;
- int first = 1;
int size;
struct fdbuf *fdbuf;
t_uscalar_t optlen;
void *fds;
int fdlen;
struct T_opthdr toh;
+ sotpi_info_t *sti = SOTOTPI(so);
dprintso(so, 1,
("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid));
@@ -3801,7 +3927,7 @@ sosend_svccmsg(struct sonode *so,
* Error for transports with zero tidu_size.
*/
tdr.PRIM_type = T_OPTDATA_REQ;
- iosize = so->so_tidu_size;
+ iosize = sti->sti_tidu_size;
if (iosize <= 0)
return (EMSGSIZE);
if (uiop->uio_resid > iosize) {
@@ -3843,7 +3969,7 @@ sosend_svccmsg(struct sonode *so,
* Caught a signal waiting for memory.
* Let send* return EINTR.
*/
- return (first ? EINTR : 0);
+ return (EINTR);
}
}
soappendmsg(mp, &tdr, sizeof (tdr));
@@ -3869,13 +3995,10 @@ sosend_svccmsg(struct sonode *so,
error = kstrputmsg(SOTOV(so), mp, uiop, iosize,
0, MSG_BAND, 0);
if (error) {
- if (!first && error == EWOULDBLOCK)
- return (0);
eprintsoline(so, error);
return (error);
}
control = NULL;
- first = 0;
if (uiop->uio_resid > 0) {
/*
* Recheck for fatal errors. Fail write even though
@@ -3883,13 +4006,12 @@ sosend_svccmsg(struct sonode *so,
* with strwrite semantics and BSD sockets semantics.
*/
if (so->so_state & SS_CANTSENDMORE) {
- tsignal(curthread, SIGPIPE);
eprintsoline(so, error);
return (EPIPE);
}
if (so->so_error != 0) {
mutex_enter(&so->so_lock);
- error = sogeterr(so);
+ error = sogeterr(so, B_TRUE);
mutex_exit(&so->so_lock);
if (error != 0) {
eprintsoline(so, error);
@@ -3920,11 +4042,12 @@ sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen,
void *src;
socklen_t srclen;
ssize_t len;
+ sotpi_info_t *sti = SOTOTPI(so);
ASSERT(name != NULL && namelen != 0);
len = uiop->uio_resid;
- if (len > so->so_tidu_size) {
+ if (len > sti->sti_tidu_size) {
error = EMSGSIZE;
goto done;
}
@@ -3934,11 +4057,11 @@ sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen,
if (error != 0)
goto done;
- if (so->so_state & SS_DIRECT)
+ if (sti->sti_direct)
return (sodgram_direct(so, name, namelen, uiop, flags));
if (so->so_family == AF_UNIX) {
- if (so->so_state & SS_FADDR_NOXLATE) {
+ if (sti->sti_faddr_noxlate) {
/*
* Already have a transport internal address. Do not
* pass any (transport internal) source address.
@@ -3952,14 +4075,14 @@ sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen,
* Pass the sockaddr_un source address as an option
* and translate the remote address.
*
- * Note that this code does not prevent so_laddr_sa
+ * Note that this code does not prevent sti_laddr_sa
* from changing while it is being used. Thus
* if an unbind+bind occurs concurrently with this
* send the peer might see a partially new and a
* partially old "from" address.
*/
- src = so->so_laddr_sa;
- srclen = (socklen_t)so->so_laddr_len;
+ src = sti->sti_laddr_sa;
+ srclen = (socklen_t)sti->sti_laddr_len;
dprintso(so, 1,
("sosend_dgram UNIX: srclen %d, src %p\n",
srclen, src));
@@ -4048,17 +4171,14 @@ done:
* Assumes caller has verified that SS_ISCONNECTED is set.
*/
int
-sosend_svc(struct sonode *so,
- struct uio *uiop,
- t_scalar_t prim,
- int more,
- int sflag)
+sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more,
+ int sflag)
{
struct T_data_req tdr;
mblk_t *mp;
int error;
ssize_t iosize;
- int first = 1;
+ sotpi_info_t *sti = SOTOTPI(so);
dprintso(so, 1,
("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n",
@@ -4077,7 +4197,7 @@ sosend_svc(struct sonode *so,
* Error for transports with zero tidu_size.
*/
tdr.PRIM_type = prim;
- iosize = so->so_tidu_size;
+ iosize = sti->sti_tidu_size;
if (iosize <= 0)
return (EMSGSIZE);
if (uiop->uio_resid > iosize) {
@@ -4097,21 +4217,15 @@ sosend_svc(struct sonode *so,
* Caught a signal waiting for memory.
* Let send* return EINTR.
*/
- if (first)
- return (EINTR);
- else
- return (0);
+ return (EINTR);
}
error = kstrputmsg(SOTOV(so), mp, uiop, iosize,
0, sflag | MSG_BAND, 0);
if (error) {
- if (!first && error == EWOULDBLOCK)
- return (0);
eprintsoline(so, error);
return (error);
}
- first = 0;
if (uiop->uio_resid > 0) {
/*
* Recheck for fatal errors. Fail write even though
@@ -4119,13 +4233,12 @@ sosend_svc(struct sonode *so,
* with strwrite semantics and BSD sockets semantics.
*/
if (so->so_state & SS_CANTSENDMORE) {
- tsignal(curthread, SIGPIPE);
eprintsoline(so, error);
return (EPIPE);
}
if (so->so_error != 0) {
mutex_enter(&so->so_lock);
- error = sogeterr(so);
+ error = sogeterr(so, B_TRUE);
mutex_exit(&so->so_lock);
if (error != 0) {
eprintsoline(so, error);
@@ -4145,7 +4258,8 @@ sosend_svc(struct sonode *so,
* after sending the message.
*/
static int
-sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
+sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
+ struct cred *cr)
{
int so_state;
int so_mode;
@@ -4154,22 +4268,28 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
t_uscalar_t namelen;
int dontroute;
int flags;
+ sotpi_info_t *sti = SOTOTPI(so);
dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n",
(void *)so, (void *)msg, msg->msg_flags,
pr_state(so->so_state, so->so_mode), so->so_error));
+ if (so->so_version == SOV_STREAM) {
+ /* The imaginary "sockmod" has been popped - act as a stream */
+ so_update_attrs(so, SOMOD);
+ return (strwrite(SOTOV(so), uiop, cr));
+ }
+
mutex_enter(&so->so_lock);
so_state = so->so_state;
if (so_state & SS_CANTSENDMORE) {
mutex_exit(&so->so_lock);
- tsignal(curthread, SIGPIPE);
return (EPIPE);
}
if (so->so_error != 0) {
- error = sogeterr(so);
+ error = sogeterr(so, B_TRUE);
if (error != 0) {
mutex_exit(&so->so_lock);
return (error);
@@ -4194,15 +4314,15 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
namelen = 0;
} else {
/*
- * Note that this code does not prevent so_faddr_sa
+ * Note that this code does not prevent sti_faddr_sa
* from changing while it is being used. Thus
* if an "unconnect"+connect occurs concurrently with
* this send the datagram might be delivered to a
* garbaled address.
*/
- ASSERT(so->so_faddr_sa);
- name = so->so_faddr_sa;
- namelen = (t_uscalar_t)so->so_faddr_len;
+ ASSERT(sti->sti_faddr_sa);
+ name = sti->sti_faddr_sa;
+ namelen = (t_uscalar_t)sti->sti_faddr_len;
}
} else {
if (!(so_state & SS_ISCONNECTED) &&
@@ -4227,7 +4347,7 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
if (!(so_state & SS_ISBOUND)) {
so_lock_single(so); /* Set SOLOCKED */
error = sotpi_bind(so, NULL, 0,
- _SOBIND_UNSPEC|_SOBIND_LOCK_HELD);
+ _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr);
so_unlock_single(so, SOLOCKED);
if (error) {
mutex_exit(&so->so_lock);
@@ -4243,20 +4363,20 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
* If sending to some other address discard the delayed
* error indication.
*/
- if (so->so_delayed_error) {
+ if (sti->sti_delayed_error) {
struct T_uderror_ind *tudi;
void *addr;
t_uscalar_t addrlen;
boolean_t match = B_FALSE;
- ASSERT(so->so_eaddr_mp);
- error = so->so_delayed_error;
- so->so_delayed_error = 0;
- tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr;
+ ASSERT(sti->sti_eaddr_mp);
+ error = sti->sti_delayed_error;
+ sti->sti_delayed_error = 0;
+ tudi =
+ (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr;
addrlen = tudi->DEST_length;
- addr = sogetoff(so->so_eaddr_mp,
- tudi->DEST_offset,
- addrlen, 1);
+ addr = sogetoff(sti->sti_eaddr_mp,
+ tudi->DEST_offset, addrlen, 1);
ASSERT(addr); /* Checked by strsock_proto */
switch (so->so_family) {
case AF_INET: {
@@ -4292,8 +4412,8 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
match = B_TRUE;
}
if (match) {
- freemsg(so->so_eaddr_mp);
- so->so_eaddr_mp = NULL;
+ freemsg(sti->sti_eaddr_mp);
+ sti->sti_eaddr_mp = NULL;
mutex_exit(&so->so_lock);
#ifdef DEBUG
dprintso(so, 0,
@@ -4303,8 +4423,8 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
#endif /* DEBUG */
return (error);
}
- freemsg(so->so_eaddr_mp);
- so->so_eaddr_mp = NULL;
+ freemsg(sti->sti_eaddr_mp);
+ sti->sti_eaddr_mp = NULL;
}
}
mutex_exit(&so->so_lock);
@@ -4316,7 +4436,7 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
val = 1;
error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE,
- &val, (t_uscalar_t)sizeof (val));
+ &val, (t_uscalar_t)sizeof (val), cr);
if (error)
return (error);
dontroute = 1;
@@ -4328,6 +4448,7 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
}
if (msg->msg_controllen != 0) {
if (!(so_mode & SM_CONNREQUIRED)) {
+ so_update_attrs(so, SOMOD);
error = sosend_dgramcmsg(so, name, namelen, uiop,
msg->msg_control, msg->msg_controllen, flags);
} else {
@@ -4336,6 +4457,7 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
error = EOPNOTSUPP;
goto done;
}
+ so_update_attrs(so, SOMOD);
error = sosend_svccmsg(so, uiop,
!(flags & MSG_EOR),
msg->msg_control, msg->msg_controllen,
@@ -4344,6 +4466,7 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
goto done;
}
+ so_update_attrs(so, SOMOD);
if (!(so_mode & SM_CONNREQUIRED)) {
/*
* If there is no SO_DONTROUTE to turn off return immediately
@@ -4368,20 +4491,25 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
} else {
if (so_mode & SM_BYTESTREAM) {
/* Byte stream transport - use write */
-
dprintso(so, 1, ("sotpi_sendmsg: write\n"));
+
+ /* Send M_DATA messages */
+ if ((sti->sti_nl7c_flags & NL7C_ENABLED) &&
+ (error = nl7c_data(so, uiop)) >= 0) {
+ /* NL7C consumed the data */
+ return (error);
+ }
/*
* If there is no SO_DONTROUTE to turn off,
- * SS_DIRECT is on, and there is no flow
+ * sti_direct is on, and there is no flow
* control, we can take the fast path.
*/
- if (!dontroute &&
- (so_state & SS_DIRECT) &&
+ if (!dontroute && sti->sti_direct != 0 &&
canputnext(SOTOV(so)->v_stream->sd_wrq)) {
return (sostream_direct(so, uiop,
- NULL, CRED()));
+ NULL, cr));
}
- error = strwrite(SOTOV(so), uiop, CRED());
+ error = strwrite(SOTOV(so), uiop, cr);
goto done;
}
prim = T_DATA_REQ;
@@ -4404,12 +4532,129 @@ done:
val = 0;
(void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE,
- &val, (t_uscalar_t)sizeof (val));
+ &val, (t_uscalar_t)sizeof (val), cr);
}
return (error);
}
/*
+ * kstrwritemp() has very similar semantics as that of strwrite().
+ * The main difference is it obtains mblks from the caller and also
+ * does not do any copy as done in strwrite() from user buffers to
+ * kernel buffers.
+ *
+ * Currently, this routine is used by sendfile to send data allocated
+ * within the kernel without any copying. This interface does not use the
+ * synchronous stream interface as synch. stream interface implies
+ * copying.
+ */
+int
+kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode)
+{
+ struct stdata *stp;
+ struct queue *wqp;
+ mblk_t *newmp;
+ char waitflag;
+ int tempmode;
+ int error = 0;
+ int done = 0;
+ struct sonode *so;
+ boolean_t direct;
+
+ ASSERT(vp->v_stream);
+ stp = vp->v_stream;
+
+ so = VTOSO(vp);
+ direct = _SOTOTPI(so)->sti_direct;
+
+ /*
+ * This is the sockfs direct fast path. canputnext() need
+ * not be accurate so we don't grab the sd_lock here. If
+ * we get flow-controlled, we grab sd_lock just before the
+ * do..while loop below to emulate what strwrite() does.
+ */
+ wqp = stp->sd_wrq;
+ if (canputnext(wqp) && direct &&
+ !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
+ return (sostream_direct(so, NULL, mp, CRED()));
+ } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
+ /* Fast check of flags before acquiring the lock */
+ mutex_enter(&stp->sd_lock);
+ error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
+ mutex_exit(&stp->sd_lock);
+ if (error != 0) {
+ if (!(stp->sd_flag & STPLEX) &&
+ (stp->sd_wput_opt & SW_SIGPIPE)) {
+ error = EPIPE;
+ }
+ return (error);
+ }
+ }
+
+ waitflag = WRITEWAIT;
+ if (stp->sd_flag & OLDNDELAY)
+ tempmode = fmode & ~FNDELAY;
+ else
+ tempmode = fmode;
+
+ mutex_enter(&stp->sd_lock);
+ do {
+ if (canputnext(wqp)) {
+ mutex_exit(&stp->sd_lock);
+ if (stp->sd_wputdatafunc != NULL) {
+ newmp = (stp->sd_wputdatafunc)(vp, mp, NULL,
+ NULL, NULL, NULL);
+ if (newmp == NULL) {
+ /* The caller will free mp */
+ return (ECOMM);
+ }
+ mp = newmp;
+ }
+ putnext(wqp, mp);
+ return (0);
+ }
+ error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1,
+ &done);
+ } while (error == 0 && !done);
+
+ mutex_exit(&stp->sd_lock);
+ /*
+ * EAGAIN tells the application to try again. ENOMEM
+ * is returned only if the memory allocation size
+ * exceeds the physical limits of the system. ENOMEM
+ * can't be true here.
+ */
+ if (error == ENOMEM)
+ error = EAGAIN;
+ return (error);
+}
+
+/* ARGSUSED */
+static int
+sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
+ struct cred *cr, mblk_t **mpp)
+{
+ int error;
+
+ if (so->so_family != AF_INET && so->so_family != AF_INET6)
+ return (EAFNOSUPPORT);
+
+ if (so->so_state & SS_CANTSENDMORE)
+ return (EPIPE);
+
+ if (so->so_type != SOCK_STREAM)
+ return (EOPNOTSUPP);
+
+ if ((so->so_state & SS_ISCONNECTED) == 0)
+ return (ENOTCONN);
+
+ error = kstrwritemp(so->so_vnode, *mpp, fflag);
+ if (error == 0)
+ *mpp = NULL;
+ return (error);
+}
+
+/*
* Sending data on a datagram socket.
* Assumes caller has verified that SS_ISBOUND etc. are set.
*/
@@ -4429,6 +4674,7 @@ sodgram_direct(struct sonode *so, struct sockaddr *name,
queue_t *udp_wq;
boolean_t connected;
mblk_t *mpdata = NULL;
+ sotpi_info_t *sti = SOTOTPI(so);
ASSERT(name != NULL && namelen != 0);
ASSERT(!(so->so_mode & SM_CONNREQUIRED));
@@ -4438,7 +4684,7 @@ sodgram_direct(struct sonode *so, struct sockaddr *name,
/* Caller checked for proper length */
len = uiop->uio_resid;
- ASSERT(len <= so->so_tidu_size);
+ ASSERT(len <= sti->sti_tidu_size);
/* Length and family checks have been done by caller */
ASSERT(name->sa_family == so->so_family);
@@ -4640,22 +4886,34 @@ slow_send:
}
/*
- * Update so_faddr by asking the transport (unless AF_UNIX).
+ * Update sti_faddr by asking the transport (unless AF_UNIX).
*/
+/* ARGSUSED */
int
-sotpi_getpeername(struct sonode *so)
+sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen,
+ boolean_t accept, struct cred *cr)
{
struct strbuf strbuf;
int error = 0, res;
void *addr;
t_uscalar_t addrlen;
k_sigset_t smask;
+ sotpi_info_t *sti = SOTOTPI(so);
dprintso(so, 1, ("sotpi_getpeername(%p) %s\n",
(void *)so, pr_state(so->so_state, so->so_mode)));
+ ASSERT(*namelen > 0);
mutex_enter(&so->so_lock);
so_lock_single(so); /* Set SOLOCKED */
+
+ if (accept) {
+ bcopy(sti->sti_faddr_sa, name,
+ MIN(*namelen, sti->sti_faddr_len));
+ *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len;
+ goto done;
+ }
+
if (!(so->so_state & SS_ISCONNECTED)) {
error = ENOTCONN;
goto done;
@@ -4668,27 +4926,39 @@ sotpi_getpeername(struct sonode *so)
}
goto done;
}
+
+ if (sti->sti_faddr_valid) {
+ bcopy(sti->sti_faddr_sa, name,
+ MIN(*namelen, sti->sti_faddr_len));
+ *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len;
+ goto done;
+ }
+
#ifdef DEBUG
dprintso(so, 1, ("sotpi_getpeername (local): %s\n",
- pr_addr(so->so_family, so->so_faddr_sa,
- (t_uscalar_t)so->so_faddr_len)));
+ pr_addr(so->so_family, sti->sti_faddr_sa,
+ (t_uscalar_t)sti->sti_faddr_len)));
#endif /* DEBUG */
if (so->so_family == AF_UNIX) {
/* Transport has different name space - return local info */
+ if (sti->sti_faddr_noxlate)
+ *namelen = 0;
error = 0;
goto done;
}
- ASSERT(so->so_faddr_sa);
+ ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0);
+
+ ASSERT(sti->sti_faddr_sa);
/* Allocate local buffer to use with ioctl */
- addrlen = (t_uscalar_t)so->so_faddr_maxlen;
+ addrlen = (t_uscalar_t)sti->sti_faddr_maxlen;
mutex_exit(&so->so_lock);
addr = kmem_alloc(addrlen, KM_SLEEP);
/*
* Issue TI_GETPEERNAME with signals masked.
- * Put the result in so_faddr_sa so that getpeername works after
+ * Put the result in sti_faddr_sa so that getpeername works after
* a shutdown(output).
* If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
* back to the socket.
@@ -4699,16 +4969,16 @@ sotpi_getpeername(struct sonode *so)
sigintr(&smask, 0);
res = 0;
- ASSERT(CRED());
+ ASSERT(cr);
error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf,
- 0, K_TO_K, CRED(), &res);
+ 0, K_TO_K, cr, &res);
sigunintr(&smask);
mutex_enter(&so->so_lock);
/*
* If there is an error record the error in so_error put don't fail
* the getpeername. Instead fallback on the recorded
- * so->so_faddr_sa.
+ * sti->sti_faddr_sa.
*/
if (error) {
/*
@@ -4732,16 +5002,19 @@ sotpi_getpeername(struct sonode *so)
error = 0;
} else if (res == 0 && strbuf.len > 0 &&
(so->so_state & SS_ISCONNECTED)) {
- ASSERT(strbuf.len <= (int)so->so_faddr_maxlen);
- so->so_faddr_len = (socklen_t)strbuf.len;
- bcopy(addr, so->so_faddr_sa, so->so_faddr_len);
- so->so_state |= SS_FADDR_VALID;
+ ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen);
+ sti->sti_faddr_len = (socklen_t)strbuf.len;
+ bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len);
+ sti->sti_faddr_valid = 1;
+
+ bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len));
+ *namelen = sti->sti_faddr_len;
}
kmem_free(addr, addrlen);
#ifdef DEBUG
dprintso(so, 1, ("sotpi_getpeername (tp): %s\n",
- pr_addr(so->so_family, so->so_faddr_sa,
- (t_uscalar_t)so->so_faddr_len)));
+ pr_addr(so->so_family, sti->sti_faddr_sa,
+ (t_uscalar_t)sti->sti_faddr_len)));
#endif /* DEBUG */
done:
so_unlock_single(so, SOLOCKED);
@@ -4750,42 +5023,39 @@ done:
}
/*
- * Update so_laddr by asking the transport (unless AF_UNIX).
+ * Update sti_laddr by asking the transport (unless AF_UNIX).
*/
int
-sotpi_getsockname(struct sonode *so)
+sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen,
+ struct cred *cr)
{
struct strbuf strbuf;
int error = 0, res;
void *addr;
t_uscalar_t addrlen;
k_sigset_t smask;
+ sotpi_info_t *sti = SOTOTPI(so);
dprintso(so, 1, ("sotpi_getsockname(%p) %s\n",
(void *)so, pr_state(so->so_state, so->so_mode)));
+ ASSERT(*namelen > 0);
mutex_enter(&so->so_lock);
so_lock_single(so); /* Set SOLOCKED */
- if (!(so->so_state & SS_ISBOUND) && so->so_family != AF_UNIX) {
- /* Return an all zero address except for the family */
- if (so->so_family == AF_INET)
- so->so_laddr_len = (socklen_t)sizeof (sin_t);
- else if (so->so_family == AF_INET6)
- so->so_laddr_len = (socklen_t)sizeof (sin6_t);
- ASSERT(so->so_laddr_len <= so->so_laddr_maxlen);
- bzero(so->so_laddr_sa, so->so_laddr_len);
- /*
- * Can not assume there is a sa_family for all
- * protocol families.
- */
- if (so->so_family == AF_INET || so->so_family == AF_INET6)
- so->so_laddr_sa->sa_family = so->so_family;
- }
+
#ifdef DEBUG
+
dprintso(so, 1, ("sotpi_getsockname (local): %s\n",
- pr_addr(so->so_family, so->so_laddr_sa,
- (t_uscalar_t)so->so_laddr_len)));
+ pr_addr(so->so_family, sti->sti_laddr_sa,
+ (t_uscalar_t)sti->sti_laddr_len)));
#endif /* DEBUG */
+ if (sti->sti_laddr_valid) {
+ bcopy(sti->sti_laddr_sa, name,
+ MIN(*namelen, sti->sti_laddr_len));
+ *namelen = sti->sti_laddr_len;
+ goto done;
+ }
+
if (so->so_family == AF_UNIX) {
/* Transport has different name space - return local info */
error = 0;
@@ -4796,14 +5066,15 @@ sotpi_getsockname(struct sonode *so)
error = 0;
goto done;
}
+
/* Allocate local buffer to use with ioctl */
- addrlen = (t_uscalar_t)so->so_laddr_maxlen;
+ addrlen = (t_uscalar_t)sti->sti_laddr_maxlen;
mutex_exit(&so->so_lock);
addr = kmem_alloc(addrlen, KM_SLEEP);
/*
* Issue TI_GETMYNAME with signals masked.
- * Put the result in so_laddr_sa so that getsockname works after
+ * Put the result in sti_laddr_sa so that getsockname works after
* a shutdown(output).
* If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
* back to the socket.
@@ -4814,16 +5085,16 @@ sotpi_getsockname(struct sonode *so)
sigintr(&smask, 0);
res = 0;
- ASSERT(CRED());
+ ASSERT(cr);
error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf,
- 0, K_TO_K, CRED(), &res);
+ 0, K_TO_K, cr, &res);
sigunintr(&smask);
mutex_enter(&so->so_lock);
/*
* If there is an error record the error in so_error put don't fail
* the getsockname. Instead fallback on the recorded
- * so->so_laddr_sa.
+ * sti->sti_laddr_sa.
*/
if (error) {
/*
@@ -4844,16 +5115,19 @@ sotpi_getsockname(struct sonode *so)
error = 0;
} else if (res == 0 && strbuf.len > 0 &&
(so->so_state & SS_ISBOUND)) {
- ASSERT(strbuf.len <= (int)so->so_laddr_maxlen);
- so->so_laddr_len = (socklen_t)strbuf.len;
- bcopy(addr, so->so_laddr_sa, so->so_laddr_len);
- so->so_state |= SS_LADDR_VALID;
+ ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen);
+ sti->sti_laddr_len = (socklen_t)strbuf.len;
+ bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len);
+ sti->sti_laddr_valid = 1;
+
+ bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen));
+ *namelen = sti->sti_laddr_len;
}
kmem_free(addr, addrlen);
#ifdef DEBUG
dprintso(so, 1, ("sotpi_getsockname (tp): %s\n",
- pr_addr(so->so_family, so->so_laddr_sa,
- (t_uscalar_t)so->so_laddr_len)));
+ pr_addr(so->so_family, sti->sti_laddr_sa,
+ (t_uscalar_t)sti->sti_laddr_len)));
#endif /* DEBUG */
done:
so_unlock_single(so, SOLOCKED);
@@ -4868,9 +5142,10 @@ done:
*
* On the return most *optlenp bytes are copied to optval.
*/
+/* ARGSUSED */
int
sotpi_getsockopt(struct sonode *so, int level, int option_name,
- void *optval, socklen_t *optlenp, int flags)
+ void *optval, socklen_t *optlenp, int flags, struct cred *cr)
{
struct T_optmgmt_req optmgmt_req;
struct T_optmgmt_ack *optmgmt_ack;
@@ -4882,6 +5157,8 @@ sotpi_getsockopt(struct sonode *so, int level, int option_name,
t_uscalar_t maxlen = *optlenp;
t_uscalar_t len;
uint32_t value;
+ struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */
+ struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */
dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n",
(void *)so, level, option_name, optval, (void *)optlenp,
@@ -4914,8 +5191,6 @@ sotpi_getsockopt(struct sonode *so, int level, int option_name,
#ifdef notyet
case SO_SNDLOWAT:
case SO_RCVLOWAT:
- case SO_SNDTIMEO:
- case SO_RCVTIMEO:
#endif /* notyet */
case SO_DOMAIN:
case SO_DGRAM_ERRIND:
@@ -4925,6 +5200,14 @@ sotpi_getsockopt(struct sonode *so, int level, int option_name,
goto done2;
}
break;
+ case SO_RCVTIMEO:
+ case SO_SNDTIMEO:
+ if (maxlen < (t_uscalar_t)sizeof (struct timeval)) {
+ error = EINVAL;
+ eprintsoline(so, error);
+ goto done2;
+ }
+ break;
case SO_LINGER:
if (maxlen < (t_uscalar_t)sizeof (struct linger)) {
error = EINVAL;
@@ -4932,6 +5215,14 @@ sotpi_getsockopt(struct sonode *so, int level, int option_name,
goto done2;
}
break;
+ case SO_SND_BUFINFO:
+ if (maxlen < (t_uscalar_t)
+ sizeof (struct so_snd_bufinfo)) {
+ error = EINVAL;
+ eprintsoline(so, error);
+ goto done2;
+ }
+ break;
}
len = (t_uscalar_t)sizeof (uint32_t); /* Default */
@@ -4943,7 +5234,7 @@ sotpi_getsockopt(struct sonode *so, int level, int option_name,
goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
case SO_ERROR:
- value = sogeterr(so);
+ value = sogeterr(so, B_TRUE);
option = &value;
goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
@@ -5072,15 +5363,33 @@ sotpi_getsockopt(struct sonode *so, int level, int option_name,
value = so->so_rcvlowat;
option = &value;
break;
+#endif /* notyet */
case SO_SNDTIMEO:
- value = so->so_sndtimeo;
- option = &value;
+ case SO_RCVTIMEO: {
+ clock_t val;
+ if (option_name == SO_RCVTIMEO)
+ val = drv_hztousec(so->so_rcvtimeo);
+ else
+ val = drv_hztousec(so->so_sndtimeo);
+ tmo_val.tv_sec = val / (1000 * 1000);
+ tmo_val.tv_usec = val % (1000 * 1000);
+ option = &tmo_val;
+ len = (t_uscalar_t)sizeof (struct timeval);
break;
- case SO_RCVTIMEO:
- value = so->so_rcvtimeo;
- option = &value;
+ }
+ case SO_SND_BUFINFO: {
+ snd_bufinfo.sbi_wroff =
+ (so->so_proto_props).sopp_wroff;
+ snd_bufinfo.sbi_maxblk =
+ (so->so_proto_props).sopp_maxblk;
+ snd_bufinfo.sbi_maxpsz =
+ (so->so_proto_props).sopp_maxpsz;
+ snd_bufinfo.sbi_tail =
+ (so->so_proto_props).sopp_tail;
+ option = &snd_bufinfo;
+ len = (t_uscalar_t)sizeof (struct so_snd_bufinfo);
break;
-#endif /* notyet */
+ }
}
}
@@ -5159,6 +5468,7 @@ done:
done2:
so_unlock_single(so, SOLOCKED);
mutex_exit(&so->so_lock);
+
return (error);
}
@@ -5168,9 +5478,10 @@ done2:
* SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails -
* setsockopt has to work even if the transport does not support the option.
*/
+/* ARGSUSED */
int
sotpi_setsockopt(struct sonode *so, int level, int option_name,
- const void *optval, t_uscalar_t optlen)
+ const void *optval, t_uscalar_t optlen, struct cred *cr)
{
struct T_optmgmt_req optmgmt_req;
struct opthdr oh;
@@ -5182,7 +5493,6 @@ sotpi_setsockopt(struct sonode *so, int level, int option_name,
(void *)so, level, option_name, optval, optlen,
pr_state(so->so_state, so->so_mode)));
-
/* X/Open requires this check */
if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
if (xnet_check_print)
@@ -5190,12 +5500,6 @@ sotpi_setsockopt(struct sonode *so, int level, int option_name,
return (EINVAL);
}
- /* Caller allocates aligned optval, or passes null */
- ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0);
- /* If optval is null optlen is 0, and vice-versa */
- ASSERT(optval != NULL || optlen == 0);
- ASSERT(optlen != 0 || optval == NULL);
-
mutex_enter(&so->so_lock);
so_lock_single(so); /* Set SOLOCKED */
mutex_exit(&so->so_lock);
@@ -5207,8 +5511,9 @@ sotpi_setsockopt(struct sonode *so, int level, int option_name,
*/
if ((level == SOL_SOCKET || level == IPPROTO_TCP) &&
(so->so_family == AF_INET || so->so_family == AF_INET6) &&
- (so->so_version == SOV_SOCKSTREAM) && (so->so_priv != NULL)) {
- tcp_t *tcp = so->so_priv;
+ (so->so_version == SOV_SOCKSTREAM) &&
+ (so->so_proto_handle != NULL)) {
+ tcp_t *tcp = (tcp_t *)so->so_proto_handle;
boolean_t onoff;
#define intvalue (*(int32_t *)optval)
@@ -5233,6 +5538,18 @@ sotpi_setsockopt(struct sonode *so, int level, int option_name,
onoff = intvalue != 0;
handled = B_TRUE;
break;
+ case SO_SNDTIMEO:
+ case SO_RCVTIMEO:
+ if (optlen !=
+ (t_uscalar_t)sizeof (struct timeval)) {
+ error = EINVAL;
+ eprintsoline(so, error);
+ mutex_enter(&so->so_lock);
+ goto done2;
+ }
+ ASSERT(optval);
+ handled = B_TRUE;
+ break;
case SO_LINGER:
if (optlen !=
(t_uscalar_t)sizeof (struct linger)) {
@@ -5373,7 +5690,7 @@ sotpi_setsockopt(struct sonode *so, int level, int option_name,
mutex_enter(&so->so_lock);
if (error) {
eprintsoline(so, error);
- goto done;
+ goto done2;
}
error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK,
(t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0);
@@ -5406,8 +5723,6 @@ done:
#ifdef notyet
case SO_SNDLOWAT:
case SO_RCVLOWAT:
- case SO_SNDTIMEO:
- case SO_RCVTIMEO:
#endif /* notyet */
case SO_DGRAM_ERRIND:
if (optlen != (t_uscalar_t)sizeof (int32_t)) {
@@ -5418,6 +5733,16 @@ done:
ASSERT(optval);
handled = B_TRUE;
break;
+ case SO_SNDTIMEO:
+ case SO_RCVTIMEO:
+ if (optlen != (t_uscalar_t)sizeof (struct timeval)) {
+ error = EINVAL;
+ eprintsoline(so, error);
+ goto done2;
+ }
+ ASSERT(optval);
+ handled = B_TRUE;
+ break;
case SO_LINGER:
if (optlen != (t_uscalar_t)sizeof (struct linger)) {
error = EINVAL;
@@ -5474,19 +5799,19 @@ done:
case SO_DGRAM_ERRIND:
if (intvalue != 0) {
dprintso(so, 1,
- ("sotpi_setsockopt: setting 0x%x\n",
+ ("socket_setsockopt: setting 0x%x\n",
option_name));
so->so_options |= option_name;
} else {
dprintso(so, 1,
- ("sotpi_setsockopt: clearing 0x%x\n",
+ ("socket_setsockopt: clearing 0x%x\n",
option_name));
so->so_options &= ~option_name;
}
break;
/*
* The following options are only returned by us when the
- * T_SVR4_OPTMGMT_REQ fails.
+ * transport layer fails.
* XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs
* since the transport might adjust the value and not
* return exactly what was set by the application.
@@ -5497,6 +5822,9 @@ done:
case SO_RCVBUF:
so->so_rcvbuf = intvalue;
break;
+ case SO_RCVPSH:
+ so->so_rcv_timer_interval = intvalue;
+ break;
#ifdef notyet
/*
* We do not implement the semantics of these options
@@ -5508,13 +5836,17 @@ done:
case SO_RCVLOWAT:
so->so_rcvlowat = intvalue;
break;
+#endif /* notyet */
case SO_SNDTIMEO:
- so->so_sndtimeo = intvalue;
- break;
- case SO_RCVTIMEO:
- so->so_rcvtimeo = intvalue;
+ case SO_RCVTIMEO: {
+ struct timeval *tl = (struct timeval *)optval;
+ clock_t val = tl->tv_sec * 1000 * 1000 + tl->tv_usec;
+ if (option_name == SO_RCVTIMEO)
+ so->so_rcvtimeo = drv_usectohz(val);
+ else
+ so->so_sndtimeo = drv_usectohz(val);
break;
-#endif /* notyet */
+ }
}
#undef intvalue
@@ -5529,8 +5861,1121 @@ done:
}
}
done2:
-ret:
so_unlock_single(so, SOLOCKED);
mutex_exit(&so->so_lock);
return (error);
}
+
+/* ARGSUSED */
+int
+sotpi_close(struct sonode *so, int flag, struct cred *cr)
+{
+ struct vnode *vp = SOTOV(so);
+ dev_t dev;
+ int error = 0;
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ dprintso(so, 1, ("sotpi_close(%p, %x) %s\n",
+ (void *)vp, flag, pr_state(so->so_state, so->so_mode)));
+
+ dev = sti->sti_dev;
+
+ ASSERT(STREAMSTAB(getmajor(dev)));
+
+ mutex_enter(&so->so_lock);
+ so_lock_single(so); /* Set SOLOCKED */
+
+ /*
+ * Only call NL7C's close on last open reference.
+ */
+ if (sti->sti_nl7c_flags & NL7C_ENABLED) {
+ sti->sti_nl7c_flags = 0;
+ nl7c_close(so);
+ }
+
+ /*
+ * Only call the close routine when the last open reference through
+ * any [s, v]node goes away.
+ */
+ if (vp->v_stream != NULL) {
+ vnode_t *ux_vp;
+
+ if (so->so_family == AF_UNIX) {
+ /* Could avoid this when CANTSENDMORE for !dgram */
+ so_unix_close(so);
+ }
+
+ mutex_exit(&so->so_lock);
+ /*
+ * Disassemble the linkage from the AF_UNIX underlying file
+ * system vnode to this socket (by atomically clearing
+ * v_stream in vn_rele_stream) before strclose clears sd_vnode
+ * and frees the stream head.
+ */
+ if ((ux_vp = sti->sti_ux_bound_vp) != NULL) {
+ ASSERT(ux_vp->v_stream);
+ sti->sti_ux_bound_vp = NULL;
+ vn_rele_stream(ux_vp);
+ }
+ if (so->so_family == AF_INET || so->so_family == AF_INET6) {
+ strsetrwputdatahooks(SOTOV(so), NULL, NULL);
+ if (sti->sti_kssl_ent != NULL) {
+ kssl_release_ent(sti->sti_kssl_ent, so,
+ sti->sti_kssl_type);
+ sti->sti_kssl_ent = NULL;
+ }
+ if (sti->sti_kssl_ctx != NULL) {
+ kssl_release_ctx(sti->sti_kssl_ctx);
+ sti->sti_kssl_ctx = NULL;
+ }
+ sti->sti_kssl_type = KSSL_NO_PROXY;
+ }
+ error = strclose(vp, flag, cr);
+ vp->v_stream = NULL;
+ mutex_enter(&so->so_lock);
+ }
+
+ /*
+ * Flush the T_DISCON_IND on sti_discon_ind_mp.
+ */
+ so_flush_discon_ind(so);
+
+ so_unlock_single(so, SOLOCKED);
+ mutex_exit(&so->so_lock);
+
+ /*
+ * Needed for STREAMs.
+ * Decrement the device driver's reference count for streams
+ * opened via the clone dip. The driver was held in clone_open().
+ * The absence of clone_close() forces this asymmetry.
+ */
+ if (so->so_flag & SOCLONE)
+ ddi_rele_driver(getmajor(dev));
+
+ return (error);
+}
+
+static int
+sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
+ struct cred *cr, int32_t *rvalp)
+{
+ struct vnode *vp = SOTOV(so);
+ sotpi_info_t *sti = SOTOTPI(so);
+ int error = 0;
+
+ dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n",
+ cmd, arg, pr_state(so->so_state, so->so_mode)));
+
+ switch (cmd) {
+ case _I_INSERT:
+ case _I_REMOVE:
+ /*
+ * Since there's no compelling reason to support these ioctls
+ * on sockets, and doing so would increase the complexity
+ * markedly, prevent it.
+ */
+ return (EOPNOTSUPP);
+
+ case I_FIND:
+ case I_LIST:
+ case I_LOOK:
+ case I_POP:
+ case I_PUSH:
+ /*
+ * To prevent races and inconsistencies between the actual
+ * state of the stream and the state according to the sonode,
+ * we serialize all operations which modify or operate on the
+ * list of modules on the socket's stream.
+ */
+ mutex_enter(&sti->sti_plumb_lock);
+ error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp);
+ mutex_exit(&sti->sti_plumb_lock);
+ return (error);
+
+ default:
+ if (so->so_version != SOV_STREAM)
+ break;
+
+ /*
+ * The imaginary "sockmod" has been popped; act as a stream.
+ */
+ return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
+ }
+
+ ASSERT(so->so_version != SOV_STREAM);
+
+ /*
+ * Process socket-specific ioctls.
+ */
+ switch (cmd) {
+ case FIONBIO: {
+ int32_t value;
+
+ if (so_copyin((void *)arg, &value, sizeof (int32_t),
+ (mode & (int)FKIOCTL)))
+ return (EFAULT);
+
+ mutex_enter(&so->so_lock);
+ if (value) {
+ so->so_state |= SS_NDELAY;
+ } else {
+ so->so_state &= ~SS_NDELAY;
+ }
+ mutex_exit(&so->so_lock);
+ return (0);
+ }
+
+ case FIOASYNC: {
+ int32_t value;
+
+ if (so_copyin((void *)arg, &value, sizeof (int32_t),
+ (mode & (int)FKIOCTL)))
+ return (EFAULT);
+
+ mutex_enter(&so->so_lock);
+ /*
+ * SS_ASYNC flag not already set correctly?
+ * (!value != !(so->so_state & SS_ASYNC))
+ * but some engineers find that too hard to read.
+ */
+ if (value == 0 && (so->so_state & SS_ASYNC) != 0 ||
+ value != 0 && (so->so_state & SS_ASYNC) == 0)
+ error = so_flip_async(so, vp, mode, cr);
+ mutex_exit(&so->so_lock);
+ return (error);
+ }
+
+ case SIOCSPGRP:
+ case FIOSETOWN: {
+ pid_t pgrp;
+
+ if (so_copyin((void *)arg, &pgrp, sizeof (pid_t),
+ (mode & (int)FKIOCTL)))
+ return (EFAULT);
+
+ mutex_enter(&so->so_lock);
+ dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp));
+ /* Any change? */
+ if (pgrp != so->so_pgrp)
+ error = so_set_siggrp(so, vp, pgrp, mode, cr);
+ mutex_exit(&so->so_lock);
+ return (error);
+ }
+ case SIOCGPGRP:
+ case FIOGETOWN:
+ if (so_copyout(&so->so_pgrp, (void *)arg,
+ sizeof (pid_t), (mode & (int)FKIOCTL)))
+ return (EFAULT);
+ return (0);
+
+ case SIOCATMARK: {
+ int retval;
+ uint_t so_state;
+
+ /*
+ * strwaitmark has a finite timeout after which it
+ * returns -1 if the mark state is undetermined.
+ * In order to avoid any race between the mark state
+ * in sockfs and the mark state in the stream head this
+ * routine loops until the mark state can be determined
+ * (or the urgent data indication has been removed by some
+ * other thread).
+ */
+ do {
+ mutex_enter(&so->so_lock);
+ so_state = so->so_state;
+ mutex_exit(&so->so_lock);
+ if (so_state & SS_RCVATMARK) {
+ retval = 1;
+ } else if (!(so_state & SS_OOBPEND)) {
+ /*
+ * No SIGURG has been generated -- there is no
+ * pending or present urgent data. Thus can't
+ * possibly be at the mark.
+ */
+ retval = 0;
+ } else {
+ /*
+ * Have the stream head wait until there is
+ * either some messages on the read queue, or
+ * STRATMARK or STRNOTATMARK gets set. The
+ * STRNOTATMARK flag is used so that the
+ * transport can send up a MSGNOTMARKNEXT
+ * M_DATA to indicate that it is not
+ * at the mark and additional data is not about
+ * to be send upstream.
+ *
+ * If the mark state is undetermined this will
+ * return -1 and we will loop rechecking the
+ * socket state.
+ */
+ retval = strwaitmark(vp);
+ }
+ } while (retval == -1);
+
+ if (so_copyout(&retval, (void *)arg, sizeof (int),
+ (mode & (int)FKIOCTL)))
+ return (EFAULT);
+ return (0);
+ }
+
+ case I_FDINSERT:
+ case I_SENDFD:
+ case I_RECVFD:
+ case I_ATMARK:
+ case _SIOCSOCKFALLBACK:
+ /*
+ * These ioctls do not apply to sockets. I_FDINSERT can be
+ * used to send M_PROTO messages without modifying the socket
+ * state. I_SENDFD/RECVFD should not be used for socket file
+ * descriptor passing since they assume a twisted stream.
+ * SIOCATMARK must be used instead of I_ATMARK.
+ *
+ * _SIOCSOCKFALLBACK from an application should never be
+ * processed. It is only generated by socktpi_open() or
+ * in response to I_POP or I_PUSH.
+ */
+#ifdef DEBUG
+ zcmn_err(getzoneid(), CE_WARN,
+ "Unsupported STREAMS ioctl 0x%x on socket. "
+ "Pid = %d\n", cmd, curproc->p_pid);
+#endif /* DEBUG */
+ return (EOPNOTSUPP);
+
+ case _I_GETPEERCRED:
+ if ((mode & FKIOCTL) == 0)
+ return (EINVAL);
+
+ mutex_enter(&so->so_lock);
+ if ((so->so_mode & SM_CONNREQUIRED) == 0) {
+ error = ENOTSUP;
+ } else if ((so->so_state & SS_ISCONNECTED) == 0) {
+ error = ENOTCONN;
+ } else if (so->so_peercred != NULL) {
+ k_peercred_t *kp = (k_peercred_t *)arg;
+ kp->pc_cr = so->so_peercred;
+ kp->pc_cpid = so->so_cpid;
+ crhold(so->so_peercred);
+ } else {
+ error = EINVAL;
+ }
+ mutex_exit(&so->so_lock);
+ return (error);
+
+ default:
+ /*
+ * Do the higher-order bits of the ioctl cmd indicate
+ * that it is an I_* streams ioctl?
+ */
+ if ((cmd & 0xffffff00U) == STR &&
+ so->so_version == SOV_SOCKBSD) {
+#ifdef DEBUG
+ zcmn_err(getzoneid(), CE_WARN,
+ "Unsupported STREAMS ioctl 0x%x on socket. "
+ "Pid = %d\n", cmd, curproc->p_pid);
+#endif /* DEBUG */
+ return (EOPNOTSUPP);
+ }
+ return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
+ }
+}
+
+/*
+ * Handle plumbing-related ioctls.
+ */
+static int
+socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
+ struct cred *cr, int32_t *rvalp)
+{
+ static const char sockmod_name[] = "sockmod";
+ struct sonode *so = VTOSO(vp);
+ char mname[FMNAMESZ + 1];
+ int error;
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
+
+ if (so->so_version == SOV_SOCKBSD)
+ return (EOPNOTSUPP);
+
+ if (so->so_version == SOV_STREAM) {
+ /*
+ * The imaginary "sockmod" has been popped - act as a stream.
+ * If this is a push of sockmod then change back to a socket.
+ */
+ if (cmd == I_PUSH) {
+ error = ((mode & FKIOCTL) ? copystr : copyinstr)(
+ (void *)arg, mname, sizeof (mname), NULL);
+
+ if (error == 0 && strcmp(mname, sockmod_name) == 0) {
+ dprintso(so, 0, ("socktpi_ioctl: going to "
+ "socket version\n"));
+ so_stream2sock(so);
+ return (0);
+ }
+ }
+ return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
+ }
+
+ switch (cmd) {
+ case I_PUSH:
+ if (sti->sti_direct) {
+ mutex_enter(&so->so_lock);
+ so_lock_single(so);
+ mutex_exit(&so->so_lock);
+
+ error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K,
+ CRED(), rvalp);
+
+ mutex_enter(&so->so_lock);
+ if (error == 0)
+ sti->sti_direct = 0;
+ so_unlock_single(so, SOLOCKED);
+ mutex_exit(&so->so_lock);
+
+ if (error != 0)
+ return (error);
+ }
+
+ error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
+ if (error == 0)
+ sti->sti_pushcnt++;
+ return (error);
+
+ case I_POP:
+ if (sti->sti_pushcnt == 0) {
+ /* Emulate sockmod being popped */
+ dprintso(so, 0,
+ ("socktpi_ioctl: going to STREAMS version\n"));
+ return (so_sock2stream(so));
+ }
+
+ error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
+ if (error == 0)
+ sti->sti_pushcnt--;
+ return (error);
+
+ case I_LIST: {
+ struct str_mlist *kmlistp, *umlistp;
+ struct str_list kstrlist;
+ ssize_t kstrlistsize;
+ int i, nmods;
+
+ STRUCT_DECL(str_list, ustrlist);
+ STRUCT_INIT(ustrlist, mode);
+
+ if (arg == NULL) {
+ error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
+ if (error == 0)
+ (*rvalp)++; /* Add one for sockmod */
+ return (error);
+ }
+
+ error = so_copyin((void *)arg, STRUCT_BUF(ustrlist),
+ STRUCT_SIZE(ustrlist), mode & FKIOCTL);
+ if (error != 0)
+ return (error);
+
+ nmods = STRUCT_FGET(ustrlist, sl_nmods);
+ if (nmods <= 0)
+ return (EINVAL);
+ /*
+ * Ceiling nmods at nstrpush to prevent someone from
+ * maliciously consuming lots of kernel memory.
+ */
+ nmods = MIN(nmods, nstrpush);
+
+ kstrlistsize = (nmods + 1) * sizeof (struct str_mlist);
+ kstrlist.sl_nmods = nmods;
+ kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP);
+
+ error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K,
+ cr, rvalp);
+ if (error != 0)
+ goto done;
+
+ /*
+ * Considering the module list as a 0-based array of sl_nmods
+ * modules, sockmod should conceptually exist at slot
+ * sti_pushcnt. Insert sockmod at this location by sliding all
+ * of the module names after so_pushcnt over by one. We know
+ * that there will be room to do this since we allocated
+ * sl_modlist with an additional slot.
+ */
+ for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--)
+ kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1];
+
+ (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name);
+ kstrlist.sl_nmods++;
+
+ /*
+ * Copy all of the entries out to ustrlist.
+ */
+ kmlistp = kstrlist.sl_modlist;
+ umlistp = STRUCT_FGETP(ustrlist, sl_modlist);
+ for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) {
+ error = so_copyout(kmlistp++, umlistp++,
+ sizeof (struct str_mlist), mode & FKIOCTL);
+ if (error != 0)
+ goto done;
+ }
+
+ error = so_copyout(&i, (void *)arg, sizeof (int32_t),
+ mode & FKIOCTL);
+ if (error == 0)
+ *rvalp = 0;
+ done:
+ kmem_free(kstrlist.sl_modlist, kstrlistsize);
+ return (error);
+ }
+ case I_LOOK:
+ if (sti->sti_pushcnt == 0) {
+ return (so_copyout(sockmod_name, (void *)arg,
+ sizeof (sockmod_name), mode & FKIOCTL));
+ }
+ return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
+
+ case I_FIND:
+ error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
+ if (error && error != EINVAL)
+ return (error);
+
+ /* if not found and string was sockmod return 1 */
+ if (*rvalp == 0 || error == EINVAL) {
+ error = ((mode & FKIOCTL) ? copystr : copyinstr)(
+ (void *)arg, mname, sizeof (mname), NULL);
+ if (error == ENAMETOOLONG)
+ error = EINVAL;
+
+ if (error == 0 && strcmp(mname, sockmod_name) == 0)
+ *rvalp = 1;
+ }
+ return (error);
+
+ default:
+ panic("socktpi_plumbioctl: unknown ioctl %d", cmd);
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * Wrapper around the streams poll routine that implements socket poll
+ * semantics.
+ * The sockfs never calls pollwakeup itself - the stream head take care
+ * of all pollwakeups. Since sockfs never holds so_lock when calling the
+ * stream head there can never be a deadlock due to holding so_lock across
+ * pollwakeup and acquiring so_lock in this routine.
+ *
+ * However, since the performance of VOP_POLL is critical we avoid
+ * acquiring so_lock here. This is based on two assumptions:
+ * - The poll implementation holds locks to serialize the VOP_POLL call
+ * and a pollwakeup for the same pollhead. This ensures that should
+ * e.g. so_state change during a socktpi_poll call the pollwakeup
+ * (which strsock_* and strrput conspire to issue) is issued after
+ * the state change. Thus the pollwakeup will block until VOP_POLL has
+ * returned and then wake up poll and have it call VOP_POLL again.
+ * - The reading of so_state without holding so_lock does not result in
+ * stale data that is older than the latest state change that has dropped
+ * so_lock. This is ensured by the mutex_exit issuing the appropriate
+ * memory barrier to force the data into the coherency domain.
+ */
+static int
+sotpi_poll(
+ struct sonode *so,
+ short events,
+ int anyyet,
+ short *reventsp,
+ struct pollhead **phpp)
+{
+ short origevents = events;
+ struct vnode *vp = SOTOV(so);
+ int error;
+ int so_state = so->so_state; /* snapshot */
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n",
+ (void *)vp, pr_state(so_state, so->so_mode), so->so_error));
+
+ ASSERT(vp->v_type == VSOCK);
+ ASSERT(vp->v_stream != NULL);
+
+ if (so->so_version == SOV_STREAM) {
+ /* The imaginary "sockmod" has been popped - act as a stream */
+ return (strpoll(vp->v_stream, events, anyyet,
+ reventsp, phpp));
+ }
+
+ if (!(so_state & SS_ISCONNECTED) &&
+ (so->so_mode & SM_CONNREQUIRED)) {
+ /* Not connected yet - turn off write side events */
+ events &= ~(POLLOUT|POLLWRBAND);
+ }
+ /*
+ * Check for errors without calling strpoll if the caller wants them.
+ * In sockets the errors are represented as input/output events
+ * and there is no need to ask the stream head for this information.
+ */
+ if (so->so_error != 0 &&
+ ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) {
+ *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents;
+ return (0);
+ }
+ /*
+ * Ignore M_PROTO only messages such as the T_EXDATA_IND messages.
+ * These message with only an M_PROTO/M_PCPROTO part and no M_DATA
+ * will not trigger a POLLIN event with POLLRDDATA set.
+ * The handling of urgent data (causing POLLRDBAND) is done by
+ * inspecting SS_OOBPEND below.
+ */
+ events |= POLLRDDATA;
+
+ /*
+ * After shutdown(output) a stream head write error is set.
+ * However, we should not return output events.
+ */
+ events |= POLLNOERR;
+ error = strpoll(vp->v_stream, events, anyyet,
+ reventsp, phpp);
+ if (error)
+ return (error);
+
+ ASSERT(!(*reventsp & POLLERR));
+
+ /*
+ * Notes on T_CONN_IND handling for sockets.
+ *
+ * If strpoll() returned without events, SR_POLLIN is guaranteed
+ * to be set, ensuring any subsequent strrput() runs pollwakeup().
+ *
+ * Since the so_lock is not held, soqueueconnind() may have run
+ * and a T_CONN_IND may be waiting. We now check for any queued
+ * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events
+ * to ensure poll returns.
+ *
+ * However:
+ * If the T_CONN_IND hasn't arrived by the time strpoll() returns,
+ * when strrput() does run for an arriving M_PROTO with T_CONN_IND
+ * the following actions will occur; taken together they ensure the
+ * syscall will return.
+ *
+ * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if
+ * the accept() was run on a non-blocking socket sowaitconnind()
+ * may have already returned EWOULDBLOCK, so not be waiting to
+ * process the message. Additionally socktpi_poll() has probably
+ * proceeded past the sti_conn_ind_head check below.
+ * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake
+ * this thread, however that could occur before poll_common()
+ * has entered cv_wait.
+ * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock.
+ *
+ * Before proceeding to cv_wait() in poll_common() for an event,
+ * poll_common() atomically checks for T_POLLWAKE under the pc_lock,
+ * and if set, re-calls strpoll() to ensure the late arriving
+ * T_CONN_IND is recognized, and pollsys() returns.
+ */
+
+ if (sti->sti_conn_ind_head != NULL)
+ *reventsp |= (POLLIN|POLLRDNORM) & events;
+
+ if (so->so_state & SS_OOBPEND)
+ *reventsp |= POLLRDBAND & events;
+
+ if (sti->sti_nl7c_rcv_mp != NULL) {
+ *reventsp |= (POLLIN|POLLRDNORM) & events;
+ }
+ if ((sti->sti_nl7c_flags & NL7C_ENABLED) &&
+ ((POLLIN|POLLRDNORM) & *reventsp)) {
+ sti->sti_nl7c_flags |= NL7C_POLLIN;
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+socktpi_constructor(void *buf, void *cdrarg, int kmflags)
+{
+ sotpi_sonode_t *st = (sotpi_sonode_t *)buf;
+ int error = 0;
+
+ error = sonode_constructor(buf, cdrarg, kmflags);
+ if (error != 0)
+ return (error);
+
+ error = i_sotpi_info_constructor(&st->st_info);
+ if (error != 0)
+ sonode_destructor(buf, cdrarg);
+
+ st->st_sonode.so_priv = &st->st_info;
+
+ return (error);
+}
+
+/*ARGSUSED1*/
+static void
+socktpi_destructor(void *buf, void *cdrarg)
+{
+ sotpi_sonode_t *st = (sotpi_sonode_t *)buf;
+
+ ASSERT(st->st_sonode.so_priv == &st->st_info);
+ st->st_sonode.so_priv = NULL;
+
+ i_sotpi_info_destructor(&st->st_info);
+ sonode_destructor(buf, cdrarg);
+}
+
+static int
+socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags)
+{
+ int retval;
+
+ if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) {
+ struct sonode *so = (struct sonode *)buf;
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ mutex_enter(&socklist.sl_lock);
+
+ sti->sti_next_so = socklist.sl_list;
+ sti->sti_prev_so = NULL;
+ if (sti->sti_next_so != NULL)
+ SOTOTPI(sti->sti_next_so)->sti_prev_so = so;
+ socklist.sl_list = so;
+
+ mutex_exit(&socklist.sl_lock);
+
+ }
+ return (retval);
+}
+
+static void
+socktpi_unix_destructor(void *buf, void *cdrarg)
+{
+ struct sonode *so = (struct sonode *)buf;
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ mutex_enter(&socklist.sl_lock);
+
+ if (sti->sti_next_so != NULL)
+ SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so;
+ if (sti->sti_prev_so != NULL)
+ SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so;
+ else
+ socklist.sl_list = sti->sti_next_so;
+
+ mutex_exit(&socklist.sl_lock);
+
+ socktpi_destructor(buf, cdrarg);
+}
+
+int
+socktpi_init(void)
+{
+ /*
+ * Create sonode caches. We create a special one for AF_UNIX so
+ * that we can track them for netstat(1m).
+ */
+ socktpi_cache = kmem_cache_create("socktpi_cache",
+ sizeof (struct sotpi_sonode), 0, socktpi_constructor,
+ socktpi_destructor, NULL, NULL, NULL, 0);
+
+ socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache",
+ sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor,
+ socktpi_unix_destructor, NULL, NULL, NULL, 0);
+
+ return (0);
+}
+
+/*
+ * Given a non-TPI sonode, allocate and prep it to be ready for TPI.
+ *
+ * Caller must still update state and mode using sotpi_update_state().
+ *
+ * Returns the STREAM queue that the protocol should use.
+ */
+queue_t *
+sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp,
+ boolean_t *direct, struct cred *cr)
+{
+ sotpi_info_t *sti;
+ struct sockparams *origsp = so->so_sockparams;
+ sock_lower_handle_t handle = so->so_proto_handle;
+ uint_t old_state = so->so_state;
+ struct stdata *stp;
+ struct vnode *vp;
+ queue_t *q;
+
+ *direct = B_FALSE;
+ so->so_sockparams = newsp;
+ /*
+ * Allocate and initalize fields required by TPI.
+ */
+ (void) sotpi_info_create(so, KM_SLEEP);
+ sotpi_info_init(so);
+
+ if (sotpi_init(so, NULL, cr, SO_FALLBACK) != 0) {
+ sotpi_info_fini(so);
+ sotpi_info_destroy(so);
+ so->so_state = old_state;
+ return (NULL);
+ }
+ ASSERT(handle == so->so_proto_handle);
+ sti = SOTOTPI(so);
+ if (sti->sti_direct != 0)
+ *direct = B_TRUE;
+
+ /*
+ * Keep the original sp around so we can properly dispose of the
+ * sonode when the socket is being closed.
+ */
+ sti->sti_orig_sp = origsp;
+
+ so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */
+ so_alloc_addr(so, so->so_max_addr_len);
+
+ /*
+ * If the application has done a SIOCSPGRP, make sure the
+ * STREAM head is aware. This needs to take place before
+ * the protocol start sending up messages. Otherwise we
+ * might miss to generate SIGPOLL.
+ *
+ * It is possible that the application will receive duplicate
+ * signals if some were already generated for either data or
+ * connection indications.
+ */
+ if (so->so_pgrp != 0) {
+ mutex_enter(&so->so_lock);
+ if (so_set_events(so, so->so_vnode, cr) != 0)
+ so->so_pgrp = 0;
+ mutex_exit(&so->so_lock);
+ }
+
+ /*
+ * Determine which queue to use.
+ */
+ vp = SOTOV(so);
+ stp = vp->v_stream;
+ ASSERT(stp != NULL);
+ q = stp->sd_wrq->q_next;
+
+ /*
+ * Skip any modules that may have been auto pushed when the device
+ * was opened
+ */
+ while (q->q_next != NULL)
+ q = q->q_next;
+ q = _RD(q);
+
+ return (q);
+}
+
+void
+sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap,
+ struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr,
+ socklen_t faddrlen, short opts)
+{
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ so_proc_tcapability_ack(so, tcap);
+
+ so->so_options |= opts;
+
+ /*
+ * Determine whether the foreign and local address are valid
+ */
+ if (laddrlen != 0) {
+ ASSERT(laddrlen <= sti->sti_laddr_maxlen);
+ sti->sti_laddr_len = laddrlen;
+ bcopy(laddr, sti->sti_laddr_sa, laddrlen);
+ sti->sti_laddr_valid = (so->so_state & SS_ISBOUND);
+ }
+
+ if (faddrlen != 0) {
+ ASSERT(faddrlen <= sti->sti_faddr_maxlen);
+ sti->sti_faddr_len = faddrlen;
+ bcopy(faddr, sti->sti_faddr_sa, faddrlen);
+ sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED);
+ }
+
+}
+
+/*
+ * Allocate enough space to cache the local and foreign addresses.
+ */
+void
+so_alloc_addr(struct sonode *so, t_uscalar_t maxlen)
+{
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL);
+ ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0);
+ sti->sti_laddr_maxlen = sti->sti_faddr_maxlen =
+ P2ROUNDUP(maxlen, KMEM_ALIGN);
+ so->so_max_addr_len = sti->sti_laddr_maxlen;
+ sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP);
+ sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa
+ + sti->sti_laddr_maxlen);
+
+ if (so->so_family == AF_UNIX) {
+ /*
+ * Initialize AF_UNIX related fields.
+ */
+ bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr));
+ bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr));
+ }
+}
+
+
+sotpi_info_t *
+sotpi_sototpi(struct sonode *so)
+{
+ sotpi_info_t *sti;
+
+ if (so == NULL)
+ return (NULL);
+
+ sti = (sotpi_info_t *)so->so_priv;
+
+ ASSERT(sti != NULL);
+ ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC);
+
+ return (sti);
+}
+
+static int
+i_sotpi_info_constructor(sotpi_info_t *sti)
+{
+ sti->sti_magic = SOTPI_INFO_MAGIC;
+ sti->sti_ack_mp = NULL;
+ sti->sti_discon_ind_mp = NULL;
+ sti->sti_ux_bound_vp = NULL;
+ sti->sti_unbind_mp = NULL;
+
+ sti->sti_conn_ind_head = NULL;
+ sti->sti_conn_ind_tail = NULL;
+
+ sti->sti_laddr_sa = NULL;
+ sti->sti_faddr_sa = NULL;
+
+ sti->sti_nl7c_flags = 0;
+ sti->sti_nl7c_uri = NULL;
+ sti->sti_nl7c_rcv_mp = NULL;
+
+ mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL);
+
+ return (0);
+}
+
+static void
+i_sotpi_info_destructor(sotpi_info_t *sti)
+{
+ ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC);
+ ASSERT(sti->sti_ack_mp == NULL);
+ ASSERT(sti->sti_discon_ind_mp == NULL);
+ ASSERT(sti->sti_ux_bound_vp == NULL);
+ ASSERT(sti->sti_unbind_mp == NULL);
+
+ ASSERT(sti->sti_conn_ind_head == NULL);
+ ASSERT(sti->sti_conn_ind_tail == NULL);
+
+ ASSERT(sti->sti_laddr_sa == NULL);
+ ASSERT(sti->sti_faddr_sa == NULL);
+
+ ASSERT(sti->sti_nl7c_flags == 0);
+ ASSERT(sti->sti_nl7c_uri == NULL);
+ ASSERT(sti->sti_nl7c_rcv_mp == NULL);
+
+ mutex_destroy(&sti->sti_plumb_lock);
+ cv_destroy(&sti->sti_ack_cv);
+}
+
+/*
+ * Creates and attaches TPI information to the given sonode
+ */
+static boolean_t
+sotpi_info_create(struct sonode *so, int kmflags)
+{
+ sotpi_info_t *sti;
+
+ ASSERT(so->so_priv == NULL);
+
+ if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL)
+ return (B_FALSE);
+
+ if (i_sotpi_info_constructor(sti) != 0) {
+ kmem_free(sti, sizeof (*sti));
+ return (B_FALSE);
+ }
+
+ so->so_priv = (void *)sti;
+ return (B_TRUE);
+}
+
+/*
+ * Initializes the TPI information.
+ */
+static void
+sotpi_info_init(struct sonode *so)
+{
+ struct vnode *vp = SOTOV(so);
+ sotpi_info_t *sti = SOTOTPI(so);
+ time_t now;
+
+ sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev;
+ vp->v_rdev = sti->sti_dev;
+
+ sti->sti_orig_sp = NULL;
+
+ sti->sti_pushcnt = 0;
+
+ now = gethrestime_sec();
+ sti->sti_atime = now;
+ sti->sti_mtime = now;
+ sti->sti_ctime = now;
+
+ sti->sti_eaddr_mp = NULL;
+ sti->sti_delayed_error = 0;
+
+ sti->sti_provinfo = NULL;
+
+ sti->sti_oobcnt = 0;
+ sti->sti_oobsigcnt = 0;
+
+ ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL);
+
+ sti->sti_laddr_sa = 0;
+ sti->sti_faddr_sa = 0;
+ sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0;
+ sti->sti_laddr_len = sti->sti_faddr_len = 0;
+
+ sti->sti_laddr_valid = 0;
+ sti->sti_faddr_valid = 0;
+ sti->sti_faddr_noxlate = 0;
+
+ sti->sti_direct = 0;
+
+ ASSERT(sti->sti_ack_mp == NULL);
+ ASSERT(sti->sti_ux_bound_vp == NULL);
+ ASSERT(sti->sti_unbind_mp == NULL);
+
+ ASSERT(sti->sti_conn_ind_head == NULL);
+ ASSERT(sti->sti_conn_ind_tail == NULL);
+
+ /* Initialize the kernel SSL proxy fields */
+ sti->sti_kssl_type = KSSL_NO_PROXY;
+ sti->sti_kssl_ent = NULL;
+ sti->sti_kssl_ctx = NULL;
+}
+
+/*
+ * Given a sonode, grab the TPI info and free any data.
+ */
+static void
+sotpi_info_fini(struct sonode *so)
+{
+ sotpi_info_t *sti = SOTOTPI(so);
+ mblk_t *mp;
+
+ ASSERT(sti->sti_discon_ind_mp == NULL);
+
+ if ((mp = sti->sti_conn_ind_head) != NULL) {
+ mblk_t *mp1;
+
+ while (mp) {
+ mp1 = mp->b_next;
+ mp->b_next = NULL;
+ freemsg(mp);
+ mp = mp1;
+ }
+ sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL;
+ }
+
+ /*
+ * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely
+ * indirect them. It also uses so_count as a validity test.
+ */
+ mutex_enter(&so->so_lock);
+
+ if (sti->sti_laddr_sa) {
+ ASSERT((caddr_t)sti->sti_faddr_sa ==
+ (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen);
+ ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen);
+ sti->sti_laddr_valid = 0;
+ sti->sti_faddr_valid = 0;
+ kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2);
+ sti->sti_laddr_sa = NULL;
+ sti->sti_laddr_len = sti->sti_laddr_maxlen = 0;
+ sti->sti_faddr_sa = NULL;
+ sti->sti_faddr_len = sti->sti_faddr_maxlen = 0;
+ }
+
+ mutex_exit(&so->so_lock);
+
+ if ((mp = sti->sti_eaddr_mp) != NULL) {
+ freemsg(mp);
+ sti->sti_eaddr_mp = NULL;
+ sti->sti_delayed_error = 0;
+ }
+
+ if ((mp = sti->sti_ack_mp) != NULL) {
+ freemsg(mp);
+ sti->sti_ack_mp = NULL;
+ }
+
+ if ((mp = sti->sti_nl7c_rcv_mp) != NULL) {
+ sti->sti_nl7c_rcv_mp = NULL;
+ freemsg(mp);
+ }
+ sti->sti_nl7c_rcv_rval = 0;
+ if (sti->sti_nl7c_uri != NULL) {
+ nl7c_urifree(so);
+ /* urifree() cleared nl7c_uri */
+ }
+ if (sti->sti_nl7c_flags) {
+ sti->sti_nl7c_flags = 0;
+ }
+
+ ASSERT(sti->sti_ux_bound_vp == NULL);
+ if ((mp = sti->sti_unbind_mp) != NULL) {
+ freemsg(mp);
+ sti->sti_unbind_mp = NULL;
+ }
+}
+
+/*
+ * Destroys the TPI information attached to a sonode.
+ */
+static void
+sotpi_info_destroy(struct sonode *so)
+{
+ sotpi_info_t *sti = SOTOTPI(so);
+
+ i_sotpi_info_destructor(sti);
+ kmem_free(sti, sizeof (*sti));
+
+ so->so_priv = NULL;
+}
+
+/*
+ * Create the global sotpi socket module entry. It will never be free.
+ */
+smod_info_t *
+sotpi_smod_create(void)
+{
+ smod_info_t *smodp;
+
+ smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP);
+ smodp->smod_name = kmem_zalloc(strlen(SOTPI_SMOD_NAME), + 1);
+ (void *)strcpy(smodp->smod_name, SOTPI_SMOD_NAME);
+ /*
+ * Initilization the refcnt to 1 so it will never be free.
+ */
+ smodp->smod_refcnt = 1;
+ smodp->smod_uc_version = SOCK_UC_VERSION;
+ smodp->smod_dc_version = SOCK_DC_VERSION;
+ smodp->smod_sock_create_func = &sotpi_create;
+ smodp->smod_sock_destroy_func = &sotpi_destroy;
+ return (smodp);
+}
diff --git a/usr/src/uts/common/fs/sockfs/socktpi.h b/usr/src/uts/common/fs/sockfs/socktpi.h
new file mode 100644
index 0000000000..4c1a5de268
--- /dev/null
+++ b/usr/src/uts/common/fs/sockfs/socktpi.h
@@ -0,0 +1,282 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SOCKFS_SOCKTPI_H
+#define _SOCKFS_SOCKTPI_H
+
+#include <inet/kssl/ksslapi.h>
+#include <sys/sodirect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Internal representation used for addresses.
+ */
+struct soaddr {
+ struct sockaddr *soa_sa; /* Actual address */
+ t_uscalar_t soa_len; /* Length in bytes for kmem_free */
+ t_uscalar_t soa_maxlen; /* Allocated length */
+};
+/* Maximum size address for transports that have ADDR_size == 1 */
+#define SOA_DEFSIZE 128
+
+struct sonode;
+
+/*
+ * TPI Sockets
+ * ======================
+ *
+ * A TPI socket can be created by the TPI socket module, or as a
+ * result of fallback. In either case, the TPI related information is
+ * stored in a sotpi_info_t. Sockets that are TPI based from the
+ * beginning will use a sotpi_sonode_t, but fallback case the
+ * sotpi_info_t will be allocated when needed. However, the so_priv
+ * field in the sonode will always point to the sotpi_info_t, and the
+ * structure should only be accessed via so_priv. Use SOTOTPI().
+ *
+ * A TPI socket always corresponds to a VCHR stream representing the
+ * transport provider (e.g. /dev/tcp). This information is retrieved
+ * from the kernel socket configuration table and accessible via
+ * so_sockparams->sp_sdev_info. sockfs uses this to perform
+ * VOP_ACCESS checks before allowing an open of the transport
+ * provider.
+ *
+ * AF_UNIX Sockets
+ * -------------------------
+ *
+ * When an AF_UNIX socket is bound to a pathname the sockfs creates a
+ * VSOCK vnode in the underlying file system. However, the vnodeops
+ * etc in this VNODE remain those of the underlying file system.
+ * Sockfs uses the v_stream pointer in the underlying file system
+ * VSOCK node to find the sonode bound to the pathname. The bound
+ * pathname vnode is accessed through sti_ux_vp.
+ *
+ * Out of Band Data Handling
+ * -------------------------
+ *
+ * The counts (sti_oobcnt and sti_oobsigcnt) track the number of
+ * urgent indicates that are (logically) queued on the stream head
+ * read queue. The urgent data is queued on the stream head
+ * as follows.
+ *
+ * In the normal case the SIGURG is not generated until
+ * the T_EXDATA_IND arrives at the stream head. However, transports
+ * that have an early indication that urgent data is pending
+ * (e.g. TCP receiving a "new" urgent pointer value) can send up
+ * an M_PCPROTO/SIGURG message to generate the signal early.
+ *
+ * The mark is indicated by either:
+ * - a T_EXDATA_IND (with no M_DATA b_cont) with MSGMARK set.
+ * When this message is consumed by sorecvmsg the socket layer
+ * sets SS_RCVATMARK until data has been consumed past the mark.
+ * - a message with MSGMARKNEXT set (indicating that the
+ * first byte of the next message constitutes the mark). When
+ * the last byte of the MSGMARKNEXT message is consumed in
+ * the stream head the stream head sets STRATMARK. This flag
+ * is cleared when at least one byte is read. (Note that
+ * the MSGMARKNEXT messages can be of zero length when there
+ * is no previous data to which the marknext can be attached.)
+ *
+ * While the T_EXDATA_IND method is the common case which is used
+ * with all TPI transports, the MSGMARKNEXT method is needed to
+ * indicate the mark when e.g. the TCP urgent byte has not been
+ * received yet but the TCP urgent pointer has made TCP generate
+ * the M_PCSIG/SIGURG.
+ *
+ * The signal (the M_PCSIG carrying the SIGURG) and the mark
+ * indication can not be delivered as a single message, since
+ * the signal should be delivered as high priority and any mark
+ * indication must flow with the data. This implies that immediately
+ * when the SIGURG has been delivered if the stream head queue is
+ * empty it is impossible to determine if this will be the position
+ * of the mark. This race condition is resolved by using MSGNOTMARKNEXT
+ * messages and the STRNOTATMARK flag in the stream head. The
+ * SIOCATMARK code calls the stream head to wait for either a
+ * non-empty queue or one of the STR*ATMARK flags being set.
+ * This implies that any transport that is sending M_PCSIG(SIGURG)
+ * should send the appropriate MSGNOTMARKNEXT message (which can be
+ * zero length) after sending an M_PCSIG to prevent SIOCATMARK
+ * from sleeping unnecessarily.
+ */
+
+#define SOTPI_INFO_MAGIC 0x12345678
+
+/*
+ * Information used by TPI/STREAMS sockets
+ */
+typedef struct sotpi_info {
+ /*
+ * These fields are initialized once.
+ */
+ uint32_t sti_magic; /* always set to SOTPI_INFO_MAGIC */
+ dev_t sti_dev; /* device the sonode represents */
+
+ struct sockparams *sti_orig_sp; /* in case of fallback; the orig sp */
+
+ kmutex_t sti_plumb_lock; /* serializes plumbs, and the related */
+ /* so_pushcnt */
+ short sti_pushcnt; /* Number of modules above "sockmod" */
+
+ kcondvar_t sti_ack_cv; /* wait for TPI acks */
+
+ uint8_t
+ sti_laddr_valid : 1, /* sti_laddr valid for user */
+ sti_faddr_valid : 1, /* sti_faddr valid for user */
+ sti_faddr_noxlate : 1, /* No xlation of faddr for AF_UNIX */
+
+ sti_direct : 1, /* transport is directly below */
+
+ sti_pad_to_bit7 : 4;
+
+ mblk_t *sti_ack_mp; /* TPI ack received from below */
+ mblk_t *sti_unbind_mp; /* Preallocated T_UNBIND_REQ message */
+
+ time_t sti_atime; /* time of last access */
+ time_t sti_mtime; /* time of last modification */
+ time_t sti_ctime; /* time of last attributes change */
+
+ ushort_t sti_delayed_error; /* From T_uderror_ind */
+ mblk_t *sti_eaddr_mp; /* for so_delayed_error */
+ /* put here for delayed processing */
+
+ mblk_t *sti_conn_ind_head; /* b_next list of T_CONN_IND */
+ mblk_t *sti_conn_ind_tail;
+
+ uint_t sti_oobsigcnt; /* Number of SIGURG generated */
+ uint_t sti_oobcnt; /* Number of T_EXDATA_IND queued */
+
+ /* From T_info_ack */
+ t_uscalar_t sti_tsdu_size;
+ t_uscalar_t sti_etsdu_size;
+ t_scalar_t sti_addr_size;
+ t_uscalar_t sti_opt_size;
+ t_uscalar_t sti_tidu_size;
+ t_scalar_t sti_serv_type;
+
+ /* From T_capability_ack */
+ t_uscalar_t sti_acceptor_id;
+
+ /* Internal provider information */
+ struct tpi_provinfo *sti_provinfo;
+
+ /*
+ * The local and remote addresses have multiple purposes
+ * but one of the key reasons for their existence and careful
+ * tracking in sockfs is to support getsockname and getpeername
+ * when the transport does not handle the TI_GET*NAME ioctls
+ * and caching when it does (signalled by valid bits in so_state).
+ * When all transports support the new TPI (with T_ADDR_REQ)
+ * we can revisit this code.
+ *
+ * The other usage of sti_faddr is to keep the "connected to"
+ * address for datagram sockets.
+ *
+ * Finally, for AF_UNIX both local and remote addresses are used
+ * to record the sockaddr_un since we use a separate namespace
+ * in the loopback transport.
+ */
+ struct soaddr sti_laddr; /* Local address */
+ struct soaddr sti_faddr; /* Peer address */
+#define sti_laddr_sa sti_laddr.soa_sa
+#define sti_faddr_sa sti_faddr.soa_sa
+#define sti_laddr_len sti_laddr.soa_len
+#define sti_faddr_len sti_faddr.soa_len
+#define sti_laddr_maxlen sti_laddr.soa_maxlen
+#define sti_faddr_maxlen sti_faddr.soa_maxlen
+
+ /*
+ * For AF_UNIX sockets:
+ *
+ * sti_ux_laddr/faddr records the internal addresses used with the
+ * transport. sti_ux_vp and v_stream->sd_vnode form the
+ * cross-linkage between the underlying fs vnode corresponding
+ * to the bound sockaddr_un and the socket node.
+ */
+ struct so_ux_addr sti_ux_laddr; /* laddr bound with the transport */
+ struct so_ux_addr sti_ux_faddr; /* temporary peer address */
+ struct vnode *sti_ux_bound_vp; /* bound AF_UNIX file system vnode */
+ struct sonode *sti_next_so; /* next sonode on socklist */
+ struct sonode *sti_prev_so; /* previous sonode on socklist */
+ mblk_t *sti_discon_ind_mp; /* T_DISCON_IND received from below */
+
+ /*
+ * For NL7C sockets:
+ *
+ * sti_nl7c_flags the NL7C state of URL processing.
+ *
+ * sti_nl7c_rcv_mp mblk_t chain of already received data to be
+ * passed up to the app after NL7C gives up on
+ * a socket.
+ *
+ * sti_nl7c_rcv_rval returned rval for last mblk_t from above.
+ *
+ * sti_nl7c_uri the URI currently being processed.
+ *
+ * sti_nl7c_rtime URI request gethrestime_sec().
+ *
+ * sti_nl7c_addr pointer returned by nl7c_addr_lookup().
+ */
+ uint64_t sti_nl7c_flags;
+ mblk_t *sti_nl7c_rcv_mp;
+ int64_t sti_nl7c_rcv_rval;
+ void *sti_nl7c_uri;
+ time_t sti_nl7c_rtime;
+ void *sti_nl7c_addr;
+
+ /* For sockets acting as an in-kernel SSL proxy */
+ kssl_endpt_type_t sti_kssl_type; /* is proxy/is proxied/none */
+ kssl_ent_t sti_kssl_ent; /* SSL config entry */
+ kssl_ctx_t sti_kssl_ctx; /* SSL session context */
+} sotpi_info_t;
+
+struct T_capability_ack;
+
+extern sonodeops_t sotpi_sonodeops;
+
+extern int socktpi_init(void);
+extern queue_t *sotpi_convert_sonode(struct sonode *, struct sockparams *,
+ boolean_t *, struct cred *);
+extern void sotpi_update_state(struct sonode *, struct T_capability_ack *,
+ struct sockaddr *, socklen_t, struct sockaddr *, socklen_t,
+ short);
+
+extern sotpi_info_t *sotpi_sototpi(struct sonode *);
+#ifdef DEBUG
+#define SOTOTPI(so) (sotpi_sototpi(so))
+#else
+#define SOTOTPI(so) ((sotpi_info_t *)(so)->so_priv)
+#endif
+
+/* for consumers outside sockfs */
+#define _SOTOTPI(so) ((sotpi_info_t *)(so)->so_priv)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SOCKFS_SOCKTPI_H */
diff --git a/usr/src/uts/common/fs/sockfs/socktpi_impl.h b/usr/src/uts/common/fs/sockfs/socktpi_impl.h
new file mode 100644
index 0000000000..aa0b04bf1c
--- /dev/null
+++ b/usr/src/uts/common/fs/sockfs/socktpi_impl.h
@@ -0,0 +1,99 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SOCKFS_SOCKTPI_IMPL_H
+#define _SOCKFS_SOCKTPI_IMPL_H
+
+#include <sys/socketvar.h>
+#include <fs/sockfs/socktpi.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * so_priv will always be set to &st_info
+ */
+typedef struct sotpi_sonode {
+ struct sonode st_sonode;
+ struct sotpi_info st_info;
+} sotpi_sonode_t;
+
+extern void so_proc_tcapability_ack(struct sonode *,
+ struct T_capability_ack *);
+extern void so_basic_strinit(struct sonode *);
+extern void so_alloc_addr(struct sonode *, t_uscalar_t);
+extern int so_set_events(struct sonode *, vnode_t *, cred_t *);
+extern int so_sock2stream(struct sonode *);
+extern void so_stream2sock(struct sonode *);
+
+extern int so_strinit(struct sonode *, struct sonode *);
+extern void so_update_attrs(struct sonode *, int);
+extern int sogetrderr(vnode_t *, int, int *);
+extern int sogetwrerr(vnode_t *, int, int *);
+extern int so_addr_verify(struct sonode *, const struct sockaddr *,
+ socklen_t);
+extern int so_ux_addr_xlate(struct sonode *, struct sockaddr *,
+ socklen_t, int, void **, socklen_t *);
+extern void so_unix_close(struct sonode *);
+
+extern int sowaitprim(struct sonode *, t_scalar_t, t_scalar_t,
+ t_uscalar_t, mblk_t **, clock_t);
+extern int sowaitokack(struct sonode *, t_scalar_t);
+extern int sowaitack(struct sonode *, mblk_t **, clock_t);
+extern void soqueueack(struct sonode *, mblk_t *);
+extern int sowaitconnind(struct sonode *, int, mblk_t **);
+extern void soqueueconnind(struct sonode *, mblk_t *);
+extern int soflushconnind(struct sonode *, t_scalar_t);
+extern void so_drain_discon_ind(struct sonode *);
+extern void so_flush_discon_ind(struct sonode *);
+
+extern mblk_t *soallocproto(size_t, int);
+extern mblk_t *soallocproto1(const void *, ssize_t, ssize_t, int);
+extern void soappendmsg(mblk_t *, const void *, ssize_t);
+extern mblk_t *soallocproto2(const void *, ssize_t, const void *, ssize_t,
+ ssize_t, int);
+extern mblk_t *soallocproto3(const void *, ssize_t, const void *, ssize_t,
+ const void *, ssize_t, ssize_t, int);
+
+extern int so_set_asyncsigs(vnode_t *, pid_t, int, int, cred_t *);
+extern int so_flip_async(struct sonode *, vnode_t *, int, cred_t *);
+extern int so_set_siggrp(struct sonode *, vnode_t *, pid_t, int, cred_t *);
+
+extern void so_installhooks(struct sonode *);
+
+extern int kstrwritemp(struct vnode *, mblk_t *, ushort_t);
+extern int sostream_direct(struct sonode *, struct uio *,
+ mblk_t *, cred_t *);
+extern int sosend_dgram(struct sonode *, struct sockaddr *,
+ socklen_t, struct uio *, int);
+extern int sosend_svc(struct sonode *, struct uio *, t_scalar_t, int, int);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SOCKFS_SOCKTPI_IMPL_H */
diff --git a/usr/src/uts/common/fs/sockfs/sockvnops.c b/usr/src/uts/common/fs/sockfs/sockvnops.c
deleted file mode 100644
index e9195c5e11..0000000000
--- a/usr/src/uts/common/fs/sockfs/sockvnops.c
+++ /dev/null
@@ -1,1438 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/thread.h>
-#include <sys/t_lock.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bitmap.h>
-#include <sys/buf.h>
-#include <sys/cmn_err.h>
-#include <sys/conf.h>
-#include <sys/debug.h>
-#include <sys/errno.h>
-#include <sys/time.h>
-#include <sys/fcntl.h>
-#include <sys/flock.h>
-#include <sys/file.h>
-#include <sys/kmem.h>
-#include <sys/mman.h>
-#include <sys/open.h>
-#include <sys/swap.h>
-#include <sys/sysmacros.h>
-#include <sys/uio.h>
-#include <sys/vfs.h>
-#include <sys/vfs_opreg.h>
-#include <sys/vnode.h>
-#include <sys/poll.h>
-#include <sys/stropts.h>
-#include <sys/stream.h>
-#include <sys/strsubr.h>
-#include <sys/strsun.h>
-#include <sys/suntpi.h>
-#include <sys/ioctl.h>
-#include <sys/sockio.h>
-#include <sys/filio.h>
-#include <sys/stat.h>
-#include <sys/proc.h>
-#include <sys/user.h>
-#include <sys/session.h>
-#include <sys/vmsystm.h>
-#include <sys/vtrace.h>
-#include <sys/policy.h>
-
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <netinet/in.h>
-#include <sys/un.h>
-
-#define _SUN_TPI_VERSION 2
-#include <sys/tihdr.h>
-
-#include <vm/seg.h>
-#include <vm/seg_map.h>
-#include <vm/page.h>
-#include <vm/pvn.h>
-#include <vm/seg_dev.h>
-#include <vm/seg_vn.h>
-
-#include <fs/fs_subr.h>
-
-#include <sys/esunddi.h>
-#include <sys/autoconf.h>
-
-#include <fs/sockfs/nl7c.h>
-#include <fs/sockfs/nl7curi.h>
-
-#include <inet/udp_impl.h>
-#include <inet/tcp_impl.h>
-
-#include <inet/kssl/ksslapi.h>
-
-static int socktpi_close(struct vnode *, int, int, offset_t, struct cred *,
- caller_context_t *);
-static int socktpi_read(struct vnode *, struct uio *, int, struct cred *,
- caller_context_t *);
-static int socktpi_write(struct vnode *, struct uio *, int, struct cred *,
- caller_context_t *);
-static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, struct cred *,
- int32_t *);
-static void socktpi_inactive(struct vnode *, struct cred *, caller_context_t *);
-static int socktpi_poll(struct vnode *, short, int, short *,
- struct pollhead **, caller_context_t *);
-
-struct vnodeops *socktpi_vnodeops;
-
-const fs_operation_def_t socktpi_vnodeops_template[] = {
- VOPNAME_OPEN, { .vop_open = socktpi_open },
- VOPNAME_CLOSE, { .vop_close = socktpi_close },
- VOPNAME_READ, { .vop_read = socktpi_read },
- VOPNAME_WRITE, { .vop_write = socktpi_write },
- VOPNAME_IOCTL, { .vop_ioctl = socktpi_ioctl },
- VOPNAME_SETFL, { .vop_setfl = socktpi_setfl },
- VOPNAME_GETATTR, { .vop_getattr = socktpi_getattr },
- VOPNAME_SETATTR, { .vop_setattr = socktpi_setattr },
- VOPNAME_ACCESS, { .vop_access = socktpi_access },
- VOPNAME_FSYNC, { .vop_fsync = socktpi_fsync },
- VOPNAME_INACTIVE, { .vop_inactive = socktpi_inactive },
- VOPNAME_FID, { .vop_fid = socktpi_fid },
- VOPNAME_SEEK, { .vop_seek = socktpi_seek },
- VOPNAME_POLL, { .vop_poll = socktpi_poll },
- VOPNAME_DISPOSE, { .error = fs_error },
- NULL, NULL
-};
-
-/*
- * Do direct function call to the transport layer below; this would
- * also allow the transport to utilize read-side synchronous stream
- * interface if necessary. This is a /etc/system tunable that must
- * not be modified on a running system. By default this is enabled
- * for performance reasons and may be disabled for debugging purposes.
- */
-boolean_t socktpi_direct = B_TRUE;
-
-/*
- * Open routine used by socket() call. Note that vn_open checks for
- * VSOCK and fails the open (and VOP_OPEN is fs_nosys). The VSOCK check is
- * needed since VSOCK type vnodes exist in various underlying filesystems as
- * a result of an AF_UNIX bind to a pathname.
- *
- * Sockets assume that the driver will clone (either itself
- * or by using the clone driver) i.e. a socket() call will always
- * result in a new vnode being created. This routine single-threads
- * open/closes for a given vnode which is probably not needed.
- */
-int
-socktpi_open(struct vnode **vpp, int flag, struct cred *cr,
- caller_context_t *ct)
-{
- major_t maj;
- dev_t newdev;
- struct vnode *vp = *vpp;
- struct sonode *so;
- int error = 0;
- struct stdata *stp;
-
- dprint(1, ("socktpi_open()\n"));
- flag &= ~FCREAT; /* paranoia */
-
- so = VTOSO(vp);
-
- mutex_enter(&so->so_lock);
- so->so_count++; /* one more open reference */
- ASSERT(so->so_count != 0); /* wraparound */
- if (so->so_count == 1)
- so->so_zoneid = getzoneid();
- mutex_exit(&so->so_lock);
-
- ASSERT(vp->v_type == VSOCK);
-
- newdev = vp->v_rdev;
- maj = getmajor(newdev);
- ASSERT(STREAMSTAB(maj));
-
- mutex_enter(&so->so_lock);
- so_lock_single(so); /* Set SOLOCKED */
- mutex_exit(&so->so_lock);
-
- error = stropen(vp, &newdev, flag, cr);
-
- stp = vp->v_stream;
- if (error == 0) {
- if (so->so_flag & SOCLONE)
- ASSERT(newdev != vp->v_rdev);
- mutex_enter(&so->so_lock);
- so->so_dev = newdev;
- vp->v_rdev = newdev;
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
-
- if (stp->sd_flag & STRISTTY) {
- /*
- * this is a post SVR4 tty driver - a socket can not
- * be a controlling terminal. Fail the open.
- */
- (void) socktpi_close(vp, flag, 1, (offset_t)0, cr, ct);
- return (ENOTTY); /* XXX */
- }
-
- ASSERT(stp->sd_wrq != NULL);
- so->so_provinfo = tpi_findprov(stp->sd_wrq);
-
- /*
- * If caller is interested in doing direct function call
- * interface to/from transport module, probe the module
- * directly beneath the streamhead to see if it qualifies.
- *
- * We turn off the direct interface when qualifications fail.
- * In the acceptor case, we simply turn off the SS_DIRECT
- * flag on the socket. We do the fallback after the accept
- * has completed, before the new socket is returned to the
- * application.
- */
- if (so->so_state & SS_DIRECT) {
- queue_t *tq = stp->sd_wrq->q_next;
-
- /*
- * SS_DIRECT is currently supported and tested
- * only for tcp/udp; this is the main reason to
- * have the following assertions.
- */
- ASSERT(so->so_family == AF_INET ||
- so->so_family == AF_INET6);
- ASSERT(so->so_protocol == IPPROTO_UDP ||
- so->so_protocol == IPPROTO_TCP ||
- so->so_protocol == IPPROTO_IP);
- ASSERT(so->so_type == SOCK_DGRAM ||
- so->so_type == SOCK_STREAM);
-
- /*
- * Abort direct call interface if the module directly
- * underneath the stream head is not defined with the
- * _D_DIRECT flag. This could happen in the tcp or
- * udp case, when some other module is autopushed
- * above it, or for some reasons the expected module
- * isn't purely D_MP (which is the main requirement).
- *
- * Else, SS_DIRECT is valid. If the read-side Q has
- * _QSODIRECT set then and uioasync is enabled then
- * set SS_SODIRECT to enable sodirect.
- */
- if (!socktpi_direct || !(tq->q_flag & _QDIRECT) ||
- !(_OTHERQ(tq)->q_flag & _QDIRECT)) {
- int rval;
-
- /* Continue on without direct calls */
- so->so_state &= ~SS_DIRECT;
- if (!(flag & SO_ACCEPTOR)) {
- if ((error = strioctl(vp,
- _SIOCSOCKFALLBACK, 0, 0, K_TO_K,
- CRED(), &rval)) != 0) {
- (void) socktpi_close(vp, flag,
- 1, (offset_t)0, cr, ct);
- return (error);
- }
- }
- } else if ((_OTHERQ(tq)->q_flag & _QSODIRECT) &&
- uioasync.enabled) {
- /* Enable sodirect */
- so->so_state |= SS_SODIRECT;
- }
- }
- } else {
- /*
- * While the same socket can not be reopened (unlike specfs)
- * the stream head sets STREOPENFAIL when the autopush fails.
- */
- if ((stp != NULL) &&
- (stp->sd_flag & STREOPENFAIL)) {
- /*
- * Open failed part way through.
- */
- mutex_enter(&stp->sd_lock);
- stp->sd_flag &= ~STREOPENFAIL;
- mutex_exit(&stp->sd_lock);
-
- mutex_enter(&so->so_lock);
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
- (void) socktpi_close(vp, flag, 1,
- (offset_t)0, cr, ct);
- return (error);
- /*NOTREACHED*/
- }
- ASSERT(stp == NULL);
- mutex_enter(&so->so_lock);
- so_unlock_single(so, SOLOCKED);
- ASSERT(so->so_count > 0);
- so->so_count--; /* one less open reference */
- mutex_exit(&so->so_lock);
- }
- TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN,
- "sockfs open:maj %d vp %p so %p error %d", maj,
- vp, so, error);
- return (error);
-}
-
-/*ARGSUSED2*/
-static int
-socktpi_close(
- struct vnode *vp,
- int flag,
- int count,
- offset_t offset,
- struct cred *cr,
- caller_context_t *ct)
-{
- struct sonode *so;
- dev_t dev;
- int error = 0;
-
- so = VTOSO(vp);
-
- dprintso(so, 1, ("socktpi_close(%p, %x, %d) %s\n",
- (void *)vp, flag, count, pr_state(so->so_state, so->so_mode)));
-
- cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
- cleanshares(vp, ttoproc(curthread)->p_pid);
- if (vp->v_stream)
- strclean(vp);
- if (count > 1)
- return (0);
-
- dev = so->so_dev;
-
- ASSERT(vp->v_type == VSOCK);
- ASSERT(STREAMSTAB(getmajor(dev)));
-
- mutex_enter(&so->so_lock);
- so_lock_single(so); /* Set SOLOCKED */
- ASSERT(so->so_count > 0);
- so->so_count--; /* one fewer open reference */
-
- /*
- * Only call NL7C's close on last open reference.
- */
- if (so->so_count == 0 && (so->so_nl7c_flags & NL7C_ENABLED)) {
- so->so_nl7c_flags = 0;
- nl7c_close(so);
- }
-
- /*
- * Only call the close routine when the last open reference through
- * any [s, v]node goes away.
- */
- if (so->so_count == 0 && vp->v_stream != NULL) {
- vnode_t *ux_vp;
-
- if (so->so_family == AF_UNIX) {
- /* Could avoid this when CANTSENDMORE for !dgram */
- so_unix_close(so);
- }
-
- mutex_exit(&so->so_lock);
- /*
- * Disassemble the linkage from the AF_UNIX underlying file
- * system vnode to this socket (by atomically clearing
- * v_stream in vn_rele_stream) before strclose clears sd_vnode
- * and frees the stream head.
- */
- if ((ux_vp = so->so_ux_bound_vp) != NULL) {
- ASSERT(ux_vp->v_stream);
- so->so_ux_bound_vp = NULL;
- vn_rele_stream(ux_vp);
- }
- if (so->so_family == AF_INET || so->so_family == AF_INET6) {
- strsetrwputdatahooks(SOTOV(so), NULL, NULL);
- if (so->so_kssl_ent != NULL) {
- kssl_release_ent(so->so_kssl_ent, so,
- so->so_kssl_type);
- so->so_kssl_ent = NULL;
- }
- if (so->so_kssl_ctx != NULL) {
- kssl_release_ctx(so->so_kssl_ctx);
- so->so_kssl_ctx = NULL;
- }
- so->so_kssl_type = KSSL_NO_PROXY;
- }
- error = strclose(vp, flag, cr);
- vp->v_stream = NULL;
- mutex_enter(&so->so_lock);
- }
-
- /*
- * Flush the T_DISCON_IND on so_discon_ind_mp.
- */
- if (so->so_count == 0)
- so_flush_discon_ind(so);
-
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
-
- /*
- * Decrement the device driver's reference count for streams
- * opened via the clone dip. The driver was held in clone_open().
- * The absence of clone_close() forces this asymmetry.
- */
- if (so->so_flag & SOCLONE)
- ddi_rele_driver(getmajor(dev));
-
- return (error);
-}
-
-/*ARGSUSED2*/
-static int
-socktpi_read(
- struct vnode *vp,
- struct uio *uiop,
- int ioflag,
- struct cred *cr,
- caller_context_t *ct)
-{
- struct sonode *so = VTOSO(vp);
- struct nmsghdr lmsg;
-
- dprintso(so, 1, ("socktpi_read(%p) %s\n",
- (void *)so, pr_state(so->so_state, so->so_mode)));
-
- ASSERT(vp->v_type == VSOCK);
- so_update_attrs(so, SOACC);
-
- uiop->uio_extflg |= UIO_COPY_CACHED;
-
- if (so->so_version == SOV_STREAM) {
- /* The imaginary "sockmod" has been popped - act as a stream */
- return (strread(vp, uiop, cr));
- }
- lmsg.msg_namelen = 0;
- lmsg.msg_controllen = 0;
- lmsg.msg_flags = 0;
- return (sotpi_recvmsg(so, &lmsg, uiop));
-}
-
-/* ARGSUSED2 */
-static int
-socktpi_write(
- struct vnode *vp,
- struct uio *uiop,
- int ioflag,
- struct cred *cr,
- caller_context_t *ct)
-{
- struct sonode *so = VTOSO(vp);
- int so_state;
- int so_mode;
- int error;
-
- dprintso(so, 1, ("socktpi_write(%p) %s\n",
- (void *)so, pr_state(so->so_state, so->so_mode)));
-
- ASSERT(vp->v_type == VSOCK);
-
- if (so->so_family == AF_UNIX)
- uiop->uio_extflg |= UIO_COPY_CACHED;
- else
- uiop->uio_extflg &= ~UIO_COPY_CACHED;
- if (so->so_version == SOV_STREAM) {
- /* The imaginary "sockmod" has been popped - act as a stream */
- so_update_attrs(so, SOMOD);
- return (strwrite(vp, uiop, cr));
- }
- /* State checks */
- so_state = so->so_state;
- so_mode = so->so_mode;
- if (so_state & SS_CANTSENDMORE) {
- tsignal(curthread, SIGPIPE);
- return (EPIPE);
- }
-
- if (so->so_error != 0) {
- mutex_enter(&so->so_lock);
- error = sogeterr(so);
- if (error != 0) {
- mutex_exit(&so->so_lock);
- return (error);
- }
- mutex_exit(&so->so_lock);
- }
-
- if ((so_state & (SS_ISCONNECTED|SS_ISBOUND)) !=
- (SS_ISCONNECTED|SS_ISBOUND)) {
- if (so_mode & SM_CONNREQUIRED)
- return (ENOTCONN);
- else
- return (EDESTADDRREQ);
- }
-
- if (!(so_mode & SM_CONNREQUIRED)) {
- /*
- * Note that this code does not prevent so_faddr_sa
- * from changing while it is being used. Thus
- * if an "unconnect"+connect occurs concurrently with
- * this write the datagram might be delivered to a
- * garbled address.
- */
- so_update_attrs(so, SOMOD);
- return (sosend_dgram(so, so->so_faddr_sa,
- (t_uscalar_t)so->so_faddr_len, uiop, 0));
- }
- so_update_attrs(so, SOMOD);
-
- if (so_mode & SM_BYTESTREAM) {
- /* Send M_DATA messages */
- if ((so->so_nl7c_flags & NL7C_ENABLED) &&
- (error = nl7c_data(so, uiop)) >= 0) {
- /* NL7C consumed the data */
- return (error);
- }
- if ((so_state & SS_DIRECT) &&
- canputnext(vp->v_stream->sd_wrq)) {
- return (sostream_direct(so, uiop, NULL, cr));
- }
- return (strwrite(vp, uiop, cr));
- } else {
- /* Send T_DATA_REQ messages without MORE_flag set */
- return (sosend_svc(so, uiop, T_DATA_REQ, 0, 0));
- }
-}
-
-int
-so_copyin(const void *from, void *to, size_t size, int fromkernel)
-{
- if (fromkernel) {
- bcopy(from, to, size);
- return (0);
- }
- return (xcopyin(from, to, size));
-}
-
-int
-so_copyout(const void *from, void *to, size_t size, int tokernel)
-{
- if (tokernel) {
- bcopy(from, to, size);
- return (0);
- }
- return (xcopyout(from, to, size));
-}
-
-/*ARGSUSED6*/
-int
-socktpi_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
- struct cred *cr, int32_t *rvalp, caller_context_t *ct)
-{
- struct sonode *so = VTOSO(vp);
- int error = 0;
-
- ASSERT(vp->v_type == VSOCK);
- dprintso(so, 0, ("socktpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n",
- cmd, arg, pr_state(so->so_state, so->so_mode)));
-
- switch (cmd) {
- case _I_INSERT:
- case _I_REMOVE:
- /*
- * Since there's no compelling reason to support these ioctls
- * on sockets, and doing so would increase the complexity
- * markedly, prevent it.
- */
- return (EOPNOTSUPP);
-
- case I_FIND:
- case I_LIST:
- case I_LOOK:
- case I_POP:
- case I_PUSH:
- /*
- * To prevent races and inconsistencies between the actual
- * state of the stream and the state according to the sonode,
- * we serialize all operations which modify or operate on the
- * list of modules on the socket's stream.
- */
- mutex_enter(&so->so_plumb_lock);
- error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp);
- mutex_exit(&so->so_plumb_lock);
- return (error);
-
- default:
- if (so->so_version != SOV_STREAM)
- break;
-
- /*
- * The imaginary "sockmod" has been popped; act as a stream.
- */
- return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
- }
-
- ASSERT(so->so_version != SOV_STREAM);
-
- /*
- * Process socket-specific ioctls.
- */
- switch (cmd) {
- case FIONBIO: {
- int32_t value;
-
- if (so_copyin((void *)arg, &value, sizeof (int32_t),
- (mode & (int)FKIOCTL)))
- return (EFAULT);
-
- mutex_enter(&so->so_lock);
- if (value) {
- so->so_state |= SS_NDELAY;
- } else {
- so->so_state &= ~SS_NDELAY;
- }
- mutex_exit(&so->so_lock);
- return (0);
- }
-
- case FIOASYNC: {
- int32_t value;
-
- if (so_copyin((void *)arg, &value, sizeof (int32_t),
- (mode & (int)FKIOCTL)))
- return (EFAULT);
-
- mutex_enter(&so->so_lock);
- /*
- * SS_ASYNC flag not already set correctly?
- * (!value != !(so->so_state & SS_ASYNC))
- * but some engineers find that too hard to read.
- */
- if (value == 0 && (so->so_state & SS_ASYNC) != 0 ||
- value != 0 && (so->so_state & SS_ASYNC) == 0)
- error = so_flip_async(so, vp, mode, cr);
- mutex_exit(&so->so_lock);
- return (error);
- }
-
- case SIOCSPGRP:
- case FIOSETOWN: {
- pid_t pgrp;
-
- if (so_copyin((void *)arg, &pgrp, sizeof (pid_t),
- (mode & (int)FKIOCTL)))
- return (EFAULT);
-
- mutex_enter(&so->so_lock);
- dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp));
- /* Any change? */
- if (pgrp != so->so_pgrp)
- error = so_set_siggrp(so, vp, pgrp, mode, cr);
- mutex_exit(&so->so_lock);
- return (error);
- }
- case SIOCGPGRP:
- case FIOGETOWN:
- if (so_copyout(&so->so_pgrp, (void *)arg,
- sizeof (pid_t), (mode & (int)FKIOCTL)))
- return (EFAULT);
- return (0);
-
- case SIOCATMARK: {
- int retval;
- uint_t so_state;
-
- /*
- * strwaitmark has a finite timeout after which it
- * returns -1 if the mark state is undetermined.
- * In order to avoid any race between the mark state
- * in sockfs and the mark state in the stream head this
- * routine loops until the mark state can be determined
- * (or the urgent data indication has been removed by some
- * other thread).
- */
- do {
- mutex_enter(&so->so_lock);
- so_state = so->so_state;
- mutex_exit(&so->so_lock);
- if (so_state & SS_RCVATMARK) {
- retval = 1;
- } else if (!(so_state & SS_OOBPEND)) {
- /*
- * No SIGURG has been generated -- there is no
- * pending or present urgent data. Thus can't
- * possibly be at the mark.
- */
- retval = 0;
- } else {
- /*
- * Have the stream head wait until there is
- * either some messages on the read queue, or
- * STRATMARK or STRNOTATMARK gets set. The
- * STRNOTATMARK flag is used so that the
- * transport can send up a MSGNOTMARKNEXT
- * M_DATA to indicate that it is not
- * at the mark and additional data is not about
- * to be send upstream.
- *
- * If the mark state is undetermined this will
- * return -1 and we will loop rechecking the
- * socket state.
- */
- retval = strwaitmark(vp);
- }
- } while (retval == -1);
-
- if (so_copyout(&retval, (void *)arg, sizeof (int),
- (mode & (int)FKIOCTL)))
- return (EFAULT);
- return (0);
- }
-
- case I_FDINSERT:
- case I_SENDFD:
- case I_RECVFD:
- case I_ATMARK:
- case _SIOCSOCKFALLBACK:
- /*
- * These ioctls do not apply to sockets. I_FDINSERT can be
- * used to send M_PROTO messages without modifying the socket
- * state. I_SENDFD/RECVFD should not be used for socket file
- * descriptor passing since they assume a twisted stream.
- * SIOCATMARK must be used instead of I_ATMARK.
- *
- * _SIOCSOCKFALLBACK from an application should never be
- * processed. It is only generated by socktpi_open() or
- * in response to I_POP or I_PUSH.
- */
-#ifdef DEBUG
- zcmn_err(getzoneid(), CE_WARN,
- "Unsupported STREAMS ioctl 0x%x on socket. "
- "Pid = %d\n", cmd, curproc->p_pid);
-#endif /* DEBUG */
- return (EOPNOTSUPP);
-
- case _I_GETPEERCRED:
- if ((mode & FKIOCTL) == 0)
- return (EINVAL);
-
- mutex_enter(&so->so_lock);
- if ((so->so_mode & SM_CONNREQUIRED) == 0) {
- error = ENOTSUP;
- } else if ((so->so_state & SS_ISCONNECTED) == 0) {
- error = ENOTCONN;
- } else if (so->so_peercred != NULL) {
- k_peercred_t *kp = (k_peercred_t *)arg;
- kp->pc_cr = so->so_peercred;
- kp->pc_cpid = so->so_cpid;
- crhold(so->so_peercred);
- } else {
- error = EINVAL;
- }
- mutex_exit(&so->so_lock);
- return (error);
-
- default:
- /*
- * Do the higher-order bits of the ioctl cmd indicate
- * that it is an I_* streams ioctl?
- */
- if ((cmd & 0xffffff00U) == STR &&
- so->so_version == SOV_SOCKBSD) {
-#ifdef DEBUG
- zcmn_err(getzoneid(), CE_WARN,
- "Unsupported STREAMS ioctl 0x%x on socket. "
- "Pid = %d\n", cmd, curproc->p_pid);
-#endif /* DEBUG */
- return (EOPNOTSUPP);
- }
- return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
- }
-}
-
-/*
- * Handle plumbing-related ioctls.
- */
-static int
-socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
- struct cred *cr, int32_t *rvalp)
-{
- static const char sockmod_name[] = "sockmod";
- struct sonode *so = VTOSO(vp);
- char mname[FMNAMESZ + 1];
- int error;
-
- ASSERT(MUTEX_HELD(&so->so_plumb_lock));
-
- if (so->so_version == SOV_SOCKBSD)
- return (EOPNOTSUPP);
-
- if (so->so_version == SOV_STREAM) {
- /*
- * The imaginary "sockmod" has been popped - act as a stream.
- * If this is a push of sockmod then change back to a socket.
- */
- if (cmd == I_PUSH) {
- error = ((mode & FKIOCTL) ? copystr : copyinstr)(
- (void *)arg, mname, sizeof (mname), NULL);
-
- if (error == 0 && strcmp(mname, sockmod_name) == 0) {
- dprintso(so, 0, ("socktpi_ioctl: going to "
- "socket version\n"));
- so_stream2sock(so);
- return (0);
- }
- }
- return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
- }
-
- switch (cmd) {
- case I_PUSH:
- if (so->so_state & SS_DIRECT) {
- mutex_enter(&so->so_lock);
- so_lock_single(so);
- mutex_exit(&so->so_lock);
-
- error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K,
- CRED(), rvalp);
-
- mutex_enter(&so->so_lock);
- if (error == 0)
- so->so_state &= ~SS_DIRECT;
- so_unlock_single(so, SOLOCKED);
- mutex_exit(&so->so_lock);
-
- if (error != 0)
- return (error);
- }
-
- error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
- if (error == 0)
- so->so_pushcnt++;
- return (error);
-
- case I_POP:
- if (so->so_pushcnt == 0) {
- /* Emulate sockmod being popped */
- dprintso(so, 0,
- ("socktpi_ioctl: going to STREAMS version\n"));
- return (so_sock2stream(so));
- }
-
- error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
- if (error == 0)
- so->so_pushcnt--;
- return (error);
-
- case I_LIST: {
- struct str_mlist *kmlistp, *umlistp;
- struct str_list kstrlist;
- ssize_t kstrlistsize;
- int i, nmods;
-
- STRUCT_DECL(str_list, ustrlist);
- STRUCT_INIT(ustrlist, mode);
-
- if (arg == NULL) {
- error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
- if (error == 0)
- (*rvalp)++; /* Add one for sockmod */
- return (error);
- }
-
- error = so_copyin((void *)arg, STRUCT_BUF(ustrlist),
- STRUCT_SIZE(ustrlist), mode & FKIOCTL);
- if (error != 0)
- return (error);
-
- nmods = STRUCT_FGET(ustrlist, sl_nmods);
- if (nmods <= 0)
- return (EINVAL);
- /*
- * Ceiling nmods at nstrpush to prevent someone from
- * maliciously consuming lots of kernel memory.
- */
- nmods = MIN(nmods, nstrpush);
-
- kstrlistsize = (nmods + 1) * sizeof (struct str_mlist);
- kstrlist.sl_nmods = nmods;
- kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP);
-
- error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K,
- cr, rvalp);
- if (error != 0)
- goto done;
-
- /*
- * Considering the module list as a 0-based array of sl_nmods
- * modules, sockmod should conceptually exist at slot
- * so_pushcnt. Insert sockmod at this location by sliding all
- * of the module names after so_pushcnt over by one. We know
- * that there will be room to do this since we allocated
- * sl_modlist with an additional slot.
- */
- for (i = kstrlist.sl_nmods; i > so->so_pushcnt; i--)
- kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1];
-
- (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name);
- kstrlist.sl_nmods++;
-
- /*
- * Copy all of the entries out to ustrlist.
- */
- kmlistp = kstrlist.sl_modlist;
- umlistp = STRUCT_FGETP(ustrlist, sl_modlist);
- for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) {
- error = so_copyout(kmlistp++, umlistp++,
- sizeof (struct str_mlist), mode & FKIOCTL);
- if (error != 0)
- goto done;
- }
-
- error = so_copyout(&i, (void *)arg, sizeof (int32_t),
- mode & FKIOCTL);
- if (error == 0)
- *rvalp = 0;
- done:
- kmem_free(kstrlist.sl_modlist, kstrlistsize);
- return (error);
- }
- case I_LOOK:
- if (so->so_pushcnt == 0) {
- return (so_copyout(sockmod_name, (void *)arg,
- sizeof (sockmod_name), mode & FKIOCTL));
- }
- return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
-
- case I_FIND:
- error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
- if (error && error != EINVAL)
- return (error);
-
- /* if not found and string was sockmod return 1 */
- if (*rvalp == 0 || error == EINVAL) {
- error = ((mode & FKIOCTL) ? copystr : copyinstr)(
- (void *)arg, mname, sizeof (mname), NULL);
- if (error == ENAMETOOLONG)
- error = EINVAL;
-
- if (error == 0 && strcmp(mname, sockmod_name) == 0)
- *rvalp = 1;
- }
- return (error);
-
- default:
- panic("socktpi_plumbioctl: unknown ioctl %d", cmd);
- break;
- }
-
- return (0);
-}
-
-/*
- * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
- * from listener to acceptor.
- */
-/* ARGSUSED */
-int
-socktpi_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr,
- caller_context_t *ct)
-{
- struct sonode *so;
- int error = 0;
-
- so = VTOSO(vp);
-
- dprintso(so, 0, ("socktpi_setfl: oflags 0x%x, nflags 0x%x, state %s\n",
- oflags, nflags, pr_state(so->so_state, so->so_mode)));
- mutex_enter(&so->so_lock);
- if (nflags & FNDELAY)
- so->so_state |= SS_NDELAY;
- else
- so->so_state &= ~SS_NDELAY;
- if (nflags & FNONBLOCK)
- so->so_state |= SS_NONBLOCK;
- else
- so->so_state &= ~SS_NONBLOCK;
- mutex_exit(&so->so_lock);
-
- /*
- * Sets/clears the SS_ASYNC flag based on the presence/absence
- * of the FASYNC flag passed to fcntl(F_SETFL).
- * This exists solely for BSD fcntl() FASYNC compatibility.
- */
- so = VTOSO(vp->v_stream->sd_vnode);
-
- if (so->so_version != SOV_STREAM) {
- mutex_enter(&so->so_lock);
-
- /*
- * SS_ASYNC flag not already set correctly?
- * (!(nflags & FASYNC) != !(so->so_state & SS_ASYNC))
- * but some engineers find that too hard to read.
- */
- if ((nflags & FASYNC) == 0 && (so->so_state & SS_ASYNC) != 0 ||
- (nflags & FASYNC) != 0 && (so->so_state & SS_ASYNC) == 0)
- error = so_flip_async(so, SOTOV(so), 0, CRED());
- mutex_exit(&so->so_lock);
- }
- return (error);
-}
-
-/*
- * Get the made up attributes for the vnode.
- * 4.3BSD returns the current time for all the timestamps.
- * 4.4BSD returns 0 for all the timestamps.
- * Here we use the access and modified times recorded in the sonode.
- *
- * Just like in BSD there is not effect on the underlying file system node
- * bound to an AF_UNIX pathname.
- *
- * When sockmod has been popped this will act just like a stream. Since
- * a socket is always a clone there is no need to inspect the attributes
- * of the "realvp".
- */
-/* ARGSUSED */
-int
-socktpi_getattr(
- struct vnode *vp,
- struct vattr *vap,
- int flags,
- struct cred *cr,
- caller_context_t *ct)
-{
- dev_t fsid;
- struct sonode *so;
- static int sonode_shift = 0;
-
- /*
- * Calculate the amount of bitshift to a sonode pointer which will
- * still keep it unique. See below.
- */
- if (sonode_shift == 0)
- sonode_shift = highbit(sizeof (struct sonode));
- ASSERT(sonode_shift > 0);
-
- so = VTOSO(vp);
- fsid = so->so_fsid;
-
- if (so->so_version == SOV_STREAM) {
- /*
- * The imaginary "sockmod" has been popped - act
- * as a stream
- */
- vap->va_type = VCHR;
- vap->va_mode = 0;
- } else {
- vap->va_type = vp->v_type;
- vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|
- S_IROTH|S_IWOTH;
- }
- vap->va_uid = vap->va_gid = 0;
- vap->va_fsid = fsid;
- /*
- * If the va_nodeid is > MAX_USHORT, then i386 stats might fail.
- * So we shift down the sonode pointer to try and get the most
- * uniqueness into 16-bits.
- */
- vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF;
- vap->va_nlink = 0;
- vap->va_size = 0;
-
- /*
- * We need to zero out the va_rdev to avoid some fstats getting
- * EOVERFLOW. This also mimics SunOS 4.x and BSD behavior.
- */
- vap->va_rdev = (dev_t)0;
- vap->va_blksize = MAXBSIZE;
- vap->va_nblocks = btod(vap->va_size);
-
- mutex_enter(&so->so_lock);
- vap->va_atime.tv_sec = so->so_atime;
- vap->va_mtime.tv_sec = so->so_mtime;
- vap->va_ctime.tv_sec = so->so_ctime;
- mutex_exit(&so->so_lock);
-
- vap->va_atime.tv_nsec = 0;
- vap->va_mtime.tv_nsec = 0;
- vap->va_ctime.tv_nsec = 0;
- vap->va_seq = 0;
-
- return (0);
-}
-
-/*
- * Set attributes.
- * Just like in BSD there is not effect on the underlying file system node
- * bound to an AF_UNIX pathname.
- *
- * When sockmod has been popped this will act just like a stream. Since
- * a socket is always a clone there is no need to modify the attributes
- * of the "realvp".
- */
-/* ARGSUSED */
-int
-socktpi_setattr(
- struct vnode *vp,
- struct vattr *vap,
- int flags,
- struct cred *cr,
- caller_context_t *ct)
-{
- struct sonode *so = VTOSO(vp);
-
- /*
- * If times were changed, update sonode.
- */
- mutex_enter(&so->so_lock);
- if (vap->va_mask & AT_ATIME)
- so->so_atime = vap->va_atime.tv_sec;
- if (vap->va_mask & AT_MTIME) {
- so->so_mtime = vap->va_mtime.tv_sec;
- so->so_ctime = gethrestime_sec();
- }
- mutex_exit(&so->so_lock);
-
- return (0);
-}
-
-int
-socktpi_access(struct vnode *vp, int mode, int flags, struct cred *cr,
- caller_context_t *ct)
-{
- struct vnode *accessvp;
- struct sonode *so = VTOSO(vp);
-
- if ((accessvp = so->so_accessvp) != NULL)
- return (VOP_ACCESS(accessvp, mode, flags, cr, ct));
- else
- return (0); /* Allow all access. */
-}
-
-/*
- * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL.
- * This code does the same to be compatible and also to not give an
- * application the impression that the data has actually been "synced"
- * to the other end of the connection.
- */
-/* ARGSUSED */
-int
-socktpi_fsync(struct vnode *vp, int syncflag, struct cred *cr,
- caller_context_t *ct)
-{
- return (EINVAL);
-}
-
-/* ARGSUSED */
-static void
-socktpi_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
-{
- struct sonode *so = VTOSO(vp);
-
- mutex_enter(&vp->v_lock);
- /*
- * If no one has reclaimed the vnode, remove from the
- * cache now.
- */
- if (vp->v_count < 1)
- cmn_err(CE_PANIC, "socktpi_inactive: Bad v_count");
-
- /*
- * Drop the temporary hold by vn_rele now
- */
- if (--vp->v_count != 0) {
- mutex_exit(&vp->v_lock);
- return;
- }
- mutex_exit(&vp->v_lock);
-
- /* We are the sole owner of so now */
-
- ASSERT(!vn_has_cached_data(vp));
- sockfree(so);
-}
-
-/* ARGSUSED */
-int
-socktpi_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
-{
- return (EINVAL);
-}
-
-/*
- * Sockets are not seekable.
- * (and there is a bug to fix STREAMS to make them fail this as well).
- */
-/*ARGSUSED*/
-int
-socktpi_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
- caller_context_t *ct)
-{
- return (ESPIPE);
-}
-
-/*
- * Wrapper around the streams poll routine that implements socket poll
- * semantics.
- * Sockfs never calls pollwakeup() itself - the stream head takes care
- * of all pollwakeups. Since sockfs never holds so_lock when calling the
- * stream head there can never be a deadlock due to holding so_lock across
- * pollwakeup and acquiring so_lock in this routine.
- *
- * However, since the performance of VOP_POLL is critical we avoid
- * acquiring so_lock here. This is based on the following assumptions:
- * - The poll implementation holds locks to serialize the VOP_POLL call
- * and a pollwakeup for the same pollhead. This ensures that should
- * so_state etc change during a socktpi_poll() call, the pollwakeup()
- * (which strsock_* and strrput() conspire to issue) is issued after
- * the state change. Thus the pollwakeup will block until VOP_POLL has
- * returned, and then wake up poll and have it call VOP_POLL again.
- *
- * - The reading of so_state without holding so_lock does not result in
- * stale data (older than the latest state change that has dropped
- * so_lock). This is ensured as mutex_exit() issues the appropriate
- * memory barrier to force the data into the coherency domain.
- *
- * - Whilst so_state may change during the VOP_POLL call, (SS_HASCONNIND
- * may have been set by an arriving connection), the above two factors
- * guarantee validity of SS_ISCONNECTED/SM_CONNREQUIRED in the entry
- * time snapshot. In order to capture the arrival of a connection while
- * VOP_POLL was in progress, we then check real so_state, (so->so_state)
- * for SS_HASCONNIND and set appropriate events to ensure poll_common()
- * will not sleep.
- */
-/*ARGSUSED5*/
-static int
-socktpi_poll(
- struct vnode *vp,
- short events,
- int anyyet,
- short *reventsp,
- struct pollhead **phpp,
- caller_context_t *ct)
-{
- short origevents = events;
- struct sonode *so = VTOSO(vp);
- int error;
- int so_state = so->so_state; /* snapshot */
-
- dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n",
- (void *)vp, pr_state(so_state, so->so_mode), so->so_error));
-
- ASSERT(vp->v_type == VSOCK);
- ASSERT(vp->v_stream != NULL);
-
- if (so->so_version == SOV_STREAM) {
- /* The imaginary "sockmod" has been popped - act as a stream */
- return (strpoll(vp->v_stream, events, anyyet,
- reventsp, phpp));
- }
-
- if (!(so_state & SS_ISCONNECTED) &&
- (so->so_mode & SM_CONNREQUIRED)) {
- /* Not connected yet - turn off write side events */
- events &= ~(POLLOUT|POLLWRBAND);
- }
- /*
- * Check for errors without calling strpoll if the caller wants them.
- * In sockets the errors are represented as input/output events
- * and there is no need to ask the stream head for this information.
- */
- if (so->so_error != 0 &&
- ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) {
- *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents;
- return (0);
- }
- /*
- * Ignore M_PROTO only messages such as the T_EXDATA_IND messages.
- * These message with only an M_PROTO/M_PCPROTO part and no M_DATA
- * will not trigger a POLLIN event with POLLRDDATA set.
- * The handling of urgent data (causing POLLRDBAND) is done by
- * inspecting SS_OOBPEND below.
- */
- events |= POLLRDDATA;
-
- /*
- * After shutdown(output) a stream head write error is set.
- * However, we should not return output events.
- */
- events |= POLLNOERR;
- error = strpoll(vp->v_stream, events, anyyet,
- reventsp, phpp);
- if (error)
- return (error);
-
- ASSERT(!(*reventsp & POLLERR));
-
- /*
- * Notes on T_CONN_IND handling for sockets.
- *
- * If strpoll() returned without events, SR_POLLIN is guaranteed
- * to be set, ensuring any subsequent strrput() runs pollwakeup().
- *
- * Since the so_lock is not held, soqueueconnind() may have run
- * and a T_CONN_IND may be waiting. We now check for SS_HASCONNIND
- * in the current so_state and set appropriate events to ensure poll
- * returns.
- *
- * However:
- * If the T_CONN_IND hasn't arrived by the time strpoll() returns,
- * when strrput() does run for an arriving M_PROTO with T_CONN_IND
- * the following actions will occur; taken together they ensure the
- * syscall will return.
- *
- * 1. If a socket, soqueueconnind() will set SS_HASCONNIND but if
- * the accept() was run on a non-blocking socket sowaitconnind()
- * may have already returned EWOULDBLOCK, so not be waiting to
- * process the message. Additionally socktpi_poll() has probably
- * proceeded past the SS_HASCONNIND check below.
- * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake
- * this thread, however that could occur before poll_common()
- * has entered cv_wait.
- * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock.
- *
- * Before proceeding to cv_wait() in poll_common() for an event,
- * poll_common() atomically checks for T_POLLWAKE under the pc_lock,
- * and if set, re-calls strpoll() to ensure the late arriving
- * T_CONN_IND is recognized, and pollsys() returns.
- */
- if (so->so_state & (SS_HASCONNIND|SS_OOBPEND)) {
- if (so->so_state & SS_HASCONNIND)
- *reventsp |= (POLLIN|POLLRDNORM) & events;
- if (so->so_state & SS_OOBPEND)
- *reventsp |= POLLRDBAND & events;
- }
-
- if (so->so_nl7c_rcv_mp != NULL) {
- *reventsp |= (POLLIN|POLLRDNORM) & events;
- }
- if ((so->so_nl7c_flags & NL7C_ENABLED) &&
- ((POLLIN|POLLRDNORM) & *reventsp)) {
- so->so_nl7c_flags |= NL7C_POLLIN;
- }
-
- return (0);
-}
-
-/*
- * Wrapper for getmsg. If the socket has been converted to a stream
- * pass the request to the stream head.
- */
-int
-sock_getmsg(
- struct vnode *vp,
- struct strbuf *mctl,
- struct strbuf *mdata,
- uchar_t *prip,
- int *flagsp,
- int fmode,
- rval_t *rvp
-)
-{
- struct sonode *so;
-
- ASSERT(vp->v_type == VSOCK);
- /*
- * Use the stream head to find the real socket vnode.
- * This is needed when namefs sits above sockfs. Some
- * sockets (like SCTP) are not streams.
- */
- if (!vp->v_stream) {
- return (ENOSTR);
- }
- ASSERT(vp->v_stream->sd_vnode);
- vp = vp->v_stream->sd_vnode;
- ASSERT(vn_matchops(vp, socktpi_vnodeops));
- so = VTOSO(vp);
-
- dprintso(so, 1, ("sock_getmsg(%p) %s\n",
- (void *)so, pr_state(so->so_state, so->so_mode)));
-
- if (so->so_version == SOV_STREAM) {
- /* The imaginary "sockmod" has been popped - act as a stream */
- return (strgetmsg(vp, mctl, mdata, prip, flagsp, fmode, rvp));
- }
- eprintsoline(so, ENOSTR);
- return (ENOSTR);
-}
-
-/*
- * Wrapper for putmsg. If the socket has been converted to a stream
- * pass the request to the stream head.
- *
- * Note that a while a regular socket (SOV_SOCKSTREAM) does support the
- * streams ioctl set it does not support putmsg and getmsg.
- * Allowing putmsg would prevent sockfs from tracking the state of
- * the socket/transport and would also invalidate the locking in sockfs.
- */
-int
-sock_putmsg(
- struct vnode *vp,
- struct strbuf *mctl,
- struct strbuf *mdata,
- uchar_t pri,
- int flag,
- int fmode
-)
-{
- struct sonode *so;
-
- ASSERT(vp->v_type == VSOCK);
- /*
- * Use the stream head to find the real socket vnode.
- * This is needed when namefs sits above sockfs.
- */
- if (!vp->v_stream) {
- return (ENOSTR);
- }
- ASSERT(vp->v_stream->sd_vnode);
- vp = vp->v_stream->sd_vnode;
- ASSERT(vn_matchops(vp, socktpi_vnodeops));
- so = VTOSO(vp);
-
- dprintso(so, 1, ("sock_putmsg(%p) %s\n",
- (void *)so, pr_state(so->so_state, so->so_mode)));
-
- if (so->so_version == SOV_STREAM) {
- /* The imaginary "sockmod" has been popped - act as a stream */
- return (strputmsg(vp, mctl, mdata, pri, flag, fmode));
- }
- eprintsoline(so, ENOSTR);
- return (ENOSTR);
-}
-
-/*
- * Special function called only from f_getfl().
- * Returns FASYNC if the SS_ASYNC flag is set on a socket, else 0.
- * No locks are acquired here, so it is safe to use while uf_lock is held.
- * This exists solely for BSD fcntl() FASYNC compatibility.
- */
-int
-sock_getfasync(vnode_t *vp)
-{
- struct sonode *so;
-
- ASSERT(vp->v_type == VSOCK);
- so = VTOSO(vp->v_stream->sd_vnode);
- if (so->so_version == SOV_STREAM || !(so->so_state & SS_ASYNC))
- return (0);
- return (FASYNC);
-}
diff --git a/usr/src/uts/common/inet/inetddi.c b/usr/src/uts/common/inet/inetddi.c
index 48a9e3aa2e..6b0cd5839a 100644
--- a/usr/src/uts/common/inet/inetddi.c
+++ b/usr/src/uts/common/inet/inetddi.c
@@ -23,7 +23,6 @@
* Use is subject to license terms.
*/
-
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/stream.h>
@@ -57,12 +56,23 @@
*
* Drivers that need to masquerade as IP should set INET_DEVMTFLAGS to
* IP_DEVMTFLAGS and set INET_DEVSTRTAB to ipinfo.
+ *
+ * The symbols that all socket modules must define are:
+ *
+ * INET_SOCKDESC The one-line description for this socket module
+ * INET_SOCK_PROTO_CREATE_FUNC The function used to create PCBs
+ *
+ * In addition, socket modules that can be converted to TPI must define:
+ *
+ * INET_SOCK_PROTO_FB_FUNC The function used to fallback to TPI
*/
#if !defined(INET_NAME)
#error inetddi.c: INET_NAME is not defined!
-#elif !defined(INET_DEVDESC) && !defined(INET_MODDESC)
-#error inetddi.c: at least one of INET_DEVDESC or INET_MODDESC must be defined!
+#elif !defined(INET_DEVDESC) && !defined(INET_MODDESC) && \
+ !defined(INET_SOCKDESC)
+#error inetddi.c: at least one of INET_DEVDESC or INET_MODDESC or \
+INET_SOCKDESC must be defined!
#elif defined(INET_DEVDESC) && !defined(INET_DEVSTRTAB)
#error inetddi.c: INET_DEVDESC is defined but INET_DEVSTRTAB is not!
#elif defined(INET_DEVDESC) && !defined(INET_DEVMTFLAGS)
@@ -73,6 +83,11 @@
#error inetddi.c: INET_MODDESC is defined but INET_MODSTRTAB is not!
#elif defined(INET_MODDESC) && !defined(INET_MODMTFLAGS)
#error inetddi.c: INET_MODDESC is defined but INET_MODMTFLAGS is not!
+#elif defined(INET_SOCKDESC) && !defined(SOCKMOD_VERSION)
+#error inetddi.c: INET_SOCKDESC is defined but SOCKMOD_VERSION is not!
+#elif defined(INET_SOCKDESC) && !defined(INET_SOCK_PROTO_CREATE_FUNC)
+#error inetddi.c: INET_SOCKDESC is defined but INET_SOCK_PROTO_CREATE_FUNC \
+is not!
#endif
#ifdef INET_DEVDESC
@@ -192,8 +207,39 @@ static struct modlstrmod modlstrmod = {
INET_MODDESC,
&fsw
};
+
#endif /* INET_MODDESC */
+#ifdef INET_SOCKDESC
+
+#ifdef INET_SOCK_PROTO_FB_FUNC
+static __smod_priv_t smodpriv = {
+ NULL,
+ NULL,
+ INET_SOCK_PROTO_FB_FUNC
+};
+#endif /* INET_SOCK_PROTO_FB_FUNC */
+
+static struct smod_reg_s smodreg = {
+ SOCKMOD_VERSION,
+ INET_NAME,
+ SOCK_UC_VERSION,
+ SOCK_DC_VERSION,
+ INET_SOCK_PROTO_CREATE_FUNC,
+#ifdef INET_SOCK_PROTO_FB_FUNC
+ &smodpriv
+#else
+ NULL
+#endif /* INET_SOCK_PROTO_FB_FUNC */
+};
+
+static struct modlsockmod modlsockmod = {
+ &mod_sockmodops,
+ INET_SOCKDESC,
+ &smodreg
+};
+#endif /* INET_SOCKDESC */
+
static struct modlinkage modlinkage = {
MODREV_1,
#ifdef INET_DEVDESC
@@ -202,5 +248,8 @@ static struct modlinkage modlinkage = {
#ifdef INET_MODDESC
&modlstrmod,
#endif
+#ifdef INET_SOCKDESC
+ &modlsockmod,
+#endif
NULL
};
diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h
index c7ccff8a14..323c8fd0de 100644
--- a/usr/src/uts/common/inet/ip.h
+++ b/usr/src/uts/common/inet/ip.h
@@ -2771,7 +2771,7 @@ typedef struct ip_pktinfo {
#define ILL_LOOKUP_FAILED 1 /* Used as error code */
#define IPIF_LOOKUP_FAILED 2 /* Used as error code */
-#define ILL_CAN_LOOKUP(ill) \
+#define ILL_CAN_LOOKUP(ill) \
(!((ill)->ill_state_flags & (ILL_CONDEMNED | ILL_CHANGING)) || \
IAM_WRITER_ILL(ill))
@@ -2781,7 +2781,7 @@ typedef struct ip_pktinfo {
#define ILL_CAN_LOOKUP_WALKER(ill) \
(!((ill)->ill_state_flags & ILL_CONDEMNED))
-#define IPIF_CAN_LOOKUP(ipif) \
+#define IPIF_CAN_LOOKUP(ipif) \
(!((ipif)->ipif_state_flags & (IPIF_CONDEMNED | IPIF_CHANGING)) || \
IAM_WRITER_IPIF(ipif))
@@ -3166,11 +3166,15 @@ extern void icmp_unreachable(queue_t *, mblk_t *, uint8_t, zoneid_t,
ip_stack_t *);
extern mblk_t *ip_add_info(mblk_t *, ill_t *, uint_t, zoneid_t, ip_stack_t *);
extern mblk_t *ip_bind_v4(queue_t *, mblk_t *, conn_t *);
-extern int ip_bind_connected(conn_t *, mblk_t *, ipaddr_t *, uint16_t,
- ipaddr_t, uint16_t, boolean_t, boolean_t, boolean_t, boolean_t);
-extern boolean_t ip_bind_ipsec_policy_set(conn_t *, mblk_t *);
-extern int ip_bind_laddr(conn_t *, mblk_t *, ipaddr_t, uint16_t,
- boolean_t, boolean_t, boolean_t);
+extern boolean_t ip_bind_ipsec_policy_set(conn_t *, mblk_t *);
+extern int ip_bind_laddr_v4(conn_t *, mblk_t **, uint8_t, ipaddr_t,
+ uint16_t, boolean_t);
+extern int ip_proto_bind_laddr_v4(conn_t *, mblk_t **, uint8_t, ipaddr_t,
+ uint16_t, boolean_t);
+extern int ip_proto_bind_connected_v4(conn_t *, mblk_t **,
+ uint8_t, ipaddr_t *, uint16_t, ipaddr_t, uint16_t, boolean_t, boolean_t);
+extern int ip_bind_connected_v4(conn_t *, mblk_t **, uint8_t, ipaddr_t *,
+ uint16_t, ipaddr_t, uint16_t, boolean_t, boolean_t);
extern uint_t ip_cksum(mblk_t *, int, uint32_t);
extern int ip_close(queue_t *, int);
extern uint16_t ip_csum_hdr(ipha_t *);
@@ -3308,7 +3312,7 @@ extern boolean_t ip_md_hcksum_attr(struct multidata_s *, struct pdesc_s *,
uint32_t, uint32_t, uint32_t, uint32_t);
extern boolean_t ip_md_zcopy_attr(struct multidata_s *, struct pdesc_s *,
uint_t);
-extern mblk_t *ip_unbind(queue_t *, mblk_t *);
+extern void ip_unbind(conn_t *connp);
extern phyint_t *phyint_lookup_group(char *, boolean_t, ip_stack_t *);
extern phyint_t *phyint_lookup_group_ifindex(uint_t, ip_stack_t *);
@@ -3577,7 +3581,6 @@ extern void ip_squeue_quiesce_ring(ill_t *, ill_rx_ring_t *);
extern void ip_squeue_restart_ring(ill_t *, ill_rx_ring_t *);
extern void ip_squeue_clean_all(ill_t *);
-extern void ip_resume_tcp_bind(void *, mblk_t *, void *);
extern void tcp_wput(queue_t *, mblk_t *);
extern int ip_fill_mtuinfo(struct in6_addr *, in_port_t,
@@ -3635,6 +3638,8 @@ typedef void (*ipsq_func_t)(ipsq_t *, queue_t *, mblk_t *, void *);
#define SQTAG_IP_INPUT_RX_RING 39
#define SQTAG_SQUEUE_CHANGE 40
#define SQTAG_CONNECT_FINISH 41
+#define SQTAG_SYNCHRONOUS_OP 42
+#define SQTAG_TCP_SHUTDOWN_OUTPUT 43
#define NOT_OVER_IP(ip_wq) \
(ip_wq->q_next != NULL || \
@@ -3643,6 +3648,7 @@ typedef void (*ipsq_func_t)(ipsq_t *, queue_t *, mblk_t *, void *);
IP_MOD_NAME) != 0 || \
ip_wq->q_qinfo->qi_minfo->mi_idnum != IP_MOD_ID)
+#define PROTO_FLOW_CNTRLD(connp) (connp->conn_flow_cntrld)
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/inet/ip/icmp.c b/usr/src/uts/common/inet/ip/icmp.c
index 90cc6a51d5..c728a687d4 100644
--- a/usr/src/uts/common/inet/ip/icmp.c
+++ b/usr/src/uts/common/inet/ip/icmp.c
@@ -43,7 +43,9 @@
#include <sys/zone.h>
#include <sys/time.h>
+#include <sys/sockio.h>
#include <sys/socket.h>
+#include <sys/socketvar.h>
#include <sys/isa_defs.h>
#include <sys/suntpi.h>
#include <sys/xti_inet.h>
@@ -58,7 +60,7 @@
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/ip6.h>
-#include <inet/mi.h>
+#include <inet/proto_set.h>
#include <inet/nd.h>
#include <inet/optcom.h>
#include <inet/snmpcom.h>
@@ -78,6 +80,7 @@
#include <inet/ip_if.h>
#include <inet/ip_impl.h>
+#include <sys/disp.h>
/*
* Synchronization notes:
@@ -99,41 +102,34 @@
*/
static void icmp_addr_req(queue_t *q, mblk_t *mp);
-static void icmp_bind(queue_t *q, mblk_t *mp);
-static void icmp_bind_proto(queue_t *q);
-static void icmp_bind_result(conn_t *, mblk_t *);
-static void icmp_bind_ack(conn_t *, mblk_t *mp);
-static void icmp_bind_error(conn_t *, mblk_t *mp);
+static void icmp_tpi_bind(queue_t *q, mblk_t *mp);
+static int icmp_bind_proto(conn_t *connp);
static int icmp_build_hdrs(icmp_t *icmp);
static void icmp_capability_req(queue_t *q, mblk_t *mp);
-static int icmp_close(queue_t *q);
-static void icmp_connect(queue_t *q, mblk_t *mp);
-static void icmp_disconnect(queue_t *q, mblk_t *mp);
+static int icmp_close(queue_t *q, int flags);
+static void icmp_tpi_connect(queue_t *q, mblk_t *mp);
+static void icmp_tpi_disconnect(queue_t *q, mblk_t *mp);
static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
int sys_error);
static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
t_scalar_t t_error, int sys_error);
-static void icmp_icmp_error(queue_t *q, mblk_t *mp);
-static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp);
+static void icmp_icmp_error(conn_t *connp, mblk_t *mp);
+static void icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp);
static void icmp_info_req(queue_t *q, mblk_t *mp);
static void icmp_input(void *, mblk_t *, void *);
-static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim,
- t_scalar_t addr_length, in_port_t);
-static int icmp_open(queue_t *q, dev_t *devp, int flag, int sflag,
- cred_t *credp, boolean_t isv6);
+static conn_t *icmp_open(int family, cred_t *credp, int *err, int flags);
static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
cred_t *credp);
static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
cred_t *credp);
-static void icmp_output(queue_t *q, mblk_t *mp);
static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp,
int *errorp, void *thisdg_attrs);
static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
-int icmp_opt_set(queue_t *q, uint_t optset_context,
+int icmp_opt_set(conn_t *connp, uint_t optset_context,
int level, int name, uint_t inlen,
uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
- void *thisdg_attrs, cred_t *cr, mblk_t *mblk);
-int icmp_opt_get(queue_t *q, int level, int name,
+ void *thisdg_attrs, cred_t *cr);
+int icmp_opt_get(conn_t *connp, int level, int name,
uchar_t *ptr);
static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt);
@@ -144,10 +140,13 @@ static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp,
cred_t *cr);
static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
-static void icmp_unbind(queue_t *q, mblk_t *mp);
+static void icmp_tpi_unbind(queue_t *q, mblk_t *mp);
static void icmp_wput(queue_t *q, mblk_t *mp);
-static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6,
- t_scalar_t tudr_optlen);
+static void icmp_wput_fallback(queue_t *q, mblk_t *mp);
+static int raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp,
+ sin6_t *sin6, ip6_pkt_t *ipp);
+static int raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp,
+ ipaddr_t v4dst, ip4_pkt_t *pktinfop);
static void icmp_wput_other(queue_t *q, mblk_t *mp);
static void icmp_wput_iocdata(queue_t *q, mblk_t *mp);
static void icmp_wput_restricted(queue_t *q, mblk_t *mp);
@@ -158,7 +157,16 @@ static void rawip_stack_fini(netstackid_t stackid, void *arg);
static void *rawip_kstat_init(netstackid_t stackid);
static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp);
static int rawip_kstat_update(kstat_t *kp, int rw);
+static void rawip_stack_shutdown(netstackid_t stackid, void *arg);
+static int rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa,
+ uint_t *salenp);
+static int rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa,
+ uint_t *salenp);
+int rawip_getsockname(sock_lower_handle_t, struct sockaddr *,
+ socklen_t *, cred_t *);
+int rawip_getpeername(sock_lower_handle_t, struct sockaddr *,
+ socklen_t *, cred_t *);
static struct module_info icmp_mod_info = {
5707, "icmp", 1, INFPSZ, 512, 128
@@ -177,7 +185,12 @@ static struct qinit icmprinitv6 = {
};
static struct qinit icmpwinit = {
- (pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info
+ (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info
+};
+
+/* ICMP entry point during fallback */
+static struct qinit icmp_fallback_sock_winit = {
+ (pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info
};
/* For AF_INET aka /dev/icmp */
@@ -233,6 +246,11 @@ static icmpparam_t icmp_param_arr[] = {
#define is_recv_hiwat is_param_arr[6].icmp_param_value
#define is_max_buf is_param_arr[7].icmp_param_value
+static int rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len);
+static int rawip_do_connect(conn_t *connp, const struct sockaddr *sa,
+ socklen_t len);
+static void rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error);
+
/*
* This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
* passed to icmp_wput.
@@ -241,14 +259,17 @@ static icmpparam_t icmp_param_arr[] = {
* message is returned by ip_bind_v4/v6.
*/
static void
-icmp_bind(queue_t *q, mblk_t *mp)
+icmp_tpi_bind(queue_t *q, mblk_t *mp)
{
+ int error;
+ struct sockaddr *sa;
+ struct T_bind_req *tbr;
+ socklen_t len;
sin_t *sin;
sin6_t *sin6;
- mblk_t *mp1;
- struct T_bind_req *tbr;
- icmp_t *icmp;
+ icmp_t *icmp;
conn_t *connp = Q_TO_CONN(q);
+ mblk_t *mp1;
icmp = connp->conn_icmp;
if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
@@ -258,12 +279,14 @@ icmp_bind(queue_t *q, mblk_t *mp)
icmp_err_ack(q, mp, TPROTO, 0);
return;
}
+
if (icmp->icmp_state != TS_UNBND) {
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
"icmp_bind: bad state, %d", icmp->icmp_state);
icmp_err_ack(q, mp, TOUTSTATE, 0);
return;
}
+
/*
* Reallocate the message to make sure we have enough room for an
* address and the protocol type.
@@ -274,9 +297,13 @@ icmp_bind(queue_t *q, mblk_t *mp)
return;
}
mp = mp1;
+
+ /* Reset the message type in preparation for shipping it back. */
+ DB_TYPE(mp) = M_PCPROTO;
tbr = (struct T_bind_req *)mp->b_rptr;
- switch (tbr->ADDR_length) {
- case 0: /* Generic request */
+ len = tbr->ADDR_length;
+ switch (len) {
+ case 0: /* request for a generic port */
tbr->ADDR_offset = sizeof (struct T_bind_req);
if (icmp->icmp_family == AF_INET) {
tbr->ADDR_length = sizeof (sin_t);
@@ -284,6 +311,8 @@ icmp_bind(queue_t *q, mblk_t *mp)
*sin = sin_null;
sin->sin_family = AF_INET;
mp->b_wptr = (uchar_t *)&sin[1];
+ sa = (struct sockaddr *)sin;
+ len = sizeof (sin_t);
} else {
ASSERT(icmp->icmp_family == AF_INET6);
tbr->ADDR_length = sizeof (sin6_t);
@@ -291,39 +320,21 @@ icmp_bind(queue_t *q, mblk_t *mp)
*sin6 = sin6_null;
sin6->sin6_family = AF_INET6;
mp->b_wptr = (uchar_t *)&sin6[1];
+ sa = (struct sockaddr *)sin6;
+ len = sizeof (sin6_t);
}
break;
- case sizeof (sin_t): /* Complete IP address */
- sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset,
+
+ case sizeof (sin_t): /* Complete IPv4 address */
+ sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
sizeof (sin_t));
- if (sin == NULL || !OK_32PTR((char *)sin)) {
- icmp_err_ack(q, mp, TSYSERR, EINVAL);
- return;
- }
- if (icmp->icmp_family != AF_INET ||
- sin->sin_family != AF_INET) {
- icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
- return;
- }
break;
- case sizeof (sin6_t): /* Complete IP address */
- sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset,
- sizeof (sin6_t));
- if (sin6 == NULL || !OK_32PTR((char *)sin6)) {
- icmp_err_ack(q, mp, TSYSERR, EINVAL);
- return;
- }
- if (icmp->icmp_family != AF_INET6 ||
- sin6->sin6_family != AF_INET6) {
- icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
- return;
- }
- /* No support for mapped addresses on raw sockets */
- if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
- icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL);
- return;
- }
+
+ case sizeof (sin6_t): /* Complete IPv6 address */
+ sa = (struct sockaddr *)mi_offset_param(mp,
+ tbr->ADDR_offset, sizeof (sin6_t));
break;
+
default:
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
"icmp_bind: bad ADDR_length %d", tbr->ADDR_length);
@@ -331,6 +342,37 @@ icmp_bind(queue_t *q, mblk_t *mp)
return;
}
+ error = rawip_do_bind(connp, sa, len);
+done:
+ ASSERT(mp->b_cont == NULL);
+ if (error != 0) {
+ if (error > 0) {
+ icmp_err_ack(q, mp, TSYSERR, error);
+ } else {
+ icmp_err_ack(q, mp, -error, 0);
+ }
+ } else {
+ tbr->PRIM_type = T_BIND_ACK;
+ qreply(q, mp);
+ }
+}
+
+static int
+rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len)
+{
+ sin_t *sin;
+ sin6_t *sin6;
+ icmp_t *icmp;
+ int error = 0;
+ mblk_t *ire_mp;
+
+
+ icmp = connp->conn_icmp;
+
+ if (sa == NULL || !OK_32PTR((char *)sa)) {
+ return (EINVAL);
+ }
+
/*
* The state must be TS_UNBND. TPI mandates that users must send
* TPI primitives only 1 at a time and wait for the response before
@@ -338,24 +380,53 @@ icmp_bind(queue_t *q, mblk_t *mp)
*/
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) {
- rw_exit(&icmp->icmp_rwlock);
- (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
- "icmp_bind: bad state, %d", icmp->icmp_state);
- icmp_err_ack(q, mp, TOUTSTATE, 0);
- return;
+ error = -TOUTSTATE;
+ goto done;
+ }
+
+ ASSERT(len != 0);
+ switch (len) {
+ case sizeof (sin_t): /* Complete IPv4 address */
+ sin = (sin_t *)sa;
+ if (sin->sin_family != AF_INET ||
+ icmp->icmp_family != AF_INET) {
+ /* TSYSERR, EAFNOSUPPORT */
+ error = EAFNOSUPPORT;
+ goto done;
+ }
+ break;
+ case sizeof (sin6_t): /* Complete IPv6 address */
+ sin6 = (sin6_t *)sa;
+ if (sin6->sin6_family != AF_INET6 ||
+ icmp->icmp_family != AF_INET6) {
+ /* TSYSERR, EAFNOSUPPORT */
+ error = EAFNOSUPPORT;
+ goto done;
+ }
+ /* No support for mapped addresses on raw sockets */
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ /* TSYSERR, EADDRNOTAVAIL */
+ error = EADDRNOTAVAIL;
+ goto done;
+ }
+ break;
+
+ default:
+ /* TBADADDR */
+ error = EADDRNOTAVAIL;
+ goto done;
}
- icmp->icmp_pending_op = tbr->PRIM_type;
+ icmp->icmp_pending_op = T_BIND_REQ;
+ icmp->icmp_state = TS_IDLE;
/*
* Copy the source address into our icmp structure. This address
* may still be zero; if so, ip will fill in the correct address
* each time an outbound packet is passed to it.
* If we are binding to a broadcast or multicast address then
- * icmp_bind_ack will clear the source address when it receives
- * the T_BIND_ACK.
+ * rawip_post_ip_bind_connect will clear the source address.
*/
- icmp->icmp_state = TS_IDLE;
if (icmp->icmp_family == AF_INET) {
ASSERT(sin != NULL);
@@ -378,147 +449,136 @@ icmp_bind(queue_t *q, mblk_t *mp)
error = icmp_build_hdrs(icmp);
if (error != 0) {
icmp->icmp_pending_op = -1;
- rw_exit(&icmp->icmp_rwlock);
- icmp_err_ack(q, mp, TSYSERR, error);
- return;
+ /*
+ * TSYSERR
+ */
+ goto done;
}
}
- /*
- * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following
- * the address.
- */
- *mp->b_wptr++ = icmp->icmp_proto;
+
+ ire_mp = NULL;
if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) {
/*
- * Append a request for an IRE if src not 0 (INADDR_ANY)
+ * request an IRE if src not 0 (INADDR_ANY)
*/
- mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
- if (!mp->b_cont) {
+ ire_mp = allocb(sizeof (ire_t), BPRI_HI);
+ if (ire_mp == NULL) {
icmp->icmp_pending_op = -1;
- rw_exit(&icmp->icmp_rwlock);
- icmp_err_ack(q, mp, TSYSERR, ENOMEM);
- return;
+ error = ENOMEM;
+ goto done;
}
- mp->b_cont->b_wptr += sizeof (ire_t);
- mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
+ DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE;
}
+done:
rw_exit(&icmp->icmp_rwlock);
+ if (error != 0)
+ return (error);
- /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */
- if (icmp->icmp_family == AF_INET6)
- mp = ip_bind_v6(q, mp, connp, NULL);
- else
- mp = ip_bind_v4(q, mp, connp);
-
- /* The above return NULL if the bind needs to be deferred */
- if (mp != NULL)
- icmp_bind_result(connp, mp);
- else
- CONN_INC_REF(connp);
+ if (icmp->icmp_family == AF_INET6) {
+ error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto,
+ &sin6->sin6_addr, sin6->sin6_port, B_TRUE);
+ } else {
+ error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto,
+ sin->sin_addr.s_addr, sin->sin_port, B_TRUE);
+ }
+ rawip_post_ip_bind_connect(icmp, ire_mp, error);
+ return (error);
}
-/*
- * Send message to IP to just bind to the protocol.
- */
static void
-icmp_bind_proto(queue_t *q)
+rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error)
{
- mblk_t *mp;
- struct T_bind_req *tbr;
- icmp_t *icmp;
- conn_t *connp = Q_TO_CONN(q);
-
- icmp = connp->conn_icmp;
-
- mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1,
- BPRI_MED);
- if (!mp) {
+ rw_enter(&icmp->icmp_rwlock, RW_WRITER);
+ if (icmp->icmp_state == TS_UNBND) {
+ /*
+ * not yet bound - bind sent by icmp_bind_proto.
+ */
+ rw_exit(&icmp->icmp_rwlock);
return;
}
- mp->b_datap->db_type = M_PROTO;
- tbr = (struct T_bind_req *)mp->b_rptr;
- tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */
- tbr->ADDR_offset = sizeof (struct T_bind_req);
-
- rw_enter(&icmp->icmp_rwlock, RW_WRITER);
- if (icmp->icmp_ipversion == IPV4_VERSION) {
- sin_t *sin;
+ ASSERT(icmp->icmp_pending_op != -1);
+ icmp->icmp_pending_op = -1;
- tbr->ADDR_length = sizeof (sin_t);
- sin = (sin_t *)&tbr[1];
- *sin = sin_null;
- sin->sin_family = AF_INET;
- mp->b_wptr = (uchar_t *)&sin[1];
+ if (error != 0) {
+ if (icmp->icmp_state == TS_DATA_XFER) {
+ /* Connect failed */
+ /* Revert back to the bound source */
+ icmp->icmp_v6src = icmp->icmp_bound_v6src;
+ icmp->icmp_state = TS_IDLE;
+ if (icmp->icmp_family == AF_INET6)
+ (void) icmp_build_hdrs(icmp);
+ } else {
+ V6_SET_ZERO(icmp->icmp_v6src);
+ V6_SET_ZERO(icmp->icmp_bound_v6src);
+ icmp->icmp_state = TS_UNBND;
+ if (icmp->icmp_family == AF_INET6)
+ (void) icmp_build_hdrs(icmp);
+ }
} else {
- sin6_t *sin6;
+ if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) {
+ ire_t *ire;
- ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
- tbr->ADDR_length = sizeof (sin6_t);
- sin6 = (sin6_t *)&tbr[1];
- *sin6 = sin6_null;
- sin6->sin6_family = AF_INET6;
- mp->b_wptr = (uchar_t *)&sin6[1];
- }
+ ire = (ire_t *)ire_mp->b_rptr;
+ /*
+ * If a broadcast/multicast address was bound set
+ * the source address to 0.
+ * This ensures no datagrams with broadcast address
+ * as source address are emitted (which would violate
+ * RFC1122 - Hosts requirements)
+ * Note: we get IRE_BROADCAST for IPv6
+ * to "mark" a multicast local address.
+ */
- /* Place protocol type in the O_T_BIND_REQ following the address. */
- *mp->b_wptr++ = icmp->icmp_proto;
- rw_exit(&icmp->icmp_rwlock);
- /* Pass the O_T_BIND_REQ to ip. */
- if (icmp->icmp_family == AF_INET6)
- mp = ip_bind_v6(q, mp, connp, NULL);
- else
- mp = ip_bind_v4(q, mp, connp);
+ if (ire->ire_type == IRE_BROADCAST &&
+ icmp->icmp_state != TS_DATA_XFER) {
+ /*
+ * This was just a local bind to a
+ * MC/broadcast addr
+ */
+ V6_SET_ZERO(icmp->icmp_v6src);
+ if (icmp->icmp_family == AF_INET6)
+ (void) icmp_build_hdrs(icmp);
+ }
+ }
- /* The above return NULL if the bind needs to be deferred */
- if (mp != NULL)
- icmp_bind_result(connp, mp);
- else
- CONN_INC_REF(connp);
+ }
+ rw_exit(&icmp->icmp_rwlock);
+ if (ire_mp != NULL)
+ freeb(ire_mp);
}
/*
- * This is called from ip_wput_nondata to handle the results of a
- * deferred RAWIP bind. It is called once the bind has been completed.
+ * Send message to IP to just bind to the protocol.
*/
-void
-rawip_resume_bind(conn_t *connp, mblk_t *mp)
+static int
+icmp_bind_proto(conn_t *connp)
{
- ASSERT(connp != NULL && IPCL_IS_RAWIP(connp));
+ icmp_t *icmp;
+ int error;
+
+ icmp = connp->conn_icmp;
- icmp_bind_result(connp, mp);
+ if (icmp->icmp_family == AF_INET6)
+ error = ip_proto_bind_laddr_v6(connp, NULL, icmp->icmp_proto,
+ &sin6_null.sin6_addr, 0, B_TRUE);
+ else
+ error = ip_proto_bind_laddr_v4(connp, NULL, icmp->icmp_proto,
+ sin_null.sin_addr.s_addr, 0, B_TRUE);
- CONN_OPER_PENDING_DONE(connp);
+ rawip_post_ip_bind_connect(icmp, NULL, error);
+ return (error);
}
-/*
- * This routine handles each T_CONN_REQ message passed to icmp. It
- * associates a default destination address with the stream.
- *
- * This routine sends down a T_BIND_REQ to IP with the following mblks:
- * T_BIND_REQ - specifying local and remote address.
- * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src
- * T_OK_ACK - for the T_CONN_REQ
- * T_CONN_CON - to keep the TPI user happy
- *
- * The connect completes in icmp_bind_result.
- * When a T_BIND_ACK is received information is extracted from the IRE
- * and the two appended messages are sent to the TPI user.
- * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
- * convert it to an error ack for the appropriate primitive.
- */
static void
-icmp_connect(queue_t *q, mblk_t *mp)
+icmp_tpi_connect(queue_t *q, mblk_t *mp)
{
- sin_t *sin;
- sin6_t *sin6;
- mblk_t *mp1, *mp2;
+ conn_t *connp = Q_TO_CONN(q);
struct T_conn_req *tcr;
icmp_t *icmp;
- ipaddr_t v4dst;
- in6_addr_t v6dst;
- uint32_t flowinfo;
- conn_t *connp = Q_TO_CONN(q);
+ struct sockaddr *sa;
+ socklen_t len;
+ int error;
icmp = connp->conn_icmp;
tcr = (struct T_conn_req *)mp->b_rptr;
@@ -533,54 +593,111 @@ icmp_connect(queue_t *q, mblk_t *mp)
return;
}
- switch (tcr->DEST_length) {
+ len = tcr->DEST_length;
+
+ switch (len) {
default:
icmp_err_ack(q, mp, TBADADDR, 0);
return;
-
case sizeof (sin_t):
- sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset,
+ sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
sizeof (sin_t));
- if (sin == NULL || !OK_32PTR((char *)sin)) {
- icmp_err_ack(q, mp, TSYSERR, EINVAL);
- return;
- }
- if (icmp->icmp_family != AF_INET ||
- sin->sin_family != AF_INET) {
- icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
- return;
- }
- v4dst = sin->sin_addr.s_addr;
- IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
- ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
- icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
- icmp->icmp_ip_snd_options_len;
break;
-
case sizeof (sin6_t):
- sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset,
- sizeof (sin6_t));
- if (sin6 == NULL || !OK_32PTR((char *)sin6)) {
- icmp_err_ack(q, mp, TSYSERR, EINVAL);
- return;
+ sa = (struct sockaddr *)mi_offset_param(mp,
+ tcr->DEST_offset, sizeof (sin6_t));
+ break;
+ }
+
+ error = proto_verify_ip_addr(icmp->icmp_family, sa, len);
+ if (error != 0) {
+ icmp_err_ack(q, mp, TSYSERR, error);
+ return;
+ }
+
+ error = rawip_do_connect(connp, sa, len);
+ if (error != 0) {
+ if (error < 0) {
+ icmp_err_ack(q, mp, -error, 0);
+ } else {
+ icmp_err_ack(q, mp, 0, error);
}
- if (icmp->icmp_family != AF_INET6 ||
- sin6->sin6_family != AF_INET6) {
- icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
+ } else {
+ mblk_t *mp1;
+
+ /*
+ * We have to send a connection confirmation to
+ * keep TLI happy.
+ */
+ if (icmp->icmp_family == AF_INET) {
+ mp1 = mi_tpi_conn_con(NULL, (char *)sa,
+ sizeof (sin_t), NULL, 0);
+ } else {
+ ASSERT(icmp->icmp_family == AF_INET6);
+ mp1 = mi_tpi_conn_con(NULL, (char *)sa,
+ sizeof (sin6_t), NULL, 0);
+ }
+ if (mp1 == NULL) {
+ rw_exit(&icmp->icmp_rwlock);
+ icmp_err_ack(q, mp, TSYSERR, ENOMEM);
return;
}
- /* No support for mapped addresses on raw sockets */
- if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
- icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL);
+
+ /*
+ * Send ok_ack for T_CONN_REQ
+ */
+ mp = mi_tpi_ok_ack_alloc(mp);
+ if (mp == NULL) {
+ /* Unable to reuse the T_CONN_REQ for the ack. */
+ freemsg(mp1);
+ icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
return;
}
- v6dst = sin6->sin6_addr;
- ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
- icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
- flowinfo = sin6->sin6_flowinfo;
- break;
+ putnext(connp->conn_rq, mp);
+ putnext(connp->conn_rq, mp1);
+ }
+}
+
+static int
+rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len)
+{
+ icmp_t *icmp;
+ sin_t *sin;
+ sin6_t *sin6;
+ mblk_t *ire_mp;
+ int error;
+ ipaddr_t v4dst;
+ in6_addr_t v6dst;
+
+ icmp = connp->conn_icmp;
+
+ if (sa == NULL || !OK_32PTR((char *)sa)) {
+ return (EINVAL);
+ }
+
+ ire_mp = allocb(sizeof (ire_t), BPRI_HI);
+ if (ire_mp == NULL)
+ return (ENOMEM);
+ DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE;
+
+
+ ASSERT(sa != NULL && len != 0);
+
+ rw_enter(&icmp->icmp_rwlock, RW_WRITER);
+ if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
+ rw_exit(&icmp->icmp_rwlock);
+ freeb(ire_mp);
+ return (-TOUTSTATE);
}
- if (icmp->icmp_ipversion == IPV4_VERSION) {
+
+ switch (len) {
+ case sizeof (sin_t):
+ sin = (sin_t *)sa;
+
+ ASSERT(icmp->icmp_family == AF_INET);
+ ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
+
+ v4dst = sin->sin_addr.s_addr;
/*
* Interpret a zero destination to mean loopback.
* Update the T_CONN_REQ (sin/sin6) since it is used to
@@ -588,15 +705,16 @@ icmp_connect(queue_t *q, mblk_t *mp)
*/
if (v4dst == INADDR_ANY) {
v4dst = htonl(INADDR_LOOPBACK);
- IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
- if (icmp->icmp_family == AF_INET) {
- sin->sin_addr.s_addr = v4dst;
- } else {
- sin6->sin6_addr = v6dst;
- }
}
- icmp->icmp_v6dst = v6dst;
- icmp->icmp_flowinfo = 0;
+
+ IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
+ ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
+ icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
+ icmp->icmp_ip_snd_options_len;
+ icmp->icmp_v6dst.sin6_addr = v6dst;
+ icmp->icmp_v6dst.sin6_family = AF_INET6;
+ icmp->icmp_v6dst.sin6_flowinfo = 0;
+ icmp->icmp_v6dst.sin6_port = 0;
/*
* If the destination address is multicast and
@@ -610,35 +728,42 @@ icmp_connect(queue_t *q, mblk_t *mp)
IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr,
&icmp->icmp_v6src);
}
- } else {
+ break;
+ case sizeof (sin6_t):
+ sin6 = (sin6_t *)sa;
+
+ /* No support for mapped addresses on raw sockets */
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ rw_exit(&icmp->icmp_rwlock);
+ freeb(ire_mp);
+ return (EADDRNOTAVAIL);
+ }
+
ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
+ ASSERT(icmp->icmp_family == AF_INET6);
+
+ icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
+
+ icmp->icmp_v6dst = *sin6;
+ icmp->icmp_v6dst.sin6_port = 0;
+
/*
* Interpret a zero destination to mean loopback.
* Update the T_CONN_REQ (sin/sin6) since it is used to
* generate the T_CONN_CON.
*/
- if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
- v6dst = ipv6_loopback;
- sin6->sin6_addr = v6dst;
+ if (IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6dst.sin6_addr)) {
+ icmp->icmp_v6dst.sin6_addr = ipv6_loopback;
}
- icmp->icmp_v6dst = v6dst;
- icmp->icmp_flowinfo = flowinfo;
/*
* If the destination address is multicast and
* an outgoing multicast interface has been set,
* then the ip bind logic will pick the correct source
* address (i.e. matching the outgoing multicast interface).
*/
+ break;
}
- rw_enter(&icmp->icmp_rwlock, RW_WRITER);
- if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
- rw_exit(&icmp->icmp_rwlock);
- (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
- "icmp_connect: bad state, %d", icmp->icmp_state);
- icmp_err_ack(q, mp, TOUTSTATE, 0);
- return;
- }
icmp->icmp_pending_op = T_CONN_REQ;
if (icmp->icmp_state == TS_DATA_XFER) {
@@ -647,74 +772,22 @@ icmp_connect(queue_t *q, mblk_t *mp)
icmp->icmp_state = TS_IDLE;
}
- /*
- * Send down bind to IP to verify that there is a route
- * and to determine the source address.
- * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput.
- */
- if (icmp->icmp_family == AF_INET) {
- mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t),
- sin->sin_port);
- } else {
- ASSERT(icmp->icmp_family == AF_INET6);
- mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t),
- sin6->sin6_port);
- }
- if (mp1 == NULL) {
- icmp->icmp_pending_op = -1;
- rw_exit(&icmp->icmp_rwlock);
- icmp_err_ack(q, mp, TSYSERR, ENOMEM);
- return;
- }
-
- /*
- * We also have to send a connection confirmation to
- * keep TLI happy. Prepare it for icmp_bind_result.
- */
- if (icmp->icmp_family == AF_INET) {
- mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL,
- 0);
- } else {
- ASSERT(icmp->icmp_family == AF_INET6);
- mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL,
- 0);
- }
- if (mp2 == NULL) {
- freemsg(mp1);
- icmp->icmp_pending_op = -1;
- rw_exit(&icmp->icmp_rwlock);
- icmp_err_ack(q, mp, TSYSERR, ENOMEM);
- return;
- }
-
- mp = mi_tpi_ok_ack_alloc(mp);
- if (mp == NULL) {
- /* Unable to reuse the T_CONN_REQ for the ack. */
- freemsg(mp2);
- icmp->icmp_pending_op = -1;
- rw_exit(&icmp->icmp_rwlock);
- icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
- return;
- }
-
icmp->icmp_state = TS_DATA_XFER;
rw_exit(&icmp->icmp_rwlock);
- /* Hang onto the T_OK_ACK and T_CONN_CON for later. */
- linkb(mp1, mp);
- linkb(mp1, mp2);
-
- mblk_setcred(mp1, connp->conn_cred);
- if (icmp->icmp_family == AF_INET)
- mp1 = ip_bind_v4(q, mp1, connp);
- else
- mp1 = ip_bind_v6(q, mp1, connp, NULL);
-
- /* The above return NULL if the bind needs to be deferred */
- if (mp1 != NULL)
- icmp_bind_result(connp, mp1);
- else
- CONN_INC_REF(connp);
+ if (icmp->icmp_family == AF_INET6) {
+ error = ip_proto_bind_connected_v6(connp, &ire_mp,
+ icmp->icmp_proto, &icmp->icmp_v6src, 0,
+ &icmp->icmp_v6dst.sin6_addr,
+ NULL, sin6->sin6_port, B_TRUE, B_TRUE);
+ } else {
+ error = ip_proto_bind_connected_v4(connp, &ire_mp,
+ icmp->icmp_proto, &V4_PART_OF_V6(icmp->icmp_v6src), 0,
+ V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr), sin->sin_port,
+ B_TRUE, B_TRUE);
+ }
+ rawip_post_ip_bind_connect(icmp, ire_mp, error);
+ return (error);
}
static void
@@ -733,6 +806,7 @@ icmp_close_free(conn_t *connp)
kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t));
icmp->icmp_filter = NULL;
}
+
/* Free memory associated with sticky options */
if (icmp->icmp_sticky_hdrs_len != 0) {
kmem_free(icmp->icmp_sticky_hdrs,
@@ -754,16 +828,18 @@ icmp_close_free(conn_t *connp)
}
static int
-icmp_close(queue_t *q)
+rawip_do_close(conn_t *connp)
{
- conn_t *connp = (conn_t *)q->q_ptr;
-
ASSERT(connp != NULL && IPCL_IS_RAWIP(connp));
ip_quiesce_conn(connp);
- qprocsoff(connp->conn_rq);
+ if (!IPCL_IS_NONSTR(connp)) {
+ qprocsoff(connp->conn_rq);
+ }
+ ASSERT(connp->conn_icmp->icmp_fallback_queue_head == NULL &&
+ connp->conn_icmp->icmp_fallback_queue_tail == NULL);
icmp_close_free(connp);
/*
@@ -778,11 +854,36 @@ icmp_close(queue_t *q)
*/
ASSERT(connp->conn_ref == 1);
- inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
+ if (!IPCL_IS_NONSTR(connp)) {
+ inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
+ } else {
+ ip_close_helper_stream(connp);
+ }
connp->conn_ref--;
ipcl_conn_destroy(connp);
+ return (0);
+}
+
+static int
+icmp_close(queue_t *q, int flags)
+{
+ conn_t *connp;
+
+ if (flags & SO_FALLBACK) {
+ /*
+ * stream is being closed while in fallback
+ * simply free the resources that were allocated
+ */
+ inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
+ qprocsoff(q);
+ goto done;
+ }
+
+ connp = Q_TO_CONN(q);
+ (void) rawip_do_close(connp);
+done:
q->q_ptr = WR(q)->q_ptr = NULL;
return (0);
}
@@ -793,88 +894,102 @@ icmp_close(queue_t *q)
* in sending a T_BIND_REQ to IP to restore the binding to just
* the local address.
*
- * This routine sends down a T_BIND_REQ to IP with the following mblks:
- * T_BIND_REQ - specifying just the local address.
- * T_OK_ACK - for the T_DISCON_REQ
- *
- * The disconnect completes in icmp_bind_result.
- * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user.
- * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
- * convert it to an error ack for the appropriate primitive.
+ * The disconnect completes in rawip_post_ip_bind_connect.
*/
-static void
-icmp_disconnect(queue_t *q, mblk_t *mp)
+static int
+icmp_do_disconnect(conn_t *connp)
{
icmp_t *icmp;
- mblk_t *mp1;
- conn_t *connp = Q_TO_CONN(q);
+ mblk_t *ire_mp;
+ int error;
icmp = connp->conn_icmp;
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) {
rw_exit(&icmp->icmp_rwlock);
- (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
- "icmp_disconnect: bad state, %d", icmp->icmp_state);
- icmp_err_ack(q, mp, TOUTSTATE, 0);
- return;
+ return (-TOUTSTATE);
}
icmp->icmp_pending_op = T_DISCON_REQ;
icmp->icmp_v6src = icmp->icmp_bound_v6src;
icmp->icmp_state = TS_IDLE;
- /*
- * Send down bind to IP to remove the full binding and revert
- * to the local address binding.
- */
- if (icmp->icmp_family == AF_INET) {
- mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0);
- } else {
- ASSERT(icmp->icmp_family == AF_INET6);
- mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0);
- }
- if (mp1 == NULL) {
- icmp->icmp_pending_op = -1;
- rw_exit(&icmp->icmp_rwlock);
- icmp_err_ack(q, mp, TSYSERR, ENOMEM);
- return;
- }
- mp = mi_tpi_ok_ack_alloc(mp);
- if (mp == NULL) {
- /* Unable to reuse the T_DISCON_REQ for the ack. */
- icmp->icmp_pending_op = -1;
- rw_exit(&icmp->icmp_rwlock);
- icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM);
- return;
- }
if (icmp->icmp_family == AF_INET6) {
- int error;
-
/* Rebuild the header template */
error = icmp_build_hdrs(icmp);
if (error != 0) {
icmp->icmp_pending_op = -1;
rw_exit(&icmp->icmp_rwlock);
- icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error);
- freemsg(mp1);
- return;
+ return (error);
}
}
rw_exit(&icmp->icmp_rwlock);
- /* Append the T_OK_ACK to the T_BIND_REQ for icmp_bind_result */
- linkb(mp1, mp);
+ ire_mp = allocb(sizeof (ire_t), BPRI_HI);
+ if (ire_mp == NULL) {
+ return (ENOMEM);
+ }
- if (icmp->icmp_family == AF_INET6)
- mp1 = ip_bind_v6(q, mp1, connp, NULL);
- else
- mp1 = ip_bind_v4(q, mp1, connp);
+ if (icmp->icmp_family == AF_INET6) {
+ error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto,
+ &icmp->icmp_bound_v6src, 0, B_TRUE);
+ } else {
- /* The above return NULL if the bind needs to be deferred */
- if (mp1 != NULL)
- icmp_bind_result(connp, mp1);
- else
- CONN_INC_REF(connp);
+ error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto,
+ V4_PART_OF_V6(icmp->icmp_bound_v6src), 0, B_TRUE);
+ }
+
+ rawip_post_ip_bind_connect(icmp, ire_mp, error);
+
+ return (error);
+}
+
+static void
+icmp_tpi_disconnect(queue_t *q, mblk_t *mp)
+{
+ conn_t *connp = Q_TO_CONN(q);
+ int error;
+
+ /*
+ * Allocate the largest primitive we need to send back
+ * T_error_ack is > than T_ok_ack
+ */
+ mp = reallocb(mp, sizeof (struct T_error_ack), 1);
+ if (mp == NULL) {
+ /* Unable to reuse the T_DISCON_REQ for the ack. */
+ icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
+ return;
+ }
+
+ error = icmp_do_disconnect(connp);
+
+ if (error != 0) {
+ if (error > 0) {
+ icmp_err_ack(q, mp, 0, error);
+ } else {
+ icmp_err_ack(q, mp, -error, 0);
+ }
+ } else {
+ mp = mi_tpi_ok_ack_alloc(mp);
+ ASSERT(mp != NULL);
+ qreply(q, mp);
+ }
+
+}
+
+static int
+icmp_disconnect(conn_t *connp)
+{
+ int error;
+ icmp_t *icmp = connp->conn_icmp;
+
+ icmp->icmp_dgram_errind = B_FALSE;
+
+ error = icmp_do_disconnect(connp);
+
+ if (error < 0)
+ error = proto_tlitosyserr(-error);
+ return (error);
}
/* This routine creates a T_ERROR_ACK message and passes it upstream. */
@@ -905,22 +1020,20 @@ icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
/*
* icmp_icmp_error is called by icmp_input to process ICMP
* messages passed up by IP.
- * Generates the appropriate T_UDERROR_IND for permanent
- * (non-transient) errors.
+ * Generates the appropriate permanent (non-transient) errors.
* Assumes that IP has pulled up everything up to and including
* the ICMP header.
*/
static void
-icmp_icmp_error(queue_t *q, mblk_t *mp)
+icmp_icmp_error(conn_t *connp, mblk_t *mp)
{
icmph_t *icmph;
ipha_t *ipha;
int iph_hdr_length;
sin_t sin;
- sin6_t sin6;
mblk_t *mp1;
int error = 0;
- icmp_t *icmp = Q_TO_ICMP(q);
+ icmp_t *icmp = connp->conn_icmp;
ipha = (ipha_t *)mp->b_rptr;
@@ -928,10 +1041,19 @@ icmp_icmp_error(queue_t *q, mblk_t *mp)
if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
- icmp_icmp_error_ipv6(q, mp);
+ icmp_icmp_error_ipv6(connp, mp);
return;
}
- ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
+
+ /*
+ * icmp does not support v4 mapped addresses
+ * so we can never be here for a V6 socket
+ * i.e. icmp_family == AF_INET6
+ */
+ ASSERT((IPH_HDR_VERSION(ipha) == IPV4_VERSION) &&
+ (icmp->icmp_family == AF_INET));
+
+ ASSERT(icmp->icmp_family == AF_INET);
/* Skip past the outer IP and ICMP headers */
iph_hdr_length = IPH_HDR_LENGTH(ipha);
@@ -974,25 +1096,32 @@ icmp_icmp_error(queue_t *q, mblk_t *mp)
return;
}
- switch (icmp->icmp_family) {
- case AF_INET:
- sin = sin_null;
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = ipha->ipha_dst;
- mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0,
- error);
- break;
- case AF_INET6:
- sin6 = sin6_null;
- sin6.sin6_family = AF_INET6;
- IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
+ sin = sin_null;
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = ipha->ipha_dst;
+ if (IPCL_IS_NONSTR(connp)) {
+ rw_enter(&icmp->icmp_rwlock, RW_WRITER);
+ if (icmp->icmp_state == TS_DATA_XFER) {
+ if (sin.sin_addr.s_addr ==
+ V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr)) {
+ rw_exit(&icmp->icmp_rwlock);
+ (*connp->conn_upcalls->su_set_error)
+ (connp->conn_upper_handle, error);
+ goto done;
+ }
+ } else {
+ icmp->icmp_delayed_error = error;
+ *((sin_t *)&icmp->icmp_delayed_addr) = sin;
+ }
+ rw_exit(&icmp->icmp_rwlock);
+ } else {
- mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
- NULL, 0, error);
- break;
+ mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL,
+ 0, error);
+ if (mp1 != NULL)
+ putnext(connp->conn_rq, mp1);
}
- if (mp1)
- putnext(q, mp1);
+done:
freemsg(mp);
}
@@ -1004,7 +1133,7 @@ icmp_icmp_error(queue_t *q, mblk_t *mp)
* as the ICMPv6 header.
*/
static void
-icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp)
+icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
{
icmp6_t *icmp6;
ip6_t *ip6h, *outer_ip6h;
@@ -1013,7 +1142,7 @@ icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp)
sin6_t sin6;
mblk_t *mp1;
int error = 0;
- icmp_t *icmp = Q_TO_ICMP(q);
+ icmp_t *icmp = connp->conn_icmp;
outer_ip6h = (ip6_t *)mp->b_rptr;
if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
@@ -1085,7 +1214,7 @@ icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp)
sin6 = (sin6_t *)&tudi[1];
bzero(sin6, sizeof (sin6_t));
sin6->sin6_family = AF_INET6;
- sin6->sin6_addr = icmp->icmp_v6dst;
+ sin6->sin6_addr = icmp->icmp_v6dst.sin6_addr;
toh = (struct T_opthdr *)&sin6[1];
toh->level = IPPROTO_IPV6;
@@ -1103,7 +1232,14 @@ icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp)
* message. Free it, then send our empty message.
*/
freemsg(mp);
- putnext(q, newmp);
+ if (!IPCL_IS_NONSTR(connp)) {
+ putnext(connp->conn_rq, newmp);
+ } else {
+ (*connp->conn_upcalls->su_recv)
+ (connp->conn_upper_handle, newmp, 0, 0, &error,
+ NULL);
+ ASSERT(error == 0);
+ }
return;
}
case ICMP6_TIME_EXCEEDED:
@@ -1138,10 +1274,29 @@ icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp)
sin6.sin6_addr = ip6h->ip6_dst;
sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
- mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0,
- error);
- if (mp1)
- putnext(q, mp1);
+ if (IPCL_IS_NONSTR(connp)) {
+ rw_enter(&icmp->icmp_rwlock, RW_WRITER);
+ if (icmp->icmp_state == TS_DATA_XFER) {
+ if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
+ &icmp->icmp_v6dst.sin6_addr)) {
+ rw_exit(&icmp->icmp_rwlock);
+ (*connp->conn_upcalls->su_set_error)
+ (connp->conn_upper_handle, error);
+ goto done;
+ }
+ } else {
+ icmp->icmp_delayed_error = error;
+ *((sin6_t *)&icmp->icmp_delayed_addr) = sin6;
+ }
+ rw_exit(&icmp->icmp_rwlock);
+ } else {
+
+ mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
+ NULL, 0, error);
+ if (mp1 != NULL)
+ putnext(connp->conn_rq, mp1);
+ }
+done:
freemsg(mp);
}
@@ -1249,6 +1404,18 @@ icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp)
tap->OPT_size = icmp_max_optsize;
}
+static void
+icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap,
+ t_uscalar_t cap_bits1)
+{
+ tcap->CAP_bits1 = 0;
+
+ if (cap_bits1 & TC1_INFO) {
+ icmp_copy_info(&tcap->INFO_ack, icmp);
+ tcap->CAP_bits1 |= TC1_INFO;
+ }
+}
+
/*
* This routine responds to T_CAPABILITY_REQ messages. It is called by
* icmp_wput. Much of the T_CAPABILITY_ACK information is copied from
@@ -1270,12 +1437,8 @@ icmp_capability_req(queue_t *q, mblk_t *mp)
return;
tcap = (struct T_capability_ack *)mp->b_rptr;
- tcap->CAP_bits1 = 0;
- if (cap_bits1 & TC1_INFO) {
- icmp_copy_info(&tcap->INFO_ack, icmp);
- tcap->CAP_bits1 |= TC1_INFO;
- }
+ icmp_do_capability_ack(icmp, tcap, cap_bits1);
qreply(q, mp);
}
@@ -1298,182 +1461,131 @@ icmp_info_req(queue_t *q, mblk_t *mp)
qreply(q, mp);
}
-/*
- * IP recognizes seven kinds of bind requests:
- *
- * - A zero-length address binds only to the protocol number.
- *
- * - A 4-byte address is treated as a request to
- * validate that the address is a valid local IPv4
- * address, appropriate for an application to bind to.
- * IP does the verification, but does not make any note
- * of the address at this time.
- *
- * - A 16-byte address contains is treated as a request
- * to validate a local IPv6 address, as the 4-byte
- * address case above.
- *
- * - A 16-byte sockaddr_in to validate the local IPv4 address and also
- * use it for the inbound fanout of packets.
- *
- * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also
- * use it for the inbound fanout of packets.
- *
- * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout
- * information consisting of local and remote addresses
- * and ports (unused for raw sockets). In this case, the addresses are both
- * validated as appropriate for this operation, and, if
- * so, the information is retained for use in the
- * inbound fanout.
- *
- * - A 36-byte address address (ipa6_conn_t) containing complete IPv6
- * fanout information, like the 12-byte case above.
- *
- * IP will also fill in the IRE request mblk with information
- * regarding our peer. In all cases, we notify IP of our protocol
- * type by appending a single protocol byte to the bind request.
- */
-static mblk_t *
-icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length,
- in_port_t fport)
+/* For /dev/icmp aka AF_INET open */
+static int
+icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
+ int family)
{
- char *cp;
- mblk_t *mp;
- struct T_bind_req *tbr;
- ipa_conn_t *ac;
- ipa6_conn_t *ac6;
- sin_t *sin;
- sin6_t *sin6;
+ conn_t *connp;
+ dev_t conn_dev;
+ icmp_stack_t *is;
+ int error;
- ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ);
- ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock));
- mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI);
- if (mp == NULL)
- return (NULL);
- mp->b_datap->db_type = M_PROTO;
- tbr = (struct T_bind_req *)mp->b_rptr;
- tbr->PRIM_type = bind_prim;
- tbr->ADDR_offset = sizeof (*tbr);
- tbr->CONIND_number = 0;
- tbr->ADDR_length = addr_length;
- cp = (char *)&tbr[1];
- switch (addr_length) {
- case sizeof (ipa_conn_t):
- ASSERT(icmp->icmp_family == AF_INET);
- /* Append a request for an IRE */
- mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
- if (mp->b_cont == NULL) {
- freemsg(mp);
- return (NULL);
- }
- mp->b_cont->b_wptr += sizeof (ire_t);
- mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
+ conn_dev = NULL;
- /* cp known to be 32 bit aligned */
- ac = (ipa_conn_t *)cp;
- ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src);
- ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst);
- ac->ac_fport = fport;
- ac->ac_lport = 0;
- break;
+ /* If the stream is already open, return immediately. */
+ if (q->q_ptr != NULL)
+ return (0);
- case sizeof (ipa6_conn_t):
- ASSERT(icmp->icmp_family == AF_INET6);
- /* Append a request for an IRE */
- mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
- if (mp->b_cont == NULL) {
- freemsg(mp);
- return (NULL);
- }
- mp->b_cont->b_wptr += sizeof (ire_t);
- mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
+ if (sflag == MODOPEN)
+ return (EINVAL);
- /* cp known to be 32 bit aligned */
- ac6 = (ipa6_conn_t *)cp;
- ac6->ac6_laddr = icmp->icmp_v6src;
- ac6->ac6_faddr = icmp->icmp_v6dst;
- ac6->ac6_fport = fport;
- ac6->ac6_lport = 0;
- break;
+ /*
+ * Since ICMP is not used so heavily, allocating from the small
+ * arena should be sufficient.
+ */
+ if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
+ return (EBUSY);
+ }
- case sizeof (sin_t):
- ASSERT(icmp->icmp_family == AF_INET);
- /* Append a request for an IRE */
- mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
- if (!mp->b_cont) {
- freemsg(mp);
- return (NULL);
- }
- mp->b_cont->b_wptr += sizeof (ire_t);
- mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
+ if (flag & SO_FALLBACK) {
+ /*
+ * Non streams socket needs a stream to fallback to
+ */
+ RD(q)->q_ptr = (void *)conn_dev;
+ WR(q)->q_qinfo = &icmp_fallback_sock_winit;
+ WR(q)->q_ptr = (void *)ip_minor_arena_sa;
+ qprocson(q);
+ return (0);
+ }
- sin = (sin_t *)cp;
- *sin = sin_null;
- sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src);
- break;
+ connp = icmp_open(family, credp, &error, KM_SLEEP);
+ if (connp == NULL) {
+ ASSERT(error != NULL);
+ inet_minor_free(ip_minor_arena_sa, connp->conn_dev);
+ return (error);
+ }
- case sizeof (sin6_t):
- ASSERT(icmp->icmp_family == AF_INET6);
- /* Append a request for an IRE */
- mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
- if (!mp->b_cont) {
- freemsg(mp);
- return (NULL);
- }
- mp->b_cont->b_wptr += sizeof (ire_t);
- mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
+ *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
+ connp->conn_dev = conn_dev;
+ connp->conn_minor_arena = ip_minor_arena_sa;
- sin6 = (sin6_t *)cp;
- *sin6 = sin6_null;
- sin6->sin6_family = AF_INET6;
- sin6->sin6_addr = icmp->icmp_bound_v6src;
- break;
+ is = connp->conn_icmp->icmp_is;
+
+ /*
+ * Initialize the icmp_t structure for this stream.
+ */
+ q->q_ptr = connp;
+ WR(q)->q_ptr = connp;
+ connp->conn_rq = q;
+ connp->conn_wq = WR(q);
+
+ if (connp->conn_icmp->icmp_family == AF_INET6) {
+ /* Build initial header template for transmit */
+ rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER);
+ if ((error = icmp_build_hdrs(connp->conn_icmp)) != 0) {
+ rw_exit(&connp->conn_icmp->icmp_rwlock);
+ inet_minor_free(ip_minor_arena_sa, connp->conn_dev);
+ ipcl_conn_destroy(connp);
+ return (error);
+ }
+ rw_exit(&connp->conn_icmp->icmp_rwlock);
}
- /* Add protocol number to end */
- cp[addr_length] = icmp->icmp_proto;
- mp->b_wptr = (uchar_t *)&cp[addr_length + 1];
- return (mp);
+
+
+ q->q_hiwat = is->is_recv_hiwat;
+ WR(q)->q_hiwat = is->is_xmit_hiwat;
+ WR(q)->q_lowat = is->is_xmit_lowat;
+
+ qprocson(q);
+
+ /* Set the Stream head write offset. */
+ (void) proto_set_tx_wroff(q, connp,
+ connp->conn_icmp->icmp_max_hdr_len + is->is_wroff_extra);
+ (void) proto_set_rx_hiwat(connp->conn_rq, connp, q->q_hiwat);
+
+ mutex_enter(&connp->conn_lock);
+ connp->conn_state_flags &= ~CONN_INCIPIENT;
+ mutex_exit(&connp->conn_lock);
+
+ return (0);
}
-/* For /dev/icmp aka AF_INET open */
+/* For /dev/icmp4 aka AF_INET open */
static int
icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
{
- return (icmp_open(q, devp, flag, sflag, credp, B_FALSE));
+ return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET));
}
/* For /dev/icmp6 aka AF_INET6 open */
static int
icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
{
- return (icmp_open(q, devp, flag, sflag, credp, B_TRUE));
+ return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6));
}
/*
* This is the open routine for icmp. It allocates a icmp_t structure for
* the stream and, on the first open of the module, creates an ND table.
*/
-/*ARGSUSED2*/
-static int
-icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
- boolean_t isv6)
+/* ARGSUSED */
+static conn_t *
+icmp_open(int family, cred_t *credp, int *err, int flags)
{
- int err;
icmp_t *icmp;
conn_t *connp;
- dev_t conn_dev;
zoneid_t zoneid;
netstack_t *ns;
icmp_stack_t *is;
+ boolean_t isv6 = B_FALSE;
- /* If the stream is already open, return immediately. */
- if (q->q_ptr != NULL)
- return (0);
-
- if (sflag == MODOPEN)
- return (EINVAL);
+ *err = secpolicy_net_icmpaccess(credp);
+ if (*err != 0)
+ return (NULL);
+ if (family == AF_INET6)
+ isv6 = B_TRUE;
ns = netstack_find_by_cred(credp);
ASSERT(ns != NULL);
is = ns->netstack_icmp;
@@ -1488,20 +1600,11 @@ icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
else
zoneid = crgetzoneid(credp);
- /*
- * Since ICMP is not used so heavily, allocating from the small
- * arena should be sufficient.
- */
- if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
- netstack_rele(ns);
- return (EBUSY);
- }
- *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
+ ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
- connp = ipcl_conn_create(IPCL_RAWIPCONN, KM_SLEEP, ns);
- connp->conn_dev = conn_dev;
- connp->conn_minor_arena = ip_minor_arena_sa;
+ connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns);
icmp = connp->conn_icmp;
+ icmp->icmp_v6dst = sin6_null;
/*
* ipcl_conn_create did a netstack_hold. Undo the hold that was
@@ -1509,14 +1612,6 @@ icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
*/
netstack_rele(ns);
- /*
- * Initialize the icmp_t structure for this stream.
- */
- q->q_ptr = connp;
- WR(q)->q_ptr = connp;
- connp->conn_rq = q;
- connp->conn_wq = WR(q);
-
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
ASSERT(connp->conn_ulp == IPPROTO_ICMP);
ASSERT(connp->conn_icmp == icmp);
@@ -1561,37 +1656,14 @@ icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
icmp->icmp_is = is;
- q->q_hiwat = is->is_recv_hiwat;
- WR(q)->q_hiwat = is->is_xmit_hiwat;
- WR(q)->q_lowat = is->is_xmit_lowat;
-
connp->conn_recv = icmp_input;
crhold(credp);
connp->conn_cred = credp;
- mutex_enter(&connp->conn_lock);
- connp->conn_state_flags &= ~CONN_INCIPIENT;
- mutex_exit(&connp->conn_lock);
-
- qprocson(q);
-
- if (icmp->icmp_family == AF_INET6) {
- /* Build initial header template for transmit */
- if ((err = icmp_build_hdrs(icmp)) != 0) {
- rw_exit(&icmp->icmp_rwlock);
- qprocsoff(q);
- ipcl_conn_destroy(connp);
- return (err);
- }
- }
rw_exit(&icmp->icmp_rwlock);
- /* Set the Stream head write offset. */
- (void) mi_set_sth_wroff(q,
- icmp->icmp_max_hdr_len + is->is_wroff_extra);
- (void) mi_set_sth_hiwat(q, q->q_hiwat);
-
- return (0);
+ connp->conn_flow_cntrld = B_FALSE;
+ return (connp);
}
/*
@@ -1657,14 +1729,15 @@ icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
* It returns the size of the option retrieved.
*/
int
-icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
+icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
{
- conn_t *connp = Q_TO_CONN(q);
- icmp_t *icmp = connp->conn_icmp;
- icmp_stack_t *is = icmp->icmp_is;
- int *i1 = (int *)ptr;
+ icmp_t *icmp = connp->conn_icmp;
+ icmp_stack_t *is = icmp->icmp_is;
+ int *i1 = (int *)ptr;
ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp;
+ int ret = 0;
+ ASSERT(RW_READ_HELD(&icmp->icmp_rwlock));
switch (level) {
case SOL_SOCKET:
switch (name) {
@@ -1696,12 +1769,12 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
break;
case SO_SNDBUF:
- ASSERT(q->q_hiwat <= INT_MAX);
- *i1 = (int)q->q_hiwat;
+ ASSERT(icmp->icmp_xmit_hiwat <= INT_MAX);
+ *i1 = icmp->icmp_xmit_hiwat;
break;
case SO_RCVBUF:
- ASSERT(RD(q)->q_hiwat <= INT_MAX);
- *i1 = (int)RD(q)->q_hiwat;
+ ASSERT(icmp->icmp_recv_hiwat <= INT_MAX);
+ *i1 = icmp->icmp_recv_hiwat;
break;
case SO_DGRAM_ERRIND:
*i1 = icmp->icmp_dgram_errind;
@@ -1726,21 +1799,25 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
* case SO_ALLZONES:
*/
default:
- return (-1);
+ ret = -1;
+ goto done;
}
break;
case IPPROTO_IP:
/*
* Only allow IPv4 option processing on IPv4 sockets.
*/
- if (icmp->icmp_family != AF_INET)
- return (-1);
+ if (icmp->icmp_family != AF_INET) {
+ ret = -1;
+ goto done;
+ }
switch (name) {
case IP_OPTIONS:
case T_IP_OPTIONS:
/* Options are passed up with each packet */
- return (0);
+ ret = 0;
+ goto done;
case IP_HDRINCL:
*i1 = (int)icmp->icmp_hdrincl;
break;
@@ -1754,13 +1831,16 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
case IP_MULTICAST_IF:
/* 0 address if not set */
*(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr;
- return (sizeof (ipaddr_t));
+ ret = sizeof (ipaddr_t);
+ goto done;
case IP_MULTICAST_TTL:
*(uchar_t *)ptr = icmp->icmp_multicast_ttl;
- return (sizeof (uchar_t));
+ ret = sizeof (uchar_t);
+ goto done;
case IP_MULTICAST_LOOP:
*ptr = connp->conn_multicast_loop;
- return (sizeof (uint8_t));
+ ret = sizeof (uint8_t);
+ goto done;
case IP_BOUND_IF:
/* Zero if not set */
*i1 = icmp->icmp_bound_if;
@@ -1768,12 +1848,12 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
case IP_UNSPEC_SRC:
*ptr = icmp->icmp_unspec_source;
break; /* goto sizeof (int) option return */
- case IP_BROADCAST_TTL:
- *(uchar_t *)ptr = connp->conn_broadcast_ttl;
- return (sizeof (uchar_t));
case IP_RECVIF:
*ptr = icmp->icmp_recvif;
break; /* goto sizeof (int) option return */
+ case IP_BROADCAST_TTL:
+ *(uchar_t *)ptr = connp->conn_broadcast_ttl;
+ return (sizeof (uchar_t));
case IP_RECVPKTINFO:
/*
* This also handles IP_PKTINFO.
@@ -1784,7 +1864,8 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
* error for IP_PKTINFO as it's not supported as a
* sticky option.
*/
- return (-EINVAL);
+ ret = -EINVAL;
+ goto done;
/*
* Cannot "get" the value of following options
* at this level. Action is same as "default" to
@@ -1815,15 +1896,18 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
* case IP_NEXTHOP:
*/
default:
- return (-1);
+ ret = -1;
+ goto done;
}
break;
case IPPROTO_IPV6:
/*
* Only allow IPv6 option processing on native IPv6 sockets.
*/
- if (icmp->icmp_family != AF_INET6)
- return (-1);
+ if (icmp->icmp_family != AF_INET6) {
+ ret = -1;
+ goto done;
+ }
switch (name) {
case IPV6_UNICAST_HOPS:
*i1 = (unsigned int)icmp->icmp_ttl;
@@ -1850,8 +1934,10 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
* Return offset or -1 if no checksum offset.
* Does not apply to IPPROTO_ICMPV6
*/
- if (icmp->icmp_proto == IPPROTO_ICMPV6)
- return (-1);
+ if (icmp->icmp_proto == IPPROTO_ICMPV6) {
+ ret = -1;
+ goto done;
+ }
if (icmp->icmp_raw_checksum) {
*i1 = icmp->icmp_checksum_off;
@@ -1868,7 +1954,8 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
/* cannot "get" the value for these */
- return (-1);
+ ret = -1;
+ goto done;
case IPV6_RECVPKTINFO:
*i1 = icmp->icmp_ip_recvpktinfo;
break;
@@ -1912,7 +1999,8 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
pkti->ipi6_addr = ipp->ipp_addr;
else
pkti->ipi6_addr = ipv6_all_zeros;
- return (sizeof (struct in6_pktinfo));
+ ret = sizeof (struct in6_pktinfo);
+ goto done;
}
case IPV6_NEXTHOP: {
sin6_t *sin6 = (sin6_t *)ptr;
@@ -1922,7 +2010,8 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
*sin6 = sin6_null;
sin6->sin6_family = AF_INET6;
sin6->sin6_addr = ipp->ipp_nexthop;
- return (sizeof (sin6_t));
+ ret = (sizeof (sin6_t));
+ goto done;
}
case IPV6_HOPOPTS:
if (!(ipp->ipp_fields & IPPF_HOPOPTS))
@@ -1937,28 +2026,38 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
ptr[1] = (ipp->ipp_hopoptslen -
icmp->icmp_label_len_v6 + 7) / 8 - 1;
}
- return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
+ ret = (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
+ goto done;
case IPV6_RTHDRDSTOPTS:
if (!(ipp->ipp_fields & IPPF_RTDSTOPTS))
return (0);
bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen);
- return (ipp->ipp_rtdstoptslen);
+ ret = ipp->ipp_rtdstoptslen;
+ goto done;
case IPV6_RTHDR:
if (!(ipp->ipp_fields & IPPF_RTHDR))
return (0);
bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
- return (ipp->ipp_rthdrlen);
+ ret = ipp->ipp_rthdrlen;
+ goto done;
case IPV6_DSTOPTS:
- if (!(ipp->ipp_fields & IPPF_DSTOPTS))
- return (0);
+ if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
+ ret = 0;
+ goto done;
+ }
bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
- return (ipp->ipp_dstoptslen);
+ ret = ipp->ipp_dstoptslen;
+ goto done;
case IPV6_PATHMTU:
- if (!(ipp->ipp_fields & IPPF_PATHMTU))
- return (0);
-
- return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0,
- (struct ip6_mtuinfo *)ptr, is->is_netstack));
+ if (!(ipp->ipp_fields & IPPF_PATHMTU)) {
+ ret = 0;
+ } else {
+ ret = ip_fill_mtuinfo(
+ &icmp->icmp_v6dst.sin6_addr, 0,
+ (struct ip6_mtuinfo *)ptr,
+ is->is_netstack);
+ }
+ goto done;
case IPV6_TCLASS:
if (ipp->ipp_fields & IPPF_TCLASS)
*i1 = ipp->ipp_tclass;
@@ -1967,18 +2066,21 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
IPV6_DEFAULT_VERS_AND_FLOW);
break;
default:
- return (-1);
+ ret = -1;
+ goto done;
}
break;
case IPPROTO_ICMPV6:
/*
* Only allow IPv6 option processing on native IPv6 sockets.
*/
- if (icmp->icmp_family != AF_INET6)
- return (-1);
+ if (icmp->icmp_family != AF_INET6) {
+ ret = -1;
+ }
- if (icmp->icmp_proto != IPPROTO_ICMPV6)
- return (-1);
+ if (icmp->icmp_proto != IPPROTO_ICMPV6) {
+ ret = -1;
+ }
switch (name) {
case ICMP6_FILTER:
@@ -1989,14 +2091,19 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
(void) bcopy(icmp->icmp_filter, ptr,
sizeof (icmp6_filter_t));
}
- return (sizeof (icmp6_filter_t));
+ ret = sizeof (icmp6_filter_t);
+ goto done;
default:
- return (-1);
+ ret = -1;
+ goto done;
}
default:
- return (-1);
+ ret = -1;
+ goto done;
}
- return (sizeof (int));
+ ret = sizeof (int);
+done:
+ return (ret);
}
/*
@@ -2004,84 +2111,36 @@ icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
* It returns the size of the option retrieved.
*/
int
-icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
+icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
{
- icmp_t *icmp = Q_TO_ICMP(q);
+ conn_t *connp = Q_TO_CONN(q);
+ icmp_t *icmp = connp->conn_icmp;
int err;
rw_enter(&icmp->icmp_rwlock, RW_READER);
- err = icmp_opt_get_locked(q, level, name, ptr);
+ err = icmp_opt_get(connp, level, name, ptr);
rw_exit(&icmp->icmp_rwlock);
return (err);
}
-
-/* This routine sets socket options. */
-/* ARGSUSED */
int
-icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
- uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
- void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
+icmp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
+ uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr,
+ void *thisdg_attrs, boolean_t checkonly)
{
- conn_t *connp = Q_TO_CONN(q);
- icmp_t *icmp = connp->conn_icmp;
- icmp_stack_t *is = icmp->icmp_is;
+
int *i1 = (int *)invalp;
boolean_t onoff = (*i1 == 0) ? 0 : 1;
- boolean_t checkonly;
+ icmp_t *icmp = connp->conn_icmp;
+ icmp_stack_t *is = icmp->icmp_is;
int error;
- switch (optset_context) {
- case SETFN_OPTCOM_CHECKONLY:
- checkonly = B_TRUE;
- /*
- * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
- * inlen != 0 implies value supplied and
- * we have to "pretend" to set it.
- * inlen == 0 implies that there is no
- * value part in T_CHECK request and just validation
- * done elsewhere should be enough, we just return here.
- */
- if (inlen == 0) {
- *outlenp = 0;
- return (0);
- }
- break;
- case SETFN_OPTCOM_NEGOTIATE:
- checkonly = B_FALSE;
- break;
- case SETFN_UD_NEGOTIATE:
- case SETFN_CONN_NEGOTIATE:
- checkonly = B_FALSE;
- /*
- * Negotiating local and "association-related" options
- * through T_UNITDATA_REQ.
- *
- * Following routine can filter out ones we do not
- * want to be "set" this way.
- */
- if (!icmp_opt_allow_udr_set(level, name)) {
- *outlenp = 0;
- return (EINVAL);
- }
- break;
- default:
- /*
- * We should never get here
- */
- *outlenp = 0;
- return (EINVAL);
- }
-
- ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
- (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
-
+ ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock));
/*
* For fixed length options, no sanity check
* of passed in length is done. It is assumed *_optcom_req()
* routines do the right thing.
*/
-
switch (level) {
case SOL_SOCKET:
switch (name) {
@@ -2161,12 +2220,14 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
/* Drop lock across the bind operation */
rw_exit(&icmp->icmp_rwlock);
- icmp_bind_proto(q);
+ (void) icmp_bind_proto(connp);
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
return (0);
case SO_REUSEADDR:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_reuseaddr = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
/*
@@ -2174,16 +2235,22 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
* but are only meaningful to IP.
*/
case SO_DONTROUTE:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_dontroute = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case SO_USELOOPBACK:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_useloopback = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case SO_BROADCAST:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_broadcast = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case SO_SNDBUF:
@@ -2192,7 +2259,10 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
return (ENOBUFS);
}
if (!checkonly) {
- q->q_hiwat = *i1;
+ if (!IPCL_IS_NONSTR(connp)) {
+ connp->conn_wq->q_hiwat = *i1;
+ }
+ icmp->icmp_xmit_hiwat = *i1;
}
break;
case SO_RCVBUF:
@@ -2201,9 +2271,10 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
return (ENOBUFS);
}
if (!checkonly) {
- RD(q)->q_hiwat = *i1;
+ icmp->icmp_recv_hiwat = *i1;
rw_exit(&icmp->icmp_rwlock);
- (void) mi_set_sth_hiwat(RD(q), *i1);
+ (void) proto_set_rx_hiwat(connp->conn_rq, connp,
+ *i1);
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
}
break;
@@ -2273,8 +2344,9 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
icmp->icmp_ip_snd_options_len;
rw_exit(&icmp->icmp_rwlock);
- (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len +
- is->is_wroff_extra);
+ (void) proto_set_tx_wroff(connp->conn_rq == NULL ? NULL:
+ RD(connp->conn_rq), connp,
+ icmp->icmp_max_hdr_len + is->is_wroff_extra);
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
break;
case IP_HDRINCL:
@@ -2297,8 +2369,10 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
* TODO should check OPTMGMT reply and undo this if
* there is an error.
*/
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_multicast_if_addr = *i1;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IP_MULTICAST_TTL:
if (!checkonly)
@@ -2308,23 +2382,29 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
if (!checkonly) {
connp->conn_multicast_loop =
(*invalp == 0) ? 0 : 1;
+ PASS_OPT_TO_IP(connp);
}
break;
case IP_BOUND_IF:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_bound_if = *i1;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IP_UNSPEC_SRC:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_unspec_source = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IP_BROADCAST_TTL:
if (!checkonly)
connp->conn_broadcast_ttl = *invalp;
break;
case IP_RECVIF:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_recvif = onoff;
+ }
/*
* pass to ip
*/
@@ -2354,8 +2434,9 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
}
- if (inlen != sizeof (struct in_pktinfo))
+ if (inlen != sizeof (struct in_pktinfo)) {
return (EINVAL);
+ }
if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs)
== NULL) {
@@ -2436,8 +2517,10 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
switch (name) {
case IPV6_MULTICAST_IF:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_multicast_if_index = *i1;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_UNICAST_HOPS:
/* -1 means use default */
@@ -2492,8 +2575,10 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
*outlenp = 0;
return (EINVAL);
}
- if (!checkonly)
+ if (!checkonly) {
connp->conn_multicast_loop = *i1;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_CHECKSUM:
/*
@@ -2544,51 +2629,71 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
*/
return (-EINVAL);
case IPV6_BOUND_IF:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_bound_if = *i1;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_UNSPEC_SRC:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_unspec_source = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_RECVTCLASS:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_ipv6_recvtclass = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
/*
* Set boolean switches for ancillary data delivery
*/
case IPV6_RECVPKTINFO:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_ip_recvpktinfo = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_RECVPATHMTU:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_ipv6_recvpathmtu = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_RECVHOPLIMIT:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_ipv6_recvhoplimit = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_RECVHOPOPTS:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_ipv6_recvhopopts = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_RECVDSTOPTS:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_ipv6_recvdstopts = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case _OLD_IPV6_RECVDSTOPTS:
if (!checkonly)
icmp->icmp_old_ipv6_recvdstopts = onoff;
break;
case IPV6_RECVRTHDRDSTOPTS:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_ipv6_recvrtdstopts = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_RECVRTHDR:
- if (!checkonly)
+ if (!checkonly) {
icmp->icmp_ipv6_recvrthdr = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
/*
* Set sticky options or ancillary data.
@@ -2601,8 +2706,10 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
* in ip_opt_set(). For ancillary data the
* source address is checked in ip_wput_v6.
*/
- if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
+ if (inlen != 0 && inlen !=
+ sizeof (struct in6_pktinfo)) {
return (EINVAL);
+ }
if (checkonly)
break;
@@ -2630,6 +2737,7 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
error = icmp_build_hdrs(icmp);
if (error != 0)
return (error);
+ PASS_OPT_TO_IP(connp);
}
break;
case IPV6_HOPLIMIT:
@@ -2660,8 +2768,9 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
* IPV6_RECVTCLASS accepts -1 as use kernel default
* and [0, 255] as the actualy traffic class.
*/
- if (inlen != 0 && inlen != sizeof (int))
+ if (inlen != 0 && inlen != sizeof (int)) {
return (EINVAL);
+ }
if (checkonly)
break;
@@ -2691,8 +2800,9 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
* IP will verify that the nexthop is reachable
* and fail for sticky options.
*/
- if (inlen != 0 && inlen != sizeof (sin6_t))
+ if (inlen != 0 && inlen != sizeof (sin6_t)) {
return (EINVAL);
+ }
if (checkonly)
break;
@@ -2702,10 +2812,12 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
} else {
sin6_t *sin6 = (sin6_t *)invalp;
- if (sin6->sin6_family != AF_INET6)
+ if (sin6->sin6_family != AF_INET6) {
return (EAFNOSUPPORT);
- if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
+ }
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
return (EADDRNOTAVAIL);
+ }
ipp->ipp_nexthop = sin6->sin6_addr;
if (!IN6_IS_ADDR_UNSPECIFIED(
&ipp->ipp_nexthop))
@@ -2717,6 +2829,7 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
error = icmp_build_hdrs(icmp);
if (error != 0)
return (error);
+ PASS_OPT_TO_IP(connp);
}
break;
case IPV6_HOPOPTS: {
@@ -2726,8 +2839,9 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
* eight bytes, and matching size passed in.
*/
if (inlen != 0 &&
- inlen != (8 * (hopts->ip6h_len + 1)))
+ inlen != (8 * (hopts->ip6h_len + 1))) {
return (EINVAL);
+ }
if (checkonly)
break;
@@ -2974,23 +3088,89 @@ icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
*outlenp = inlen;
return (0);
}
+
/* This routine sets socket options. */
/* ARGSUSED */
int
-icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
+icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
+ uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
+ void *thisdg_attrs, cred_t *cr)
+{
+ boolean_t checkonly;
+ int error;
+
+ error = 0;
+ switch (optset_context) {
+ case SETFN_OPTCOM_CHECKONLY:
+ checkonly = B_TRUE;
+ /*
+ * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
+ * inlen != 0 implies value supplied and
+ * we have to "pretend" to set it.
+ * inlen == 0 implies that there is no
+ * value part in T_CHECK request and just validation
+ * done elsewhere should be enough, we just return here.
+ */
+ if (inlen == 0) {
+ *outlenp = 0;
+ error = 0;
+ goto done;
+ }
+ break;
+ case SETFN_OPTCOM_NEGOTIATE:
+ checkonly = B_FALSE;
+ break;
+ case SETFN_UD_NEGOTIATE:
+ case SETFN_CONN_NEGOTIATE:
+ checkonly = B_FALSE;
+ /*
+ * Negotiating local and "association-related" options
+ * through T_UNITDATA_REQ.
+ *
+ * Following routine can filter out ones we do not
+ * want to be "set" this way.
+ */
+ if (!icmp_opt_allow_udr_set(level, name)) {
+ *outlenp = 0;
+ error = EINVAL;
+ goto done;
+ }
+ break;
+ default:
+ /*
+ * We should never get here
+ */
+ *outlenp = 0;
+ error = EINVAL;
+ goto done;
+ }
+
+ ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
+ (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
+ error = icmp_do_opt_set(connp, level, name, inlen, invalp, outlenp,
+ outvalp, cr, thisdg_attrs, checkonly);
+
+done:
+ return (error);
+}
+
+/* This routine sets socket options. */
+/* ARGSUSED */
+int
+icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
{
+ conn_t *connp = Q_TO_CONN(q);
icmp_t *icmp;
- int err;
-
- icmp = Q_TO_ICMP(q);
+ int error;
+ icmp = connp->conn_icmp;
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
- err = icmp_opt_set_locked(q, optset_context, level, name, inlen, invalp,
- outlenp, outvalp, thisdg_attrs, cr, mblk);
+ error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp,
+ outlenp, outvalp, thisdg_attrs, cr);
rw_exit(&icmp->icmp_rwlock);
- return (err);
+ return (error);
}
/*
@@ -3055,7 +3235,8 @@ icmp_build_hdrs(icmp_t *icmp)
if (hdrs_len > icmp->icmp_max_hdr_len) {
icmp->icmp_max_hdr_len = hdrs_len;
rw_exit(&icmp->icmp_rwlock);
- (void) mi_set_sth_wroff(icmp->icmp_connp->conn_rq,
+ (void) proto_set_tx_wroff(icmp->icmp_connp->conn_rq,
+ icmp->icmp_connp,
icmp->icmp_max_hdr_len + is->is_wroff_extra);
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
}
@@ -3123,6 +3304,33 @@ icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
icmppa->icmp_param_value = new_value;
return (0);
}
+static void
+icmp_queue_fallback(icmp_t *icmp, mblk_t *mp)
+{
+ ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock));
+ if (IPCL_IS_NONSTR(icmp->icmp_connp)) {
+ /*
+ * fallback has started but messages have not been moved yet
+ */
+ if (icmp->icmp_fallback_queue_head == NULL) {
+ ASSERT(icmp->icmp_fallback_queue_tail == NULL);
+ icmp->icmp_fallback_queue_head = mp;
+ icmp->icmp_fallback_queue_tail = mp;
+ } else {
+ ASSERT(icmp->icmp_fallback_queue_tail != NULL);
+ icmp->icmp_fallback_queue_tail->b_next = mp;
+ icmp->icmp_fallback_queue_tail = mp;
+ }
+ mutex_exit(&icmp->icmp_recv_lock);
+ } else {
+ /*
+ * no more fallbacks possible, ok to drop lock.
+ */
+ mutex_exit(&icmp->icmp_recv_lock);
+ putnext(icmp->icmp_connp->conn_rq, mp);
+ }
+}
+
/*ARGSUSED2*/
static void
icmp_input(void *arg1, mblk_t *mp, void *arg2)
@@ -3148,6 +3356,7 @@ icmp_input(void *arg1, mblk_t *mp, void *arg2)
uint_t icmp_opt = 0;
boolean_t icmp_ipv6_recvhoplimit = B_FALSE;
uint_t hopstrip;
+ int error;
ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
@@ -3189,7 +3398,7 @@ icmp_input(void *arg1, mblk_t *mp, void *arg2)
/*
* ICMP messages.
*/
- icmp_icmp_error(connp->conn_rq, mp);
+ icmp_icmp_error(connp, mp);
return;
}
}
@@ -3388,8 +3597,7 @@ icmp_input(void *arg1, mblk_t *mp, void *arg2)
freeb(options_mp);
BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
- putnext(connp->conn_rq, mp);
- return;
+ goto deliver;
}
/*
@@ -3707,7 +3915,7 @@ icmp_input(void *arg1, mblk_t *mp, void *arg2)
udi_size -= toh->len;
}
if (icmp->icmp_timestamp) {
- struct T_opthdr *toh;
+ struct T_opthdr *toh;
toh = (struct T_opthdr *)dstopt;
toh->level = SOL_SOCKET;
@@ -3723,6 +3931,7 @@ icmp_input(void *arg1, mblk_t *mp, void *arg2)
dstopt = (uchar_t *)toh + toh->len;
udi_size -= toh->len;
}
+
if (icmp_opt & IPPF_HOPOPTS) {
struct T_opthdr *toh;
@@ -3792,235 +4001,37 @@ icmp_input(void *arg1, mblk_t *mp, void *arg2)
ASSERT(udi_size == 0);
}
BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
- putnext(connp->conn_rq, mp);
-}
-
-/*
- * Handle the results of a T_BIND_REQ whether deferred by IP or handled
- * immediately.
- */
-static void
-icmp_bind_result(conn_t *connp, mblk_t *mp)
-{
- struct T_error_ack *tea;
-
- switch (mp->b_datap->db_type) {
- case M_PROTO:
- case M_PCPROTO:
- /* M_PROTO messages contain some type of TPI message. */
- if ((mp->b_wptr - mp->b_rptr) < sizeof (t_scalar_t)) {
- freemsg(mp);
- return;
- }
- tea = (struct T_error_ack *)mp->b_rptr;
-
- switch (tea->PRIM_type) {
- case T_ERROR_ACK:
- switch (tea->ERROR_prim) {
- case O_T_BIND_REQ:
- case T_BIND_REQ:
- icmp_bind_error(connp, mp);
- return;
- default:
- break;
- }
- ASSERT(0);
- freemsg(mp);
- return;
-
- case T_BIND_ACK:
- icmp_bind_ack(connp, mp);
- return;
-
- default:
- break;
- }
- freemsg(mp);
- return;
- default:
- /* FIXME: other cases? */
- ASSERT(0);
- freemsg(mp);
- return;
- }
-}
-
-/*
- * Process a T_BIND_ACK
- */
-static void
-icmp_bind_ack(conn_t *connp, mblk_t *mp)
-{
- icmp_t *icmp = connp->conn_icmp;
- mblk_t *mp1;
- ire_t *ire;
- struct T_bind_ack *tba;
- uchar_t *addrp;
- ipa_conn_t *ac;
- ipa6_conn_t *ac6;
-
- rw_enter(&icmp->icmp_rwlock, RW_WRITER);
- /*
- * We know if headers are included or not so we can
- * safely do this.
- */
- if (icmp->icmp_state == TS_UNBND) {
- /*
- * TPI has not yet bound - bind sent by
- * icmp_bind_proto.
- */
- freemsg(mp);
- rw_exit(&icmp->icmp_rwlock);
- return;
- }
- ASSERT(icmp->icmp_pending_op != -1);
-
- /*
- * If a broadcast/multicast address was bound set
- * the source address to 0.
- * This ensures no datagrams with broadcast address
- * as source address are emitted (which would violate
- * RFC1122 - Hosts requirements)
- *
- * Note that when connecting the returned IRE is
- * for the destination address and we only perform
- * the broadcast check for the source address (it
- * is OK to connect to a broadcast/multicast address.)
- */
- mp1 = mp->b_cont;
- if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) {
- ire = (ire_t *)mp1->b_rptr;
- /*
- * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast
- * local address.
- */
- if (ire->ire_type == IRE_BROADCAST &&
- icmp->icmp_state != TS_DATA_XFER) {
- ASSERT(icmp->icmp_pending_op == T_BIND_REQ ||
- icmp->icmp_pending_op == O_T_BIND_REQ);
- /* This was just a local bind to a MC/broadcast addr */
- V6_SET_ZERO(icmp->icmp_v6src);
- if (icmp->icmp_family == AF_INET6)
- (void) icmp_build_hdrs(icmp);
- } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) {
- /*
- * Local address not yet set - pick it from the
- * T_bind_ack
- */
- tba = (struct T_bind_ack *)mp->b_rptr;
- addrp = &mp->b_rptr[tba->ADDR_offset];
- switch (icmp->icmp_family) {
- case AF_INET:
- if (tba->ADDR_length == sizeof (ipa_conn_t)) {
- ac = (ipa_conn_t *)addrp;
- } else {
- ASSERT(tba->ADDR_length ==
- sizeof (ipa_conn_x_t));
- ac = &((ipa_conn_x_t *)addrp)->acx_conn;
- }
- IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr,
- &icmp->icmp_v6src);
- break;
- case AF_INET6:
- if (tba->ADDR_length == sizeof (ipa6_conn_t)) {
- ac6 = (ipa6_conn_t *)addrp;
- } else {
- ASSERT(tba->ADDR_length ==
- sizeof (ipa6_conn_x_t));
- ac6 = &((ipa6_conn_x_t *)
- addrp)->ac6x_conn;
+deliver:
+ if (IPCL_IS_NONSTR(connp)) {
+ if ((*connp->conn_upcalls->su_recv)
+ (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error,
+ NULL) < 0) {
+ mutex_enter(&icmp->icmp_recv_lock);
+ if (error == ENOSPC) {
+ /*
+ * let's confirm while holding the lock
+ */
+ if ((*connp->conn_upcalls->su_recv)
+ (connp->conn_upper_handle, NULL, 0, 0,
+ &error, NULL) < 0) {
+ if (error == ENOSPC) {
+ connp->conn_flow_cntrld =
+ B_TRUE;
+ } else {
+ ASSERT(error == EOPNOTSUPP);
+ }
}
- icmp->icmp_v6src = ac6->ac6_laddr;
- (void) icmp_build_hdrs(icmp);
+ mutex_exit(&icmp->icmp_recv_lock);
+ } else {
+ ASSERT(error == EOPNOTSUPP);
+ icmp_queue_fallback(icmp, mp);
}
}
- mp1 = mp1->b_cont;
- }
- icmp->icmp_pending_op = -1;
- rw_exit(&icmp->icmp_rwlock);
- /*
- * Look for one or more appended ACK message added by
- * icmp_connect or icmp_disconnect.
- * If none found just send up the T_BIND_ACK.
- * icmp_connect has appended a T_OK_ACK and a
- * T_CONN_CON.
- * icmp_disconnect has appended a T_OK_ACK.
- */
- if (mp1 != NULL) {
- if (mp->b_cont == mp1)
- mp->b_cont = NULL;
- else {
- ASSERT(mp->b_cont->b_cont == mp1);
- mp->b_cont->b_cont = NULL;
- }
- freemsg(mp);
- mp = mp1;
- while (mp != NULL) {
- mp1 = mp->b_cont;
- mp->b_cont = NULL;
- putnext(connp->conn_rq, mp);
- mp = mp1;
- }
- return;
- }
- freemsg(mp->b_cont);
- mp->b_cont = NULL;
- putnext(connp->conn_rq, mp);
-}
-
-static void
-icmp_bind_error(conn_t *connp, mblk_t *mp)
-{
- icmp_t *icmp = connp->conn_icmp;
- struct T_error_ack *tea;
-
- tea = (struct T_error_ack *)mp->b_rptr;
- /*
- * If our O_T_BIND_REQ/T_BIND_REQ fails,
- * clear out the source address before
- * passing the message upstream.
- * If this was caused by a T_CONN_REQ
- * revert back to bound state.
- */
- rw_enter(&icmp->icmp_rwlock, RW_WRITER);
- if (icmp->icmp_state == TS_UNBND) {
- /*
- * TPI has not yet bound - bind sent by icmp_bind_proto.
- */
- freemsg(mp);
- rw_exit(&icmp->icmp_rwlock);
- return;
- }
- ASSERT(icmp->icmp_pending_op != -1);
- tea->ERROR_prim = icmp->icmp_pending_op;
- icmp->icmp_pending_op = -1;
-
- switch (tea->ERROR_prim) {
- case T_CONN_REQ:
- ASSERT(icmp->icmp_state == TS_DATA_XFER);
- /* Connect failed */
- /* Revert back to the bound source */
- icmp->icmp_v6src = icmp->icmp_bound_v6src;
- icmp->icmp_state = TS_IDLE;
- if (icmp->icmp_family == AF_INET6)
- (void) icmp_build_hdrs(icmp);
- break;
-
- case T_DISCON_REQ:
- case T_BIND_REQ:
- case O_T_BIND_REQ:
- V6_SET_ZERO(icmp->icmp_v6src);
- V6_SET_ZERO(icmp->icmp_bound_v6src);
- icmp->icmp_state = TS_UNBND;
- if (icmp->icmp_family == AF_INET6)
- (void) icmp_build_hdrs(icmp);
- break;
- default:
- break;
+ } else {
+ putnext(connp->conn_rq, mp);
}
- rw_exit(&icmp->icmp_rwlock);
- putnext(connp->conn_rq, mp);
+ ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock));
}
/*
@@ -4121,7 +4132,8 @@ icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
(void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s",
(void *)icmp,
- inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf,
+ inet_ntop(AF_INET6, &icmp->icmp_v6dst.sin6_addr,
+ faddrbuf,
sizeof (faddrbuf)),
inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf,
sizeof (laddrbuf)),
@@ -4152,32 +4164,26 @@ icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
freemsg(mp);
}
-/*
- * This routine is called by icmp_wput to handle T_UNBIND_REQ messages.
- * After some error checking, the message is passed downstream to ip.
- */
-static void
-icmp_unbind(queue_t *q, mblk_t *mp)
+
+static int
+rawip_do_unbind(conn_t *connp)
{
- icmp_t *icmp = Q_TO_ICMP(q);
+ icmp_t *icmp = connp->conn_icmp;
rw_enter(&icmp->icmp_rwlock, RW_WRITER);
/* If a bind has not been done, we can't unbind. */
if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
rw_exit(&icmp->icmp_rwlock);
- icmp_err_ack(q, mp, TOUTSTATE, 0);
- return;
+ return (-TOUTSTATE);
}
icmp->icmp_pending_op = T_UNBIND_REQ;
rw_exit(&icmp->icmp_rwlock);
/*
- * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK
- * and therefore ip_unbind must never return NULL.
+ * Call ip to unbind
*/
- mp = ip_unbind(q, mp);
- ASSERT(mp != NULL);
- ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
+
+ ip_unbind(connp);
/*
* Once we're unbound from IP, the pending operation may be cleared
@@ -4191,17 +4197,54 @@ icmp_unbind(queue_t *q, mblk_t *mp)
if (icmp->icmp_family == AF_INET6)
(void) icmp_build_hdrs(icmp);
rw_exit(&icmp->icmp_rwlock);
+ return (0);
+}
+
+/*
+ * This routine is called by icmp_wput to handle T_UNBIND_REQ messages.
+ * After some error checking, the message is passed downstream to ip.
+ */
+static void
+icmp_tpi_unbind(queue_t *q, mblk_t *mp)
+{
+ conn_t *connp = Q_TO_CONN(q);
+ int error;
+
+ ASSERT(mp->b_cont == NULL);
+ error = rawip_do_unbind(connp);
+ if (error) {
+ if (error < 0) {
+ icmp_err_ack(q, mp, -error, 0);
+ } else {
+ icmp_err_ack(q, mp, 0, error);
+ }
+ return;
+ }
+
+ /*
+ * Convert mp into a T_OK_ACK
+ */
+
+ mp = mi_tpi_ok_ack_alloc(mp);
+ /*
+ * should not happen in practice... T_OK_ACK is smaller than the
+ * original message.
+ */
+ ASSERT(mp != NULL);
+ ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
qreply(q, mp);
}
+
/*
* Process IPv4 packets that already include an IP header.
* Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and
* IPPROTO_IGMP).
*/
-static void
-icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop)
+static int
+icmp_wput_hdrincl(queue_t *q, conn_t *connp, mblk_t *mp, icmp_t *icmp,
+ ip4_pkt_t *pktinfop)
{
icmp_stack_t *is = icmp->icmp_is;
ipha_t *ipha;
@@ -4210,7 +4253,6 @@ icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop)
mblk_t *mp1;
uint_t pkt_len;
ip_opt_info_t optinfo;
- conn_t *connp = icmp->icmp_connp;
optinfo.ip_opt_flags = 0;
optinfo.ip_opt_ill_index = 0;
@@ -4221,7 +4263,7 @@ icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop)
ASSERT(icmp != NULL);
BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
freemsg(mp);
- return;
+ return (0);
}
ipha = (ipha_t *)mp->b_rptr;
}
@@ -4266,7 +4308,7 @@ icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop)
BUMP_MIB(&is->is_rawip_mib,
rawipOutErrors);
freemsg(mp);
- return;
+ return (0);
}
ipha = (ipha_t *)mp->b_rptr;
}
@@ -4278,13 +4320,11 @@ icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop)
pkt_len = ntohs(ipha->ipha_length)
+ icmp->icmp_ip_snd_options_len;
if (pkt_len > IP_MAXPACKET) {
- icmp_ud_err(q, mp, EMSGSIZE);
- return;
+ return (EMSGSIZE);
}
if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra +
tp_hdr_len, BPRI_LO))) {
- icmp_ud_err(q, mp, ENOMEM);
- return;
+ return (ENOMEM);
}
mp1->b_rptr += is->is_wroff_extra;
mp1->b_wptr = mp1->b_rptr + ip_hdr_length;
@@ -4329,10 +4369,11 @@ icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop)
mblk_setcred(mp, connp->conn_cred);
ip_output_options(connp, mp, q, IP_WPUT, &optinfo);
+ return (0);
}
-static boolean_t
-icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst)
+static int
+icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst)
{
int err;
uchar_t opt_storage[IP_MAX_OPT_LENGTH];
@@ -4351,13 +4392,12 @@ icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst)
BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
DTRACE_PROBE4(
tx__ip__log__drop__updatelabel__icmp,
- char *, "queue(1) failed to update options(2) on mp(3)",
- queue_t *, q, char *, opt_storage, mblk_t *, mp);
- icmp_ud_err(q, mp, err);
- return (B_FALSE);
+ char *, "icmp(1) failed to update options(2) on mp(3)",
+ icmp_t *, icmp, char *, opt_storage, mblk_t *, mp);
+ return (err);
}
IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst);
- return (B_TRUE);
+ return (0);
}
/*
@@ -4371,7 +4411,6 @@ icmp_wput(queue_t *q, mblk_t *mp)
uchar_t *rptr = mp->b_rptr;
ipha_t *ipha;
mblk_t *mp1;
- int ip_hdr_length;
#define tudr ((struct T_unitdata_req *)rptr)
size_t ip_len;
conn_t *connp = Q_TO_CONN(q);
@@ -4382,7 +4421,12 @@ icmp_wput(queue_t *q, mblk_t *mp)
ipaddr_t v4dst;
ip4_pkt_t pktinfo;
ip4_pkt_t *pktinfop = &pktinfo;
- ip_opt_info_t optinfo;
+ ip6_pkt_t ipp_s; /* For ancillary data options */
+ ip6_pkt_t *ipp = &ipp_s;
+ int error;
+
+ ipp->ipp_fields = 0;
+ ipp->ipp_sticky_ignored = 0;
switch (mp->b_datap->db_type) {
case M_DATA:
@@ -4406,11 +4450,17 @@ icmp_wput(queue_t *q, mblk_t *mp)
if (is_system_labeled() &&
(!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) ||
V4_PART_OF_V6(icmp->icmp_v6lastdst) !=
- ipha->ipha_dst) &&
- !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) {
- return;
+ ipha->ipha_dst)) {
+ error = icmp_update_label(icmp, mp,
+ ipha->ipha_dst);
+ if (error != 0) {
+ icmp_ud_err(q, mp, error);
+ return;
+ }
}
- icmp_wput_hdrincl(q, mp, icmp, NULL);
+ error = icmp_wput_hdrincl(q, connp, mp, icmp, NULL);
+ if (error != 0)
+ icmp_ud_err(q, mp, error);
return;
}
freemsg(mp);
@@ -4432,14 +4482,6 @@ icmp_wput(queue_t *q, mblk_t *mp)
/* Handle T_UNITDATA_REQ messages here. */
-
-
- if (icmp->icmp_state == TS_UNBND) {
- /* If a port has not been bound to the stream, fail. */
- BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
- icmp_ud_err(q, mp, EPROTO);
- return;
- }
mp1 = mp->b_cont;
if (mp1 == NULL) {
BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
@@ -4475,8 +4517,22 @@ icmp_wput(queue_t *q, mblk_t *mp)
* Destination is a native IPv6 address.
* Send out an IPv6 format packet.
*/
- icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length);
- return;
+ if (tudr->OPT_length != 0) {
+ int error;
+
+ error = 0;
+ if (icmp_unitdata_opt_process(q, mp, &error,
+ (void *)ipp) < 0) {
+ /* failure */
+ BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
+ icmp_ud_err(q, mp, error);
+ return;
+ }
+ ASSERT(error == 0);
+ }
+
+ error = raw_ip_send_data_v6(q, connp, mp1, sin6, ipp);
+ goto done;
case AF_INET:
sin = (sin_t *)&rptr[tudr->DEST_offset];
@@ -4497,9 +4553,6 @@ icmp_wput(queue_t *q, mblk_t *mp)
pktinfop->ip4_ill_index = 0;
pktinfop->ip4_addr = INADDR_ANY;
- optinfo.ip_opt_flags = 0;
- optinfo.ip_opt_ill_index = 0;
-
/*
* If options passed in, feed it for verification and handling
@@ -4522,7 +4575,48 @@ icmp_wput(queue_t *q, mblk_t *mp)
* OPT_length/offset now potentially modified
* and contain option setting results
*/
+ }
+ error = raw_ip_send_data_v4(q, connp, mp1, v4dst, pktinfop);
+done:
+ if (error != 0) {
+ icmp_ud_err(q, mp, error);
+ return;
+ } else {
+ mp->b_cont = NULL;
+ freeb(mp);
+ }
+}
+
+
+/* ARGSUSED */
+static void
+icmp_wput_fallback(queue_t *q, mblk_t *mp)
+{
+#ifdef DEBUG
+ cmn_err(CE_CONT, "icmp_wput_fallback: Message during fallback \n");
+#endif
+ freemsg(mp);
+}
+
+static int
+raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, ipaddr_t v4dst,
+ ip4_pkt_t *pktinfop)
+{
+ ipha_t *ipha;
+ size_t ip_len;
+ icmp_t *icmp = connp->conn_icmp;
+ icmp_stack_t *is = icmp->icmp_is;
+ int ip_hdr_length;
+ ip_opt_info_t optinfo;
+
+ optinfo.ip_opt_flags = 0;
+ optinfo.ip_opt_ill_index = 0;
+
+ if (icmp->icmp_state == TS_UNBND) {
+ /* If a port has not been bound to the stream, fail. */
+ BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
+ return (EPROTO);
}
if (v4dst == INADDR_ANY)
@@ -4531,35 +4625,34 @@ icmp_wput(queue_t *q, mblk_t *mp)
/* Check if our saved options are valid; update if not */
if (is_system_labeled() &&
(!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) ||
- V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) &&
- !icmp_update_label(q, icmp, mp, v4dst)) {
- return;
- }
+ V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst)) {
+ int error = icmp_update_label(icmp, mp, v4dst);
- /* Protocol 255 contains full IP headers */
- if (icmp->icmp_hdrincl) {
- freeb(mp);
- icmp_wput_hdrincl(q, mp1, icmp, pktinfop);
- return;
+ if (error != 0)
+ return (error);
}
+ /* Protocol 255 contains full IP headers */
+ if (icmp->icmp_hdrincl)
+ return (icmp_wput_hdrincl(q, connp, mp, icmp, pktinfop));
/* Add an IP header */
ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len;
- ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length];
- if ((uchar_t *)ipha < mp1->b_datap->db_base ||
- mp1->b_datap->db_ref != 1 ||
+ ipha = (ipha_t *)&mp->b_rptr[-ip_hdr_length];
+ if ((uchar_t *)ipha < mp->b_datap->db_base ||
+ mp->b_datap->db_ref != 1 ||
!OK_32PTR(ipha)) {
+ mblk_t *mp1;
if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra,
BPRI_LO))) {
BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
- icmp_ud_err(q, mp, ENOMEM);
- return;
+ return (ENOMEM);
}
- mp1->b_cont = mp->b_cont;
+ mp1->b_cont = mp;
ipha = (ipha_t *)mp1->b_datap->db_lim;
mp1->b_wptr = (uchar_t *)ipha;
ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length);
+ mp = mp1;
}
#ifdef _BIG_ENDIAN
/* Set version, header length, and tos */
@@ -4604,11 +4697,11 @@ icmp_wput(queue_t *q, mblk_t *mp)
ipha->ipha_ident = IP_HDR_INCLUDED;
/* Finish common formatting of the packet. */
- mp1->b_rptr = (uchar_t *)ipha;
+ mp->b_rptr = (uchar_t *)ipha;
- ip_len = mp1->b_wptr - (uchar_t *)ipha;
- if (mp1->b_cont != NULL)
- ip_len += msgdsize(mp1->b_cont);
+ ip_len = mp->b_wptr - (uchar_t *)ipha;
+ if (mp->b_cont != NULL)
+ ip_len += msgdsize(mp->b_cont);
/*
* Set the length into the IP header.
@@ -4618,13 +4711,11 @@ icmp_wput(queue_t *q, mblk_t *mp)
*/
if (ip_len > IP_MAXPACKET) {
BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
- icmp_ud_err(q, mp, EMSGSIZE);
- return;
+ return (EMSGSIZE);
}
ipha->ipha_length = htons((uint16_t)ip_len);
/*
- * Copy in the destination address from the T_UNITDATA
- * request
+ * Copy in the destination address request
*/
ipha->ipha_dst = v4dst;
@@ -4645,16 +4736,14 @@ icmp_wput(queue_t *q, mblk_t *mp)
(void) ip_massage_options(ipha, is->is_netstack);
}
- freeb(mp);
BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
- mblk_setcred(mp1, connp->conn_cred);
- ip_output_options(Q_TO_CONN(q), mp1, q, IP_WPUT, &optinfo);
-#undef ipha
-#undef tudr
+ mblk_setcred(mp, connp->conn_cred);
+ ip_output_options(connp, mp, q, IP_WPUT, &optinfo);
+ return (0);
}
-static boolean_t
-icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst)
+static int
+icmp_update_label_v6(icmp_t *icmp, mblk_t *mp, in6_addr_t *dst)
{
int err;
uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
@@ -4672,33 +4761,30 @@ icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst)
BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
DTRACE_PROBE4(
tx__ip__log__drop__updatelabel__icmp6,
- char *, "queue(1) failed to update options(2) on mp(3)",
- queue_t *, wq, char *, opt_storage, mblk_t *, mp);
- icmp_ud_err(wq, mp, err);
- return (B_FALSE);
+ char *, "icmp(1) failed to update options(2) on mp(3)",
+ icmp_t *, icmp, char *, opt_storage, mblk_t *, mp);
+ return (err);
}
icmp->icmp_v6lastdst = *dst;
- return (B_TRUE);
+ return (0);
}
/*
- * icmp_wput_ipv6():
+ * raw_ip_send_data_v6():
* Assumes that icmp_wput did some sanity checking on the destination
* address, but that the label may not yet be correct.
*/
-void
-icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen)
+static int
+raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, sin6_t *sin6,
+ ip6_pkt_t *ipp)
{
ip6_t *ip6h;
- ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */
- mblk_t *mp1;
+ ip6i_t *ip6i; /* mp->b_rptr even if no ip6i_t */
int ip_hdr_len = IPV6_HDR_LEN;
size_t ip_len;
- icmp_t *icmp = Q_TO_ICMP(q);
+ icmp_t *icmp = connp->conn_icmp;
icmp_stack_t *is = icmp->icmp_is;
- ip6_pkt_t ipp_s; /* For ancillary data options */
- ip6_pkt_t *ipp = &ipp_s;
ip6_pkt_t *tipp;
uint32_t csum = 0;
uint_t ignore = 0;
@@ -4716,30 +4802,10 @@ icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen)
*/
if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) {
BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
- icmp_ud_err(q, mp, EADDRNOTAVAIL);
- return;
- }
-
- ipp->ipp_fields = 0;
- ipp->ipp_sticky_ignored = 0;
-
- /*
- * If TPI options passed in, feed it for verification and handling
- */
- if (tudr_optlen != 0) {
- int error;
-
- if (icmp_unitdata_opt_process(q, mp, &error,
- (void *)ipp) < 0) {
- /* failure */
- BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
- icmp_ud_err(q, mp, error);
- return;
- }
- ignore = ipp->ipp_sticky_ignored;
- ASSERT(error == 0);
+ return (EADDRNOTAVAIL);
}
+ ignore = ipp->ipp_sticky_ignored;
if (sin6->sin6_scope_id != 0 &&
IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
/*
@@ -4763,9 +4829,12 @@ icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen)
* avoid blowing up our stack here.
*/
if (is_system_labeled() &&
- !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) &&
- !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) {
- return;
+ !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst)) {
+ int error = 0;
+
+ error = icmp_update_label_v6(icmp, mp, &ip6_dst);
+ if (error != 0)
+ return (error);
}
/*
@@ -4933,28 +5002,30 @@ no_options:
ip_hdr_len += sizeof (ip6i_t);
/* check/fix buffer config, setup pointers into it */
- mp1 = mp->b_cont;
- ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len];
- if ((mp1->b_datap->db_ref != 1) ||
- ((unsigned char *)ip6h < mp1->b_datap->db_base) ||
+ ip6h = (ip6_t *)&mp->b_rptr[-ip_hdr_len];
+ if ((mp->b_datap->db_ref != 1) ||
+ ((unsigned char *)ip6h < mp->b_datap->db_base) ||
!OK_32PTR(ip6h)) {
+ mblk_t *mp1;
+
/* Try to get everything in a single mblk next time */
if (ip_hdr_len > icmp->icmp_max_hdr_len) {
icmp->icmp_max_hdr_len = ip_hdr_len;
- (void) mi_set_sth_wroff(RD(q),
+
+ (void) proto_set_tx_wroff(q == NULL ? NULL:RD(q), connp,
icmp->icmp_max_hdr_len + is->is_wroff_extra);
}
mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO);
if (!mp1) {
BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
- icmp_ud_err(q, mp, ENOMEM);
- return;
+ return (ENOMEM);
}
- mp1->b_cont = mp->b_cont;
+ mp1->b_cont = mp;
mp1->b_wptr = mp1->b_datap->db_lim;
ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len);
+ mp = mp1;
}
- mp1->b_rptr = (unsigned char *)ip6h;
+ mp->b_rptr = (unsigned char *)ip6h;
ip6i = (ip6i_t *)ip6h;
#define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp)
@@ -5140,27 +5211,25 @@ no_options:
* We know that all extension headers will be in the same mblk
* as the IPv6 header.
*/
- rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr);
+ rth = ip_find_rthdr_v6(ip6h, mp->b_wptr);
if (rth != NULL && rth->ip6r_segleft != 0) {
if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) {
/*
* Drop packet - only support Type 0 routing.
* Notify the application as well.
*/
- icmp_ud_err(q, mp, EPROTO);
BUMP_MIB(&is->is_rawip_mib,
rawipOutErrors);
- return;
+ return (EPROTO);
}
/*
* rth->ip6r_len is twice the number of
* addresses in the header
*/
if (rth->ip6r_len & 0x1) {
- icmp_ud_err(q, mp, EPROTO);
BUMP_MIB(&is->is_rawip_mib,
rawipOutErrors);
- return;
+ return (EPROTO);
}
/*
* Shuffle the routing header and ip6_dst
@@ -5176,17 +5245,16 @@ no_options:
* for subsequent hops.
*/
if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
- icmp_ud_err(q, mp, EADDRNOTAVAIL);
BUMP_MIB(&is->is_rawip_mib,
rawipOutErrors);
- return;
+ return (EADDRNOTAVAIL);
}
}
}
- ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN;
- if (mp1->b_cont != NULL)
- ip_len += msgdsize(mp1->b_cont);
+ ip_len = mp->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN;
+ if (mp->b_cont != NULL)
+ ip_len += msgdsize(mp->b_cont);
/*
* Set the length into the IP header.
@@ -5196,11 +5264,10 @@ no_options:
*/
if (ip_len > IP_MAXPACKET) {
BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
- icmp_ud_err(q, mp, EMSGSIZE);
- return;
+ return (EMSGSIZE);
}
if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) {
- uint_t cksum_off; /* From ip6i == mp1->b_rptr */
+ uint_t cksum_off; /* From ip6i == mp->b_rptr */
uint16_t *cksum_ptr;
uint_t ext_hdrs_len;
@@ -5216,14 +5283,14 @@ no_options:
* Note: ICMPv6 must always checksum the packet.
*/
cksum_off = ip_hdr_len + icmp->icmp_checksum_off;
- if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) {
- if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) {
+ if (cksum_off + sizeof (uint16_t) > mp->b_wptr - mp->b_rptr) {
+ if (!pullupmsg(mp, cksum_off + sizeof (uint16_t))) {
BUMP_MIB(&is->is_rawip_mib,
rawipOutErrors);
freemsg(mp);
- return;
+ return (0);
}
- ip6i = (ip6i_t *)mp1->b_rptr;
+ ip6i = (ip6i_t *)mp->b_rptr;
if (ip6i->ip6i_nxt == IPPROTO_RAW)
ip6h = (ip6_t *)&ip6i[1];
else
@@ -5244,11 +5311,10 @@ no_options:
#endif
ip6h->ip6_plen = (uint16_t)ip_len;
- freeb(mp);
-
/* We're done. Pass the packet to IP */
BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
- ip_output_v6(icmp->icmp_connp, mp1, q, IP_WPUT);
+ ip_output_v6(icmp->icmp_connp, mp, q, IP_WPUT);
+ return (0);
}
static void
@@ -5281,10 +5347,10 @@ icmp_wput_other(queue_t *q, mblk_t *mp)
return;
case O_T_BIND_REQ:
case T_BIND_REQ:
- icmp_bind(q, mp);
+ icmp_tpi_bind(q, mp);
return;
case T_CONN_REQ:
- icmp_connect(q, mp);
+ icmp_tpi_connect(q, mp);
return;
case T_CAPABILITY_REQ:
icmp_capability_req(q, mp);
@@ -5301,7 +5367,7 @@ icmp_wput_other(queue_t *q, mblk_t *mp)
icmp_ud_err(q, mp, EADDRNOTAVAIL);
return;
case T_UNBIND_REQ:
- icmp_unbind(q, mp);
+ icmp_tpi_unbind(q, mp);
return;
case T_SVR4_OPTMGMT_REQ:
@@ -5319,7 +5385,7 @@ icmp_wput_other(queue_t *q, mblk_t *mp)
return;
case T_DISCON_REQ:
- icmp_disconnect(q, mp);
+ icmp_tpi_disconnect(q, mp);
return;
/* The following TPI message is not supported by icmp. */
@@ -5375,6 +5441,15 @@ icmp_wput_other(queue_t *q, mblk_t *mp)
return;
}
break;
+ case _SIOCSOCKFALLBACK:
+ /*
+ * socket is falling back to be a
+ * streams socket. Nothing to do
+ */
+ iocp->ioc_count = 0;
+ iocp->ioc_rval = 0;
+ qreply(q, mp);
+ return;
default:
break;
}
@@ -5398,10 +5473,8 @@ icmp_wput_iocdata(queue_t *q, mblk_t *mp)
mblk_t *mp1;
STRUCT_HANDLE(strbuf, sb);
icmp_t *icmp;
- in6_addr_t v6addr;
- ipaddr_t v4addr;
- uint32_t flowinfo = 0;
- int addrlen;
+ uint_t addrlen;
+ uint_t error;
/* Make sure it is one of ours. */
switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
@@ -5458,81 +5531,34 @@ icmp_wput_iocdata(queue_t *q, mblk_t *mp)
mi_copy_done(q, mp, EINVAL);
return;
}
+
+ mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
+
+ if (mp1 == NULL)
+ return;
+
+ rw_enter(&icmp->icmp_rwlock, RW_READER);
switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
case TI_GETMYNAME:
- if (icmp->icmp_family == AF_INET) {
- ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
- if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) &&
- !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
- v4addr = V4_PART_OF_V6(icmp->icmp_v6src);
- } else {
- /*
- * INADDR_ANY
- * icmp_v6src is not set, we might be bound to
- * broadcast/multicast. Use icmp_bound_v6src as
- * local address instead (that could
- * also still be INADDR_ANY)
- */
- v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src);
- }
- } else {
- /* icmp->icmp_family == AF_INET6 */
- if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
- v6addr = icmp->icmp_v6src;
- } else {
- /*
- * UNSPECIFIED
- * icmp_v6src is not set, we might be bound to
- * broadcast/multicast. Use icmp_bound_v6src as
- * local address instead (that could
- * also still be UNSPECIFIED)
- */
- v6addr = icmp->icmp_bound_v6src;
- }
- }
+ error = rawip_do_getsockname(icmp, (void *)mp1->b_rptr,
+ &addrlen);
break;
case TI_GETPEERNAME:
- if (icmp->icmp_family == AF_INET) {
- ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
- v4addr = V4_PART_OF_V6(icmp->icmp_v6dst);
- } else {
- /* icmp->icmp_family == AF_INET6) */
- v6addr = icmp->icmp_v6dst;
- flowinfo = icmp->icmp_flowinfo;
- }
+ error = rawip_do_getpeername(icmp, (void *)mp1->b_rptr,
+ &addrlen);
break;
- default:
- mi_copy_done(q, mp, EPROTO);
- return;
}
- mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
- if (!mp1)
- return;
-
- if (icmp->icmp_family == AF_INET) {
- sin_t *sin;
+ rw_exit(&icmp->icmp_rwlock);
- STRUCT_FSET(sb, len, (int)sizeof (sin_t));
- sin = (sin_t *)mp1->b_rptr;
- mp1->b_wptr = (uchar_t *)&sin[1];
- *sin = sin_null;
- sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = v4addr;
+ if (error != 0) {
+ mi_copy_done(q, mp, error);
} else {
- /* icmp->icmp_family == AF_INET6 */
- sin6_t *sin6;
+ mp1->b_wptr += addrlen;
+ STRUCT_FSET(sb, len, addrlen);
- ASSERT(icmp->icmp_family == AF_INET6);
- STRUCT_FSET(sb, len, (int)sizeof (sin6_t));
- sin6 = (sin6_t *)mp1->b_rptr;
- mp1->b_wptr = (uchar_t *)&sin6[1];
- *sin6 = sin6_null;
- sin6->sin6_family = AF_INET6;
- sin6->sin6_flowinfo = flowinfo;
- sin6->sin6_addr = v6addr;
+ /* Copy out the address */
+ mi_copyout(q, mp);
}
- /* Copy out the address */
- mi_copyout(q, mp);
}
static int
@@ -5565,7 +5591,7 @@ icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
}
void
-icmp_ddi_init(void)
+icmp_ddi_g_init(void)
{
icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr,
icmp_opt_obj.odb_opt_arr_cnt);
@@ -5579,11 +5605,13 @@ icmp_ddi_init(void)
}
void
-icmp_ddi_destroy(void)
+icmp_ddi_g_destroy(void)
{
netstack_unregister(NS_ICMP);
}
+#define INET_NAME "ip"
+
/*
* Initialize the ICMP stack instance.
*/
@@ -5592,6 +5620,8 @@ rawip_stack_init(netstackid_t stackid, netstack_t *ns)
{
icmp_stack_t *is;
icmpparam_t *pa;
+ int error = 0;
+ major_t major;
is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP);
is->is_netstack = ns;
@@ -5603,6 +5633,10 @@ rawip_stack_init(netstackid_t stackid, netstack_t *ns)
(void) icmp_param_register(&is->is_nd,
is->is_param_arr, A_CNT(icmp_param_arr));
is->is_ksp = rawip_kstat_init(stackid);
+
+ major = mod_name_to_major(INET_NAME);
+ error = ldi_ident_from_major(major, &is->is_ldi_ident);
+ ASSERT(error == 0);
return (is);
}
@@ -5620,6 +5654,7 @@ rawip_stack_fini(netstackid_t stackid, void *arg)
rawip_kstat_fini(stackid, is->is_ksp);
is->is_ksp = NULL;
+ ldi_ident_release(is->is_ldi_ident);
kmem_free(is, sizeof (*is));
}
@@ -5691,3 +5726,848 @@ rawip_kstat_update(kstat_t *ksp, int rw)
netstack_rele(ns);
return (0);
}
+
+/* ARGSUSED */
+int
+rawip_accept(sock_lower_handle_t lproto_handle,
+ sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
+ cred_t *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/* ARGSUSED */
+int
+rawip_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
+ socklen_t len, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ int error;
+
+ /* Binding to a NULL address really means unbind */
+ if (sa == NULL)
+ error = rawip_do_unbind(connp);
+ else
+ error = rawip_do_bind(connp, sa, len);
+
+ if (error < 0) {
+ if (error == -TOUTSTATE)
+ error = EINVAL;
+ else
+ error = proto_tlitosyserr(-error);
+ }
+ return (error);
+}
+
+static int
+rawip_implicit_bind(conn_t *connp)
+{
+ sin6_t sin6addr;
+ sin_t *sin;
+ sin6_t *sin6;
+ socklen_t len;
+ int error;
+
+ if (connp->conn_icmp->icmp_family == AF_INET) {
+ len = sizeof (struct sockaddr_in);
+ sin = (sin_t *)&sin6addr;
+ *sin = sin_null;
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ } else {
+ ASSERT(connp->conn_icmp->icmp_family == AF_INET6);
+ len = sizeof (sin6_t);
+ sin6 = (sin6_t *)&sin6addr;
+ *sin6 = sin6_null;
+ sin6->sin6_family = AF_INET6;
+ V6_SET_ZERO(sin6->sin6_addr);
+ }
+
+ error = rawip_do_bind(connp, (struct sockaddr *)&sin6addr, len);
+
+ return ((error < 0) ? proto_tlitosyserr(-error) : error);
+}
+
+static int
+rawip_unbind(conn_t *connp)
+{
+ int error;
+
+ error = rawip_do_unbind(connp);
+ if (error < 0) {
+ error = proto_tlitosyserr(-error);
+ }
+ return (error);
+}
+
+/* ARGSUSED */
+int
+rawip_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/* ARGSUSED */
+int
+rawip_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
+ socklen_t len, sock_connid_t *id, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ icmp_t *icmp = connp->conn_icmp;
+ int error;
+ boolean_t did_bind = B_FALSE;
+
+ if (sa == NULL) {
+ /*
+ * Disconnect
+ * Make sure we are connected
+ */
+ if (icmp->icmp_state != TS_DATA_XFER)
+ return (EINVAL);
+
+ error = icmp_disconnect(connp);
+ return (error);
+ }
+
+ error = proto_verify_ip_addr(icmp->icmp_family, sa, len);
+ if (error != 0)
+ return (error);
+
+ /* do an implicit bind if necessary */
+ if (icmp->icmp_state == TS_UNBND) {
+ error = rawip_implicit_bind(connp);
+ /*
+ * We could be racing with an actual bind, in which case
+ * we would see EPROTO. We cross our fingers and try
+ * to connect.
+ */
+ if (!(error == 0 || error == EPROTO))
+ return (error);
+ did_bind = B_TRUE;
+ }
+
+ /*
+ * set SO_DGRAM_ERRIND
+ */
+ icmp->icmp_dgram_errind = B_TRUE;
+
+ error = rawip_do_connect(connp, sa, len);
+
+ if (error != 0 && did_bind) {
+ int unbind_err;
+
+ unbind_err = rawip_unbind(connp);
+ ASSERT(unbind_err == 0);
+ }
+
+ if (error == 0) {
+ *id = 0;
+ (*connp->conn_upcalls->su_connected)
+ (connp->conn_upper_handle, 0, NULL, -1);
+ } else if (error < 0) {
+ error = proto_tlitosyserr(-error);
+ }
+ return (error);
+}
+
+/* ARGSUSED */
+void
+rawip_fallback(sock_lower_handle_t proto_handle, queue_t *q,
+ boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ icmp_t *icmp;
+ struct T_capability_ack tca;
+ struct sockaddr_in6 laddr, faddr;
+ socklen_t laddrlen, faddrlen;
+ short opts;
+ struct stroptions *stropt;
+ mblk_t *stropt_mp;
+ int error;
+
+ icmp = connp->conn_icmp;
+
+ stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
+
+ /*
+ * setup the fallback stream that was allocated
+ */
+ connp->conn_dev = (dev_t)RD(q)->q_ptr;
+ connp->conn_minor_arena = WR(q)->q_ptr;
+
+ RD(q)->q_ptr = WR(q)->q_ptr = connp;
+
+ WR(q)->q_qinfo = &icmpwinit;
+
+ connp->conn_rq = RD(q);
+ connp->conn_wq = WR(q);
+
+ /* Notify stream head about options before sending up data */
+ stropt_mp->b_datap->db_type = M_SETOPTS;
+ stropt_mp->b_wptr += sizeof (*stropt);
+ stropt = (struct stroptions *)stropt_mp->b_rptr;
+ stropt->so_flags = SO_WROFF | SO_HIWAT;
+ stropt->so_wroff =
+ (ushort_t)(icmp->icmp_max_hdr_len + icmp->icmp_is->is_wroff_extra);
+ stropt->so_hiwat = icmp->icmp_recv_hiwat;
+ putnext(RD(q), stropt_mp);
+
+ /*
+ * free helper stream
+ */
+ ip_close_helper_stream(connp);
+
+ /*
+ * Collect the information needed to sync with the sonode
+ */
+ icmp_do_capability_ack(icmp, &tca, TC1_INFO);
+
+ laddrlen = faddrlen = sizeof (sin6_t);
+ (void) rawip_getsockname((sock_lower_handle_t)connp,
+ (struct sockaddr *)&laddr, &laddrlen, NULL);
+ error = rawip_getpeername((sock_lower_handle_t)connp,
+ (struct sockaddr *)&faddr, &faddrlen, NULL);
+ if (error != 0)
+ faddrlen = 0;
+ opts = 0;
+ if (icmp->icmp_dgram_errind)
+ opts |= SO_DGRAM_ERRIND;
+ if (icmp->icmp_dontroute)
+ opts |= SO_DONTROUTE;
+
+ /*
+ * Once we grab the drain lock, no data will be send up
+ * to the socket. So we notify the socket that the endpoint
+ * is quiescent and it's therefore safe move data from
+ * the socket to the stream head.
+ */
+ (*quiesced_cb)(connp->conn_upper_handle, q, &tca,
+ (struct sockaddr *)&laddr, laddrlen,
+ (struct sockaddr *)&faddr, faddrlen, opts);
+
+ /*
+ * push up any packets that were queued in icmp_t
+ */
+
+ mutex_enter(&icmp->icmp_recv_lock);
+ while (icmp->icmp_fallback_queue_head != NULL) {
+ mblk_t *mp;
+
+ mp = icmp->icmp_fallback_queue_head;
+ icmp->icmp_fallback_queue_head = mp->b_next;
+ mp->b_next = NULL;
+ mutex_exit(&icmp->icmp_recv_lock);
+ putnext(RD(q), mp);
+ mutex_enter(&icmp->icmp_recv_lock);
+ }
+ icmp->icmp_fallback_queue_tail = icmp->icmp_fallback_queue_head;
+ /*
+ * No longer a streams less socket
+ */
+ connp->conn_flags &= ~IPCL_NONSTR;
+ mutex_exit(&icmp->icmp_recv_lock);
+ ASSERT(icmp->icmp_fallback_queue_head == NULL &&
+ icmp->icmp_fallback_queue_tail == NULL);
+
+ ASSERT(connp->conn_ref >= 1);
+}
+
+/* ARGSUSED */
+sock_lower_handle_t
+rawip_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
+ uint_t *smodep, int *errorp, int flags, cred_t *credp)
+{
+ conn_t *connp;
+
+ if (type != SOCK_RAW || (family != AF_INET && family != AF_INET6)) {
+ *errorp = EPROTONOSUPPORT;
+ return (NULL);
+ }
+
+ connp = icmp_open(family, credp, errorp, flags);
+ if (connp != NULL) {
+ icmp_stack_t *is;
+
+ is = connp->conn_icmp->icmp_is;
+ connp->conn_flags |= IPCL_NONSTR;
+
+ if (connp->conn_icmp->icmp_family == AF_INET6) {
+ /* Build initial header template for transmit */
+ rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER);
+ if ((*errorp =
+ icmp_build_hdrs(connp->conn_icmp)) != 0) {
+ rw_exit(&connp->conn_icmp->icmp_rwlock);
+ ipcl_conn_destroy(connp);
+ return (NULL);
+ }
+ rw_exit(&connp->conn_icmp->icmp_rwlock);
+ }
+
+ connp->conn_icmp->icmp_recv_hiwat = is->is_recv_hiwat;
+ connp->conn_icmp->icmp_xmit_hiwat = is->is_xmit_hiwat;
+
+ if ((*errorp = ip_create_helper_stream(connp,
+ is->is_ldi_ident)) != 0) {
+ cmn_err(CE_CONT, "create of IP helper stream failed\n");
+ (void) rawip_do_close(connp);
+ return (NULL);
+ }
+
+ mutex_enter(&connp->conn_lock);
+ connp->conn_state_flags &= ~CONN_INCIPIENT;
+ mutex_exit(&connp->conn_lock);
+ *sock_downcalls = &sock_rawip_downcalls;
+ *smodep = SM_ATOMIC;
+ } else {
+ ASSERT(*errorp != 0);
+ }
+
+ return ((sock_lower_handle_t)connp);
+}
+
+/* ARGSUSED */
+void
+rawip_activate(sock_lower_handle_t proto_handle,
+ sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags,
+ cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ icmp_stack_t *is = connp->conn_icmp->icmp_is;
+ struct sock_proto_props sopp;
+
+ connp->conn_upcalls = sock_upcalls;
+ connp->conn_upper_handle = sock_handle;
+
+ sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
+ SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
+ sopp.sopp_wroff = connp->conn_icmp->icmp_max_hdr_len +
+ is->is_wroff_extra;
+ sopp.sopp_rxhiwat = is->is_recv_hiwat;
+ sopp.sopp_rxlowat = icmp_mod_info.mi_lowat;
+ sopp.sopp_maxblk = INFPSZ;
+ sopp.sopp_maxpsz = IP_MAXPACKET;
+ sopp.sopp_minpsz = (icmp_mod_info.mi_minpsz == 1) ? 0 :
+ icmp_mod_info.mi_minpsz;
+
+ (*connp->conn_upcalls->su_set_proto_props)
+ (connp->conn_upper_handle, &sopp);
+}
+
+static int
+rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp)
+{
+ sin_t *sin = (sin_t *)sa;
+ sin6_t *sin6 = (sin6_t *)sa;
+
+ ASSERT(icmp != NULL);
+ ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock));
+
+ switch (icmp->icmp_family) {
+ case AF_INET:
+ ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
+ if (*salenp < sizeof (sin_t))
+ return (EINVAL);
+
+ *salenp = sizeof (sin_t);
+ *sin = sin_null;
+ sin->sin_family = AF_INET;
+ if (icmp->icmp_state == TS_UNBND) {
+ break;
+ }
+
+ if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) &&
+ !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
+ sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_v6src);
+ } else {
+ /*
+ * INADDR_ANY
+ * icmp_v6src is not set, we might be bound to
+ * broadcast/multicast. Use icmp_bound_v6src as
+ * local address instead (that could
+ * also still be INADDR_ANY)
+ */
+ sin->sin_addr.s_addr =
+ V4_PART_OF_V6(icmp->icmp_bound_v6src);
+ }
+ break;
+ case AF_INET6:
+
+ if (*salenp < sizeof (sin6_t))
+ return (EINVAL);
+
+ *salenp = sizeof (sin6_t);
+ *sin6 = sin6_null;
+ sin6->sin6_family = AF_INET6;
+ if (icmp->icmp_state == TS_UNBND) {
+ break;
+ }
+ if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
+ sin6->sin6_addr = icmp->icmp_v6src;
+ } else {
+ /*
+ * UNSPECIFIED
+ * icmp_v6src is not set, we might be bound to
+ * broadcast/multicast. Use icmp_bound_v6src as
+ * local address instead (that could
+ * also still be UNSPECIFIED)
+ */
+
+ sin6->sin6_addr = icmp->icmp_bound_v6src;
+ }
+ break;
+ }
+ return (0);
+}
+
+static int
+rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp)
+{
+ sin_t *sin = (sin_t *)sa;
+ sin6_t *sin6 = (sin6_t *)sa;
+
+ ASSERT(icmp != NULL);
+ ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock));
+
+ if (icmp->icmp_state != TS_DATA_XFER)
+ return (ENOTCONN);
+
+ sa->sa_family = icmp->icmp_family;
+ switch (icmp->icmp_family) {
+ case AF_INET:
+ ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
+
+ if (*salenp < sizeof (sin_t))
+ return (EINVAL);
+
+ *salenp = sizeof (sin_t);
+ *sin = sin_null;
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr =
+ V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr);
+ break;
+ case AF_INET6:
+ if (*salenp < sizeof (sin6_t))
+ return (EINVAL);
+
+ *salenp = sizeof (sin6_t);
+ *sin6 = sin6_null;
+ *sin6 = icmp->icmp_v6dst;
+ break;
+ }
+ return (0);
+}
+
+/* ARGSUSED */
+int
+rawip_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
+ socklen_t *salenp, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ icmp_t *icmp = connp->conn_icmp;
+ int error;
+
+ ASSERT(icmp != NULL);
+
+ rw_enter(&icmp->icmp_rwlock, RW_READER);
+
+ error = rawip_do_getpeername(icmp, sa, salenp);
+
+ rw_exit(&icmp->icmp_rwlock);
+
+ return (error);
+}
+
+/* ARGSUSED */
+int
+rawip_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
+ socklen_t *salenp, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ icmp_t *icmp = connp->conn_icmp;
+ int error;
+
+ ASSERT(icmp != NULL);
+ rw_enter(&icmp->icmp_rwlock, RW_READER);
+
+ error = rawip_do_getsockname(icmp, sa, salenp);
+
+ rw_exit(&icmp->icmp_rwlock);
+
+ return (error);
+}
+
+int
+rawip_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
+ const void *optvalp, socklen_t optlen, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ icmp_t *icmp = connp->conn_icmp;
+ int error;
+
+ error = proto_opt_check(level, option_name, optlen, NULL,
+ icmp_opt_obj.odb_opt_des_arr,
+ icmp_opt_obj.odb_opt_arr_cnt,
+ icmp_opt_obj.odb_topmost_tpiprovider,
+ B_TRUE, B_FALSE, cr);
+
+ if (error != 0) {
+ /*
+ * option not recognized
+ */
+ if (error < 0) {
+ error = proto_tlitosyserr(-error);
+ }
+ return (error);
+ }
+
+ rw_enter(&icmp->icmp_rwlock, RW_WRITER);
+ error = icmp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level,
+ option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen,
+ (uchar_t *)optvalp, NULL, cr);
+ rw_exit(&icmp->icmp_rwlock);
+
+ if (error < 0) {
+ /*
+ * Pass on to ip
+ */
+ error = ip_set_options(connp, level, option_name, optvalp,
+ optlen, cr);
+ }
+
+ ASSERT(error >= 0);
+
+ return (error);
+}
+
+int
+rawip_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
+ void *optvalp, socklen_t *optlen, cred_t *cr)
+{
+ int error;
+ conn_t *connp = (conn_t *)proto_handle;
+ icmp_t *icmp = connp->conn_icmp;
+ t_uscalar_t max_optbuf_len;
+ void *optvalp_buf;
+ int len;
+
+ error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
+ icmp_opt_obj.odb_opt_des_arr,
+ icmp_opt_obj.odb_opt_arr_cnt,
+ icmp_opt_obj.odb_topmost_tpiprovider,
+ B_FALSE, B_TRUE, cr);
+
+ if (error != 0) {
+ if (error < 0) {
+ error = proto_tlitosyserr(-error);
+ }
+ return (error);
+ }
+
+ optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
+ rw_enter(&icmp->icmp_rwlock, RW_READER);
+ len = icmp_opt_get(connp, level, option_name, optvalp_buf);
+ rw_exit(&icmp->icmp_rwlock);
+
+ if (len < 0) {
+ /*
+ * Pass on to IP
+ */
+ kmem_free(optvalp_buf, max_optbuf_len);
+ return (ip_get_options(connp, level, option_name, optvalp,
+ optlen, cr));
+ } else {
+ /*
+ * update optlen and copy option value
+ */
+ t_uscalar_t size = MIN(len, *optlen);
+ bcopy(optvalp_buf, optvalp, size);
+ bcopy(&size, optlen, sizeof (size));
+
+ kmem_free(optvalp_buf, max_optbuf_len);
+ return (0);
+ }
+}
+
+/* ARGSUSED */
+int
+rawip_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ (void) rawip_do_close(connp);
+ return (0);
+}
+
+/* ARGSUSED */
+int
+rawip_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+
+ /* shut down the send side */
+ if (how != SHUT_RD)
+ (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
+ SOCK_OPCTL_SHUT_SEND, 0);
+ /* shut down the recv side */
+ if (how != SHUT_WR)
+ (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
+ SOCK_OPCTL_SHUT_RECV, 0);
+ return (0);
+}
+
+void
+rawip_clr_flowctrl(sock_lower_handle_t proto_handle)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ icmp_t *icmp = connp->conn_icmp;
+
+ mutex_enter(&icmp->icmp_recv_lock);
+ connp->conn_flow_cntrld = B_FALSE;
+ mutex_exit(&icmp->icmp_recv_lock);
+}
+
+int
+rawip_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
+ int mode, int32_t *rvalp, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ int error;
+
+ switch (cmd) {
+ case ND_SET:
+ case ND_GET:
+ case _SIOCSOCKFALLBACK:
+ case TI_GETPEERNAME:
+ case TI_GETMYNAME:
+#ifdef DEBUG
+ cmn_err(CE_CONT, "icmp_ioctl cmd 0x%x on non streams"
+ " socket", cmd);
+#endif
+ error = EINVAL;
+ break;
+ default:
+ /*
+ * Pass on to IP using helper stream
+ */
+ error = ldi_ioctl(
+ connp->conn_helper_info->ip_helper_stream_handle,
+ cmd, arg, mode, cr, rvalp);
+ break;
+ }
+ return (error);
+}
+
+/* ARGSUSED */
+int
+rawip_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
+ cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ icmp_t *icmp = connp->conn_icmp;
+ icmp_stack_t *is = icmp->icmp_is;
+ int error = 0;
+ boolean_t bypass_dgram_errind = B_FALSE;
+
+ ASSERT(DB_TYPE(mp) == M_DATA);
+
+ if (is_system_labeled())
+ msg_setcredpid(mp, cr, curproc->p_pid);
+
+ /* do an implicit bind if necessary */
+ if (icmp->icmp_state == TS_UNBND) {
+ error = rawip_implicit_bind(connp);
+ /*
+ * We could be racing with an actual bind, in which case
+ * we would see EPROTO. We cross our fingers and try
+ * to connect.
+ */
+ if (!(error == 0 || error == EPROTO)) {
+ freemsg(mp);
+ return (error);
+ }
+ }
+
+ rw_enter(&icmp->icmp_rwlock, RW_WRITER);
+
+ if (msg->msg_name != NULL && icmp->icmp_state == TS_DATA_XFER) {
+ error = EISCONN;
+ goto done_lock;
+ }
+
+ switch (icmp->icmp_family) {
+ case AF_INET6: {
+ sin6_t *sin6;
+ ip6_pkt_t ipp_s; /* For ancillary data options */
+ ip6_pkt_t *ipp = &ipp_s;
+
+ sin6 = (sin6_t *)msg->msg_name;
+ if (sin6 != NULL) {
+ error = proto_verify_ip_addr(icmp->icmp_family,
+ (struct sockaddr *)msg->msg_name, msg->msg_namelen);
+ if (error != 0) {
+ bypass_dgram_errind = B_TRUE;
+ goto done_lock;
+ }
+ if (icmp->icmp_delayed_error != 0) {
+ sin6_t *sin1 = (sin6_t *)msg->msg_name;
+ sin6_t *sin2 = (sin6_t *)
+ &icmp->icmp_delayed_addr;
+
+ error = icmp->icmp_delayed_error;
+ icmp->icmp_delayed_error = 0;
+
+ /* Compare IP address and port */
+
+ if (sin1->sin6_port == sin2->sin6_port &&
+ IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
+ &sin2->sin6_addr)) {
+ goto done_lock;
+ }
+ }
+ } else {
+ /*
+ * Use connected address
+ */
+ if (icmp->icmp_state != TS_DATA_XFER) {
+ BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
+ error = EDESTADDRREQ;
+ bypass_dgram_errind = B_TRUE;
+ goto done_lock;
+ }
+ sin6 = &icmp->icmp_v6dst;
+ }
+
+ /* No support for mapped addresses on raw sockets */
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
+ error = EADDRNOTAVAIL;
+ goto done_lock;
+ }
+
+ ipp->ipp_fields = 0;
+ ipp->ipp_sticky_ignored = 0;
+
+ /*
+ * If options passed in, feed it for verification and handling
+ */
+ if (msg->msg_controllen != 0) {
+ error = process_auxiliary_options(connp,
+ msg->msg_control, msg->msg_controllen,
+ ipp, &icmp_opt_obj, icmp_opt_set);
+ if (error != 0) {
+ goto done_lock;
+ }
+ }
+
+ rw_exit(&icmp->icmp_rwlock);
+
+ /*
+ * Destination is a native IPv6 address.
+ * Send out an IPv6 format packet.
+ */
+
+ error = raw_ip_send_data_v6(connp->conn_wq, connp, mp, sin6,
+ ipp);
+ }
+ break;
+ case AF_INET: {
+ sin_t *sin;
+ ip4_pkt_t pktinfo;
+ ip4_pkt_t *pktinfop = &pktinfo;
+ ipaddr_t v4dst;
+
+ sin = (sin_t *)msg->msg_name;
+ if (sin != NULL) {
+ error = proto_verify_ip_addr(icmp->icmp_family,
+ (struct sockaddr *)msg->msg_name, msg->msg_namelen);
+ if (error != 0) {
+ bypass_dgram_errind = B_TRUE;
+ goto done_lock;
+ }
+ v4dst = sin->sin_addr.s_addr;
+ if (icmp->icmp_delayed_error != 0) {
+ sin_t *sin1 = (sin_t *)msg->msg_name;
+ sin_t *sin2 = (sin_t *)&icmp->icmp_delayed_addr;
+
+ error = icmp->icmp_delayed_error;
+ icmp->icmp_delayed_error = 0;
+
+ /* Compare IP address and port */
+ if (sin1->sin_port == sin2->sin_port &&
+ sin1->sin_addr.s_addr ==
+ sin2->sin_addr.s_addr) {
+ goto done_lock;
+ }
+
+ }
+ } else {
+ /*
+ * Use connected address
+ */
+ if (icmp->icmp_state != TS_DATA_XFER) {
+ BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
+ error = EDESTADDRREQ;
+ bypass_dgram_errind = B_TRUE;
+ goto done_lock;
+ }
+ v4dst = V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr);
+ }
+
+
+ pktinfop->ip4_ill_index = 0;
+ pktinfop->ip4_addr = INADDR_ANY;
+
+ /*
+ * If options passed in, feed it for verification and handling
+ */
+ if (msg->msg_controllen != 0) {
+ error = process_auxiliary_options(connp,
+ msg->msg_control, msg->msg_controllen,
+ pktinfop, &icmp_opt_obj, icmp_opt_set);
+ if (error != 0) {
+ goto done_lock;
+ }
+ }
+ rw_exit(&icmp->icmp_rwlock);
+
+ error = raw_ip_send_data_v4(connp->conn_wq, connp, mp,
+ v4dst, pktinfop);
+ break;
+ }
+
+ default:
+ ASSERT(0);
+ }
+
+ goto done;
+
+done_lock:
+ rw_exit(&icmp->icmp_rwlock);
+ if (error != 0) {
+ ASSERT(mp != NULL);
+ freemsg(mp);
+ }
+done:
+ if (bypass_dgram_errind)
+ return (error);
+ return (icmp->icmp_dgram_errind ? error : 0);
+}
+
+sock_downcalls_t sock_rawip_downcalls = {
+ rawip_activate,
+ rawip_accept,
+ rawip_bind,
+ rawip_listen,
+ rawip_connect,
+ rawip_getpeername,
+ rawip_getsockname,
+ rawip_getsockopt,
+ rawip_setsockopt,
+ rawip_send,
+ NULL,
+ NULL,
+ NULL,
+ rawip_shutdown,
+ rawip_clr_flowctrl,
+ rawip_ioctl,
+ rawip_close
+};
diff --git a/usr/src/uts/common/inet/ip/icmp_opt_data.c b/usr/src/uts/common/inet/ip/icmp_opt_data.c
index 8769a7d3d4..4f15801dfb 100644
--- a/usr/src/uts/common/inet/ip/icmp_opt_data.c
+++ b/usr/src/uts/common/inet/ip/icmp_opt_data.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/stream.h>
#define _SUN_TPI_VERSION 2
@@ -52,8 +50,8 @@
extern int icmp_opt_default(queue_t *, int, int, uchar_t *);
-extern int icmp_opt_get(queue_t *, int, int, uchar_t *);
-extern int icmp_opt_set(queue_t *, uint_t, int, int, uint_t, uchar_t *,
+extern int icmp_tpi_opt_get(queue_t *, int, int, uchar_t *);
+extern int icmp_tpi_opt_set(queue_t *, uint_t, int, int, uint_t, uchar_t *,
uint_t *, uchar_t *, void *, cred_t *, mblk_t *);
/*
@@ -96,10 +94,10 @@ opdes_t icmp_opt_arr[] = {
{ IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
(OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT),
- 40, -1 /* not initialized */ },
+ IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
{ T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
(OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT),
- 40, -1 /* not initialized */ },
+ IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
{ IP_HDRINCL, IPPROTO_IP, OA_R, OA_RW, OP_RAW, OP_PASSNEXT,
sizeof (int), 0 },
@@ -347,8 +345,8 @@ uint_t icmp_max_optsize; /* initialized when ICMP driver is loaded */
optdb_obj_t icmp_opt_obj = {
icmp_opt_default, /* ICMP default value function pointer */
- icmp_opt_get, /* ICMP get function pointer */
- icmp_opt_set, /* ICMP set function pointer */
+ icmp_tpi_opt_get, /* ICMP get function pointer */
+ icmp_tpi_opt_set, /* ICMP set function pointer */
B_TRUE, /* ICMP is tpi provider */
ICMP_OPT_ARR_CNT, /* ICMP option database count of entries */
icmp_opt_arr, /* ICMP option database */
diff --git a/usr/src/uts/common/inet/ip/icmpddi.c b/usr/src/uts/common/inet/ip/icmpddi.c
index a5861d9120..dd0023c0c8 100644
--- a/usr/src/uts/common/inet/ip/icmpddi.c
+++ b/usr/src/uts/common/inet/ip/icmpddi.c
@@ -29,6 +29,9 @@
#include <sys/modctl.h>
#include <inet/common.h>
#include <inet/ip.h>
+#include <inet/rawip_impl.h>
+#include <sys/strsubr.h>
+#include <sys/socketvar.h>
#define INET_NAME "icmp"
#define INET_MODDESC "ICMP dummy STREAMS module"
@@ -36,6 +39,9 @@
#define INET_DEVMINOR 0
#define INET_DEVSTRTAB icmpinfov4
#define INET_MODSTRTAB dummymodinfo
+#define INET_SOCKDESC "Rawip socket module"
+#define INET_SOCK_PROTO_CREATE_FUNC (*rawip_create)
+#define INET_SOCK_PROTO_FB_FUNC (*rawip_fallback)
#define INET_DEVMTFLAGS D_MP
#define INET_MODMTFLAGS D_MP
diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c
index b0eaa51983..3141cd914e 100644
--- a/usr/src/uts/common/inet/ip/ip.c
+++ b/usr/src/uts/common/inet/ip/ip.c
@@ -38,7 +38,6 @@
#include <sys/tihdr.h>
#include <sys/xti_inet.h>
#include <sys/ddi.h>
-#include <sys/sunddi.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/kobj.h>
@@ -120,7 +119,6 @@
#include <inet/udp_impl.h>
#include <inet/rawip_impl.h>
#include <inet/rts_impl.h>
-#include <sys/sunddi.h>
#include <sys/tsol/label.h>
#include <sys/tsol/tnet.h>
@@ -625,7 +623,7 @@ uint_t ip_max_frag_dups = 10;
#define IS_SIMPLE_IPH(ipha) \
((ipha)->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION)
-/* RFC1122 Conformance */
+/* RFC 1122 Conformance */
#define IP_FORWARD_DEFAULT IP_FORWARD_NEVER
#define ILL_MAX_NAMELEN LIFNAMSIZ
@@ -658,8 +656,7 @@ static void icmp_send_redirect(queue_t *, mblk_t *, ipaddr_t,
ip_stack_t *);
static void ip_arp_news(queue_t *, mblk_t *);
-static boolean_t ip_bind_insert_ire(mblk_t *, ire_t *, iulp_t *,
- ip_stack_t *);
+static boolean_t ip_bind_get_ire_v4(mblk_t **, ire_t *, iulp_t *, ip_stack_t *);
mblk_t *ip_dlpi_alloc(size_t, t_uscalar_t);
char *ip_dot_addr(ipaddr_t, char *);
mblk_t *ip_carve_mp(mblk_t **, ssize_t);
@@ -770,6 +767,8 @@ static void ip_multirt_bad_mtu(ire_t *, uint32_t);
static int ip_cgtp_filter_get(queue_t *, mblk_t *, caddr_t, cred_t *);
static int ip_cgtp_filter_set(queue_t *, mblk_t *, char *,
caddr_t, cred_t *);
+extern int ip_helper_stream_setup(queue_t *, dev_t *, int, int,
+ cred_t *, boolean_t);
static int ip_input_proc_set(queue_t *q, mblk_t *mp, char *value,
caddr_t cp, cred_t *cr);
static int ip_int_set(queue_t *, mblk_t *, char *, caddr_t,
@@ -1318,6 +1317,7 @@ ip_ioctl_cmd_t ip_ndx_ioctl_table[] = {
ip_sioctl_set_ipmpfailback, NULL },
/* SIOCSENABLESDP is handled by SDP */
/* 183 */ { IPI_DONTCARE /* SIOCSENABLESDP */, 0, 0, 0, NULL, NULL },
+ /* 184 */ { IPI_DONTCARE /* SIOCSQPTR */, 0, 0, 0, NULL, NULL },
};
int ip_ndx_ioctl_count = sizeof (ip_ndx_ioctl_table) / sizeof (ip_ioctl_cmd_t);
@@ -1373,7 +1373,8 @@ static ipha_t icmp_ipha = {
};
struct module_info ip_mod_info = {
- IP_MOD_ID, IP_MOD_NAME, 1, INFPSZ, 65536, 1024
+ IP_MOD_ID, IP_MOD_NAME, IP_MOD_MINPSZ, IP_MOD_MAXPSZ, IP_MOD_HIWAT,
+ IP_MOD_LOWAT
};
/*
@@ -4334,6 +4335,23 @@ ip_bind_ipsec_policy_set(conn_t *connp, mblk_t *policy_mp)
return (B_TRUE);
}
+static void
+ip_bind_post_handling(conn_t *connp, mblk_t *mp, boolean_t ire_requested)
+{
+ /*
+ * Pass the IPsec headers size in ire_ipsec_overhead.
+ * We can't do this in ip_bind_get_ire because the policy
+ * may not have been inherited at that point in time and hence
+ * conn_out_enforce_policy may not be set.
+ */
+ if (ire_requested && connp->conn_out_enforce_policy &&
+ mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE) {
+ ire_t *ire = (ire_t *)mp->b_rptr;
+ ASSERT(MBLKL(mp) >= sizeof (ire_t));
+ ire->ire_ipsec_overhead = conn_ipsec_length(connp);
+ }
+}
+
/*
* Upper level protocols (ULP) pass through bind requests to IP for inspection
* and to arrange for power-fanout assist. The ULP is identified by
@@ -4374,7 +4392,6 @@ ip_bind_v4(queue_t *q, mblk_t *mp, conn_t *connp)
uchar_t *ucp;
mblk_t *mp1;
boolean_t ire_requested;
- boolean_t ipsec_policy_set = B_FALSE;
int error = 0;
int protocol;
ipa_conn_x_t *acx;
@@ -4453,7 +4470,6 @@ ip_bind_v4(queue_t *q, mblk_t *mp, conn_t *connp)
mp1 = mp->b_cont;
ire_requested = (mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE);
- ipsec_policy_set = (mp1 != NULL && DB_TYPE(mp1) == IPSEC_POLICY_SET);
switch (tbr->ADDR_length) {
default:
@@ -4463,14 +4479,14 @@ ip_bind_v4(queue_t *q, mblk_t *mp, conn_t *connp)
case IP_ADDR_LEN:
/* Verification of local address only */
- error = ip_bind_laddr(connp, mp, *(ipaddr_t *)ucp, 0,
- ire_requested, ipsec_policy_set, B_FALSE);
+ error = ip_bind_laddr_v4(connp, &mp1, protocol,
+ *(ipaddr_t *)ucp, 0, B_FALSE);
break;
case sizeof (sin_t):
sin = (sin_t *)ucp;
- error = ip_bind_laddr(connp, mp, sin->sin_addr.s_addr,
- sin->sin_port, ire_requested, ipsec_policy_set, B_TRUE);
+ error = ip_bind_laddr_v4(connp, &mp1, protocol,
+ sin->sin_addr.s_addr, sin->sin_port, B_TRUE);
break;
case sizeof (ipa_conn_t):
@@ -4479,9 +4495,9 @@ ip_bind_v4(queue_t *q, mblk_t *mp, conn_t *connp)
if (ac->ac_lport == 0)
ac->ac_lport = connp->conn_lport;
/* Always verify destination reachability. */
- error = ip_bind_connected(connp, mp, &ac->ac_laddr,
- ac->ac_lport, ac->ac_faddr, ac->ac_fport, ire_requested,
- ipsec_policy_set, B_TRUE, B_TRUE);
+ error = ip_bind_connected_v4(connp, &mp1, protocol,
+ &ac->ac_laddr, ac->ac_lport, ac->ac_faddr, ac->ac_fport,
+ B_TRUE, B_TRUE);
break;
case sizeof (ipa_conn_x_t):
@@ -4490,29 +4506,17 @@ ip_bind_v4(queue_t *q, mblk_t *mp, conn_t *connp)
* Whether or not to verify destination reachability depends
* on the setting of the ACX_VERIFY_DST flag in acx->acx_flags.
*/
- error = ip_bind_connected(connp, mp, &acx->acx_conn.ac_laddr,
- acx->acx_conn.ac_lport, acx->acx_conn.ac_faddr,
- acx->acx_conn.ac_fport, ire_requested, ipsec_policy_set,
+ error = ip_bind_connected_v4(connp, &mp1, protocol,
+ &acx->acx_conn.ac_laddr, acx->acx_conn.ac_lport,
+ acx->acx_conn.ac_faddr, acx->acx_conn.ac_fport,
B_TRUE, (acx->acx_flags & ACX_VERIFY_DST) != 0);
break;
}
- if (error == EINPROGRESS)
- return (NULL);
- else if (error != 0)
+ ASSERT(error != EINPROGRESS);
+ if (error != 0)
goto bad_addr;
- /*
- * Pass the IPsec headers size in ire_ipsec_overhead.
- * We can't do this in ip_bind_insert_ire because the policy
- * may not have been inherited at that point in time and hence
- * conn_out_enforce_policy may not be set.
- */
- mp1 = mp->b_cont;
- if (ire_requested && connp->conn_out_enforce_policy &&
- mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) {
- ire_t *ire = (ire_t *)mp1->b_rptr;
- ASSERT(MBLKL(mp1) >= sizeof (ire_t));
- ire->ire_ipsec_overhead = conn_ipsec_length(connp);
- }
+
+ ip_bind_post_handling(connp, mp->b_cont, ire_requested);
/* Send it home. */
mp->b_datap->db_type = M_PCPROTO;
@@ -4539,7 +4543,7 @@ bad_addr:
* upper protocol is expected to reset the src address
* to 0 if it sees a IRE_BROADCAST type returned so that
* no packets are emitted with broadcast/multicast address as
- * source address (that violates hosts requirements RFC1122)
+ * source address (that violates hosts requirements RFC 1122)
* The addresses valid for bind are:
* (1) - INADDR_ANY (0)
* (2) - IP address of an UP interface
@@ -4561,21 +4565,26 @@ bad_addr:
* matching IREs so bind has to look up based on the zone.
*
* Note: lport is in network byte order.
+ *
*/
int
-ip_bind_laddr(conn_t *connp, mblk_t *mp, ipaddr_t src_addr, uint16_t lport,
- boolean_t ire_requested, boolean_t ipsec_policy_set,
- boolean_t fanout_insert)
+ip_bind_laddr_v4(conn_t *connp, mblk_t **mpp, uint8_t protocol,
+ ipaddr_t src_addr, uint16_t lport, boolean_t fanout_insert)
{
int error = 0;
ire_t *src_ire;
- mblk_t *policy_mp;
- ipif_t *ipif;
zoneid_t zoneid;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
+ mblk_t *mp = NULL;
+ boolean_t ire_requested = B_FALSE;
+ boolean_t ipsec_policy_set = B_FALSE;
- if (ipsec_policy_set) {
- policy_mp = mp->b_cont;
+ if (mpp)
+ mp = *mpp;
+
+ if (mp != NULL) {
+ ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE);
+ ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET);
}
/*
@@ -4585,7 +4594,6 @@ ip_bind_laddr(conn_t *connp, mblk_t *mp, ipaddr_t src_addr, uint16_t lport,
connp->conn_fully_bound = B_FALSE;
src_ire = NULL;
- ipif = NULL;
zoneid = IPCL_ZONEID(connp);
@@ -4598,7 +4606,7 @@ ip_bind_laddr(conn_t *connp, mblk_t *mp, ipaddr_t src_addr, uint16_t lport,
* Note: Following code is in if-else-if form for
* readability compared to a condition check.
*/
- /* LINTED - statement has no consequent */
+ /* LINTED - statement has no consequence */
if (IRE_IS_LOCAL(src_ire)) {
/*
* (2) Bind to address of local UP interface
@@ -4617,20 +4625,10 @@ ip_bind_laddr(conn_t *connp, mblk_t *mp, ipaddr_t src_addr, uint16_t lport,
* (ipif_lookup_addr() looks up all interfaces
* but we do not get here for UP interfaces
* - case (2) above)
- * We put the protocol byte back into the mblk
- * since we may come back via ip_wput_nondata()
- * later with this mblk if ipif_lookup_addr chooses
- * to defer processing.
- */
- *mp->b_wptr++ = (char)connp->conn_ulp;
- if ((ipif = ipif_lookup_addr(src_addr, NULL, zoneid,
- CONNP_TO_WQ(connp), mp, ip_wput_nondata,
- &error, ipst)) != NULL) {
- ipif_refrele(ipif);
- } else if (error == EINPROGRESS) {
- if (src_ire != NULL)
- ire_refrele(src_ire);
- return (EINPROGRESS);
+ */
+ /* LINTED - statement has no consequent */
+ if (ip_addr_exists(src_addr, zoneid, ipst)) {
+ /* The address exists */
} else if (CLASSD(src_addr)) {
error = 0;
if (src_ire != NULL)
@@ -4653,20 +4651,16 @@ ip_bind_laddr(conn_t *connp, mblk_t *mp, ipaddr_t src_addr, uint16_t lport,
*/
error = EADDRNOTAVAIL;
}
- /*
- * Just to keep it consistent with the processing in
- * ip_bind_v4()
- */
- mp->b_wptr--;
}
if (error) {
/* Red Alert! Attempting to be a bogon! */
- ip1dbg(("ip_bind: bad src address 0x%x\n",
+ ip1dbg(("ip_bind_laddr_v4: bad src address 0x%x\n",
ntohl(src_addr)));
goto bad_addr;
}
}
+
/*
* Allow setting new policies. For example, disconnects come
* down as ipa_t bind. As we would have set conn_policy_cached
@@ -4690,17 +4684,17 @@ ip_bind_laddr(conn_t *connp, mblk_t *mp, ipaddr_t src_addr, uint16_t lport,
/*
* Do we need to add a check to reject Multicast packets
*/
- error = ipcl_bind_insert(connp, *mp->b_wptr, src_addr, lport);
+ error = ipcl_bind_insert(connp, protocol, src_addr, lport);
}
if (error == 0) {
if (ire_requested) {
- if (!ip_bind_insert_ire(mp, src_ire, NULL, ipst)) {
+ if (!ip_bind_get_ire_v4(mpp, src_ire, NULL, ipst)) {
error = -1;
/* Falls through to bad_addr */
}
} else if (ipsec_policy_set) {
- if (!ip_bind_ipsec_policy_set(connp, policy_mp)) {
+ if (!ip_bind_ipsec_policy_set(connp, mp)) {
error = -1;
/* Falls through to bad_addr */
}
@@ -4717,15 +4711,32 @@ bad_addr:
}
if (src_ire != NULL)
IRE_REFRELE(src_ire);
- if (ipsec_policy_set) {
- ASSERT(policy_mp == mp->b_cont);
- ASSERT(policy_mp != NULL);
- freeb(policy_mp);
- /*
- * As of now assume that nothing else accompanies
- * IPSEC_POLICY_SET.
- */
- mp->b_cont = NULL;
+ return (error);
+}
+
+int
+ip_proto_bind_laddr_v4(conn_t *connp, mblk_t **ire_mpp, uint8_t protocol,
+ ipaddr_t src_addr, uint16_t lport, boolean_t fanout_insert)
+{
+ int error;
+ mblk_t *mp = NULL;
+ boolean_t ire_requested;
+
+ if (ire_mpp)
+ mp = *ire_mpp;
+ ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE);
+
+ ASSERT(!connp->conn_af_isv6);
+ connp->conn_pkt_isv6 = B_FALSE;
+ connp->conn_ulp = protocol;
+
+ error = ip_bind_laddr_v4(connp, ire_mpp, protocol, src_addr, lport,
+ fanout_insert);
+ if (error == 0) {
+ ip_bind_post_handling(connp, ire_mpp ? *ire_mpp : NULL,
+ ire_requested);
+ } else if (error < 0) {
+ error = -TBADADDR;
}
return (error);
}
@@ -4746,16 +4757,14 @@ bad_addr:
* Note: lport and fport are in network byte order.
*/
int
-ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp,
- uint16_t lport, ipaddr_t dst_addr, uint16_t fport,
- boolean_t ire_requested, boolean_t ipsec_policy_set,
+ip_bind_connected_v4(conn_t *connp, mblk_t **mpp, uint8_t protocol,
+ ipaddr_t *src_addrp, uint16_t lport, ipaddr_t dst_addr, uint16_t fport,
boolean_t fanout_insert, boolean_t verify_dst)
{
+
ire_t *src_ire;
ire_t *dst_ire;
int error = 0;
- int protocol;
- mblk_t *policy_mp;
ire_t *sire = NULL;
ire_t *md_dst_ire = NULL;
ire_t *lso_dst_ire = NULL;
@@ -4763,25 +4772,33 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp,
zoneid_t zoneid;
ipaddr_t src_addr = *src_addrp;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
+ mblk_t *mp = NULL;
+ boolean_t ire_requested = B_FALSE;
+ boolean_t ipsec_policy_set = B_FALSE;
+ ts_label_t *tsl = NULL;
+
+ if (mpp)
+ mp = *mpp;
+
+ if (mp != NULL) {
+ ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE);
+ ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET);
+ tsl = MBLK_GETLABEL(mp);
+ }
src_ire = dst_ire = NULL;
- protocol = *mp->b_wptr & 0xFF;
/*
* If we never got a disconnect before, clear it now.
*/
connp->conn_fully_bound = B_FALSE;
- if (ipsec_policy_set) {
- policy_mp = mp->b_cont;
- }
-
zoneid = IPCL_ZONEID(connp);
if (CLASSD(dst_addr)) {
/* Pick up an IRE_BROADCAST */
dst_ire = ire_route_lookup(ip_g_all_ones, 0, 0, 0, NULL,
- NULL, zoneid, MBLK_GETLABEL(mp),
+ NULL, zoneid, tsl,
(MATCH_IRE_RECURSIVE |
MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE |
MATCH_IRE_SECATTR), ipst);
@@ -4804,11 +4821,11 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp,
if (connp->conn_nexthop_set) {
dst_ire = ire_route_lookup(connp->conn_nexthop_v4, 0,
- 0, 0, NULL, NULL, zoneid, MBLK_GETLABEL(mp),
+ 0, 0, NULL, NULL, zoneid, tsl,
MATCH_IRE_SECATTR, ipst);
} else {
dst_ire = ire_route_lookup(dst_addr, 0, 0, 0, NULL,
- &sire, zoneid, MBLK_GETLABEL(mp),
+ &sire, zoneid, tsl,
(MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT |
MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE |
MATCH_IRE_SECATTR), ipst);
@@ -4840,8 +4857,9 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp,
*/
if (verify_dst || (dst_ire != NULL)) {
if (ip_debug > 2) {
- pr_addr_dbg("ip_bind_connected: bad connected "
- "dst %s\n", AF_INET, &dst_addr);
+ pr_addr_dbg("ip_bind_connected_v4:"
+ "bad connected dst %s\n",
+ AF_INET, &dst_addr);
}
if (dst_ire == NULL || !(dst_ire->ire_type & IRE_HOST))
error = ENETUNREACH;
@@ -4872,7 +4890,8 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp,
connp->conn_mac_exempt, ipst) != 0) {
error = EHOSTUNREACH;
if (ip_debug > 2) {
- pr_addr_dbg("ip_bind_connected: no label for dst %s\n",
+ pr_addr_dbg("ip_bind_connected_v4:"
+ " no label for dst %s\n",
AF_INET, &dst_addr);
}
goto bad_addr;
@@ -5056,7 +5075,7 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp,
/* src_ire must be a local|loopback */
if (!IRE_IS_LOCAL(src_ire)) {
if (ip_debug > 2) {
- pr_addr_dbg("ip_bind_connected: bad connected "
+ pr_addr_dbg("ip_bind_connected_v4: bad connected "
"src %s\n", AF_INET, &src_addr);
}
error = EADDRNOTAVAIL;
@@ -5071,7 +5090,7 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp,
*/
if (src_ire->ire_type == IRE_LOOPBACK &&
!(IRE_IS_LOCAL(dst_ire) || CLASSD(dst_addr))) {
- ip1dbg(("ip_bind_connected: bad connected loopback\n"));
+ ip1dbg(("ip_bind_connected_v4: bad connected loopback\n"));
error = -1;
goto bad_addr;
}
@@ -5114,12 +5133,13 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp,
if (sire != NULL) {
ulp_info = &(sire->ire_uinfo);
}
- if (!ip_bind_insert_ire(mp, dst_ire, ulp_info, ipst)) {
+ if (!ip_bind_get_ire_v4(mpp, dst_ire, ulp_info, ipst)) {
error = -1;
goto bad_addr;
}
+ mp = *mpp;
} else if (ipsec_policy_set) {
- if (!ip_bind_ipsec_policy_set(connp, policy_mp)) {
+ if (!ip_bind_ipsec_policy_set(connp, mp)) {
error = -1;
goto bad_addr;
}
@@ -5171,27 +5191,36 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp,
ASSERT(ill->ill_lso_capab != NULL);
if ((lsoinfo_mp = ip_lsoinfo_return(lso_dst_ire, connp,
- ill->ill_name, ill->ill_lso_capab)) != NULL)
- linkb(mp, lsoinfo_mp);
+ ill->ill_name, ill->ill_lso_capab)) != NULL) {
+ if (mp == NULL) {
+ *mpp = lsoinfo_mp;
+ } else {
+ linkb(mp, lsoinfo_mp);
+ }
+ }
} else if (md_dst_ire != NULL) {
mblk_t *mdinfo_mp;
ASSERT(ill->ill_mdt_capab != NULL);
if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp,
- ill->ill_name, ill->ill_mdt_capab)) != NULL)
- linkb(mp, mdinfo_mp);
+ ill->ill_name, ill->ill_mdt_capab)) != NULL) {
+ if (mp == NULL) {
+ *mpp = mdinfo_mp;
+ } else {
+ linkb(mp, mdinfo_mp);
+ }
+ }
}
}
bad_addr:
if (ipsec_policy_set) {
- ASSERT(policy_mp == mp->b_cont);
- ASSERT(policy_mp != NULL);
- freeb(policy_mp);
+ ASSERT(mp != NULL);
+ freeb(mp);
/*
* As of now assume that nothing else accompanies
* IPSEC_POLICY_SET.
*/
- mp->b_cont = NULL;
+ *mpp = NULL;
}
if (src_ire != NULL)
IRE_REFRELE(src_ire);
@@ -5206,32 +5235,62 @@ bad_addr:
return (error);
}
+int
+ip_proto_bind_connected_v4(conn_t *connp, mblk_t **ire_mpp, uint8_t protocol,
+ ipaddr_t *src_addrp, uint16_t lport, ipaddr_t dst_addr, uint16_t fport,
+ boolean_t fanout_insert, boolean_t verify_dst)
+{
+ int error;
+ mblk_t *mp = NULL;
+ boolean_t ire_requested;
+
+ if (ire_mpp)
+ mp = *ire_mpp;
+ ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE);
+
+ ASSERT(!connp->conn_af_isv6);
+ connp->conn_pkt_isv6 = B_FALSE;
+ connp->conn_ulp = protocol;
+
+ /* For raw socket, the local port is not set. */
+ if (lport == 0)
+ lport = connp->conn_lport;
+ error = ip_bind_connected_v4(connp, ire_mpp, protocol,
+ src_addrp, lport, dst_addr, fport, fanout_insert, verify_dst);
+ if (error == 0) {
+ ip_bind_post_handling(connp, ire_mpp ? *ire_mpp : NULL,
+ ire_requested);
+ } else if (error < 0) {
+ error = -TBADADDR;
+ }
+ return (error);
+}
+
/*
- * Insert the ire in b_cont. Returns false if it fails (due to lack of space).
+ * Get the ire in *mpp. Returns false if it fails (due to lack of space).
* Prefers dst_ire over src_ire.
*/
static boolean_t
-ip_bind_insert_ire(mblk_t *mp, ire_t *ire, iulp_t *ulp_info, ip_stack_t *ipst)
+ip_bind_get_ire_v4(mblk_t **mpp, ire_t *ire, iulp_t *ulp_info, ip_stack_t *ipst)
{
- mblk_t *mp1;
- ire_t *ret_ire = NULL;
+ mblk_t *mp = *mpp;
+ ire_t *ret_ire;
- mp1 = mp->b_cont;
- ASSERT(mp1 != NULL);
+ ASSERT(mp != NULL);
if (ire != NULL) {
/*
- * mp1 initialized above to IRE_DB_REQ_TYPE
+ * mp initialized above to IRE_DB_REQ_TYPE
* appended mblk. Its <upper protocol>'s
* job to make sure there is room.
*/
- if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t))
- return (0);
+ if ((mp->b_datap->db_lim - mp->b_rptr) < sizeof (ire_t))
+ return (B_FALSE);
- mp1->b_datap->db_type = IRE_DB_TYPE;
- mp1->b_wptr = mp1->b_rptr + sizeof (ire_t);
- bcopy(ire, mp1->b_rptr, sizeof (ire_t));
- ret_ire = (ire_t *)mp1->b_rptr;
+ mp->b_datap->db_type = IRE_DB_TYPE;
+ mp->b_wptr = mp->b_rptr + sizeof (ire_t);
+ bcopy(ire, mp->b_rptr, sizeof (ire_t));
+ ret_ire = (ire_t *)mp->b_rptr;
/*
* Pass the latest setting of the ip_path_mtu_discovery and
* copy the ulp info if any.
@@ -5242,16 +5301,15 @@ ip_bind_insert_ire(mblk_t *mp, ire_t *ire, iulp_t *ulp_info, ip_stack_t *ipst)
bcopy(ulp_info, &(ret_ire->ire_uinfo),
sizeof (iulp_t));
}
- ret_ire->ire_mp = mp1;
+ ret_ire->ire_mp = mp;
} else {
/*
* No IRE was found. Remove IRE mblk.
*/
- mp->b_cont = mp1->b_cont;
- freeb(mp1);
+ *mpp = mp->b_cont;
+ freeb(mp);
}
-
- return (1);
+ return (B_TRUE);
}
/*
@@ -5645,9 +5703,9 @@ ip_ddi_destroy(void)
{
tnet_fini();
- icmp_ddi_destroy();
- rts_ddi_destroy();
- udp_ddi_destroy();
+ icmp_ddi_g_destroy();
+ rts_ddi_g_destroy();
+ udp_ddi_g_destroy();
sctp_ddi_g_destroy();
tcp_ddi_g_destroy();
ipsec_policy_g_destroy();
@@ -5814,6 +5872,7 @@ ip_stack_fini(netstackid_t stackid, void *arg)
kmem_free(ipst->ips_ill_g_heads, sizeof (ill_g_head_t) * MAX_G_HEADS);
ipst->ips_ill_g_heads = NULL;
+ ldi_ident_release(ipst->ips_ldi_ident);
kmem_free(ipst, sizeof (*ipst));
}
@@ -5898,9 +5957,9 @@ ip_ddi_init(void)
tnet_init();
- udp_ddi_init();
- rts_ddi_init();
- icmp_ddi_init();
+ udp_ddi_g_init();
+ rts_ddi_g_init();
+ icmp_ddi_g_init();
}
/*
@@ -5912,6 +5971,7 @@ ip_stack_init(netstackid_t stackid, netstack_t *ns)
ip_stack_t *ipst;
ipparam_t *pa;
ipndp_t *na;
+ major_t major;
#ifdef NS_DEBUG
printf("ip_stack_init(stack %d)\n", stackid);
@@ -6011,6 +6071,8 @@ ip_stack_init(netstackid_t stackid, netstack_t *ns)
list_create(&ipst->ips_capab_taskq_list, sizeof (mblk_t),
offsetof(mblk_t, b_next));
+ major = mod_name_to_major(INET_NAME);
+ (void) ldi_ident_from_major(major, &ipst->ips_ldi_ident);
return (ipst);
}
@@ -6353,7 +6415,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags,
}
}
- if (connp == NULL || connp->conn_upq == NULL) {
+ if (connp == NULL) {
/*
* No one bound to these addresses. Is
* there a client that wants all
@@ -6392,6 +6454,9 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags,
}
return;
}
+
+ ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL);
+
CONN_INC_REF(connp);
first_connp = connp;
@@ -6415,7 +6480,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags,
/*
* Copy the packet.
*/
- if (connp == NULL || connp->conn_upq == NULL ||
+ if (connp == NULL ||
(((first_mp1 = dupmsg(first_mp)) == NULL) &&
((first_mp1 = ip_copymsg(first_mp)) == NULL))) {
/*
@@ -6425,11 +6490,17 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags,
connp = first_connp;
break;
}
+ ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
mp1 = mctl_present ? first_mp1->b_cont : first_mp1;
CONN_INC_REF(connp);
mutex_exit(&connfp->connf_lock);
rq = connp->conn_rq;
- if (!canputnext(rq)) {
+
+ /*
+ * Check flow control
+ */
+ if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
+ (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) {
if (flags & IP_FF_RAWIP) {
BUMP_MIB(mibptr, rawipIfStatsInOverflows);
} else {
@@ -6527,7 +6598,11 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags,
}
rq = connp->conn_rq;
- if (!canputnext(rq)) {
+ /*
+ * Check flow control
+ */
+ if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
+ (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) {
if (flags & IP_FF_RAWIP) {
BUMP_MIB(mibptr, rawipIfStatsInOverflows);
} else {
@@ -6975,7 +7050,8 @@ ip_fanout_udp_conn(conn_t *connp, mblk_t *first_mp, mblk_t *mp,
else
first_mp = mp;
- if (CONN_UDP_FLOWCTLD(connp)) {
+ if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
+ (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) {
BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
freemsg(first_mp);
return;
@@ -7166,9 +7242,12 @@ ip_fanout_udp(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha,
connp = connp->conn_next;
}
- if (connp == NULL || connp->conn_upq == NULL)
+ if (connp == NULL ||
+ !IPCL_IS_NONSTR(connp) && connp->conn_upq == NULL)
goto notfound;
+ ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL);
+
if (is_system_labeled() &&
!tsol_receive_local(mp, &dst, IPV4_VERSION, shared_addr,
connp))
@@ -7202,9 +7281,12 @@ ip_fanout_udp(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha,
connp = connp->conn_next;
}
- if (connp == NULL || connp->conn_upq == NULL)
+ if (connp == NULL ||
+ !IPCL_IS_NONSTR(connp) && connp->conn_upq == NULL)
goto notfound;
+ ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL);
+
first_connp = connp;
/*
* When SO_REUSEADDR is not set, send the packet only to the first
@@ -7321,7 +7403,8 @@ notfound:
connp))
connp = NULL;
- if (connp == NULL || connp->conn_upq == NULL) {
+ if (connp == NULL ||
+ !IPCL_IS_NONSTR(connp) && connp->conn_upq == NULL) {
/*
* No one bound to this port. Is
* there a client that wants all
@@ -7349,6 +7432,7 @@ notfound:
}
return;
}
+ ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL);
CONN_INC_REF(connp);
mutex_exit(&connfp->connf_lock);
@@ -7377,7 +7461,8 @@ notfound:
connp = connp->conn_next;
}
- if (connp == NULL || connp->conn_upq == NULL) {
+ if (connp == NULL ||
+ !IPCL_IS_NONSTR(connp) && connp->conn_upq == NULL) {
/*
* No one bound to this port. Is
* there a client that wants all
@@ -7406,6 +7491,7 @@ notfound:
}
return;
}
+ ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL);
first_connp = connp;
@@ -9774,6 +9860,15 @@ ip_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
return (ip_modopen(q, devp, flag, sflag, credp));
}
+ if ((flag & ~(FKLYR)) == IP_HELPER_STR) {
+ /*
+ * Non streams based socket looking for a stream
+ * to access IP
+ */
+ return (ip_helper_stream_setup(q, devp, flag, sflag,
+ credp, isv6));
+ }
+
ns = netstack_find_by_cred(credp);
ASSERT(ns != NULL);
ipst = ns->netstack_ip;
@@ -10344,7 +10439,7 @@ ip_opt_set_ipif(conn_t *connp, ipaddr_t addr, boolean_t checkonly, int option,
if (ipif == NULL) {
if (error == EINPROGRESS)
return (error);
- else if ((option == IP_MULTICAST_IF) ||
+ if ((option == IP_MULTICAST_IF) ||
(option == IP_NEXTHOP))
return (EHOSTUNREACH);
else
@@ -11611,7 +11706,6 @@ ip_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
}
return (-1);
}
-
/* Named Dispatch routine to get a current value out of our parameter table. */
/* ARGSUSED */
static int
@@ -12806,10 +12900,11 @@ ip_udp_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire,
if ((connp = ipcl_classify_v4(mp, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH,
ire->ire_zoneid, ipst)) != NULL) {
- ASSERT(connp->conn_upq != NULL);
+ ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL);
IP_STAT(ipst, ip_udp_fast_path);
- if (CONN_UDP_FLOWCTLD(connp)) {
+ if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
+ (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) {
freemsg(mp);
BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
} else {
@@ -20373,11 +20468,9 @@ ip_trash_ire_reclaim_stack(ip_stack_t *ipst)
* upper level protocol. We remove this conn from any fanout hash list it is
* on, and zero out the bind information. No reply is expected up above.
*/
-mblk_t *
-ip_unbind(queue_t *q, mblk_t *mp)
+void
+ip_unbind(conn_t *connp)
{
- conn_t *connp = Q_TO_CONN(q);
-
ASSERT(!MUTEX_HELD(&connp->conn_lock));
if (is_system_labeled() && connp->conn_anon_port) {
@@ -20390,20 +20483,6 @@ ip_unbind(queue_t *q, mblk_t *mp)
ipcl_hash_remove(connp);
- ASSERT(mp->b_cont == NULL);
- /*
- * Convert mp into a T_OK_ACK
- */
- mp = mi_tpi_ok_ack_alloc(mp);
-
- /*
- * should not happen in practice... T_OK_ACK is smaller than the
- * original message.
- */
- if (mp == NULL)
- return (NULL);
-
- return (mp);
}
/*
@@ -20475,11 +20554,13 @@ ip_output_options(void *arg, mblk_t *mp, void *arg2, int caller,
ASSERT(connp != NULL);
zoneid = connp->conn_zoneid;
ipst = connp->conn_netstack->netstack_ip;
+ ASSERT(ipst != NULL);
/* is queue flow controlled? */
if ((q->q_first != NULL || connp->conn_draining) &&
(caller == IP_WPUT)) {
ASSERT(!need_decref);
+ ASSERT(!IP_FLOW_CONTROLLED_ULP(connp->conn_ulp));
(void) putq(q, mp);
return;
}
@@ -21514,7 +21595,6 @@ dontroute:
* connectivity.
*/
ipha->ipha_ttl = 1;
-
/* If suitable ipif not found, drop packet */
dst_ipif = ipif_lookup_onlink_addr(dst, zoneid, ipst);
if (dst_ipif == NULL) {
@@ -23244,6 +23324,7 @@ blocked:
* ip_wsrv will be scheduled or
* is already running.
*/
+
(void) putq(connp->conn_wq,
first_mp);
}
@@ -27522,26 +27603,6 @@ ip_ioctl_finish(queue_t *q, mblk_t *mp, int err, int mode, ipsq_t *ipsq)
ipsq_current_finish(ipsq);
}
-/*
- * This is called from ip_wput_nondata to resume a deferred TCP bind.
- */
-/* ARGSUSED */
-void
-ip_resume_tcp_bind(void *arg, mblk_t *mp, void *arg2)
-{
- conn_t *connp = arg;
- tcp_t *tcp;
-
- ASSERT(connp != NULL && IPCL_IS_TCP(connp) && connp->conn_tcp != NULL);
- tcp = connp->conn_tcp;
-
- if (connp->conn_tcp->tcp_state == TCPS_CLOSED)
- freemsg(mp);
- else
- tcp_rput_other(tcp, mp);
- CONN_OPER_PENDING_DONE(connp);
-}
-
/* Called from ip_wput for all non data messages */
/* ARGSUSED */
void
@@ -27782,8 +27843,9 @@ nak:
case M_PROTO:
case M_PCPROTO:
/*
- * The only PROTO messages we expect are ULP binds and
- * copies of option negotiation acknowledgements.
+ * The only PROTO messages we expect are copies of option
+ * negotiation acknowledgements, AH and ESP bind requests
+ * are also expected.
*/
switch (((union T_primitives *)mp->b_rptr)->type) {
case O_T_BIND_REQ:
@@ -27809,37 +27871,15 @@ nak:
mp = connp->conn_af_isv6 ? ip_bind_v6(q, mp,
connp, NULL) : ip_bind_v4(q, mp, connp);
- if (mp == NULL)
- return;
- if (IPCL_IS_TCP(connp)) {
- /*
- * In the case of TCP endpoint we
- * come here only for bind retries
- */
- ASSERT(ipsq != NULL);
- CONN_INC_REF(connp);
- SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
- ip_resume_tcp_bind, connp,
- SQ_FILL, SQTAG_BIND_RETRY);
- } else if (IPCL_IS_UDP(connp)) {
- /*
- * In the case of UDP endpoint we
- * come here only for bind retries
- */
- ASSERT(ipsq != NULL);
- udp_resume_bind(connp, mp);
- } else if (IPCL_IS_RAWIP(connp)) {
- /*
- * In the case of RAWIP endpoint we
- * come here only for bind retries
- */
- ASSERT(ipsq != NULL);
- rawip_resume_bind(connp, mp);
- } else {
- /* The case of AH and ESP */
- qreply(q, mp);
- CONN_OPER_PENDING_DONE(connp);
- }
+ ASSERT(mp != NULL);
+
+ ASSERT(!IPCL_IS_TCP(connp));
+ ASSERT(!IPCL_IS_UDP(connp));
+ ASSERT(!IPCL_IS_RAWIP(connp));
+
+ /* The case of AH and ESP */
+ qreply(q, mp);
+ CONN_OPER_PENDING_DONE(connp);
return;
}
case T_SVR4_OPTMGMT_REQ:
@@ -27908,7 +27948,8 @@ nak:
proto_str = "T_UNBIND_REQ";
goto protonak;
}
- mp = ip_unbind(q, mp);
+ ip_unbind(Q_TO_CONN(q));
+ mp = mi_tpi_ok_ack_alloc(mp);
qreply(q, mp);
return;
default:
@@ -28582,6 +28623,11 @@ conn_drain_insert(conn_t *connp)
head->conn_drain_prev->conn_drain_next = connp;
head->conn_drain_prev = connp;
}
+ /*
+ * For non streams based sockets assert flow control.
+ */
+ (*connp->conn_upcalls->su_txq_full)
+ (connp->conn_upper_handle, B_TRUE);
mutex_exit(CONN_DRAIN_LIST_LOCK(connp));
}
@@ -28695,7 +28741,16 @@ conn_drain_tail(conn_t *connp, boolean_t closing)
}
connp->conn_drain_next = NULL;
connp->conn_drain_prev = NULL;
+
+ /*
+ * For non streams based sockets open up flow control.
+ */
+ if (IPCL_IS_NONSTR(connp)) {
+ (*connp->conn_upcalls->su_txq_full)
+ (connp->conn_upper_handle, B_FALSE);
+ }
}
+
mutex_exit(CONN_DRAIN_LIST_LOCK(connp));
}
@@ -28779,6 +28834,7 @@ ip_wsrv(queue_t *q)
*/
connp->conn_draining = 0;
enableok(q);
+
}
/* Enable the next conn for draining */
@@ -28941,7 +28997,7 @@ ip_conn_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr)
"CONN " MI_COL_HDRPAD_STR
"rfq " MI_COL_HDRPAD_STR
"stq " MI_COL_HDRPAD_STR
- " zone local remote");
+ " zone local remote");
/*
* Because of the ndd constraint, at most we can have 64K buffer
@@ -29339,7 +29395,6 @@ ip_multirt_apply_membership(int (*fn)(conn_t *, boolean_t, ipaddr_t, ipaddr_t,
return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error);
}
-
/*
* Issue a warning regarding a route crossing an interface with an
* incorrect MTU. Only one message every 'ip_multirt_log_interval'
diff --git a/usr/src/uts/common/inet/ip/ip6.c b/usr/src/uts/common/inet/ip/ip6.c
index a1d97627b2..fe326778c2 100644
--- a/usr/src/uts/common/inet/ip/ip6.c
+++ b/usr/src/uts/common/inet/ip/ip6.c
@@ -191,13 +191,15 @@ static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill,
static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t,
const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *);
static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill);
-static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *,
+static int ip_bind_connected_v6(conn_t *, mblk_t **, uint8_t, in6_addr_t *,
uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t,
- boolean_t, boolean_t, boolean_t, boolean_t);
-static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *,
+ boolean_t, boolean_t);
+static boolean_t ip_bind_get_ire_v6(mblk_t **, ire_t *, const in6_addr_t *,
iulp_t *, ip_stack_t *);
-static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *,
- uint16_t, boolean_t, boolean_t, boolean_t);
+static void ip_bind_post_handling_v6(conn_t *, mblk_t *, boolean_t,
+ boolean_t, ip_stack_t *);
+static int ip_bind_laddr_v6(conn_t *, mblk_t **, uint8_t,
+ const in6_addr_t *, uint16_t, boolean_t);
static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *,
ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t);
static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *,
@@ -2071,12 +2073,8 @@ ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp)
uint16_t lport;
uint16_t fport;
uchar_t *ucp;
- mblk_t *mp1;
- boolean_t ire_requested;
- boolean_t ipsec_policy_set;
int error = 0;
boolean_t local_bind;
- boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6;
ipa6_conn_x_t *acx6;
boolean_t verify_dst;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
@@ -2145,9 +2143,6 @@ ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp)
ip1dbg(("ip_bind_v6: unaligned address\n"));
goto bad_addr;
}
- mp1 = mp->b_cont; /* trailing mp if any */
- ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE);
- ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET);
switch (tbr->ADDR_length) {
default:
@@ -2173,9 +2168,6 @@ ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp)
/*
* Verify that both the source and destination addresses
* are valid.
- * Note that we allow connect to broadcast and multicast
- * addresses when ire_requested is set. Thus the ULP
- * has to check for IRE_BROADCAST and multicast.
*/
ac6 = (ipa6_conn_t *)ucp;
v6srcp = &ac6->ac6_laddr;
@@ -2192,9 +2184,6 @@ ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp)
case sizeof (ipa6_conn_x_t):
/*
* Verify that the source address is valid.
- * Note that we allow connect to broadcast and multicast
- * addresses when ire_requested is set. Thus the ULP
- * has to check for IRE_BROADCAST and multicast.
*/
acx6 = (ipa6_conn_x_t *)ucp;
ac6 = &acx6->ac6x_conn;
@@ -2211,80 +2200,35 @@ ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp)
break;
}
if (local_bind) {
- if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) {
- /* Bind to IPv4 address */
- ipaddr_t v4src;
-
- IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src);
-
- error = ip_bind_laddr(connp, mp, v4src, lport,
- ire_requested, ipsec_policy_set,
- tbr->ADDR_length != IPV6_ADDR_LEN);
- if (error != 0)
- goto bad_addr;
- connp->conn_pkt_isv6 = B_FALSE;
- } else {
- if (IN6_IS_ADDR_V4MAPPED(v6srcp)) {
- error = 0;
- goto bad_addr;
- }
- error = ip_bind_laddr_v6(connp, mp, v6srcp, lport,
- ire_requested, ipsec_policy_set,
- (tbr->ADDR_length != IPV6_ADDR_LEN));
- if (error != 0)
- goto bad_addr;
- connp->conn_pkt_isv6 = B_TRUE;
- }
+ error = ip_proto_bind_laddr_v6(connp, &mp->b_cont, protocol,
+ v6srcp, lport, tbr->ADDR_length != IPV6_ADDR_LEN);
} else {
- /*
- * Bind to local and remote address. Local might be
- * unspecified in which case it will be extracted from
- * ire_src_addr_v6
- */
- if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) {
- /* Connect to IPv4 address */
- ipaddr_t v4src;
- ipaddr_t v4dst;
-
- /* Is the source unspecified or mapped? */
- if (!IN6_IS_ADDR_V4MAPPED(v6srcp) &&
- !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) {
- ip1dbg(("ip_bind_v6: "
- "dst is mapped, but not the src\n"));
- goto bad_addr;
- }
- IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src);
- IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst);
-
- /*
- * XXX Fix needed. Need to pass ipsec_policy_set
- * instead of B_FALSE.
- */
+ error = ip_proto_bind_connected_v6(connp, &mp->b_cont, protocol,
+ v6srcp, lport, v6dstp, ipp, fport, B_TRUE, verify_dst);
+ }
- /* Always verify destination reachability. */
- error = ip_bind_connected(connp, mp, &v4src, lport,
- v4dst, fport, ire_requested, ipsec_policy_set,
- B_TRUE, B_TRUE);
- if (error != 0)
- goto bad_addr;
- IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp);
- connp->conn_pkt_isv6 = B_FALSE;
- } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) {
- ip1dbg(("ip_bind_v6: "
- "src is mapped, but not the dst\n"));
- goto bad_addr;
- } else {
- error = ip_bind_connected_v6(connp, mp, v6srcp,
- lport, v6dstp, ipp, fport, ire_requested,
- ipsec_policy_set, B_TRUE, verify_dst);
- if (error != 0)
- goto bad_addr;
- connp->conn_pkt_isv6 = B_TRUE;
- }
+ if (error == 0) {
+ /* Send it home. */
+ mp->b_datap->db_type = M_PCPROTO;
+ tbr->PRIM_type = T_BIND_ACK;
+ return (mp);
}
+bad_addr:
+ ASSERT(error != EINPROGRESS);
+ if (error > 0)
+ mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error);
+ else
+ mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0);
+ return (mp);
+}
+
+static void
+ip_bind_post_handling_v6(conn_t *connp, mblk_t *mp,
+ boolean_t version_changed, boolean_t ire_requested, ip_stack_t *ipst)
+{
/* Update conn_send and pktversion if v4/v6 changed */
- if (orig_pkt_isv6 != connp->conn_pkt_isv6) {
+ if (version_changed) {
ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst);
}
/*
@@ -2293,27 +2237,12 @@ ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp)
* may not have been inherited at that point in time and hence
* conn_out_enforce_policy may not be set.
*/
- mp1 = mp->b_cont;
if (ire_requested && connp->conn_out_enforce_policy &&
- mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) {
- ire_t *ire = (ire_t *)mp1->b_rptr;
- ASSERT(MBLKL(mp1) >= sizeof (ire_t));
+ mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE) {
+ ire_t *ire = (ire_t *)mp->b_rptr;
+ ASSERT(MBLKL(mp) >= sizeof (ire_t));
ire->ire_ipsec_overhead = (conn_ipsec_length(connp));
}
-
- /* Send it home. */
- mp->b_datap->db_type = M_PCPROTO;
- tbr->PRIM_type = T_BIND_ACK;
- return (mp);
-
-bad_addr:
- if (error == EINPROGRESS)
- return (NULL);
- if (error > 0)
- mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error);
- else
- mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0);
- return (mp);
}
/*
@@ -2339,20 +2268,27 @@ bad_addr:
* When the address is loopback or multicast, there might be many matching IREs
* so bind has to look up based on the zone.
*/
+/*
+ * Verify the local IP address. Does not change the conn_t except
+ * conn_fully_bound and conn_policy_cached.
+ */
static int
-ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src,
- uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set,
- boolean_t fanout_insert)
+ip_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol,
+ const in6_addr_t *v6src, uint16_t lport, boolean_t fanout_insert)
{
int error = 0;
ire_t *src_ire = NULL;
- ipif_t *ipif = NULL;
- mblk_t *policy_mp;
zoneid_t zoneid;
+ mblk_t *mp = NULL;
+ boolean_t ire_requested;
+ boolean_t ipsec_policy_set;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
- if (ipsec_policy_set)
- policy_mp = mp->b_cont;
+ if (mpp)
+ mp = *mpp;
+
+ ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE);
+ ipsec_policy_set = (mp != NULL && DB_TYPE(mp) == IPSEC_POLICY_SET);
/*
* If it was previously connected, conn_fully_bound would have
@@ -2372,11 +2308,11 @@ ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src,
* readability compared to a condition check.
*/
ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST));
+ /* LINTED - statement has no consequent */
if (IRE_IS_LOCAL(src_ire)) {
/*
* (2) Bind to address of local UP interface
*/
- ipif = src_ire->ire_ipif;
} else if (IN6_IS_ADDR_MULTICAST(v6src)) {
ipif_t *multi_ipif = NULL;
ire_t *save_ire;
@@ -2418,28 +2354,12 @@ ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src,
if (multi_ipif != NULL)
ipif_refrele(multi_ipif);
} else {
- *mp->b_wptr++ = (char)connp->conn_ulp;
- ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid,
- CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error,
- ipst);
- if (ipif == NULL) {
- if (error == EINPROGRESS) {
- if (src_ire != NULL)
- ire_refrele(src_ire);
- return (error);
- }
+ if (!ip_addr_exists_v6(v6src, zoneid, ipst)) {
/*
* Not a valid address for bind
*/
error = EADDRNOTAVAIL;
- } else {
- ipif_refrele(ipif);
}
- /*
- * Just to keep it consistent with the processing in
- * ip_bind_v6().
- */
- mp->b_wptr--;
}
if (error != 0) {
@@ -2471,17 +2391,18 @@ ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src,
connp->conn_remv6 = ipv6_all_zeros;
connp->conn_lport = lport;
connp->conn_fport = 0;
- error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport);
+ error = ipcl_bind_insert_v6(connp, protocol, v6src, lport);
}
if (error == 0) {
if (ire_requested) {
- if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL,
+ if (!ip_bind_get_ire_v6(mpp, src_ire, v6src, NULL,
ipst)) {
error = -1;
goto bad_addr;
}
+ mp = *mpp;
} else if (ipsec_policy_set) {
- if (!ip_bind_ipsec_policy_set(connp, policy_mp)) {
+ if (!ip_bind_ipsec_policy_set(connp, mp)) {
error = -1;
goto bad_addr;
}
@@ -2501,54 +2422,70 @@ bad_addr:
ire_refrele(src_ire);
if (ipsec_policy_set) {
- ASSERT(policy_mp != NULL);
- freeb(policy_mp);
+ ASSERT(mp != NULL);
+ freeb(mp);
/*
* As of now assume that nothing else accompanies
* IPSEC_POLICY_SET.
*/
- mp->b_cont = NULL;
+ *mpp = NULL;
}
+
return (error);
}
-
-/* ARGSUSED */
-static void
-ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp,
- void *dummy_arg)
+int
+ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol,
+ const in6_addr_t *v6srcp, uint16_t lport, boolean_t fanout_insert)
{
- conn_t *connp = NULL;
- t_scalar_t prim;
+ int error;
+ boolean_t ire_requested;
+ mblk_t *mp = NULL;
+ boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6;
+ ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
- ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
+ /*
+ * Note that we allow connect to broadcast and multicast
+ * address when ire_requested is set. Thus the ULP
+ * has to check for IRE_BROADCAST and multicast.
+ */
+ if (mpp)
+ mp = *mpp;
+ ire_requested = (mp && DB_TYPE(mp) == IRE_DB_REQ_TYPE);
- if (CONN_Q(q))
- connp = Q_TO_CONN(q);
- ASSERT(connp != NULL);
+ ASSERT(connp->conn_af_isv6);
+ connp->conn_ulp = protocol;
- prim = ((union T_primitives *)mp->b_rptr)->type;
- ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ);
+ if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) {
+ /* Bind to IPv4 address */
+ ipaddr_t v4src;
- if (IPCL_IS_TCP(connp)) {
- /* Pass sticky_ipp for scope_id and pktinfo */
- mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp);
+ IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src);
+
+ error = ip_bind_laddr_v4(connp, mpp, protocol, v4src, lport,
+ fanout_insert);
+ if (error != 0)
+ goto bad_addr;
+ connp->conn_pkt_isv6 = B_FALSE;
} else {
- /* For UDP and ICMP */
- mp = ip_bind_v6(q, mp, connp, NULL);
- }
- if (mp != NULL) {
- if (IPCL_IS_TCP(connp)) {
- CONN_INC_REF(connp);
- SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
- ip_resume_tcp_bind, connp, SQ_FILL,
- SQTAG_TCP_RPUTOTHER);
- } else if (IPCL_IS_UDP(connp)) {
- udp_resume_bind(connp, mp);
- } else {
- ASSERT(IPCL_IS_RAWIP(connp));
- rawip_resume_bind(connp, mp);
+ if (IN6_IS_ADDR_V4MAPPED(v6srcp)) {
+ error = 0;
+ goto bad_addr;
}
+ error = ip_bind_laddr_v6(connp, mpp, protocol, v6srcp,
+ lport, fanout_insert);
+ if (error != 0)
+ goto bad_addr;
+ connp->conn_pkt_isv6 = B_TRUE;
}
+
+ ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL,
+ orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst);
+ return (0);
+
+bad_addr:
+ if (error < 0)
+ error = -TBADADDR;
+ return (error);
}
/*
@@ -2562,42 +2499,43 @@ ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp,
* non-TCP cases, it is NULL and for all other tcp cases it is not useful.
*
*/
-static int
-ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src,
- uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport,
- boolean_t ire_requested, boolean_t ipsec_policy_set,
- boolean_t fanout_insert, boolean_t verify_dst)
+int
+ip_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol,
+ in6_addr_t *v6src, uint16_t lport, const in6_addr_t *v6dst,
+ ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert,
+ boolean_t verify_dst)
{
ire_t *src_ire;
ire_t *dst_ire;
int error = 0;
- int protocol;
- mblk_t *policy_mp;
ire_t *sire = NULL;
ire_t *md_dst_ire = NULL;
ill_t *md_ill = NULL;
ill_t *dst_ill = NULL;
ipif_t *src_ipif = NULL;
zoneid_t zoneid;
- boolean_t ill_held = B_FALSE;
+ boolean_t ill_held = B_FALSE;
+ mblk_t *mp = NULL;
+ boolean_t ire_requested = B_FALSE;
+ boolean_t ipsec_policy_set = B_FALSE;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
+ ts_label_t *tsl = NULL;
- src_ire = dst_ire = NULL;
- /*
- * NOTE: The protocol is beyond the wptr because that's how
- * the undocumented transport<-->IP T_BIND_REQ behavior works.
- */
- protocol = *mp->b_wptr & 0xFF;
+ if (mpp)
+ mp = *mpp;
+
+ if (mp != NULL) {
+ ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE);
+ ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET);
+ tsl = MBLK_GETLABEL(mp);
+ }
+ src_ire = dst_ire = NULL;
/*
* If we never got a disconnect before, clear it now.
*/
connp->conn_fully_bound = B_FALSE;
- if (ipsec_policy_set) {
- policy_mp = mp->b_cont;
- }
-
zoneid = connp->conn_zoneid;
if (IN6_IS_ADDR_MULTICAST(v6dst)) {
@@ -2620,7 +2558,7 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src,
ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst);
}
mutex_exit(&connp->conn_lock);
- if (ipif == NULL || !ire_requested ||
+ if (ipif == NULL || ire_requested ||
(dst_ire = ipif_to_ire_v6(ipif)) == NULL) {
if (ipif != NULL)
ipif_refrele(ipif);
@@ -2637,7 +2575,7 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src,
ipif_refrele(ipif);
} else {
dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0,
- NULL, &sire, zoneid, MBLK_GETLABEL(mp),
+ NULL, &sire, zoneid, tsl,
MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT |
MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR,
ipst);
@@ -2693,8 +2631,8 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src,
*/
if (dst_ire != NULL && is_system_labeled() &&
!IPCL_IS_TCP(connp) &&
- tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL,
- connp->conn_mac_exempt, ipst) != 0) {
+ tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred),
+ v6dst, NULL, connp->conn_mac_exempt, ipst) != 0) {
error = EHOSTUNREACH;
if (ip_debug > 2) {
pr_addr_dbg("ip_bind_connected: no label for dst %s\n",
@@ -2831,25 +2769,24 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src,
/* No need to hold ill here */
dst_ill = dst_ire->ire_ipif->ipif_ill;
}
- if (!ip6_asp_can_lookup(ipst)) {
- *mp->b_wptr++ = (char)protocol;
- ip6_asp_pending_op(CONNP_TO_WQ(connp), mp,
- ip_bind_connected_resume_v6);
- error = EINPROGRESS;
- goto refrele_and_quit;
- }
- src_ipif = ipif_select_source_v6(dst_ill, v6dst,
- RESTRICT_TO_NONE, connp->conn_src_preferences,
- zoneid);
- ip6_asp_table_refrele(ipst);
- if (src_ipif == NULL) {
- pr_addr_dbg("ip_bind_connected_v6: "
- "no usable source address for "
- "connection to %s\n", AF_INET6, v6dst);
+ if (ip6_asp_can_lookup(ipst)) {
+ src_ipif = ipif_select_source_v6(dst_ill,
+ v6dst, RESTRICT_TO_NONE,
+ connp->conn_src_preferences, zoneid);
+ ip6_asp_table_refrele(ipst);
+ if (src_ipif == NULL) {
+ pr_addr_dbg("ip_bind_connected_v6: "
+ "no usable source address for "
+ "connection to %s\n",
+ AF_INET6, v6dst);
+ error = EADDRNOTAVAIL;
+ goto bad_addr;
+ }
+ *v6src = src_ipif->ipif_v6lcl_addr;
+ } else {
error = EADDRNOTAVAIL;
goto bad_addr;
}
- *v6src = src_ipif->ipif_v6lcl_addr;
}
}
@@ -2922,13 +2859,13 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src,
if (sire != NULL)
ulp_info = &(sire->ire_uinfo);
- if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info,
+ if (!ip_bind_get_ire_v6(mpp, dst_ire, v6dst, ulp_info,
ipst)) {
error = -1;
goto bad_addr;
}
} else if (ipsec_policy_set) {
- if (!ip_bind_ipsec_policy_set(connp, policy_mp)) {
+ if (!ip_bind_ipsec_policy_set(connp, mp)) {
error = -1;
goto bad_addr;
}
@@ -2982,19 +2919,24 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src,
ASSERT(md_ill != NULL);
ASSERT(md_ill->ill_mdt_capab != NULL);
if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp,
- md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL)
- linkb(mp, mdinfo_mp);
+ md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) {
+ if (mp == NULL) {
+ *mpp = mdinfo_mp;
+ } else {
+ linkb(mp, mdinfo_mp);
+ }
+ }
}
}
bad_addr:
if (ipsec_policy_set) {
- ASSERT(policy_mp != NULL);
- freeb(policy_mp);
+ ASSERT(mp != NULL);
+ freeb(mp);
/*
* As of now assume that nothing else accompanies
* IPSEC_POLICY_SET.
*/
- mp->b_cont = NULL;
+ *mpp = NULL;
}
refrele_and_quit:
if (src_ire != NULL)
@@ -3012,34 +2954,110 @@ refrele_and_quit:
return (error);
}
+/* ARGSUSED */
+int
+ip_proto_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol,
+ in6_addr_t *v6srcp, uint16_t lport, const in6_addr_t *v6dstp,
+ ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert,
+ boolean_t verify_dst)
+{
+ int error = 0;
+ boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6;
+ boolean_t ire_requested;
+ ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
+
+ /*
+ * Note that we allow connect to broadcast and multicast
+ * address when ire_requested is set. Thus the ULP
+ * has to check for IRE_BROADCAST and multicast.
+ */
+ ASSERT(mpp != NULL);
+ ire_requested = (*mpp != NULL && DB_TYPE(*mpp) == IRE_DB_REQ_TYPE);
+
+ ASSERT(connp->conn_af_isv6);
+ connp->conn_ulp = protocol;
+
+ /* For raw socket, the local port is not set. */
+ lport = lport != 0 ? lport : connp->conn_lport;
+
+ /*
+ * Bind to local and remote address. Local might be
+ * unspecified in which case it will be extracted from
+ * ire_src_addr_v6
+ */
+ if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) {
+ /* Connect to IPv4 address */
+ ipaddr_t v4src;
+ ipaddr_t v4dst;
+
+ /* Is the source unspecified or mapped? */
+ if (!IN6_IS_ADDR_V4MAPPED(v6srcp) &&
+ !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) {
+ ip1dbg(("ip_proto_bind_connected_v6: "
+ "dst is mapped, but not the src\n"));
+ goto bad_addr;
+ }
+ IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src);
+ IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst);
+
+ /* Always verify destination reachability. */
+ error = ip_bind_connected_v4(connp, mpp, protocol, &v4src,
+ lport, v4dst, fport, B_TRUE, B_TRUE);
+ if (error != 0)
+ goto bad_addr;
+ IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp);
+ connp->conn_pkt_isv6 = B_FALSE;
+ } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) {
+ ip1dbg(("ip_proto_bind_connected_v6: "
+ "src is mapped, but not the dst\n"));
+ goto bad_addr;
+ } else {
+ error = ip_bind_connected_v6(connp, mpp, protocol, v6srcp,
+ lport, v6dstp, ipp, fport, B_TRUE, verify_dst);
+ if (error != 0)
+ goto bad_addr;
+ connp->conn_pkt_isv6 = B_TRUE;
+ }
+
+ ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL,
+ orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst);
+
+ /* Send it home. */
+ return (0);
+
+bad_addr:
+ if (error == 0)
+ error = -TBADADDR;
+ return (error);
+}
+
/*
- * Insert the ire in b_cont. Returns false if it fails (due to lack of space).
+ * Get the ire in *mpp. Returns false if it fails (due to lack of space).
* Makes the IRE be IRE_BROADCAST if dst is a multicast address.
*/
/* ARGSUSED4 */
static boolean_t
-ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst,
+ip_bind_get_ire_v6(mblk_t **mpp, ire_t *ire, const in6_addr_t *dst,
iulp_t *ulp_info, ip_stack_t *ipst)
{
- mblk_t *mp1;
+ mblk_t *mp = *mpp;
ire_t *ret_ire;
- mp1 = mp->b_cont;
- ASSERT(mp1 != NULL);
+ ASSERT(mp != NULL);
if (ire != NULL) {
/*
- * mp1 initialized above to IRE_DB_REQ_TYPE
+ * mp initialized above to IRE_DB_REQ_TYPE
* appended mblk. Its <upper protocol>'s
* job to make sure there is room.
*/
- if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t))
+ if ((mp->b_datap->db_lim - mp->b_rptr) < sizeof (ire_t))
return (B_FALSE);
- mp1->b_datap->db_type = IRE_DB_TYPE;
- mp1->b_wptr = mp1->b_rptr + sizeof (ire_t);
- bcopy(ire, mp1->b_rptr, sizeof (ire_t));
- ret_ire = (ire_t *)mp1->b_rptr;
+ mp->b_datap->db_type = IRE_DB_TYPE;
+ mp->b_wptr = mp->b_rptr + sizeof (ire_t);
+ bcopy(ire, mp->b_rptr, sizeof (ire_t));
+ ret_ire = (ire_t *)mp->b_rptr;
if (IN6_IS_ADDR_MULTICAST(dst) ||
IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) {
ret_ire->ire_type = IRE_BROADCAST;
@@ -3049,13 +3067,13 @@ ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst,
bcopy(ulp_info, &(ret_ire->ire_uinfo),
sizeof (iulp_t));
}
- ret_ire->ire_mp = mp1;
+ ret_ire->ire_mp = mp;
} else {
/*
* No IRE was found. Remove IRE mblk.
*/
- mp->b_cont = mp1->b_cont;
- freeb(mp1);
+ *mpp = mp->b_cont;
+ freeb(mp);
}
return (B_TRUE);
}
@@ -3168,7 +3186,7 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill,
break;
}
- if (connp == NULL || connp->conn_upq == NULL) {
+ if (connp == NULL) {
/*
* No one bound to this port. Is
* there a client that wants all
@@ -3184,6 +3202,8 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill,
return;
}
+ ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL);
+
CONN_INC_REF(connp);
first_connp = connp;
@@ -3217,7 +3237,7 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill,
* needed just for verifying policy and it is never
* sent up.
*/
- if (connp == NULL || connp->conn_upq == NULL ||
+ if (connp == NULL ||
(((first_mp1 = dupmsg(first_mp)) == NULL) &&
((first_mp1 = ip_copymsg(first_mp)) == NULL))) {
/*
@@ -3227,6 +3247,7 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill,
connp = first_connp;
break;
}
+ ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
mp1 = mctl_present ? first_mp1->b_cont : first_mp1;
CONN_INC_REF(connp);
mutex_exit(&connfp->connf_lock);
@@ -3243,7 +3264,9 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill,
}
if (mp1 == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
- } else if (!canputnext(rq)) {
+ } else if (
+ (IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
+ (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) {
if (flags & IP_FF_RAWIP) {
BUMP_MIB(ill->ill_ip_mib,
rawipIfStatsInOverflows);
@@ -3320,7 +3343,9 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill,
}
rq = connp->conn_rq;
- if (!canputnext(rq)) {
+ if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
+ (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) {
+
if (flags & IP_FF_RAWIP) {
BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows);
} else {
@@ -3740,7 +3765,8 @@ ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports,
CONN_INC_REF(connp);
mutex_exit(&connfp->connf_lock);
- if (CONN_UDP_FLOWCTLD(connp)) {
+ if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
+ (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) {
freemsg(first_mp);
CONN_DEC_REF(connp);
return;
@@ -3870,7 +3896,8 @@ ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports,
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
goto next_one;
}
- if (CONN_UDP_FLOWCTLD(connp)) {
+ if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
+ (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) {
BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
freemsg(first_mp1);
goto next_one;
@@ -3938,7 +3965,8 @@ next_one:
first_mp = mp;
}
}
- if (CONN_UDP_FLOWCTLD(connp)) {
+ if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
+ (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) {
BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
freemsg(mp);
} else {
@@ -8397,7 +8425,8 @@ udp_fanout:
return;
}
- if (CONN_UDP_FLOWCTLD(connp)) {
+ if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
+ (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) {
freemsg(first_mp);
BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
CONN_DEC_REF(connp);
@@ -9069,7 +9098,7 @@ done:
*
* case 1 : Routing header was processed by this node and
* ip_process_rthdr replaced ip6_dst with the next hop
- * and we are forwarding the packet to the next hop.
+ * and we are forwarding the packet to the next hop.
*
* case 2 : Routing header was not processed by this node and we
* are just forwarding the packet.
diff --git a/usr/src/uts/common/inet/ip/ip6_if.c b/usr/src/uts/common/inet/ip/ip6_if.c
index dc703f40c3..81447c2e30 100644
--- a/usr/src/uts/common/inet/ip/ip6_if.c
+++ b/usr/src/uts/common/inet/ip/ip6_if.c
@@ -284,6 +284,44 @@ repeat:
goto repeat;
}
+boolean_t
+ip_addr_exists_v6(const in6_addr_t *addr, zoneid_t zoneid,
+ ip_stack_t *ipst)
+{
+ ipif_t *ipif;
+ ill_t *ill;
+ ill_walk_context_t ctx;
+
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+
+ ill = ILL_START_WALK_V6(&ctx, ipst);
+ for (; ill != NULL; ill = ill_next(&ctx, ill)) {
+ mutex_enter(&ill->ill_lock);
+ for (ipif = ill->ill_ipif; ipif != NULL;
+ ipif = ipif->ipif_next) {
+ if (zoneid != ALL_ZONES &&
+ ipif->ipif_zoneid != zoneid &&
+ ipif->ipif_zoneid != ALL_ZONES)
+ continue;
+ /* Allow the ipif to be down */
+ if (((IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
+ addr) &&
+ (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) ||
+ ((ipif->ipif_flags & IPIF_POINTOPOINT) &&
+ IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr,
+ addr))) {
+ mutex_exit(&ill->ill_lock);
+ rw_exit(&ipst->ips_ill_g_lock);
+ return (B_TRUE);
+ }
+ }
+ mutex_exit(&ill->ill_lock);
+ }
+
+ rw_exit(&ipst->ips_ill_g_lock);
+ return (B_FALSE);
+}
+
/*
* Look for an ipif with the specified address. For point-point links
* we look for matches on either the destination address and the local
@@ -2237,7 +2275,6 @@ ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst,
dstinfo.dst_scope = ip_addr_scope_v6(dst);
dstinfo.dst_label = ip6_asp_lookup(dst, NULL, ipst);
dstinfo.dst_prefer_src_tmp = ((src_prefs & IPV6_PREFER_SRC_TMP) != 0);
-
rw_enter(&ipst->ips_ill_g_lock, RW_READER);
/*
* Section three of the I-D states that for multicast and
diff --git a/usr/src/uts/common/inet/ip/ip_helper_stream.c b/usr/src/uts/common/inet/ip/ip_helper_stream.c
new file mode 100644
index 0000000000..7da64667d1
--- /dev/null
+++ b/usr/src/uts/common/inet/ip/ip_helper_stream.c
@@ -0,0 +1,482 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <inet/ip.h>
+#include <inet/ip_impl.h>
+#include <inet/ipclassifier.h>
+#include <inet/proto_set.h>
+#include <sys/stream.h>
+#include <sys/strsubr.h>
+#include <sys/strsun.h>
+#include <sys/cmn_err.h>
+#include <sys/t_kuser.h>
+#include <sys/tihdr.h>
+#include <sys/pathname.h>
+#include <sys/sockio.h>
+#include <sys/vmem.h>
+#include <sys/disp.h>
+
+void ip_helper_wput(queue_t *q, mblk_t *mp);
+
+static int ip_helper_stream_close(queue_t *, int);
+
+static struct module_info ip_helper_stream_info = {
+ 0, "iphelper", IP_MOD_MINPSZ, IP_MOD_MAXPSZ, IP_MOD_HIWAT, IP_MOD_LOWAT
+};
+
+static struct qinit ip_helper_stream_rinit = {
+ NULL, NULL, NULL, ip_helper_stream_close, NULL,
+ &ip_helper_stream_info, NULL
+};
+
+static struct qinit ip_helper_stream_winit = {
+ (pfi_t)ip_helper_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL,
+ &ip_helper_stream_info, NULL, NULL, NULL, STRUIOT_NONE
+};
+
+#define IP_USE_HELPER_CACHE (ip_helper_stream_cache != NULL)
+
+/*
+ * set the q_ptr of the 'q' to the conn_t pointer passed in
+ */
+static void
+ip_helper_share_conn(queue_t *q, mblk_t *mp)
+{
+ if (IP_USE_HELPER_CACHE) {
+ ip_helper_stream_info_t *ip_helper_info;
+
+ ip_helper_info = *((ip_helper_stream_info_t **)
+ mp->b_cont->b_rptr);
+ ip_helper_info->ip_helper_stream_minfo = q->q_ptr;
+ ip_helper_info->ip_helper_stream_rq = RD(q);
+ ip_helper_info->ip_helper_stream_wq = WR(q);
+ } else {
+ conn_t *connp = *((conn_t **)mp->b_cont->b_rptr);
+
+ connp->conn_helper_info->ip_helper_stream_minfo = q->q_ptr;
+ connp->conn_helper_info->ip_helper_stream_rq = RD(q);
+ connp->conn_helper_info->ip_helper_stream_wq = WR(q);
+ WR(q)->q_ptr = RD(q)->q_ptr = (void *)connp;
+ connp->conn_rq = RD(q);
+ connp->conn_wq = WR(q);
+ }
+ miocack(q, mp, 0, 0);
+}
+
+void
+ip_helper_wput(queue_t *q, mblk_t *mp)
+{
+ struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
+ if (DB_TYPE(mp) == M_IOCTL &&
+ iocp->ioc_cmd == SIOCSQPTR) {
+ ip_helper_share_conn(q, mp);
+ } else {
+ conn_t *connp = (conn_t *)q->q_ptr;
+
+ if (connp->conn_af_isv6) {
+ ip_wput_v6(q, mp);
+ } else {
+ ip_wput(q, mp);
+ }
+ }
+}
+
+/* ARGSUSED */
+int
+ip_helper_stream_setup(queue_t *q, dev_t *devp, int flag, int sflag,
+ cred_t *credp, boolean_t isv6)
+{
+ major_t maj;
+ ip_helper_minfo_t *ip_minfop;
+
+ ASSERT((flag & ~(FKLYR)) == IP_HELPER_STR);
+
+ ASSERT(RD(q) == q);
+
+ ip_minfop = kmem_alloc(sizeof (ip_helper_minfo_t), KM_NOSLEEP);
+ if (ip_minfop == NULL) {
+ return (ENOMEM);
+ }
+
+ ip_minfop->ip_minfo_dev = 0;
+ ip_minfop->ip_minfo_arena = NULL;
+
+ /*
+ * Clone the device, allocate minor device number
+ */
+ if (ip_minor_arena_la != NULL)
+ ip_minfop->ip_minfo_dev = inet_minor_alloc(ip_minor_arena_la);
+
+ if (ip_minfop->ip_minfo_dev == 0) {
+ /*
+ * numbers in the large arena are exhausted
+ * Try small arena.
+ * Or this is a 32 bit system, 32 bit systems do not have
+ * ip_minor_arena_la
+ */
+ ip_minfop->ip_minfo_dev = inet_minor_alloc(ip_minor_arena_sa);
+ if (ip_minfop->ip_minfo_dev == 0) {
+ return (EBUSY);
+ }
+ ip_minfop->ip_minfo_arena = ip_minor_arena_sa;
+ } else {
+ ip_minfop->ip_minfo_arena = ip_minor_arena_la;
+ }
+
+
+ ASSERT(ip_minfop->ip_minfo_dev != 0);
+ ASSERT(ip_minfop->ip_minfo_arena != NULL);
+
+ RD(q)->q_ptr = WR(q)->q_ptr = ip_minfop;
+
+ maj = getemajor(*devp);
+ *devp = makedevice(maj, (ulong_t)(ip_minfop->ip_minfo_dev));
+
+ q->q_qinfo = &ip_helper_stream_rinit;
+ WR(q)->q_qinfo = &ip_helper_stream_winit;
+ qprocson(q);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+ip_helper_stream_close(queue_t *q, int flag)
+{
+ ip_helper_minfo_t *ip_minfop;
+
+ qprocsoff(q);
+ ip_minfop = (q)->q_ptr;
+ inet_minor_free(ip_minfop->ip_minfo_arena,
+ ip_minfop->ip_minfo_dev);
+ kmem_free(ip_minfop, sizeof (ip_helper_minfo_t));
+ RD(q)->q_ptr = NULL;
+ WR(q)->q_ptr = NULL;
+ return (0);
+}
+
+/*
+ * Public interface for creating an IP stream with shared conn_t
+ */
+/* ARGSUSED */
+int
+ip_create_helper_stream(conn_t *connp, ldi_ident_t li)
+{
+ int error;
+ int ret;
+
+ ASSERT(!servicing_interrupt());
+
+ error = 0;
+ if (IP_USE_HELPER_CACHE) {
+ connp->conn_helper_info = (ip_helper_stream_info_t *)
+ kmem_cache_alloc(ip_helper_stream_cache, KM_SLEEP);
+ ASSERT(connp->conn_helper_info != NULL);
+ connp->conn_rq = connp->conn_helper_info->ip_helper_stream_rq;
+ connp->conn_wq = connp->conn_helper_info->ip_helper_stream_wq;
+ connp->conn_helper_info->ip_helper_stream_rq->q_ptr =
+ (void *)connp;
+ connp->conn_helper_info->ip_helper_stream_wq->q_ptr =
+ (void *)connp;
+ } else {
+ ASSERT(connp->conn_helper_info == NULL);
+ connp->conn_helper_info = (ip_helper_stream_info_t *)
+ kmem_alloc(sizeof (ip_helper_stream_info_t), KM_SLEEP);
+ /*
+ * open ip device via the layered interface.
+ * pass in kcred as some threads do not have the
+ * priviledge to open /dev/ip and the check in
+ * secpolicy_spec_open() will fail the open
+ */
+ error = ldi_open_by_name(connp->conn_af_isv6 ?
+ DEV_IP6 : DEV_IP, IP_HELPER_STR,
+ kcred, &connp->conn_helper_info->ip_helper_stream_handle,
+ li);
+
+ if (error != 0) {
+ kmem_free(connp->conn_helper_info,
+ (sizeof (ip_helper_stream_info_t)));
+ connp->conn_helper_info = NULL;
+ return (error);
+ }
+ /*
+ * Share connp with the helper stream
+ */
+ error = ldi_ioctl(
+ connp->conn_helper_info->ip_helper_stream_handle,
+ SIOCSQPTR, (intptr_t)connp, FKIOCTL, kcred, &ret);
+
+ if (error != 0) {
+ /*
+ * Passing in a zero flag indicates that an error
+ * occured and stream was not shared
+ */
+ (void) ldi_close(
+ connp->conn_helper_info->ip_helper_stream_handle,
+ 0, kcred);
+ kmem_free(connp->conn_helper_info,
+ (sizeof (ip_helper_stream_info_t)));
+ connp->conn_helper_info = NULL;
+ }
+ }
+ return (error);
+}
+
+/*
+ * Public interface for closing the shared IP stream
+ */
+/* ARGSUSED */
+void
+ip_close_helper_stream(conn_t *connp)
+{
+ ASSERT(!servicing_interrupt());
+ if (IP_USE_HELPER_CACHE) {
+ ASSERT(connp->conn_helper_info->ip_helper_stream_rq != NULL);
+ ASSERT(connp->conn_helper_info->ip_helper_stream_wq != NULL);
+
+ /* Prevent service procedures from being called */
+ disable_svc(connp->conn_helper_info->ip_helper_stream_rq);
+
+ /* Wait until service procedure of each queue is run */
+ wait_svc(connp->conn_helper_info->ip_helper_stream_rq);
+
+ /* Cleanup any pending ioctls */
+ conn_ioctl_cleanup(connp);
+
+ /* Allow service procedures to be called again */
+ enable_svc(connp->conn_helper_info->ip_helper_stream_rq);
+
+ /* Flush the queues */
+ flushq(connp->conn_helper_info->ip_helper_stream_rq, FLUSHALL);
+ flushq(connp->conn_helper_info->ip_helper_stream_wq, FLUSHALL);
+
+ connp->conn_helper_info->ip_helper_stream_rq->q_ptr = NULL;
+ connp->conn_helper_info->ip_helper_stream_wq->q_ptr = NULL;
+
+ kmem_cache_free(ip_helper_stream_cache,
+ connp->conn_helper_info);
+ } else {
+ ASSERT(
+ connp->conn_helper_info->ip_helper_stream_handle != NULL);
+
+ connp->conn_helper_info->ip_helper_stream_rq->q_ptr =
+ connp->conn_helper_info->ip_helper_stream_wq->q_ptr =
+ connp->conn_helper_info->ip_helper_stream_minfo;
+ (void) ldi_close(
+ connp->conn_helper_info->ip_helper_stream_handle,
+ IP_HELPER_STR, kcred);
+ kmem_free(connp->conn_helper_info,
+ sizeof (ip_helper_stream_info_t));
+ }
+ connp->conn_helper_info = NULL;
+}
+
+/*
+ * create a T_SVR4_OPTMGMT_REQ TPI message and send down the IP stream
+ */
+static int
+ip_send_option_request(conn_t *connp, uint_t optset_context, int level,
+ int option_name, const void *optval, t_uscalar_t optlen, cred_t *cr)
+{
+ struct T_optmgmt_req *optmgmt_reqp;
+ struct opthdr *ohp;
+ ssize_t size;
+ mblk_t *mp;
+ int error;
+
+ size = sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) + optlen;
+ mp = allocb_cred(size, cr);
+ if (mp == NULL)
+ return (ENOMEM);
+
+ mp->b_datap->db_type = M_PROTO;
+ optmgmt_reqp = (struct T_optmgmt_req *)mp->b_wptr;
+
+ optmgmt_reqp->PRIM_type = T_SVR4_OPTMGMT_REQ;
+ optmgmt_reqp->MGMT_flags = optset_context;
+ optmgmt_reqp->OPT_length = (t_scalar_t)sizeof (struct opthdr) + optlen;
+ optmgmt_reqp->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_req);
+
+ mp->b_wptr += sizeof (struct T_optmgmt_req);
+
+ ohp = (struct opthdr *)mp->b_wptr;
+
+ ohp->level = level;
+ ohp->name = option_name;
+ ohp->len = optlen;
+
+ mp->b_wptr += sizeof (struct opthdr);
+
+ if (optval != NULL) {
+ bcopy(optval, mp->b_wptr, optlen);
+ } else {
+ bzero(mp->b_wptr, optlen);
+ }
+ mp->b_wptr += optlen;
+
+ /*
+ * Send down the primitive
+ */
+ error = ldi_putmsg(connp->conn_helper_info->ip_helper_stream_handle,
+ mp);
+ return (error);
+}
+
+/*
+ * wait/process the response to T_SVR4_OPTMGMT_REQ TPI message
+ */
+static int
+ip_get_option_response(conn_t *connp, uint_t optset_context, void *optval,
+ t_uscalar_t *optlenp)
+{
+ union T_primitives *tpr;
+ int error;
+ mblk_t *mp;
+
+ mp = NULL;
+
+ ASSERT(optset_context == T_CHECK || optset_context == T_NEGOTIATE);
+ error = ldi_getmsg(connp->conn_helper_info->ip_helper_stream_handle,
+ &mp, NULL);
+ if (error != 0) {
+ return (error);
+ }
+
+ if (DB_TYPE(mp) != M_PCPROTO || MBLKL(mp) < sizeof (tpr->type)) {
+ error = EPROTO;
+ goto done;
+ }
+
+ tpr = (union T_primitives *)mp->b_rptr;
+
+ switch (tpr->type) {
+ case T_OPTMGMT_ACK:
+ if (MBLKL(mp) < TOPTMGMTACKSZ)
+ error = EPROTO;
+ break;
+ case T_ERROR_ACK:
+ if (MBLKL(mp) < TERRORACKSZ) {
+ error = EPROTO;
+ break;
+ }
+
+ if (tpr->error_ack.TLI_error == TSYSERR)
+ error = tpr->error_ack.UNIX_error;
+ else
+ error = proto_tlitosyserr(tpr->error_ack.TLI_error);
+ break;
+ default:
+ error = EPROTO;
+ break;
+ }
+
+ if ((optset_context == T_CHECK) && (error == 0)) {
+ struct opthdr *opt_res;
+ t_uscalar_t len;
+ t_uscalar_t size;
+ t_uscalar_t maxlen = *optlenp;
+ void *option;
+ struct T_optmgmt_ack *optmgmt_ack;
+
+ optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr;
+ opt_res = (struct opthdr *)
+ ((uintptr_t)mp->b_rptr + optmgmt_ack->OPT_offset);
+ /*
+ * Check mblk boundary
+ */
+ if (!MBLKIN(mp, optmgmt_ack->OPT_offset,
+ optmgmt_ack->OPT_length)) {
+ error = EPROTO;
+ goto done;
+ }
+
+ /*
+ * Check alignment
+ */
+ if ((((uintptr_t)opt_res) & (__TPI_ALIGN_SIZE - 1)) != 0) {
+ error = EPROTO;
+ goto done;
+ }
+
+ option = &opt_res[1];
+
+ /* check to ensure that the option is within bounds */
+ if ((((uintptr_t)option + opt_res->len) < (uintptr_t)option) ||
+ !MBLKIN(mp, sizeof (struct opthdr), opt_res->len)) {
+ error = EPROTO;
+ goto done;
+ }
+
+ len = opt_res->len;
+ size = MIN(len, maxlen);
+
+ /*
+ * Copy data
+ */
+ bcopy(option, optval, size);
+ bcopy(&size, optlenp, sizeof (size));
+ }
+
+done:
+ freemsg(mp);
+ return (error);
+}
+
+/*
+ * Public interface to get socketoptions via the ip helper stream.
+ */
+int
+ip_get_options(conn_t *connp, int level, int option_name, void *optval,
+ t_uscalar_t *optlenp, cred_t *cr)
+{
+ int error;
+
+ error = ip_send_option_request(connp, T_CHECK, level, option_name, NULL,
+ *optlenp, cr);
+ if (error)
+ return (error);
+
+ return (ip_get_option_response(connp, T_CHECK, optval, optlenp));
+}
+
+/*
+ * Public interface to set socket options via the ip helper stream.
+ */
+int
+ip_set_options(conn_t *connp, int level, int option_name, const void *optval,
+ t_uscalar_t optlen, cred_t *cr)
+{
+
+ int error;
+
+ error = ip_send_option_request(connp, T_NEGOTIATE, level, option_name,
+ optval, optlen, cr);
+ if (error)
+ return (error);
+
+ return (ip_get_option_response(connp, T_NEGOTIATE, (void *)optval,
+ &optlen));
+}
diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c
index d767b25a76..0597245499 100644
--- a/usr/src/uts/common/inet/ip/ip_if.c
+++ b/usr/src/uts/common/inet/ip/ip_if.c
@@ -5845,6 +5845,55 @@ repeat:
}
/*
+ * Check if the address exists in the system.
+ * We don't hold the conn_lock as we will not perform defered ipsqueue
+ * operation.
+ */
+boolean_t
+ip_addr_exists(ipaddr_t addr, zoneid_t zoneid, ip_stack_t *ipst)
+{
+ ipif_t *ipif;
+ ill_t *ill;
+ ill_walk_context_t ctx;
+
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+
+ ill = ILL_START_WALK_V4(&ctx, ipst);
+ for (; ill != NULL; ill = ill_next(&ctx, ill)) {
+ mutex_enter(&ill->ill_lock);
+ for (ipif = ill->ill_ipif; ipif != NULL;
+ ipif = ipif->ipif_next) {
+ if (zoneid != ALL_ZONES &&
+ zoneid != ipif->ipif_zoneid &&
+ ipif->ipif_zoneid != ALL_ZONES)
+ continue;
+ /* Allow the ipif to be down */
+ /*
+ * XXX Different from ipif_lookup_addr(), we don't do
+ * twice lookups. As from bind()'s point of view, we
+ * may return once we find a match.
+ */
+ if (((ipif->ipif_lcl_addr == addr) &&
+ ((ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) ||
+ ((ipif->ipif_flags & IPIF_POINTOPOINT) &&
+ (ipif->ipif_pp_dst_addr == addr))) {
+ /*
+ * Allow bind() to be successful even if the
+ * ipif is with IPIF_CHANGING bit set.
+ */
+ mutex_exit(&ill->ill_lock);
+ rw_exit(&ipst->ips_ill_g_lock);
+ return (B_TRUE);
+ }
+ }
+ mutex_exit(&ill->ill_lock);
+ }
+
+ rw_exit(&ipst->ips_ill_g_lock);
+ return (B_FALSE);
+}
+
+/*
* Look for an ipif with the specified address. For point-point links
* we look for matches on either the destination address and the local
* address, but we ignore the check on the local address if IPIF_UNNUMBERED
@@ -22145,7 +22194,6 @@ ip_sioctl_slifusesrc(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
usesrc_ill = ill_lookup_on_ifindex(ifindex, isv6, q, mp,
ip_process_ioctl, &err, ipst);
-
if (usesrc_ill == NULL) {
return (err);
}
diff --git a/usr/src/uts/common/inet/ip/ip_opt_data.c b/usr/src/uts/common/inet/ip/ip_opt_data.c
index 3df66ece60..bb6e98a99e 100644
--- a/usr/src/uts/common/inet/ip/ip_opt_data.c
+++ b/usr/src/uts/common/inet/ip/ip_opt_data.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/stream.h>
#define _SUN_TPI_VERSION 2
@@ -45,7 +43,7 @@ extern int ip_opt_default(queue_t *q, int level, int name, uchar_t *ptr);
extern int ip_opt_get(queue_t *q, int level, int name, uchar_t *ptr);
extern int ip_opt_set(queue_t *q, uint_t optset_context, int level,
int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
- void *, cred_t *cr, mblk_t *);
+ void *dummy, cred_t *cr, mblk_t *first_mp);
/*
* Table of all known options handled on a IP protocol stack.
@@ -71,9 +69,11 @@ opdes_t ip_opt_arr[] = {
{ IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
- (OP_VARLEN|OP_NODEFAULT), 40, -1 /* not initialized */ },
+ (OP_VARLEN|OP_NODEFAULT),
+ IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
{ T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
- (OP_VARLEN|OP_NODEFAULT), 40, -1 /* not initialized */ },
+ (OP_VARLEN|OP_NODEFAULT),
+ IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
{ IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
{ T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
diff --git a/usr/src/uts/common/inet/ip/ip_rts.c b/usr/src/uts/common/inet/ip/ip_rts.c
index e232f6c04e..3324d1d833 100644
--- a/usr/src/uts/common/inet/ip/ip_rts.c
+++ b/usr/src/uts/common/inet/ip/ip_rts.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -38,8 +38,6 @@
* @(#)rtsock.c 8.6 (Berkeley) 2/11/95
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* This file contains routines that processes routing socket requests.
*/
@@ -104,10 +102,9 @@ static void ip_rts_request_retry(ipsq_t *, queue_t *q, mblk_t *mp, void *);
*
*/
void
-rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af, ip_stack_t *ipst)
+rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, ip_stack_t *ipst)
{
mblk_t *mp1;
- int checkqfull;
conn_t *connp, *next_connp;
mutex_enter(&ipst->ips_rts_clients->connf_lock);
@@ -130,24 +127,16 @@ rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af, ip_stack_t *ipst)
* socket, we check if there is room upstream for a copy of the
* message.
*/
- if ((q != NULL) && (CONNP_TO_RQ(connp) == RD(q))) {
- if (connp->conn_loopback == 0) {
+ if ((o_connp == connp) && connp->conn_loopback == 0) {
connp = connp->conn_next;
continue;
- }
- /*
- * Just because it is the same queue doesn't mean it
- * will promptly read its acks. Have to avoid using
- * all of kernel memory.
- */
- checkqfull = B_TRUE;
- } else {
- checkqfull = B_TRUE;
}
CONN_INC_REF(connp);
mutex_exit(&ipst->ips_rts_clients->connf_lock);
/* Pass to rts_input */
- if (!checkqfull || canputnext(CONNP_TO_RQ(connp))) {
+ if ((IPCL_IS_NONSTR(connp) && !PROTO_FLOW_CNTRLD(connp))||
+ (!IPCL_IS_NONSTR(connp) &&
+ canputnext(CONNP_TO_RQ(connp)))) {
mp1 = dupmsg(mp);
if (mp1 == NULL)
mp1 = copymsg(mp);
@@ -273,7 +262,7 @@ ip_rts_unregister(conn_t *connp)
* conn close occurs in conn_ioctl_cleanup.
*/
int
-ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
+ip_rts_request_common(queue_t *q, mblk_t *mp, conn_t *connp, cred_t *ioc_cr)
{
rt_msghdr_t *rtm = NULL;
in6_addr_t dst_addr_v6;
@@ -298,7 +287,6 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
ipif_t *ipif = NULL;
ipif_t *tmp_ipif = NULL;
IOCP iocp = (IOCP)mp->b_rptr;
- conn_t *connp;
boolean_t gcgrp_xtraref = B_FALSE;
tsol_gcgrp_addr_t ga;
tsol_rtsecattr_t rtsecattr;
@@ -311,8 +299,6 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
ip1dbg(("ip_rts_request: mp is %x\n", DB_TYPE(mp)));
- ASSERT(CONN_Q(q));
- connp = Q_TO_CONN(q);
zoneid = connp->conn_zoneid;
ipst = connp->conn_netstack->netstack_ip;
@@ -564,7 +550,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr,
rtm->rtm_flags, ipif, &ire, B_FALSE,
- CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry,
+ WR(q), ioc_mp, ip_rts_request_retry,
rtsap, ipst);
if (ipif != NULL)
ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
@@ -602,7 +588,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
&gw_addr_v6, &src_addr_v6, rtm->rtm_flags,
- ipif, &ire, CONNP_TO_WQ(connp), ioc_mp,
+ ipif, &ire, WR(q), ioc_mp,
ip_rts_request_retry, rtsap, ipst);
break;
}
@@ -616,7 +602,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
}
error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
&gw_addr_v6, NULL, rtm->rtm_flags,
- ipif, &ire, CONNP_TO_WQ(connp), ioc_mp,
+ ipif, &ire, WR(q), ioc_mp,
ip_rts_request_retry, rtsap, ipst);
if (ipif != NULL)
ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
@@ -646,14 +632,12 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
case AF_INET:
error = ip_rt_delete(dst_addr, net_mask, gw_addr,
found_addrs, rtm->rtm_flags, ipif, B_FALSE,
- CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry,
- ipst);
+ WR(q), ioc_mp, ip_rts_request_retry, ipst);
break;
case AF_INET6:
error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6,
&gw_addr_v6, found_addrs, rtm->rtm_flags, ipif,
- CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry,
- ipst);
+ WR(q), ioc_mp, ip_rts_request_retry, ipst);
break;
}
break;
@@ -867,7 +851,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
*/
tmp_ipif = ipif_lookup_addr(
src_addr, NULL, ALL_ZONES,
- CONNP_TO_WQ(connp), ioc_mp,
+ WR(q), ioc_mp,
ip_rts_request_retry,
&error, ipst);
if (tmp_ipif == NULL) {
@@ -1053,19 +1037,27 @@ done:
/* OK ACK already set up by caller except this */
ip2dbg(("ip_rts_request: OK ACK\n"));
}
- rts_queue_input(mp, q, af, ipst);
+ rts_queue_input(mp, connp, af, ipst);
}
+
iocp->ioc_error = error;
ioc_mp->b_datap->db_type = M_IOCACK;
if (iocp->ioc_error != 0)
iocp->ioc_count = 0;
(connp->conn_recv)(connp, ioc_mp, NULL);
+
/* conn was refheld in ip_wput_ioctl. */
CONN_OPER_PENDING_DONE(connp);
return (error);
}
+int
+ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
+{
+ return (ip_rts_request_common(q, mp, Q_TO_CONN(q), ioc_cr));
+}
+
/*
* Build a reply to the RTM_GET request contained in the given message block
* using the retrieved IRE of the destination address, the parent IRE (if it
diff --git a/usr/src/uts/common/inet/ip/ipclassifier.c b/usr/src/uts/common/inet/ip/ipclassifier.c
index a19e729b41..50bd38c981 100644
--- a/usr/src/uts/common/inet/ip/ipclassifier.c
+++ b/usr/src/uts/common/inet/ip/ipclassifier.c
@@ -261,8 +261,8 @@
#include <inet/ip.h>
#include <inet/ip6.h>
-#include <inet/tcp.h>
#include <inet/ip_ndp.h>
+#include <inet/ip_impl.h>
#include <inet/udp_impl.h>
#include <inet/sctp_ip.h>
#include <inet/sctp/sctp_impl.h>
@@ -272,9 +272,11 @@
#include <sys/cpuvar.h>
#include <inet/ipclassifier.h>
+#include <inet/tcp.h>
#include <inet/ipsec_impl.h>
#include <sys/tsol/tnet.h>
+#include <sys/sockio.h>
#ifdef DEBUG
#define IPCL_DEBUG
@@ -325,6 +327,7 @@ typedef union itc_s {
struct kmem_cache *tcp_conn_cache;
struct kmem_cache *ip_conn_cache;
+struct kmem_cache *ip_helper_stream_cache;
extern struct kmem_cache *sctp_conn_cache;
extern struct kmem_cache *tcp_sack_info_cache;
extern struct kmem_cache *tcp_iphc_cache;
@@ -350,6 +353,11 @@ static void rawip_conn_destructor(void *, void *);
static int rts_conn_constructor(void *, void *, int);
static void rts_conn_destructor(void *, void *);
+static int ip_helper_stream_constructor(void *, void *, int);
+static void ip_helper_stream_destructor(void *, void *);
+
+boolean_t ip_use_helper_cache = B_TRUE;
+
#ifdef IPCL_DEBUG
#define INET_NTOA_BUFSIZE 18
@@ -394,6 +402,15 @@ ipcl_g_init(void)
sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
rts_conn_constructor, rts_conn_destructor,
NULL, NULL, NULL, 0);
+
+ if (ip_use_helper_cache) {
+ ip_helper_stream_cache = kmem_cache_create
+ ("ip_helper_stream_cache", sizeof (ip_helper_stream_info_t),
+ CACHE_ALIGN_SIZE, ip_helper_stream_constructor,
+ ip_helper_stream_destructor, NULL, NULL, NULL, 0);
+ } else {
+ ip_helper_stream_cache = NULL;
+ }
}
/*
@@ -749,6 +766,7 @@ ipcl_conn_destroy(conn_t *connp)
connp->conn_netstack = NULL;
netstack_rele(ns);
}
+
ipcl_conn_cleanup(connp);
/* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
@@ -756,6 +774,7 @@ ipcl_conn_destroy(conn_t *connp)
connp->conn_flags = IPCL_UDPCONN;
kmem_cache_free(udp_conn_cache, connp);
} else if (connp->conn_flags & IPCL_RAWIPCONN) {
+
connp->conn_flags = IPCL_RAWIPCONN;
connp->conn_ulp = IPPROTO_ICMP;
kmem_cache_free(rawip_conn_cache, connp);
@@ -2025,6 +2044,7 @@ tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP);
connp->conn_tcp = tcp;
connp->conn_flags = IPCL_TCPCONN;
@@ -2047,6 +2067,7 @@ tcp_conn_destructor(void *buf, void *cdrarg)
tcp_timermp_free(tcp);
mutex_destroy(&connp->conn_lock);
cv_destroy(&connp->conn_cv);
+ cv_destroy(&connp->conn_sq_cv);
}
/* ARGSUSED */
@@ -2181,15 +2202,56 @@ rts_conn_destructor(void *buf, void *cdrarg)
cv_destroy(&connp->conn_cv);
}
+/* ARGSUSED */
+int
+ip_helper_stream_constructor(void *buf, void *cdrarg, int kmflags)
+{
+ int error;
+ netstack_t *ns;
+ int ret;
+ tcp_stack_t *tcps;
+ ip_helper_stream_info_t *ip_helper_str;
+ ip_stack_t *ipst;
+
+ ns = netstack_find_by_cred(kcred);
+ ASSERT(ns != NULL);
+ tcps = ns->netstack_tcp;
+ ipst = ns->netstack_ip;
+ ASSERT(tcps != NULL);
+ ip_helper_str = (ip_helper_stream_info_t *)buf;
+
+ error = ldi_open_by_name(DEV_IP, IP_HELPER_STR, kcred,
+ &ip_helper_str->ip_helper_stream_handle, ipst->ips_ldi_ident);
+ if (error != 0) {
+ goto done;
+ }
+ error = ldi_ioctl(ip_helper_str->ip_helper_stream_handle,
+ SIOCSQPTR, (intptr_t)buf, FKIOCTL, kcred, &ret);
+ if (error != 0) {
+ (void) ldi_close(ip_helper_str->ip_helper_stream_handle, 0,
+ kcred);
+ }
+done:
+ netstack_rele(ipst->ips_netstack);
+ return (error);
+}
+
+/* ARGSUSED */
+static void
+ip_helper_stream_destructor(void *buf, void *cdrarg)
+{
+ ip_helper_stream_info_t *ip_helper_str = (ip_helper_stream_info_t *)buf;
+
+ ip_helper_str->ip_helper_stream_rq->q_ptr =
+ ip_helper_str->ip_helper_stream_wq->q_ptr =
+ ip_helper_str->ip_helper_stream_minfo;
+ (void) ldi_close(ip_helper_str->ip_helper_stream_handle, 0, kcred);
+}
+
+
/*
* Called as part of ipcl_conn_destroy to assert and clear any pointers
* in the conn_t.
- *
- * Below we list all the pointers in the conn_t as a documentation aid.
- * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
- * If you add any pointers to the conn_t please add an ASSERT here
- * and #ifdef it out if it can't be actually asserted to be NULL.
- * In any case, we bzero most of the conn_t at the end of the function.
*/
void
ipcl_conn_cleanup(conn_t *connp)
@@ -2197,7 +2259,6 @@ ipcl_conn_cleanup(conn_t *connp)
ASSERT(connp->conn_ire_cache == NULL);
ASSERT(connp->conn_latch == NULL);
#ifdef notdef
- /* These are not cleared */
ASSERT(connp->conn_rq == NULL);
ASSERT(connp->conn_wq == NULL);
#endif
@@ -2236,11 +2297,11 @@ ipcl_conn_cleanup(conn_t *connp)
ASSERT(connp->conn_peercred == NULL);
ASSERT(connp->conn_netstack == NULL);
+ ASSERT(connp->conn_helper_info == NULL);
/* Clear out the conn_t fields that are not preserved */
bzero(&connp->conn_start_clr,
sizeof (conn_t) -
((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
-
}
/*
diff --git a/usr/src/uts/common/inet/ip/keysock.c b/usr/src/uts/common/inet/ip/keysock.c
index c982fb4c45..af0fd73d63 100644
--- a/usr/src/uts/common/inet/ip/keysock.c
+++ b/usr/src/uts/common/inet/ip/keysock.c
@@ -59,7 +59,7 @@
#include <inet/common.h>
#include <netinet/ip6.h>
#include <inet/ip.h>
-#include <inet/mi.h>
+#include <inet/proto_set.h>
#include <inet/nd.h>
#include <inet/optcom.h>
#include <inet/ipsec_info.h>
@@ -707,7 +707,8 @@ keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
mutex_exit(&keystack->keystack_list_lock);
qprocson(q);
- (void) mi_set_sth_hiwat(q, keystack->keystack_recv_hiwat);
+ (void) proto_set_rx_hiwat(q, NULL,
+ keystack->keystack_recv_hiwat);
/*
* Wait outside the keysock module perimeter for IPsec
* plumbing to be completed. If it fails, keysock_close()
@@ -875,7 +876,7 @@ keysock_opt_set(queue_t *q, uint_t mgmt_flags, int level,
if (*i1 > keystack->keystack_max_buf)
return (ENOBUFS);
RD(q)->q_hiwat = *i1;
- (void) mi_set_sth_hiwat(RD(q), *i1);
+ (void) proto_set_rx_hiwat(RD(q), NULL, *i1);
break;
}
mutex_exit(&ks->keysock_lock);
diff --git a/usr/src/uts/common/inet/ip/rts.c b/usr/src/uts/common/inet/ip/rts.c
index 350a5fa887..7965d37483 100644
--- a/usr/src/uts/common/inet/ip/rts.c
+++ b/usr/src/uts/common/inet/ip/rts.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/strsubr.h>
@@ -41,15 +39,17 @@
#include <sys/suntpi.h>
#include <sys/policy.h>
#include <sys/zone.h>
+#include <sys/disp.h>
#include <sys/socket.h>
+#include <sys/socketvar.h>
#include <netinet/in.h>
#include <inet/common.h>
#include <netinet/ip6.h>
#include <inet/ip.h>
#include <inet/ipclassifier.h>
-#include <inet/mi.h>
+#include <inet/proto_set.h>
#include <inet/nd.h>
#include <inet/optcom.h>
#include <netinet/ip_mroute.h>
@@ -111,20 +111,10 @@ static rtsparam_t lcl_param_arr[] = {
#define rtss_recv_hiwat rtss_params[2].rts_param_value
#define rtss_max_buf rtss_params[3].rts_param_value
-static int rts_close(queue_t *q);
static void rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
int sys_error);
static void rts_input(void *, mblk_t *, void *);
static mblk_t *rts_ioctl_alloc(mblk_t *data, cred_t *cr);
-static int rts_open(queue_t *q, dev_t *devp, int flag, int sflag,
- cred_t *credp);
-int rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name,
- uchar_t *ptr);
-int rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name,
- uchar_t *ptr);
-int rts_opt_set(queue_t *q, uint_t optset_context, int level,
- int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
- uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk);
static int rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
static boolean_t rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt);
static int rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
@@ -137,12 +127,21 @@ static void rts_wput_iocdata(queue_t *q, mblk_t *mp);
static void rts_wput_other(queue_t *q, mblk_t *mp);
static int rts_wrw(queue_t *q, struiod_t *dp);
+static int rts_stream_open(queue_t *q, dev_t *devp, int flag, int sflag,
+ cred_t *credp);
+static conn_t *rts_open(int flag, cred_t *credp);
+
+static int rts_stream_close(queue_t *q);
+static int rts_close(sock_lower_handle_t proto_handle, int flags,
+ cred_t *cr);
+
static struct module_info rts_mod_info = {
129, "rts", 1, INFPSZ, 512, 128
};
static struct qinit rtsrinit = {
- NULL, (pfi_t)rts_rsrv, rts_open, rts_close, NULL, &rts_mod_info
+ NULL, (pfi_t)rts_rsrv, rts_stream_open, rts_stream_close, NULL,
+ &rts_mod_info
};
static struct qinit rtswinit = {
@@ -201,9 +200,8 @@ rts_ioctl_alloc(mblk_t *data, cred_t *cr)
* internal datastructure.
*/
static int
-rts_close(queue_t *q)
+rts_common_close(queue_t *q, conn_t *connp)
{
- conn_t *connp = Q_TO_CONN(q);
ASSERT(connp != NULL && IPCL_IS_RTS(connp));
@@ -211,25 +209,39 @@ rts_close(queue_t *q)
ip_quiesce_conn(connp);
- qprocsoff(q);
+ if (!IPCL_IS_NONSTR(connp)) {
+ qprocsoff(q);
- /*
- * Now we are truly single threaded on this stream, and can
- * delete the things hanging off the connp, and finally the connp.
- * We removed this connp from the fanout list, it cannot be
- * accessed thru the fanouts, and we already waited for the
- * conn_ref to drop to 0. We are already in close, so
- * there cannot be any other thread from the top. qprocsoff
- * has completed, and service has completed or won't run in
- * future.
- */
+ /*
+ * Now we are truly single threaded on this stream, and can
+ * delete the things hanging off the connp, and finally the
+ * connp.
+ * We removed this connp from the fanout list, it cannot be
+ * accessed thru the fanouts, and we already waited for the
+ * conn_ref to drop to 0. We are already in close, so
+ * there cannot be any other thread from the top. qprocsoff
+ * has completed, and service has completed or won't run in
+ * future.
+ */
+ inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
+ } else {
+ ip_close_helper_stream(connp);
+ }
ASSERT(connp->conn_ref == 1);
- inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
connp->conn_ref--;
ipcl_conn_destroy(connp);
+ return (0);
+}
+
+static int
+rts_stream_close(queue_t *q)
+{
+ conn_t *connp = Q_TO_CONN(q);
+
+ (void) rts_common_close(q, connp);
q->q_ptr = WR(q)->q_ptr = NULL;
return (0);
}
@@ -240,14 +252,12 @@ rts_close(queue_t *q)
*/
/* ARGSUSED */
static int
-rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
+rts_stream_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
{
- rts_t *rts;
conn_t *connp;
dev_t conn_dev;
- zoneid_t zoneid;
- netstack_t *ns;
rts_stack_t *rtss;
+ rts_t *rts;
/* If the stream is already open, return immediately. */
if (q->q_ptr != NULL)
@@ -256,40 +266,26 @@ rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
if (sflag == MODOPEN)
return (EINVAL);
- ns = netstack_find_by_cred(credp);
- ASSERT(ns != NULL);
- rtss = ns->netstack_rts;
- ASSERT(rtss != NULL);
-
- /*
- * For exclusive stacks we set the zoneid to zero
- * to make RTS operate as if in the global zone.
- */
- if (ns->netstack_stackid != GLOBAL_NETSTACKID)
- zoneid = GLOBAL_ZONEID;
- else
- zoneid = crgetzoneid(credp);
/*
* Since RTS is not used so heavily, allocating from the small
* arena should be sufficient.
*/
if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
- netstack_rele(ns);
return (EBUSY);
}
+
+ connp = rts_open(flag, credp);
+ ASSERT(connp != NULL);
+
+
*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
- connp = ipcl_conn_create(IPCL_RTSCONN, KM_SLEEP, ns);
- connp->conn_dev = conn_dev;
- connp->conn_minor_arena = ip_minor_arena_sa;
rts = connp->conn_rts;
- /*
- * ipcl_conn_create did a netstack_hold. Undo the hold that was
- * done by netstack_find_by_cred()
- */
- netstack_rele(ns);
+ rw_enter(&rts->rts_rwlock, RW_WRITER);
+ connp->conn_dev = conn_dev;
+ connp->conn_minor_arena = ip_minor_arena_sa;
/*
* Initialize the rts_t structure for this stream.
@@ -299,25 +295,12 @@ rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
connp->conn_rq = q;
connp->conn_wq = WR(q);
- rw_enter(&rts->rts_rwlock, RW_WRITER);
- ASSERT(connp->conn_rts == rts);
- ASSERT(rts->rts_connp == connp);
-
- /* Set the initial state of the stream and the privilege status. */
- rts->rts_state = TS_UNBND;
- connp->conn_zoneid = zoneid;
-
- connp->conn_ulp_labeled = is_system_labeled();
-
- rts->rts_rtss = rtss;
-
+ rtss = rts->rts_rtss;
q->q_hiwat = rtss->rtss_recv_hiwat;
WR(q)->q_hiwat = rtss->rtss_xmit_hiwat;
WR(q)->q_lowat = rtss->rtss_xmit_lowat;
- connp->conn_recv = rts_input;
- crhold(credp);
- connp->conn_cred = credp;
+
mutex_enter(&connp->conn_lock);
connp->conn_state_flags &= ~CONN_INCIPIENT;
@@ -325,7 +308,6 @@ rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
qprocson(q);
rw_exit(&rts->rts_rwlock);
-
/*
* Indicate the down IP module that this is a routing socket
* client by sending an RTS IOCTL without any user data. Although
@@ -335,7 +317,67 @@ rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
ip_rts_register(connp);
return (0);
+}
+
+/* ARGSUSED */
+static conn_t *
+rts_open(int flag, cred_t *credp)
+{
+ netstack_t *ns;
+ rts_stack_t *rtss;
+ rts_t *rts;
+ conn_t *connp;
+ zoneid_t zoneid;
+
+ ns = netstack_find_by_cred(credp);
+ ASSERT(ns != NULL);
+ rtss = ns->netstack_rts;
+ ASSERT(rtss != NULL);
+
+ /*
+ * For exclusive stacks we set the zoneid to zero
+ * to make RTS operate as if in the global zone.
+ */
+ if (ns->netstack_stackid != GLOBAL_NETSTACKID)
+ zoneid = GLOBAL_ZONEID;
+ else
+ zoneid = crgetzoneid(credp);
+
+ connp = ipcl_conn_create(IPCL_RTSCONN, KM_SLEEP, ns);
+ rts = connp->conn_rts;
+
+ /*
+ * ipcl_conn_create did a netstack_hold. Undo the hold that was
+ * done by netstack_find_by_cred()
+ */
+ netstack_rele(ns);
+
+
+ rw_enter(&rts->rts_rwlock, RW_WRITER);
+ ASSERT(connp->conn_rts == rts);
+ ASSERT(rts->rts_connp == connp);
+
+ connp->conn_zoneid = zoneid;
+ connp->conn_flow_cntrld = B_FALSE;
+ connp->conn_ulp_labeled = is_system_labeled();
+
+ rts->rts_rtss = rtss;
+ rts->rts_xmit_hiwat = rtss->rtss_xmit_hiwat;
+
+ connp->conn_recv = rts_input;
+ crhold(credp);
+ connp->conn_cred = credp;
+
+ /*
+ * rts sockets start out as bound and connected
+ * For streams based sockets, socket state is set to
+ * SS_ISBOUND | SS_ISCONNECTED in so_strinit.
+ */
+ rts->rts_state = TS_DATA_XFER;
+ rw_exit(&rts->rts_rwlock);
+
+ return (connp);
}
/*
@@ -362,7 +404,7 @@ rts_ok_ack(queue_t *q, mblk_t *mp)
* This routine is called by rts_wput to handle T_UNBIND_REQ messages.
*/
static void
-rts_unbind(queue_t *q, mblk_t *mp)
+rts_tpi_unbind(queue_t *q, mblk_t *mp)
{
conn_t *connp = Q_TO_CONN(q);
rts_t *rts = connp->conn_rts;
@@ -383,7 +425,7 @@ rts_unbind(queue_t *q, mblk_t *mp)
* O_T_BIND_REQ and T_BIND_REQ semantics.
*/
static void
-rts_bind(queue_t *q, mblk_t *mp)
+rts_tpi_bind(queue_t *q, mblk_t *mp)
{
conn_t *connp = Q_TO_CONN(q);
rts_t *rts = connp->conn_rts;
@@ -392,13 +434,13 @@ rts_bind(queue_t *q, mblk_t *mp)
if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
- "rts_bind: bad data, %d", rts->rts_state);
+ "rts_tpi_bind: bad data, %d", rts->rts_state);
rts_err_ack(q, mp, TBADADDR, 0);
return;
}
if (rts->rts_state != TS_UNBND) {
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
- "rts_bind: bad state, %d", rts->rts_state);
+ "rts_tpi_bind: bad state, %d", rts->rts_state);
rts_err_ack(q, mp, TOUTSTATE, 0);
return;
}
@@ -415,7 +457,7 @@ rts_bind(queue_t *q, mblk_t *mp)
tbr = (struct T_bind_req *)mp->b_rptr;
if (tbr->ADDR_length != 0) {
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
- "rts_bind: bad ADDR_length %d", tbr->ADDR_length);
+ "rts_tpi_bind: bad ADDR_length %d", tbr->ADDR_length);
rts_err_ack(q, mp, TBADADDR, 0);
return;
}
@@ -498,16 +540,14 @@ rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
return (-1);
}
-/*
- * This routine retrieves the current status of socket options.
- * It returns the size of the option retrieved.
- */
-int
-rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
+
+static int
+rts_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
{
- int *i1 = (int *)ptr;
- conn_t *connp = Q_TO_CONN(q);
rts_t *rts = connp->conn_rts;
+ int *i1 = (int *)ptr;
+
+ ASSERT(RW_READ_HELD(&rts->rts_rwlock));
switch (level) {
case SOL_SOCKET:
@@ -543,12 +583,12 @@ rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
* but changing them should do nothing.
*/
case SO_SNDBUF:
- ASSERT(q->q_hiwat <= INT_MAX);
- *i1 = (int)(q->q_hiwat);
+ ASSERT(rts->rts_xmit_hiwat <= INT_MAX);
+ *i1 = (int)(rts->rts_xmit_hiwat);
break;
case SO_RCVBUF:
- ASSERT(q->q_hiwat <= INT_MAX);
- *i1 = (int)(RD(q)->q_hiwat);
+ ASSERT(rts->rts_recv_hiwat <= INT_MAX);
+ *i1 = (int)(rts->rts_recv_hiwat);
break;
case SO_DOMAIN:
*i1 = PF_ROUTE;
@@ -563,60 +603,17 @@ rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
return ((int)sizeof (int));
}
-
-/*
- * This routine sets socket options.
- */
-/*ARGSUSED*/
-int
-rts_opt_set(queue_t *q, uint_t optset_context, int level,
- int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
- uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
+/* ARGSUSED */
+static int
+rts_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
+ uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr,
+ void *thisdg_attrs, boolean_t checkonly)
{
int *i1 = (int *)invalp;
- conn_t *connp = Q_TO_CONN(q);
rts_t *rts = connp->conn_rts;
- boolean_t checkonly;
rts_stack_t *rtss = rts->rts_rtss;
- switch (optset_context) {
- case SETFN_OPTCOM_CHECKONLY:
- checkonly = B_TRUE;
- /*
- * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
- * inlen != 0 implies value supplied and
- * we have to "pretend" to set it.
- * inlen == 0 implies that there is no
- * value part in T_CHECK request and just validation
- * done elsewhere should be enough, we just return here.
- */
- if (inlen == 0) {
- *outlenp = 0;
- return (0);
- }
- break;
- case SETFN_OPTCOM_NEGOTIATE:
- checkonly = B_FALSE;
- break;
- case SETFN_UD_NEGOTIATE:
- case SETFN_CONN_NEGOTIATE:
- checkonly = B_FALSE;
- /*
- * Negotiating local and "association-related" options
- * through T_UNITDATA_REQ or T_CONN_{REQ,CON}
- * Not allowed in this module.
- */
- return (EINVAL);
- default:
- /*
- * We should never get here
- */
- *outlenp = 0;
- return (EINVAL);
- }
-
- ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
- (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
+ ASSERT(RW_WRITE_HELD(&rts->rts_rwlock));
/*
* For rts, we should have no ancillary data sent down
@@ -680,7 +677,9 @@ rts_opt_set(queue_t *q, uint_t optset_context, int level,
return (ENOBUFS);
}
if (!checkonly) {
- q->q_hiwat = *i1;
+ rts->rts_xmit_hiwat = *i1;
+ if (!IPCL_IS_NONSTR(connp))
+ connp->conn_wq->q_hiwat = *i1;
}
break; /* goto sizeof (int) option return */
case SO_RCVBUF:
@@ -689,9 +688,13 @@ rts_opt_set(queue_t *q, uint_t optset_context, int level,
return (ENOBUFS);
}
if (!checkonly) {
- RD(q)->q_hiwat = *i1;
- (void) mi_set_sth_hiwat(RD(q), *i1);
+ rts->rts_recv_hiwat = *i1;
+ rw_exit(&rts->rts_rwlock);
+ (void) proto_set_rx_hiwat(connp->conn_rq, connp,
+ *i1);
+ rw_enter(&rts->rts_rwlock, RW_WRITER);
}
+
break; /* goto sizeof (int) option return */
default:
*outlenp = 0;
@@ -705,11 +708,105 @@ rts_opt_set(queue_t *q, uint_t optset_context, int level,
/*
* Common case of return from an option that is sizeof (int)
*/
- *(int *)outvalp = *i1;
+ if (invalp != outvalp) {
+ /* don't trust bcopy for identical src/dst */
+ (void) bcopy(invalp, outvalp, inlen);
+ }
*outlenp = (t_uscalar_t)sizeof (int);
return (0);
}
+static int
+rts_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
+ uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
+ void *thisdg_attrs, cred_t *cr)
+{
+ boolean_t checkonly = B_FALSE;
+
+ if (optset_context) {
+ switch (optset_context) {
+ case SETFN_OPTCOM_CHECKONLY:
+ checkonly = B_TRUE;
+ /*
+ * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
+ * inlen != 0 implies value supplied and
+ * we have to "pretend" to set it.
+ * inlen == 0 implies that there is no value part
+ * in T_CHECK request and just validation
+ * done elsewhere should be enough, we just return here.
+ */
+ if (inlen == 0) {
+ *outlenp = 0;
+ return (0);
+ }
+ break;
+ case SETFN_OPTCOM_NEGOTIATE:
+ checkonly = B_FALSE;
+ break;
+ case SETFN_UD_NEGOTIATE:
+ case SETFN_CONN_NEGOTIATE:
+ checkonly = B_FALSE;
+ /*
+ * Negotiating local and "association-related" options
+ * through T_UNITDATA_REQ or T_CONN_{REQ,CON}
+ * Not allowed in this module.
+ */
+ return (EINVAL);
+ default:
+ /*
+ * We should never get here
+ */
+ *outlenp = 0;
+ return (EINVAL);
+ }
+
+ ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
+ (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
+
+ }
+ return (rts_do_opt_set(connp, level, name, inlen, invalp, outlenp,
+ outvalp, cr, thisdg_attrs, checkonly));
+
+}
+
+/*
+ * This routine retrieves the current status of socket options.
+ * It returns the size of the option retrieved.
+ */
+int
+rts_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
+{
+ rts_t *rts;
+ int err;
+
+ rts = Q_TO_RTS(q);
+ rw_enter(&rts->rts_rwlock, RW_READER);
+ err = rts_opt_get(Q_TO_CONN(q), level, name, ptr);
+ rw_exit(&rts->rts_rwlock);
+ return (err);
+}
+
+/*
+ * This routine sets socket options.
+ */
+/*ARGSUSED*/
+int
+rts_tpi_opt_set(queue_t *q, uint_t optset_context, int level,
+ int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
+ uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
+{
+ conn_t *connp = Q_TO_CONN(q);
+ int error;
+ rts_t *rts = connp->conn_rts;
+
+
+ rw_enter(&rts->rts_rwlock, RW_WRITER);
+ error = rts_opt_set(connp, optset_context, level, name, inlen, invalp,
+ outlenp, outvalp, thisdg_attrs, cr);
+ rw_exit(&rts->rts_rwlock);
+ return (error);
+}
+
/*
* This routine retrieves the value of an ND variable in a rtsparam_t
* structure. It is called through nd_getset when a user reads the
@@ -803,7 +900,7 @@ rts_wrw(queue_t *q, struiod_t *dp)
rts->rts_error = EINTR;
goto err_ret;
}
- }
+ }
rts->rts_flag |= RTS_WRW_PENDING;
if (isuioq(q) && (error = struioget(q, mp, dp, 0))) {
@@ -954,10 +1051,10 @@ rts_wput_other(queue_t *q, mblk_t *mp)
switch (((union T_primitives *)rptr)->type) {
case T_BIND_REQ:
case O_T_BIND_REQ:
- rts_bind(q, mp);
+ rts_tpi_bind(q, mp);
return;
case T_UNBIND_REQ:
- rts_unbind(q, mp);
+ rts_tpi_unbind(q, mp);
return;
case T_CAPABILITY_REQ:
rts_capability_req(q, mp);
@@ -985,6 +1082,7 @@ rts_wput_other(queue_t *q, mblk_t *mp)
freemsg(mp);
(void) putnextctl1(RD(q), M_ERROR, EPROTO);
return;
+
default:
break;
}
@@ -1086,21 +1184,33 @@ rts_input(void *arg1, mblk_t *mp, void *arg2)
struct iocblk *iocp;
mblk_t *mp1;
struct T_data_ind *tdi;
+ int error;
switch (mp->b_datap->db_type) {
case M_IOCACK:
case M_IOCNAK:
iocp = (struct iocblk *)mp->b_rptr;
- if (rts->rts_flag & (RTS_WPUT_PENDING)) {
- rts->rts_flag &= ~RTS_WPUT_PENDING;
+ if (IPCL_IS_NONSTR(connp)) {
+ ASSERT(rts->rts_flag & (RTS_REQ_PENDING));
+ mutex_enter(&rts->rts_send_mutex);
+ rts->rts_flag &= ~RTS_REQ_INPROG;
rts->rts_error = iocp->ioc_error;
- /*
- * Tell rts_wvw/qwait that we are done.
- * Note: there is no qwait_wakeup() we can use.
- */
- qenable(connp->conn_rq);
+ cv_signal(&rts->rts_io_cv);
+ mutex_exit(&rts->rts_send_mutex);
freemsg(mp);
return;
+ } else {
+ if (rts->rts_flag & (RTS_WPUT_PENDING)) {
+ rts->rts_flag &= ~RTS_WPUT_PENDING;
+ rts->rts_error = iocp->ioc_error;
+ /*
+ * Tell rts_wvw/qwait that we are done.
+ * Note: there is no qwait_wakeup() we can use.
+ */
+ qenable(connp->conn_rq);
+ freemsg(mp);
+ return;
+ }
}
break;
case M_DATA:
@@ -1124,12 +1234,33 @@ rts_input(void *arg1, mblk_t *mp, void *arg2)
default:
break;
}
- putnext(connp->conn_rq, mp);
+
+ if (IPCL_IS_NONSTR(connp)) {
+ if ((*connp->conn_upcalls->su_recv)
+ (connp->conn_upper_handle, mp, msgdsize(mp), 0,
+ &error, NULL) < 0) {
+ ASSERT(error == ENOSPC);
+ /*
+ * Let's confirm hoding the lock that
+ * we are out of recv space.
+ */
+ mutex_enter(&rts->rts_recv_mutex);
+ if ((*connp->conn_upcalls->su_recv)
+ (connp->conn_upper_handle, NULL, 0, 0,
+ &error, NULL) < 0) {
+ ASSERT(error == ENOSPC);
+ connp->conn_flow_cntrld = B_TRUE;
+ }
+ mutex_exit(&rts->rts_recv_mutex);
+ }
+ } else {
+ putnext(connp->conn_rq, mp);
+ }
}
void
-rts_ddi_init(void)
+rts_ddi_g_init(void)
{
rts_max_optsize = optcom_max_optsize(rts_opt_obj.odb_opt_des_arr,
rts_opt_obj.odb_opt_arr_cnt);
@@ -1143,11 +1274,13 @@ rts_ddi_init(void)
}
void
-rts_ddi_destroy(void)
+rts_ddi_g_destroy(void)
{
netstack_unregister(NS_RTS);
}
+#define INET_NAME "ip"
+
/*
* Initialize the RTS stack instance.
*/
@@ -1157,6 +1290,8 @@ rts_stack_init(netstackid_t stackid, netstack_t *ns)
{
rts_stack_t *rtss;
rtsparam_t *pa;
+ int error = 0;
+ major_t major;
rtss = (rts_stack_t *)kmem_zalloc(sizeof (*rtss), KM_SLEEP);
rtss->rtss_netstack = ns;
@@ -1167,6 +1302,10 @@ rts_stack_init(netstackid_t stackid, netstack_t *ns)
(void) rts_param_register(&rtss->rtss_g_nd,
rtss->rtss_params, A_CNT(lcl_param_arr));
+
+ major = mod_name_to_major(INET_NAME);
+ error = ldi_ident_from_major(major, &rtss->rtss_ldi_ident);
+ ASSERT(error == 0);
return (rtss);
}
@@ -1182,5 +1321,411 @@ rts_stack_fini(netstackid_t stackid, void *arg)
nd_free(&rtss->rtss_g_nd);
kmem_free(rtss->rtss_params, sizeof (lcl_param_arr));
rtss->rtss_params = NULL;
+ ldi_ident_release(rtss->rtss_ldi_ident);
kmem_free(rtss, sizeof (*rtss));
}
+
+/* ARGSUSED */
+int
+rts_accept(sock_lower_handle_t lproto_handle,
+ sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
+ cred_t *cr)
+{
+ return (EINVAL);
+}
+
+/* ARGSUSED */
+static int
+rts_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
+ socklen_t len, cred_t *cr)
+{
+ /*
+ * rebind not allowed
+ */
+ return (EINVAL);
+}
+
+/* ARGSUSED */
+int
+rts_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
+{
+ return (EINVAL);
+}
+
+/* ARGSUSED */
+int
+rts_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
+ socklen_t len, sock_connid_t *id, cred_t *cr)
+{
+ /*
+ * rts sockets start out as bound and connected
+ */
+ *id = 0;
+ return (EISCONN);
+}
+
+/* ARGSUSED */
+int
+rts_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr,
+ socklen_t *addrlen, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ rts_t *rts = connp->conn_rts;
+
+ ASSERT(rts != NULL);
+
+ bzero(addr, sizeof (struct sockaddr));
+ addr->sa_family = AF_ROUTE;
+ *addrlen = sizeof (struct sockaddr);
+
+ return (0);
+}
+
+/* ARGSUSED */
+int
+rts_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr,
+ socklen_t *addrlen, cred_t *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+static int
+rts_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
+ void *optvalp, socklen_t *optlen, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ rts_t *rts = connp->conn_rts;
+ int error;
+ t_uscalar_t max_optbuf_len;
+ void *optvalp_buf;
+ int len;
+
+ error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
+ rts_opt_obj.odb_opt_des_arr,
+ rts_opt_obj.odb_opt_arr_cnt,
+ rts_opt_obj.odb_topmost_tpiprovider,
+ B_FALSE, B_TRUE, cr);
+ if (error != 0) {
+ if (error < 0)
+ error = proto_tlitosyserr(-error);
+ return (error);
+ }
+
+ optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
+ rw_enter(&rts->rts_rwlock, RW_READER);
+ len = rts_opt_get(connp, level, option_name, optvalp_buf);
+ rw_exit(&rts->rts_rwlock);
+
+ if (len < 0) {
+ /*
+ * Pass on to IP
+ */
+ error = ip_get_options(connp, level, option_name,
+ optvalp, optlen, cr);
+ } else {
+ /*
+ * update optlen and copy option value
+ */
+ t_uscalar_t size = MIN(len, *optlen);
+ bcopy(optvalp_buf, optvalp, size);
+ bcopy(&size, optlen, sizeof (size));
+ error = 0;
+ }
+
+ kmem_free(optvalp_buf, max_optbuf_len);
+ return (error);
+}
+
+static int
+rts_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
+ const void *optvalp, socklen_t optlen, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ rts_t *rts = connp->conn_rts;
+ int error;
+
+ error = proto_opt_check(level, option_name, optlen, NULL,
+ rts_opt_obj.odb_opt_des_arr,
+ rts_opt_obj.odb_opt_arr_cnt,
+ rts_opt_obj.odb_topmost_tpiprovider,
+ B_TRUE, B_FALSE, cr);
+
+ if (error != 0) {
+ if (error < 0)
+ error = proto_tlitosyserr(-error);
+ return (error);
+ }
+
+ rw_enter(&rts->rts_rwlock, RW_WRITER);
+ error = rts_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
+ optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
+ NULL, cr);
+ rw_exit(&rts->rts_rwlock);
+
+ ASSERT(error >= 0);
+
+ return (error);
+}
+
+/* ARGSUSED */
+static int
+rts_send(sock_lower_handle_t proto_handle, mblk_t *mp,
+ struct nmsghdr *msg, cred_t *cr)
+{
+ mblk_t *mp1;
+ conn_t *connp = (conn_t *)proto_handle;
+ rts_t *rts = connp->conn_rts;
+ rt_msghdr_t *rtm;
+ int error;
+
+ ASSERT(DB_TYPE(mp) == M_DATA);
+ /*
+ * The semantics of the routing socket is such that the rtm_pid
+ * field is automatically filled in during requests with the
+ * current process' pid. We do this here (where we still have
+ * user context) after checking we have at least a message the
+ * size of a routing message header.
+ */
+ if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
+ if (!pullupmsg(mp, sizeof (rt_msghdr_t))) {
+ rts->rts_error = EINVAL;
+ freemsg(mp);
+ return (rts->rts_error);
+ }
+ }
+ rtm = (rt_msghdr_t *)mp->b_rptr;
+ rtm->rtm_pid = curproc->p_pid;
+
+ mp1 = rts_ioctl_alloc(mp, DB_CRED(mp));
+ if (mp1 == NULL) {
+ ASSERT(rts != NULL);
+ freemsg(mp);
+ return (ENOMEM);
+ }
+
+ /*
+ * Allow only one outstanding request(ioctl) at any given time
+ */
+ mutex_enter(&rts->rts_send_mutex);
+ while (rts->rts_flag & RTS_REQ_PENDING) {
+ int ret;
+
+ ret = cv_wait_sig(&rts->rts_send_cv, &rts->rts_send_mutex);
+ if (ret <= 0) {
+ mutex_exit(&rts->rts_send_mutex);
+ freemsg(mp);
+ return (EINTR);
+ }
+ }
+
+ rts->rts_flag |= RTS_REQ_PENDING;
+
+ rts->rts_flag |= RTS_REQ_INPROG;
+
+ mutex_exit(&rts->rts_send_mutex);
+
+ CONN_INC_REF(connp);
+
+ error = ip_rts_request_common(rts->rts_connp->conn_wq, mp1, connp,
+ DB_CREDDEF(mp, connp->conn_cred));
+
+ mutex_enter(&rts->rts_send_mutex);
+ if (error == EINPROGRESS) {
+ ASSERT(rts->rts_flag & RTS_REQ_INPROG);
+ if (rts->rts_flag & RTS_REQ_INPROG) {
+ /*
+ * Once the request has been issued we wait for
+ * completion
+ */
+ cv_wait(&rts->rts_io_cv, &rts->rts_send_mutex);
+ error = rts->rts_error;
+ }
+ }
+
+ ASSERT((error != 0) || !(rts->rts_flag & RTS_REQ_INPROG));
+ ASSERT(MUTEX_HELD(&rts->rts_send_mutex));
+
+ rts->rts_flag &= ~(RTS_REQ_PENDING | RTS_REQ_INPROG);
+ cv_signal(&rts->rts_send_cv);
+ mutex_exit(&rts->rts_send_mutex);
+ return (error);
+}
+
+/* ARGSUSED */
+sock_lower_handle_t
+rts_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
+ uint_t *smodep, int *errorp, int flags, cred_t *credp)
+{
+ conn_t *connp;
+ rts_t *rts;
+ rts_stack_t *rtss;
+
+ if (family != AF_ROUTE || type != SOCK_RAW ||
+ (proto != 0 && proto != AF_INET && proto != AF_INET6)) {
+ *errorp = EPROTONOSUPPORT;
+ return (NULL);
+ }
+
+ connp = rts_open(flags, credp);
+ ASSERT(connp != NULL);
+ connp->conn_flags |= IPCL_NONSTR;
+
+ rts = connp->conn_rts;
+ rtss = rts->rts_rtss;
+
+ rts->rts_xmit_hiwat = rtss->rtss_xmit_hiwat;
+ rts->rts_xmit_lowat = rtss->rtss_xmit_lowat;
+ rts->rts_recv_hiwat = rtss->rtss_recv_hiwat;
+ rts->rts_recv_lowat = rts_mod_info.mi_lowat;
+
+ ASSERT(rtss->rtss_ldi_ident != NULL);
+
+ *errorp = ip_create_helper_stream(connp, rtss->rtss_ldi_ident);
+ if (*errorp != 0) {
+#ifdef DEBUG
+ cmn_err(CE_CONT, "rts_create: create of IP helper stream"
+ " failed\n");
+#endif
+ (void) rts_close((sock_lower_handle_t)connp, 0, credp);
+ return (NULL);
+ }
+
+ mutex_enter(&connp->conn_lock);
+ connp->conn_state_flags &= ~CONN_INCIPIENT;
+ mutex_exit(&connp->conn_lock);
+
+ *errorp = 0;
+ *smodep = SM_ATOMIC;
+ *sock_downcalls = &sock_rts_downcalls;
+ return ((sock_lower_handle_t)connp);
+}
+
+/* ARGSUSED */
+void
+rts_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
+ sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ rts_t *rts = connp->conn_rts;
+ rts_stack_t *rtss = rts->rts_rtss;
+ struct sock_proto_props sopp;
+
+ connp->conn_upcalls = sock_upcalls;
+ connp->conn_upper_handle = sock_handle;
+
+ sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
+ SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
+ sopp.sopp_wroff = 0;
+ sopp.sopp_rxhiwat = rtss->rtss_recv_hiwat;
+ sopp.sopp_rxlowat = rts_mod_info.mi_lowat;
+ sopp.sopp_maxblk = INFPSZ;
+ sopp.sopp_maxpsz = rts_mod_info.mi_maxpsz;
+ sopp.sopp_minpsz = (rts_mod_info.mi_minpsz == 1) ? 0 :
+ rts_mod_info.mi_minpsz;
+
+ (*connp->conn_upcalls->su_set_proto_props)
+ (connp->conn_upper_handle, &sopp);
+
+ /*
+ * We treat it as already connected for routing socket.
+ */
+ (*connp->conn_upcalls->su_connected)
+ (connp->conn_upper_handle, 0, NULL, -1);
+
+ /*
+ * Indicate the down IP module that this is a routing socket
+ * client by sending an RTS IOCTL without any user data. Although
+ * this is just a notification message (without any real routing
+ * request), we pass in any credential for correctness sake.
+ */
+ ip_rts_register(connp);
+}
+
+/* ARGSUSED */
+int
+rts_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+
+ ASSERT(connp != NULL && IPCL_IS_RTS(connp));
+ return (rts_common_close(NULL, connp));
+}
+
+/* ARGSUSED */
+int
+rts_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+
+ /* shut down the send side */
+ if (how != SHUT_RD)
+ (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
+ SOCK_OPCTL_SHUT_SEND, 0);
+ /* shut down the recv side */
+ if (how != SHUT_WR)
+ (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
+ SOCK_OPCTL_SHUT_RECV, 0);
+ return (0);
+}
+
+void
+rts_clr_flowctrl(sock_lower_handle_t proto_handle)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ rts_t *rts = connp->conn_rts;
+
+ mutex_enter(&rts->rts_recv_mutex);
+ connp->conn_flow_cntrld = B_FALSE;
+ mutex_exit(&rts->rts_recv_mutex);
+}
+
+int
+rts_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
+ int mode, int32_t *rvalp, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ int error;
+
+ switch (cmd) {
+ case ND_SET:
+ case ND_GET:
+ case TI_GETPEERNAME:
+ case TI_GETMYNAME:
+#ifdef DEUG
+ cmn_err(CE_CONT, "rts_ioctl cmd 0x%x on non sreams"
+ " socket", cmd);
+#endif
+ error = EINVAL;
+ break;
+ default:
+ /*
+ * Pass on to IP using helper stream
+ */
+ error = ldi_ioctl(
+ connp->conn_helper_info->ip_helper_stream_handle,
+ cmd, arg, mode, cr, rvalp);
+ break;
+ }
+
+ return (error);
+}
+
+sock_downcalls_t sock_rts_downcalls = {
+ rts_activate,
+ rts_accept,
+ rts_bind,
+ rts_listen,
+ rts_connect,
+ rts_getpeername,
+ rts_getsockname,
+ rts_getsockopt,
+ rts_setsockopt,
+ rts_send,
+ NULL,
+ NULL,
+ NULL,
+ rts_shutdown,
+ rts_clr_flowctrl,
+ rts_ioctl,
+ rts_close
+};
diff --git a/usr/src/uts/common/inet/ip/rts_opt_data.c b/usr/src/uts/common/inet/ip/rts_opt_data.c
index f815cf086c..bac0eabdc4 100644
--- a/usr/src/uts/common/inet/ip/rts_opt_data.c
+++ b/usr/src/uts/common/inet/ip/rts_opt_data.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/stream.h>
#define _SUN_TPI_VERSION 2
@@ -40,14 +38,7 @@
#include <netinet/tcp.h>
#include <netinet/ip_mroute.h>
#include <inet/optcom.h>
-
-extern int rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name,
- uchar_t *ptr);
-extern int rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name,
- uchar_t *ptr);
-extern int rts_opt_set(queue_t *q, uint_t optset_context, int level,
- int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
- uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk);
+#include <inet/rts_impl.h>
/*
* Table of all known options handled on a RTS protocol stack.
@@ -102,8 +93,8 @@ uint_t rts_max_optsize; /* initialized in _init() */
optdb_obj_t rts_opt_obj = {
rts_opt_default, /* RTS default value function pointer */
- rts_opt_get, /* RTS get function pointer */
- rts_opt_set, /* RTS set function pointer */
+ rts_tpi_opt_get, /* RTS get function pointer */
+ rts_tpi_opt_set, /* RTS set function pointer */
B_TRUE, /* RTS is tpi provider */
RTS_OPT_ARR_CNT, /* RTS option database count of entries */
rts_opt_arr, /* RTS option database */
diff --git a/usr/src/uts/common/inet/ip/rtsddi.c b/usr/src/uts/common/inet/ip/rtsddi.c
index 27704da503..482c53ab5c 100644
--- a/usr/src/uts/common/inet/ip/rtsddi.c
+++ b/usr/src/uts/common/inet/ip/rtsddi.c
@@ -28,10 +28,22 @@
#include <sys/modctl.h>
#include <inet/common.h>
#include <inet/ip.h>
+#include <inet/rts_impl.h>
+#include <sys/strsubr.h>
+#include <sys/socketvar.h>
+
+#include <netinet/in.h>
+#include <netinet/ip6.h>
+
+#include <inet/common.h>
+#include <inet/ip.h>
+
#define INET_NAME "rts"
#define INET_DEVSTRTAB rtsinfo
#define INET_DEVDESC "PF_ROUTE socket STREAMS driver"
+#define INET_SOCKDESC "PF_ROUTE socket module"
+#define INET_SOCK_PROTO_CREATE_FUNC (*rts_create)
#define INET_DEVMINOR 0
#define INET_DEVMTFLAGS (D_MP|D_MTQPAIR|D_SYNCSTR)
diff --git a/usr/src/uts/common/inet/ip/spdsock.c b/usr/src/uts/common/inet/ip/spdsock.c
index dc2e113505..749db40ee6 100644
--- a/usr/src/uts/common/inet/ip/spdsock.c
+++ b/usr/src/uts/common/inet/ip/spdsock.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stream.h>
@@ -55,6 +53,7 @@
#include <inet/ip.h>
#include <inet/ip6.h>
#include <inet/mi.h>
+#include <inet/proto_set.h>
#include <inet/nd.h>
#include <inet/ip_if.h>
#include <inet/tun.h>
@@ -3199,7 +3198,7 @@ spdsock_opt_set(queue_t *q, uint_t mgmt_flags, int level, int name,
if (*i1 > spds->spds_max_buf)
return (ENOBUFS);
RD(q)->q_hiwat = *i1;
- (void) mi_set_sth_hiwat(RD(q), *i1);
+ (void) proto_set_rx_hiwat(RD(q), NULL, *i1);
break;
}
break;
@@ -3407,7 +3406,7 @@ spdsock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
oq->q_lowat = spds->spds_xmit_lowat;
qprocson(q);
- (void) mi_set_sth_hiwat(q, spds->spds_recv_hiwat);
+ (void) proto_set_rx_hiwat(q, NULL, spds->spds_recv_hiwat);
*devp = makedevice(getmajor(*devp), ss->spdsock_minor);
return (0);
diff --git a/usr/src/uts/common/inet/ip6.h b/usr/src/uts/common/inet/ip6.h
index 1dbe8c3dd1..d463c3f6ee 100644
--- a/usr/src/uts/common/inet/ip6.h
+++ b/usr/src/uts/common/inet/ip6.h
@@ -378,9 +378,9 @@ extern void mld_timeout_handler(void *);
extern void pr_addr_dbg(char *, int, const void *);
extern int ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t,
- const in6_addr_t *, int, mcast_record_t, const in6_addr_t *,
- mblk_t *), ire_t *, conn_t *, boolean_t, const in6_addr_t *,
- mcast_record_t, const in6_addr_t *, mblk_t *);
+ const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *),
+ ire_t *, conn_t *, boolean_t, const in6_addr_t *, mcast_record_t,
+ const in6_addr_t *, mblk_t *);
extern void ip_newroute_ipif_v6(queue_t *, mblk_t *, ipif_t *,
in6_addr_t, int, zoneid_t);
extern void ip_newroute_v6(queue_t *, mblk_t *, const in6_addr_t *,
@@ -391,6 +391,11 @@ extern size_t ip6_get_src_preferences(conn_t *, uint32_t *);
extern int ip6_set_src_preferences(conn_t *, uint32_t);
extern int ip6_set_pktinfo(cred_t *, conn_t *, struct in6_pktinfo *,
mblk_t *);
+extern int ip_proto_bind_laddr_v6(conn_t *, mblk_t **, uint8_t,
+ const in6_addr_t *, uint16_t, boolean_t);
+extern int ip_proto_bind_connected_v6(conn_t *, mblk_t **,
+ uint8_t, in6_addr_t *, uint16_t, const in6_addr_t *, ip6_pkt_t *,
+ uint16_t, boolean_t, boolean_t);
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/inet/ip_if.h b/usr/src/uts/common/inet/ip_if.h
index c0a6c51696..c5982de059 100644
--- a/usr/src/uts/common/inet/ip_if.h
+++ b/usr/src/uts/common/inet/ip_if.h
@@ -234,8 +234,11 @@ extern ipif_t *ipif_getby_indexes(uint_t, uint_t, boolean_t, ip_stack_t *);
extern void ipif_init(ip_stack_t *);
extern ipif_t *ipif_lookup_addr(ipaddr_t, ill_t *, zoneid_t, queue_t *,
mblk_t *, ipsq_func_t, int *, ip_stack_t *);
+extern boolean_t ip_addr_exists(ipaddr_t, zoneid_t, ip_stack_t *);
extern ipif_t *ipif_lookup_addr_v6(const in6_addr_t *, ill_t *, zoneid_t,
queue_t *, mblk_t *, ipsq_func_t, int *, ip_stack_t *);
+extern boolean_t ip_addr_exists_v6(const in6_addr_t *, zoneid_t,
+ ip_stack_t *);
extern zoneid_t ipif_lookup_addr_zoneid(ipaddr_t, ill_t *, ip_stack_t *);
extern zoneid_t ipif_lookup_addr_zoneid_v6(const in6_addr_t *, ill_t *,
ip_stack_t *);
diff --git a/usr/src/uts/common/inet/ip_impl.h b/usr/src/uts/common/inet/ip_impl.h
index f7a9b8ff58..dae62ab499 100644
--- a/usr/src/uts/common/inet/ip_impl.h
+++ b/usr/src/uts/common/inet/ip_impl.h
@@ -44,6 +44,8 @@ extern "C" {
#define IP_MOD_ID 5701
+#define INET_NAME "ip"
+
#ifdef _BIG_ENDIAN
#define IP_HDR_CSUM_TTL_ADJUST 256
#define IP_TCP_CSUM_COMP IPPROTO_TCP
@@ -546,6 +548,22 @@ extern zoneid_t ip_get_zoneid_v4(ipaddr_t, mblk_t *, ip_stack_t *, zoneid_t);
extern zoneid_t ip_get_zoneid_v6(in6_addr_t *, mblk_t *, const ill_t *,
ip_stack_t *, zoneid_t);
+/*
+ * flag passed in by IP based protocols to get a private ip stream with
+ * no conn_t. Note this flag has the same value as SO_FALLBACK
+ */
+#define IP_HELPER_STR SO_FALLBACK
+
+#define IP_MOD_MINPSZ 1
+#define IP_MOD_MAXPSZ INFPSZ
+#define IP_MOD_HIWAT 65536
+#define IP_MOD_LOWAT 1024
+
+#define DEV_IP "/devices/pseudo/ip@0:ip"
+#define DEV_IP6 "/devices/pseudo/ip6@0:ip6"
+
+extern struct kmem_cache *ip_helper_stream_cache;
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/inet/ip_rts.h b/usr/src/uts/common/inet/ip_rts.h
index a8d3971192..70b33e0278 100644
--- a/usr/src/uts/common/inet/ip_rts.h
+++ b/usr/src/uts/common/inet/ip_rts.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _INET_IP_RTS_H
#define _INET_IP_RTS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -72,8 +70,9 @@ extern void rts_fill_msg_v6(int, int, const in6_addr_t *,
extern size_t rts_header_msg_size(int);
-extern void rts_queue_input(mblk_t *, queue_t *, sa_family_t,
- ip_stack_t *);
+extern void rts_queue_input(mblk_t *, conn_t *, sa_family_t, ip_stack_t *);
+
+extern int ip_rts_request_common(queue_t *q, mblk_t *mp, conn_t *, cred_t *);
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/inet/ip_stack.h b/usr/src/uts/common/inet/ip_stack.h
index d0c3953374..3c53e1a3d3 100644
--- a/usr/src/uts/common/inet/ip_stack.h
+++ b/usr/src/uts/common/inet/ip_stack.h
@@ -425,6 +425,8 @@ struct ip_stack {
kmutex_t ips_ipobs_cb_lock;
uint_t ips_ipobs_cb_nwalkers;
kcondvar_t ips_ipobs_cb_cv;
+
+ struct __ldi_ident *ips_ldi_ident;
};
typedef struct ip_stack ip_stack_t;
diff --git a/usr/src/uts/common/inet/ipclassifier.h b/usr/src/uts/common/inet/ipclassifier.h
index 4665549c69..39cdddb7c4 100644
--- a/usr/src/uts/common/inet/ipclassifier.h
+++ b/usr/src/uts/common/inet/ipclassifier.h
@@ -37,6 +37,9 @@ extern "C" {
#include <inet/ip6.h>
#include <netinet/in.h> /* for IPPROTO_* constants */
#include <sys/sdt.h>
+#include <sys/socket_proto.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
typedef void (*edesc_spf)(void *, mblk_t *, void *, int);
typedef void (*edesc_rpf)(void *, mblk_t *, void *);
@@ -80,6 +83,8 @@ typedef void (*edesc_rpf)(void *, mblk_t *, void *);
#define IPCL_RTSCONN 0x00000020 /* From rts_conn_cache */
#define IPCL_ISV6 0x00000040 /* AF_INET6 */
#define IPCL_IPTUN 0x00000080 /* Has "tun" plumbed above it */
+#define IPCL_NONSTR 0x00001000 /* A non-STREAMS socket */
+#define IPCL_IN_SQUEUE 0x10000000 /* Waiting squeue to finish */
/* Conn Masks */
#define IPCL_TCP (IPCL_TCP4|IPCL_TCP6)
@@ -136,6 +141,8 @@ typedef void (*edesc_rpf)(void *, mblk_t *, void *);
(connp)->conn_ulp == IPPROTO_IPV6) && \
((connp)->conn_flags & IPCL_IPTUN))
+#define IPCL_IS_NONSTR(connp) ((connp)->conn_flags & IPCL_NONSTR)
+
typedef struct connf_s connf_t;
typedef struct
@@ -145,6 +152,21 @@ typedef struct
pc_t ctb_stack[CONN_STACK_DEPTH];
} conn_trace_t;
+typedef struct ip_helper_minor_info_s {
+ dev_t ip_minfo_dev; /* Device */
+ vmem_t *ip_minfo_arena; /* Arena */
+} ip_helper_minfo_t;
+
+/*
+ * ip helper stream info
+ */
+typedef struct ip_helper_stream_info_s {
+ ldi_handle_t ip_helper_stream_handle;
+ queue_t *ip_helper_stream_rq;
+ queue_t *ip_helper_stream_wq;
+ ip_helper_minfo_t *ip_helper_stream_minfo;
+} ip_helper_stream_info_t;
+
/*
* The initial fields in the conn_t are setup by the kmem_cache constructor,
* and are preserved when it is freed. Fields after that are bzero'ed when
@@ -236,6 +258,7 @@ struct conn_s {
queue_t *conn_wq; /* Write queue */
dev_t conn_dev; /* Minor number */
vmem_t *conn_minor_arena; /* Minor arena */
+ ip_helper_stream_info_t *conn_helper_info;
cred_t *conn_cred; /* Credentials */
connf_t *conn_g_fanout; /* Global Hash bucket head */
@@ -300,6 +323,11 @@ struct conn_s {
#define conn_nexthop_v4 V4_PART_OF_V6(conn_nexthop_v6)
cred_t *conn_peercred; /* Peer credentials, if any */
+ kcondvar_t conn_sq_cv; /* For non-STREAMS socket IO */
+ kthread_t *conn_sq_caller; /* Caller of squeue sync ops */
+ sock_upcalls_t *conn_upcalls; /* Upcalls to sockfs */
+ sock_upper_handle_t conn_upper_handle; /* Upper handle: sonode * */
+
unsigned int
conn_ulp_labeled : 1, /* ULP label is synced */
conn_mlp_type : 2, /* mlp_type_t; tsol/tndb.h */
@@ -308,6 +336,8 @@ struct conn_s {
conn_anon_port : 1, /* user bound anonymously */
conn_mac_exempt : 1, /* unlabeled with loose MAC */
conn_spare : 26;
+
+ boolean_t conn_flow_cntrld;
netstack_t *conn_netstack; /* Corresponds to a netstack_hold */
#ifdef CONN_DEBUG
#define CONN_TRACE_MAX 10
@@ -582,6 +612,14 @@ conn_t *ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *, ipha_t *, tcph_t *,
ip_stack_t *);
conn_t *ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *, ip6_t *, tcph_t *,
ip_stack_t *);
+
+extern int ip_create_helper_stream(conn_t *connp, ldi_ident_t li);
+extern void ip_close_helper_stream(conn_t *connp);
+
+extern int ip_get_options(conn_t *, int, int, void *, t_uscalar_t *, cred_t *);
+extern int ip_set_options(conn_t *, int, int, const void *, t_uscalar_t,
+ cred_t *);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/inet/mi.c b/usr/src/uts/common/inet/mi.c
index a8848a3499..f88fe3709b 100644
--- a/usr/src/uts/common/inet/mi.c
+++ b/usr/src/uts/common/inet/mi.c
@@ -24,8 +24,6 @@
*/
/* Copyright (c) 1990 Mentat Inc. */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <inet/common.h> /* for various inet/mi.h and inet/nd.h needs */
#include <sys/stream.h>
@@ -46,6 +44,9 @@
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/kobj.h>
+#include <sys/stropts.h>
+#include <sys/strsubr.h>
+#include <inet/proto_set.h>
#define ISDIGIT(ch) ((ch) >= '0' && (ch) <= '9')
#define ISUPPER(ch) ((ch) >= 'A' && (ch) <= 'Z')
@@ -64,7 +65,7 @@
* allocation strategy is changed.
*/
-typedef struct stroptions *STROPTP;
+typedef struct stroptions *STROPTP;
typedef union T_primitives *TPRIMP;
/* Timer block states. */
@@ -903,93 +904,6 @@ mi_offset_paramc(mblk_t *mp, size_t offset, size_t len)
return (NULL);
}
-
-boolean_t
-mi_set_sth_hiwat(queue_t *q, size_t size)
-{
- MBLKP mp;
- STROPTP stropt;
-
- if (!(mp = allocb(sizeof (*stropt), BPRI_LO)))
- return (B_FALSE);
- mp->b_datap->db_type = M_SETOPTS;
- mp->b_wptr += sizeof (*stropt);
- stropt = (STROPTP)mp->b_rptr;
- stropt->so_flags = SO_HIWAT;
- stropt->so_hiwat = size;
- putnext(q, mp);
- return (B_TRUE);
-}
-
-boolean_t
-mi_set_sth_lowat(queue_t *q, size_t size)
-{
- MBLKP mp;
- STROPTP stropt;
-
- if (!(mp = allocb(sizeof (*stropt), BPRI_LO)))
- return (B_FALSE);
- mp->b_datap->db_type = M_SETOPTS;
- mp->b_wptr += sizeof (*stropt);
- stropt = (STROPTP)mp->b_rptr;
- stropt->so_flags = SO_LOWAT;
- stropt->so_lowat = size;
- putnext(q, mp);
- return (B_TRUE);
-}
-
-/* ARGSUSED */
-boolean_t
-mi_set_sth_maxblk(queue_t *q, ssize_t size)
-{
- MBLKP mp;
- STROPTP stropt;
-
- if (!(mp = allocb(sizeof (*stropt), BPRI_LO)))
- return (B_FALSE);
- mp->b_datap->db_type = M_SETOPTS;
- mp->b_wptr += sizeof (*stropt);
- stropt = (STROPTP)mp->b_rptr;
- stropt->so_flags = SO_MAXBLK;
- stropt->so_maxblk = size;
- putnext(q, mp);
- return (B_TRUE);
-}
-
-boolean_t
-mi_set_sth_copyopt(queue_t *q, int copyopt)
-{
- MBLKP mp;
- STROPTP stropt;
-
- if (!(mp = allocb(sizeof (*stropt), BPRI_LO)))
- return (B_FALSE);
- mp->b_datap->db_type = M_SETOPTS;
- mp->b_wptr += sizeof (*stropt);
- stropt = (STROPTP)mp->b_rptr;
- stropt->so_flags = SO_COPYOPT;
- stropt->so_copyopt = (ushort_t)copyopt;
- putnext(q, mp);
- return (B_TRUE);
-}
-
-boolean_t
-mi_set_sth_wroff(queue_t *q, size_t size)
-{
- MBLKP mp;
- STROPTP stropt;
-
- if (!(mp = allocb(sizeof (*stropt), BPRI_LO)))
- return (B_FALSE);
- mp->b_datap->db_type = M_SETOPTS;
- mp->b_wptr += sizeof (*stropt);
- stropt = (STROPTP)mp->b_rptr;
- stropt->so_flags = SO_WROFF;
- stropt->so_wroff = (ushort_t)size;
- putnext(q, mp);
- return (B_TRUE);
-}
-
int
mi_sprintf(char *buf, char *fmt, ...)
{
diff --git a/usr/src/uts/common/inet/mi.h b/usr/src/uts/common/inet/mi.h
index 6cae6a1acf..53608ca316 100644
--- a/usr/src/uts/common/inet/mi.h
+++ b/usr/src/uts/common/inet/mi.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -28,8 +27,6 @@
#ifndef _INET_MI_H
#define _INET_MI_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -39,6 +36,7 @@ extern "C" {
#include <sys/types.h>
#include <sys/vmem.h>
#include <sys/varargs.h>
+#include <netinet/in.h>
#define MI_MIN_DEV INET_MIN_DEV /* minimum minor device number */
#define MI_COPY_IN 1
@@ -137,13 +135,6 @@ extern int mi_open_link(void **mi_head, IDP ptr, dev_t *devp, int flag,
extern uint8_t *mi_offset_param(mblk_t *mp, size_t offset, size_t len);
extern uint8_t *mi_offset_paramc(mblk_t *mp, size_t offset, size_t len);
-
-extern boolean_t mi_set_sth_hiwat(queue_t *q, size_t size);
-extern boolean_t mi_set_sth_lowat(queue_t *q, size_t size);
-extern boolean_t mi_set_sth_maxblk(queue_t *q, ssize_t size);
-extern boolean_t mi_set_sth_copyopt(queue_t *q, int copyopt);
-extern boolean_t mi_set_sth_wroff(queue_t *q, size_t size);
-
/*PRINTFLIKE2*/
extern int mi_sprintf(char *buf, char *fmt, ...)
__KPRINTFLIKE(2);
diff --git a/usr/src/uts/common/inet/optcom.c b/usr/src/uts/common/inet/optcom.c
index 3de4044e58..f241599426 100644
--- a/usr/src/uts/common/inet/optcom.c
+++ b/usr/src/uts/common/inet/optcom.c
@@ -19,13 +19,11 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1990 Mentat Inc. */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* This file contains common code for handling Options Management requests.
*/
@@ -38,6 +36,7 @@
#define _SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/socket.h>
+#include <sys/socketvar.h>
#include <sys/ddi.h>
#include <sys/debug.h> /* for ASSERT */
#include <sys/policy.h>
@@ -52,6 +51,8 @@
#include "optcom.h"
#include <inet/optcom.h>
+#include <inet/ipclassifier.h>
+#include <inet/proto_set.h>
/*
* Function prototypes
@@ -69,7 +70,6 @@ static void do_opt_current(queue_t *, struct T_opthdr *, uchar_t **,
static int do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt,
uint_t optset_context, uchar_t **resptrp, t_uscalar_t *worst_statusp,
cred_t *, optdb_obj_t *dbobjp, mblk_t *first_mp);
-static opdes_t *opt_chk_lookup(t_uscalar_t, t_uscalar_t, opdes_t *, uint_t);
static boolean_t opt_level_valid(t_uscalar_t, optlevel_t *, uint_t);
static size_t opt_level_allopts_lengths(t_uscalar_t, opdes_t *, uint_t);
static boolean_t opt_length_ok(opdes_t *, struct T_opthdr *);
@@ -186,6 +186,9 @@ optcom_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
* the sq framework arranges to restart this operation and passes control to
* the restart function ip_restart_optmgmt() which in turn calls
* svr4_optcom_req() or tpi_optcom_req() to restart the option processing.
+ *
+ * XXX Remove the asynchronous behavior of svr_optcom_req() and
+ * tpi_optcom_req().
*/
int
svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp,
@@ -214,6 +217,7 @@ svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp,
boolean_t pass_to_next = B_FALSE;
struct T_optmgmt_ack *toa;
struct T_optmgmt_req *tor;
+ int error;
/*
* Allocate M_CTL and prepend to the packet for restarting this
@@ -409,85 +413,17 @@ no_mem:;
if (opt->name == T_ALLOPT)
goto bad_opt;
- /* Find the option in the opt_arr. */
- if ((optd = opt_chk_lookup(opt->level, opt->name,
- opt_arr, opt_arr_cnt)) == NULL) {
- /*
- * Not found, that is a bad thing if
- * the caller is a tpi provider
- */
- if (topmost_tpiprovider)
- goto bad_opt;
- else
- continue; /* skip unmodified */
- }
-
- /* Additional checks dependent on operation. */
- switch (tor->MGMT_flags) {
- case T_NEGOTIATE:
- if (!OA_WRITE_OR_EXECUTE(optd, cr)) {
- /* can't negotiate option */
- if (!(OA_MATCHED_PRIV(optd, cr)) &&
- OA_WX_ANYPRIV(optd)) {
- /*
- * not privileged but privilege
- * will help negotiate option.
- */
- optcom_err_ack(q, mp, TACCES, 0);
- return (0);
- } else
- goto bad_opt;
- }
- /*
- * Verify size for options
- * Note: For retaining compatibility with historical
- * behavior, variable lengths options will have their
- * length verified in the setfn() processing.
- * In order to be compatible with SunOS 4.X we return
- * EINVAL errors for bad lengths.
- */
- if (!(optd->opdes_props & OP_VARLEN)) {
- /* fixed length - size must match */
- if (opt->len != optd->opdes_size) {
- optcom_err_ack(q, mp, TSYSERR, EINVAL);
- return (0);
- }
- }
- break;
-
- case T_CHECK:
- if (!OA_RWX_ANYPRIV(optd))
- /* any of "rwx" permission but not not none */
- goto bad_opt;
- /*
- * XXX Since T_CURRENT was not there in TLI and the
- * official TLI inspired TPI standard, getsockopt()
- * API uses T_CHECK (for T_CURRENT semantics)
- * The following fallthru makes sense because of its
- * historical use as semantic equivalent to T_CURRENT.
- */
- /* FALLTHRU */
- case T_CURRENT:
- if (!OA_READ_PERMISSION(optd, cr)) {
- /* can't read option value */
- if (!(OA_MATCHED_PRIV(optd, cr)) &&
- OA_R_ANYPRIV(optd)) {
- /*
- * not privileged but privilege
- * will help in reading option value.
- */
- optcom_err_ack(q, mp, TACCES, 0);
- return (0);
- } else
- goto bad_opt;
- }
- break;
-
- default:
- optcom_err_ack(q, mp, TBADFLAG, 0);
+ error = proto_opt_check(opt->level, opt->name, opt->len, NULL,
+ opt_arr, opt_arr_cnt, topmost_tpiprovider,
+ tor->MGMT_flags == T_NEGOTIATE, tor->MGMT_flags == T_CHECK,
+ cr);
+ if (error < 0) {
+ optcom_err_ack(q, mp, -error, 0);
+ return (0);
+ } else if (error > 0) {
+ optcom_err_ack(q, mp, TSYSERR, error);
return (0);
}
- /* We liked it. Keep going. */
} /* end for loop scanning option buffer */
/* Now complete the operation as required. */
@@ -609,7 +545,7 @@ restart:
* non-fatal by svr4_optcom_req() and are
* returned by setfn() when it is passed an
* option it does not handle. Since the option
- * passed opt_chk_lookup(), it is implied that
+ * passed proto_opt_lookup(), it is implied that
* it is valid but was either handled upstream
* or will be handled downstream.
*/
@@ -892,7 +828,7 @@ process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp,
/* Find the option in the opt_arr. */
if (opt->name != T_ALLOPT) {
- optd = opt_chk_lookup(opt->level, opt->name,
+ optd = proto_opt_lookup(opt->level, opt->name,
opt_arr, opt_arr_cnt);
if (optd == NULL) {
/*
@@ -972,7 +908,7 @@ process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp,
case T_CURRENT:
/*
- * The opt_chk_lookup() routine call above approved of
+ * The proto_opt_lookup() routine call above approved of
* this option so we can work on the status for it
* based on the permissions for the operation. (This
* can override any status for it set at higher levels)
@@ -1044,7 +980,7 @@ process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp,
}
}
/*
- * The opt_chk_lookup() routine above() approved of
+ * The proto_opt_lookup() routine above() approved of
* this option so we can work on the status for it based
* on the permissions for the operation. (This can
* override anything set at a higher level).
@@ -1309,7 +1245,7 @@ do_opt_default(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp,
/*
* lookup the option in the table and fill default value
*/
- optd = opt_chk_lookup(reqopt->level, reqopt->name,
+ optd = proto_opt_lookup(reqopt->level, reqopt->name,
opt_arr, opt_arr_cnt);
if (optd == NULL) {
@@ -1609,8 +1545,7 @@ do_opt_current(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp,
}
}
-
-
+/* ARGSUSED */
static int
do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt,
uint_t optset_context, uchar_t **resptrp, t_uscalar_t *worst_statusp,
@@ -1819,7 +1754,6 @@ do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt,
* Then delete "ignored" options from option buffer and return success.
*
*/
-
int
tpi_optcom_buf(queue_t *q, mblk_t *mp, t_scalar_t *opt_lenp,
t_scalar_t opt_offset, cred_t *cr, optdb_obj_t *dbobjp,
@@ -1890,7 +1824,7 @@ tpi_optcom_buf(queue_t *q, mblk_t *mp, t_scalar_t *opt_lenp,
}
/* Find the option in the opt_arr. */
- optd = opt_chk_lookup(opt->level, opt->name,
+ optd = proto_opt_lookup(opt->level, opt->name,
opt_arr, opt_arr_cnt);
if (optd == NULL) {
@@ -2043,21 +1977,6 @@ error_ret:
return (error);
}
-static opdes_t *
-opt_chk_lookup(t_uscalar_t level, t_uscalar_t name, opdes_t *opt_arr,
- uint_t opt_arr_cnt)
-{
- opdes_t *optd;
-
- for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt];
- optd++) {
- if (level == (uint_t)optd->opdes_level &&
- name == (uint_t)optd->opdes_name)
- return (optd);
- }
- return (NULL);
-}
-
static boolean_t
opt_level_valid(t_uscalar_t level, optlevel_t *valid_level_arr,
uint_t valid_level_arr_cnt)
@@ -2287,3 +2206,68 @@ optcom_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky,
*optlenp = inlen + reservelen;
return (0);
}
+
+int
+process_auxiliary_options(conn_t *connp, void *control, t_uscalar_t controllen,
+ void *optbuf, optdb_obj_t *dbobjp, int (*opt_set_fn)(conn_t *, uint_t, int,
+ int, uint_t, uchar_t *, uint_t *, uchar_t *, void *, cred_t *))
+{
+ struct cmsghdr *cmsg;
+ opdes_t *optd;
+ t_uscalar_t outlen;
+ int error = EOPNOTSUPP;
+ t_uscalar_t len;
+ uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
+ opdes_t *opt_arr = dbobjp->odb_opt_des_arr;
+
+ for (cmsg = (struct cmsghdr *)control;
+ CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
+ cmsg = CMSG_NEXT(cmsg)) {
+
+ len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
+ /* Find the option in the opt_arr. */
+ optd = proto_opt_lookup(cmsg->cmsg_level, cmsg->cmsg_type,
+ opt_arr, opt_arr_cnt);
+ if (optd == NULL) {
+ return (EINVAL);
+ }
+ if (OA_READONLY_PERMISSION(optd, connp->conn_cred)) {
+ return (EACCES);
+ }
+ if (OA_MATCHED_PRIV(optd, connp->conn_cred)) {
+ /*
+ * For privileged options, we DO perform
+ * access checks as is common sense
+ */
+ if (!OA_WX_ANYPRIV(optd)) {
+ return (EACCES);
+ }
+ } else {
+ /*
+ * For non privileged, we fail instead following
+ * "ignore" semantics dictated by XTI spec for
+ * permissions problems.
+ */
+ if (!OA_WX_NOPRIV(optd)) { /* nopriv */
+ return (EACCES);
+ }
+ }
+ error = opt_set_fn(connp, SETFN_UD_NEGOTIATE, optd->opdes_level,
+ optd->opdes_name, len, (uchar_t *)CMSG_CONTENT(cmsg),
+ &outlen, (uchar_t *)CMSG_CONTENT(cmsg), (void *)optbuf,
+ connp->conn_cred);
+ if (error > 0) {
+ return (error);
+ } else if (outlen > len) {
+ return (EINVAL);
+ } else {
+ /*
+ * error can be -ve if the protocol wants to
+ * pass the option to IP. We donot pass auxiliary
+ * options to IP.
+ */
+ error = 0;
+ }
+ }
+ return (error);
+}
diff --git a/usr/src/uts/common/inet/optcom.h b/usr/src/uts/common/inet/optcom.h
index 1d2d1cb09d..07cb7cf946 100644
--- a/usr/src/uts/common/inet/optcom.h
+++ b/usr/src/uts/common/inet/optcom.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -27,14 +27,13 @@
#ifndef _INET_OPTCOM_H
#define _INET_OPTCOM_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
#if defined(_KERNEL) && defined(__STDC__)
+#include <inet/ipclassifier.h>
/* Options Description Structure */
typedef struct opdes_s {
t_uscalar_t opdes_name; /* option name */
@@ -139,6 +138,10 @@ typedef struct opdes_s {
#define OA_NO_PERMISSION(x, c) (OA_MATCHED_PRIV((x), (c)) ? \
((x)->opdes_access_priv == 0) : ((x)->opdes_access_nopriv == 0))
+#define PASS_OPT_TO_IP(connp) \
+ if (IPCL_IS_NONSTR(connp)) \
+ return (-EINVAL)
+
/*
* Other properties set in opdes_props field.
*/
@@ -217,6 +220,10 @@ extern t_uscalar_t optcom_max_optsize(opdes_t *, uint_t);
extern int optcom_pkt_set(uchar_t *, uint_t, boolean_t, uchar_t **, uint_t *,
uint_t);
+extern int process_auxiliary_options(conn_t *, void *, t_uscalar_t,
+ void *, optdb_obj_t *, int (*)(conn_t *, uint_t, int, int, uint_t,
+ uchar_t *, uint_t *, uchar_t *, void *, cred_t *));
+
#endif /* defined(_KERNEL) && defined(__STDC__) */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/inet/proto_set.c b/usr/src/uts/common/inet/proto_set.c
new file mode 100644
index 0000000000..45f07d2ed3
--- /dev/null
+++ b/usr/src/uts/common/inet/proto_set.c
@@ -0,0 +1,440 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <inet/common.h>
+#include <sys/stream.h>
+#include <sys/stropts.h>
+#include <sys/strsun.h>
+#include <sys/sysmacros.h>
+#include <sys/stropts.h>
+#include <sys/strsubr.h>
+#include <sys/tpicommon.h>
+#include <sys/socket_proto.h>
+#include <sys/policy.h>
+#include <inet/optcom.h>
+#include <inet/ipclassifier.h>
+
+boolean_t
+proto_set_rx_hiwat(queue_t *q, conn_t *connp, size_t size)
+{
+
+ if (connp != NULL && IPCL_IS_NONSTR(connp)) {
+ struct sock_proto_props sopp;
+
+ sopp.sopp_flags = SOCKOPT_RCVHIWAT;
+ sopp.sopp_rxhiwat = size;
+ (*connp->conn_upcalls->su_set_proto_props)
+ (connp->conn_upper_handle, &sopp);
+ } else {
+ MBLKP mp;
+ struct stroptions *stropt;
+
+ if (!(mp = allocb(sizeof (*stropt), BPRI_LO)))
+ return (B_FALSE);
+ mp->b_datap->db_type = M_SETOPTS;
+ mp->b_wptr += sizeof (*stropt);
+ stropt = (struct stroptions *)mp->b_rptr;
+ stropt->so_flags = SO_HIWAT;
+ stropt->so_hiwat = size;
+ putnext(q, mp);
+ }
+ return (B_TRUE);
+}
+
+boolean_t
+proto_set_rx_lowat(queue_t *q, conn_t *connp, size_t size)
+{
+
+ if (connp != NULL && IPCL_IS_NONSTR(connp)) {
+ struct sock_proto_props sopp;
+
+ sopp.sopp_flags = SOCKOPT_RCVLOWAT;
+ sopp.sopp_rxlowat = size;
+ (*connp->conn_upcalls->su_set_proto_props)
+ (connp->conn_upper_handle, &sopp);
+ } else {
+ MBLKP mp;
+ struct stroptions *stropt;
+
+ if (!(mp = allocb(sizeof (*stropt), BPRI_LO)))
+ return (B_FALSE);
+ mp->b_datap->db_type = M_SETOPTS;
+ mp->b_wptr += sizeof (*stropt);
+ stropt = (struct stroptions *)mp->b_rptr;
+ stropt->so_flags = SO_LOWAT;
+ stropt->so_lowat = size;
+ putnext(q, mp);
+ }
+ return (B_TRUE);
+}
+
+/*
+ * Set maximum packet size. This is the maximum amount of data the protocol
+ * wants to be given at any time, Larger data needs to be broken in multiples
+ * of maximum packet size and given to the protocol one at a time.
+ */
+boolean_t
+proto_set_maxpsz(queue_t *q, conn_t *connp, size_t size)
+{
+ if (connp != NULL && IPCL_IS_NONSTR(connp)) {
+ struct sock_proto_props sopp;
+
+ sopp.sopp_flags = SOCKOPT_MAXPSZ;
+ sopp.sopp_maxpsz = size;
+ (*connp->conn_upcalls->su_set_proto_props)
+ (connp->conn_upper_handle, &sopp);
+ return (B_TRUE);
+ } else {
+ struct stdata *stp;
+ queue_t *wq;
+ stp = STREAM(q);
+
+ /*
+ * At this point change of a queue parameter is not allowed
+ * when a multiplexor is sitting on top.
+ */
+ if (stp == NULL || stp->sd_flag & STPLEX)
+ return (B_FALSE);
+
+ claimstr(stp->sd_wrq);
+ wq = stp->sd_wrq->q_next;
+ ASSERT(wq != NULL);
+ (void) strqset(wq, QMAXPSZ, 0, size);
+ releasestr(stp->sd_wrq);
+ return (B_TRUE);
+ }
+}
+
+/* ARGSUSED */
+boolean_t
+proto_set_tx_maxblk(queue_t *q, conn_t *connp, ssize_t size)
+{
+ if (connp != NULL && IPCL_IS_NONSTR(connp)) {
+ struct sock_proto_props sopp;
+
+ sopp.sopp_flags = SOCKOPT_MAXBLK;
+ sopp.sopp_maxblk = size;
+ (*connp->conn_upcalls->su_set_proto_props)
+ (connp->conn_upper_handle, &sopp);
+ } else {
+ MBLKP mp;
+ struct stroptions *stropt;
+
+ if (!(mp = allocb(sizeof (*stropt), BPRI_LO)))
+ return (B_FALSE);
+ mp->b_datap->db_type = M_SETOPTS;
+ mp->b_wptr += sizeof (*stropt);
+ stropt = (struct stroptions *)mp->b_rptr;
+ stropt->so_flags = SO_MAXBLK;
+ stropt->so_maxblk = size;
+ putnext(q, mp);
+ }
+ return (B_TRUE);
+}
+
+boolean_t
+proto_set_tx_copyopt(queue_t *q, conn_t *connp, int copyopt)
+{
+ if (connp != NULL && IPCL_IS_NONSTR(connp)) {
+ struct sock_proto_props sopp;
+
+ sopp.sopp_flags = SOCKOPT_ZCOPY;
+ sopp.sopp_zcopyflag = (ushort_t)copyopt;
+ (*connp->conn_upcalls->su_set_proto_props)
+ (connp->conn_upper_handle, &sopp);
+ } else {
+ MBLKP mp;
+ struct stroptions *stropt;
+
+ if (!(mp = allocb(sizeof (*stropt), BPRI_LO)))
+ return (B_FALSE);
+ mp->b_datap->db_type = M_SETOPTS;
+ mp->b_wptr += sizeof (*stropt);
+ stropt = (struct stroptions *)mp->b_rptr;
+ stropt->so_flags = SO_COPYOPT;
+ stropt->so_copyopt = (ushort_t)copyopt;
+ putnext(q, mp);
+ }
+ return (B_TRUE);
+}
+
+boolean_t
+proto_set_tx_wroff(queue_t *q, conn_t *connp, size_t size)
+{
+ if (connp != NULL && IPCL_IS_NONSTR(connp)) {
+ struct sock_proto_props sopp;
+
+ sopp.sopp_flags = SOCKOPT_WROFF;
+ sopp.sopp_wroff = size;
+
+ /* XXX workaround for CR6757374 */
+ if (connp->conn_upper_handle != NULL)
+ (*connp->conn_upcalls->su_set_proto_props)
+ (connp->conn_upper_handle, &sopp);
+ } else {
+
+ MBLKP mp;
+ struct stroptions *stropt;
+ if (!(mp = allocb(sizeof (*stropt), BPRI_LO)))
+ return (B_FALSE);
+ mp->b_datap->db_type = M_SETOPTS;
+ mp->b_wptr += sizeof (*stropt);
+ stropt = (struct stroptions *)mp->b_rptr;
+ stropt->so_flags = SO_WROFF;
+ stropt->so_wroff = (ushort_t)size;
+ putnext(q, mp);
+ }
+ return (B_TRUE);
+}
+
+/*
+ * set OOBINLINE processing on the socket
+ */
+void
+proto_set_rx_oob_opt(conn_t *connp, boolean_t onoff)
+{
+ struct sock_proto_props sopp;
+
+ ASSERT(IPCL_IS_NONSTR(connp));
+
+ sopp.sopp_flags = SOCKOPT_OOBINLINE;
+ sopp.sopp_oobinline = onoff;
+ (*connp->conn_upcalls->su_set_proto_props)
+ (connp->conn_upper_handle, &sopp);
+}
+
+/*
+ * Translate a TLI(/XTI) error into a system error as best we can.
+ */
+static const int tli_errs[] = {
+ 0, /* no error */
+ EADDRNOTAVAIL, /* TBADADDR */
+ ENOPROTOOPT, /* TBADOPT */
+ EACCES, /* TACCES */
+ EBADF, /* TBADF */
+ EADDRNOTAVAIL, /* TNOADDR */
+ EPROTO, /* TOUTSTATE */
+ ECONNABORTED, /* TBADSEQ */
+ 0, /* TSYSERR - will never get */
+ EPROTO, /* TLOOK - should never be sent by transport */
+ EMSGSIZE, /* TBADDATA */
+ EMSGSIZE, /* TBUFOVFLW */
+ EPROTO, /* TFLOW */
+ EWOULDBLOCK, /* TNODATA */
+ EPROTO, /* TNODIS */
+ EPROTO, /* TNOUDERR */
+ EINVAL, /* TBADFLAG */
+ EPROTO, /* TNOREL */
+ EOPNOTSUPP, /* TNOTSUPPORT */
+ EPROTO, /* TSTATECHNG */
+ /* following represent error namespace expansion with XTI */
+ EPROTO, /* TNOSTRUCTYPE - never sent by transport */
+ EPROTO, /* TBADNAME - never sent by transport */
+ EPROTO, /* TBADQLEN - never sent by transport */
+ EADDRINUSE, /* TADDRBUSY */
+ EBADF, /* TINDOUT */
+ EBADF, /* TPROVMISMATCH */
+ EBADF, /* TRESQLEN */
+ EBADF, /* TRESADDR */
+ EPROTO, /* TQFULL - never sent by transport */
+ EPROTO, /* TPROTO */
+};
+
+int
+proto_tlitosyserr(int terr)
+{
+ ASSERT(terr != TSYSERR);
+ if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0])))
+ return (EPROTO);
+ else
+ return (tli_errs[terr]);
+}
+
+/*
+ * Verify that address is suitable for connect/sendmsg and is aligned properly
+ * Since this is a generic function we do not test for port being zero
+ * as some protocols like icmp do not require a port
+ */
+int
+proto_verify_ip_addr(int family, const struct sockaddr *name, socklen_t namelen)
+{
+
+ if (name == NULL || !OK_32PTR((char *)name))
+ return (EINVAL);
+
+ switch (family) {
+ case AF_INET:
+ if (name->sa_family != AF_INET) {
+ return (EAFNOSUPPORT);
+ }
+
+ if (namelen != (socklen_t)sizeof (struct sockaddr_in)) {
+ return (EINVAL);
+ }
+ break;
+ case AF_INET6: {
+#ifdef DEBUG
+ struct sockaddr_in6 *sin6;
+#endif /* DEBUG */
+
+ if (name->sa_family != AF_INET6) {
+ return (EAFNOSUPPORT);
+ }
+ if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) {
+ return (EINVAL);
+ }
+#ifdef DEBUG
+ /* Verify that apps don't forget to clear sin6_scope_id etc */
+ sin6 = (struct sockaddr_in6 *)name;
+ if (sin6->sin6_scope_id != 0 &&
+ !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
+ zcmn_err(getzoneid(), CE_WARN,
+ "connect/send* with uninitialized sin6_scope_id "
+ "(%d) on socket. Pid = %d\n",
+ (int)sin6->sin6_scope_id, (int)curproc->p_pid);
+ }
+#endif /* DEBUG */
+ break;
+ }
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * Do a lookup of the options in the array.
+ * Rerurn NULL if there isn't a match.
+ */
+opdes_t *
+proto_opt_lookup(t_uscalar_t level, t_uscalar_t name, opdes_t *opt_arr,
+ uint_t opt_arr_cnt)
+{
+ opdes_t *optd;
+
+ for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt];
+ optd++) {
+ if (level == (uint_t)optd->opdes_level &&
+ name == (uint_t)optd->opdes_name)
+ return (optd);
+ }
+ return (NULL);
+}
+
+/*
+ * Do a lookup of the options in the array and do permission and length checking
+ * Returns zero if there is no error (note: for non-tpi-providers not being able
+ * to find the option is not an error). TPI errors are returned as -ve.
+ */
+int
+proto_opt_check(int level, int name, int len, t_uscalar_t *max_len,
+ opdes_t *opt_arr, uint_t opt_arr_cnt, boolean_t topmost_tpiprovider,
+ boolean_t negotiate, boolean_t check, cred_t *cr)
+{
+ opdes_t *optd;
+
+ /* Find the option in the opt_arr. */
+ if ((optd = proto_opt_lookup(level, name, opt_arr, opt_arr_cnt)) ==
+ NULL) {
+ /*
+ * Not found, that is a bad thing if
+ * the caller is a tpi provider
+ */
+ if (topmost_tpiprovider)
+ return (-TBADOPT);
+ else
+ return (0); /* skip unmodified */
+ }
+
+ /* Additional checks dependent on operation. */
+ if (negotiate) {
+ /* Cannot be true at the same time */
+ ASSERT(check == B_FALSE);
+
+ if (!OA_WRITE_OR_EXECUTE(optd, cr)) {
+ /* can't negotiate option */
+ if (!(OA_MATCHED_PRIV(optd, cr)) &&
+ OA_WX_ANYPRIV(optd)) {
+ /*
+ * not privileged but privilege
+ * will help negotiate option.
+ */
+ return (-TACCES);
+ } else {
+ return (-TBADOPT);
+ }
+ }
+ /*
+ * Verify size for options
+ * Note: For retaining compatibility with historical
+ * behavior, variable lengths options will have their
+ * length verified in the setfn() processing.
+ * In order to be compatible with SunOS 4.X we return
+ * EINVAL errors for bad lengths.
+ */
+ if (!(optd->opdes_props & OP_VARLEN)) {
+ /* fixed length - size must match */
+ if (len != optd->opdes_size) {
+ return (EINVAL);
+ }
+ }
+ } else {
+ if (check) {
+ if (!OA_RWX_ANYPRIV(optd))
+ /* any of "rwx" permission but not none */
+ return (-TBADOPT);
+ }
+ /*
+ * XXX Change the comments.
+ *
+ * XXX Since T_CURRENT was not there in TLI and the
+ * official TLI inspired TPI standard, getsockopt()
+ * API uses T_CHECK (for T_CURRENT semantics)
+ * The following fallthru makes sense because of its
+ * historical use as semantic equivalent to T_CURRENT.
+ */
+ /* FALLTHRU */
+ if (!OA_READ_PERMISSION(optd, cr)) {
+ /* can't read option value */
+ if (!(OA_MATCHED_PRIV(optd, cr)) &&
+ OA_R_ANYPRIV(optd)) {
+ /*
+ * not privileged but privilege
+ * will help in reading option value.
+ */
+ return (-TACCES);
+ } else {
+ return (-TBADOPT);
+ }
+ }
+ }
+ if (max_len != NULL)
+ *max_len = optd->opdes_size;
+
+ /* We liked it. Keep going. */
+ return (0);
+}
diff --git a/usr/src/uts/common/inet/proto_set.h b/usr/src/uts/common/inet/proto_set.h
new file mode 100644
index 0000000000..8e714c7c05
--- /dev/null
+++ b/usr/src/uts/common/inet/proto_set.h
@@ -0,0 +1,58 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _INET_PROTO_SET_H
+#define _INET_PROTO_SET_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/socket_proto.h>
+#include <inet/optcom.h>
+#include <inet/ipclassifier.h>
+
+extern boolean_t proto_set_rx_hiwat(queue_t *, struct conn_s *, size_t);
+extern boolean_t proto_set_rx_lowat(queue_t *, struct conn_s *, size_t);
+extern boolean_t proto_set_maxpsz(queue_t *, struct conn_s *, size_t);
+extern boolean_t proto_set_tx_maxblk(queue_t *, struct conn_s *,
+ ssize_t);
+extern boolean_t proto_set_tx_copyopt(queue_t *, struct conn_s *, int);
+extern boolean_t proto_set_tx_wroff(queue_t *, struct conn_s *, size_t);
+extern void proto_set_rx_oob_opt(struct conn_s *, boolean_t);
+
+extern int proto_tlitosyserr(int);
+extern int proto_verify_ip_addr(int, const struct sockaddr *, socklen_t);
+
+extern int proto_opt_check(int, int, int, t_uscalar_t *, opdes_t *,
+ uint_t, boolean_t, boolean_t, boolean_t, cred_t *);
+extern opdes_t *proto_opt_lookup(t_uscalar_t, t_uscalar_t, opdes_t *, uint_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _INET_PROTO_SET_H */
diff --git a/usr/src/uts/common/inet/rawip_impl.h b/usr/src/uts/common/inet/rawip_impl.h
index 638cea6c70..f818247b67 100644
--- a/usr/src/uts/common/inet/rawip_impl.h
+++ b/usr/src/uts/common/inet/rawip_impl.h
@@ -27,8 +27,6 @@
#ifndef _RAWIP_IMPL_H
#define _RAWIP_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -44,6 +42,7 @@ extern "C" {
#include <inet/common.h>
#include <inet/ip.h>
+#include <inet/optcom.h>
/* Named Dispatch Parameter Management Structure */
typedef struct icmpparam_s {
@@ -63,7 +62,9 @@ struct icmp_stack {
icmpparam_t *is_param_arr; /* ndd variable table */
kstat_t *is_ksp; /* kstats */
mib2_rawip_t is_rawip_mib; /* SNMP fixed size info */
+ ldi_ident_t is_ldi_ident;
};
+
typedef struct icmp_stack icmp_stack_t;
/* Internal icmp control structure, one per open stream */
@@ -76,7 +77,7 @@ typedef struct icmp_s {
uint_t icmp_state; /* TPI state */
in6_addr_t icmp_v6src; /* Source address of this stream */
in6_addr_t icmp_bound_v6src; /* Explicitely bound to address */
- in6_addr_t icmp_v6dst; /* Connected destination */
+ sin6_t icmp_v6dst; /* Connected destination */
/*
* IP format that packets transmitted from this struct should use.
* Value can be IP4_VERSION or IPV6_VERSION.
@@ -87,7 +88,6 @@ typedef struct icmp_s {
sa_family_t icmp_family; /* Family from socket() call */
/* Following protected by icmp_rwlock */
- uint32_t icmp_flowinfo; /* Connected flow id and tclass */
uint32_t icmp_max_hdr_len; /* For write offset in stream head */
uint_t icmp_proto;
uint_t icmp_ip_snd_options_len; /* Len of IPv4 options */
@@ -144,6 +144,15 @@ typedef struct icmp_s {
uint_t icmp_label_len_v6; /* sec. part of sticky opt */
in6_addr_t icmp_v6lastdst; /* most recent destination */
icmp_stack_t *icmp_is; /* Stack instance */
+ size_t icmp_xmit_hiwat;
+ size_t icmp_xmit_lowat;
+ size_t icmp_recv_hiwat;
+ size_t icmp_recv_lowat;
+ int icmp_delayed_error;
+ kmutex_t icmp_recv_lock;
+ mblk_t *icmp_fallback_queue_head;
+ mblk_t *icmp_fallback_queue_tail;
+ struct sockaddr_storage icmp_delayed_addr;
} icmp_t;
/*
@@ -155,10 +164,16 @@ extern optdb_obj_t icmp_opt_obj;
extern uint_t icmp_max_optsize;
extern mblk_t *icmp_snmp_get(queue_t *q, mblk_t *mpctl);
-extern void rawip_resume_bind(conn_t *, mblk_t *);
-extern void icmp_ddi_init(void);
-extern void icmp_ddi_destroy(void);
+extern void icmp_ddi_g_init(void);
+extern void icmp_ddi_g_destroy(void);
+
+extern sock_lower_handle_t rawip_create(int, int, int, sock_downcalls_t **,
+ uint_t *, int *, int, cred_t *);
+extern void rawip_fallback(sock_lower_handle_t, queue_t *, boolean_t,
+ so_proto_quiesced_cb_t);
+
+extern sock_downcalls_t sock_rawip_downcalls;
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/inet/rts_impl.h b/usr/src/uts/common/inet/rts_impl.h
index f89d1ec82c..de7cd8970b 100644
--- a/usr/src/uts/common/inet/rts_impl.h
+++ b/usr/src/uts/common/inet/rts_impl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -27,8 +27,6 @@
#ifndef _RTS_IMPL_H
#define _RTS_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -44,6 +42,7 @@ extern "C" {
#include <inet/common.h>
#include <inet/ip.h>
+#include <inet/optcom.h>
/* Named Dispatch Parameter Management Structure */
typedef struct rtsparam_s {
@@ -61,6 +60,8 @@ struct rts_stack {
caddr_t rtss_g_nd;
rtsparam_t *rtss_params;
+
+ ldi_ident_t rtss_ldi_ident;
};
typedef struct rts_stack rts_stack_t;
@@ -84,10 +85,25 @@ typedef struct rts_s {
/* Written to only once at the time of opening the endpoint */
conn_t *rts_connp;
+
+ /* Outbound flow control */
+ size_t rts_xmit_hiwat;
+ size_t rts_xmit_lowat;
+
+ /* Inbound flow control */
+ size_t rts_recv_hiwat;
+ size_t rts_recv_lowat;
+
+ kmutex_t rts_send_mutex;
+ kmutex_t rts_recv_mutex;
+ kcondvar_t rts_send_cv;
+ kcondvar_t rts_io_cv;
} rts_t;
#define RTS_WPUT_PENDING 0x1 /* Waiting for write-side to complete */
+#define RTS_REQ_PENDING 0x1 /* For direct sockets */
#define RTS_WRW_PENDING 0x2 /* Routing socket write in progress */
+#define RTS_REQ_INPROG 0x2 /* For direct sockets */
/*
* Object to represent database of options to search passed to
@@ -98,8 +114,19 @@ typedef struct rts_s {
extern optdb_obj_t rts_opt_obj;
extern uint_t rts_max_optsize;
-extern void rts_ddi_init(void);
-extern void rts_ddi_destroy(void);
+extern void rts_ddi_g_init(void);
+extern void rts_ddi_g_destroy(void);
+
+extern int rts_tpi_opt_get(queue_t *, t_scalar_t, t_scalar_t, uchar_t *);
+extern int rts_tpi_opt_set(queue_t *, uint_t, int, int, uint_t, uchar_t *,
+ uint_t *, uchar_t *, void *, cred_t *, mblk_t *);
+extern int rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name,
+ uchar_t *ptr);
+
+extern sock_lower_handle_t rts_create(int, int, int, sock_downcalls_t **,
+ uint_t *, int *, int, cred_t *);
+
+extern sock_downcalls_t sock_rts_downcalls;
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/inet/sctp/sctp.c b/usr/src/uts/common/inet/sctp/sctp.c
index f76612f04f..1dc96a687b 100644
--- a/usr/src/uts/common/inet/sctp/sctp.c
+++ b/usr/src/uts/common/inet/sctp/sctp.c
@@ -279,13 +279,13 @@ sctp_clean_death(sctp_t *sctp, int err)
if (sctp->sctp_xmit_head || sctp->sctp_xmit_unsent) {
sctp_regift_xmitlist(sctp);
}
- if (sctp->sctp_ulp_disconnected(sctp->sctp_ulpd, err)) {
+ if (sctp->sctp_ulp_disconnected(sctp->sctp_ulpd, 0, err)) {
/*
* Socket is gone, detach.
*/
sctp->sctp_detached = B_TRUE;
sctp->sctp_ulpd = NULL;
- bzero(&sctp->sctp_upcalls, sizeof (sctp_upcalls_t));
+ sctp->sctp_upcalls = NULL;
}
}
@@ -447,7 +447,7 @@ sctp_close(sctp_t *sctp)
RUN_SCTP(sctp);
sctp->sctp_detached = 1;
sctp->sctp_ulpd = NULL;
- bzero(&sctp->sctp_upcalls, sizeof (sctp_upcalls_t));
+ sctp->sctp_upcalls = NULL;
bzero(&sctp->sctp_events, sizeof (sctp->sctp_events));
/* If the graceful shutdown has not been completed, just return. */
@@ -1341,8 +1341,8 @@ sctp_icmp_error_ipv6(sctp_t *sctp, mblk_t *mp)
* If parent pointer is passed in, inherit settings from it.
*/
sctp_t *
-sctp_create(void *sctp_ulpd, sctp_t *parent, int family, int flags,
- const sctp_upcalls_t *sctp_upcalls, sctp_sockbuf_limits_t *sbl,
+sctp_create(void *ulpd, sctp_t *parent, int family, int flags,
+ sock_upcalls_t *upcalls, sctp_sockbuf_limits_t *sbl,
cred_t *credp)
{
sctp_t *sctp, *psctp;
@@ -1507,12 +1507,11 @@ sctp_create(void *sctp_ulpd, sctp_t *parent, int family, int flags,
sctp->sctp_adv_pap = sctp->sctp_lastack_rxd;
/* Information required by upper layer */
- if (sctp_ulpd != NULL) {
- sctp->sctp_ulpd = sctp_ulpd;
+ if (ulpd != NULL) {
+ sctp->sctp_ulpd = ulpd;
- ASSERT(sctp_upcalls != NULL);
- bcopy(sctp_upcalls, &sctp->sctp_upcalls,
- sizeof (sctp_upcalls_t));
+ ASSERT(upcalls != NULL);
+ sctp->sctp_upcalls = upcalls;
ASSERT(sbl != NULL);
/* Fill in the socket buffer limits for sctpsockfs */
sbl->sbl_txlowat = sctp->sctp_xmit_lowater;
@@ -1520,8 +1519,8 @@ sctp_create(void *sctp_ulpd, sctp_t *parent, int family, int flags,
sbl->sbl_rxbuf = sctp->sctp_rwnd;
sbl->sbl_rxlowat = SCTP_RECV_LOWATER;
}
- /* If no sctp_ulpd, must be creating the default sctp */
- ASSERT(sctp_ulpd != NULL || sctps->sctps_gsctp == NULL);
+ /* If no ulpd, must be creating the default sctp */
+ ASSERT(ulpd != NULL || sctps->sctps_gsctp == NULL);
/* Insert this in the global list. */
SCTP_LINK(sctp, sctps);
diff --git a/usr/src/uts/common/inet/sctp/sctp_bind.c b/usr/src/uts/common/inet/sctp/sctp_bind.c
index 2091d91ab5..dfb70fc202 100644
--- a/usr/src/uts/common/inet/sctp/sctp_bind.c
+++ b/usr/src/uts/common/inet/sctp/sctp_bind.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/stream.h>
@@ -174,12 +172,16 @@ sctp_bind(sctp_t *sctp, struct sockaddr *sa, socklen_t len)
int err = 0;
ASSERT(sctp != NULL);
- ASSERT(sa);
RUN_SCTP(sctp);
- if (sctp->sctp_state > SCTPS_BOUND ||
- (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
+ if ((sctp->sctp_state >= SCTPS_BOUND) ||
+ (sctp->sctp_connp->conn_state_flags & CONN_CLOSING) ||
+ (sa == NULL || len == 0)) {
+ /*
+ * Multiple binds not allowed for any SCTP socket
+ * Also binding with null address is not supported.
+ */
err = EINVAL;
goto done;
}
diff --git a/usr/src/uts/common/inet/sctp/sctp_common.c b/usr/src/uts/common/inet/sctp/sctp_common.c
index 548a326806..10aff2af34 100644
--- a/usr/src/uts/common/inet/sctp/sctp_common.c
+++ b/usr/src/uts/common/inet/sctp/sctp_common.c
@@ -398,6 +398,8 @@ void
sctp_set_ulp_prop(sctp_t *sctp)
{
int hdrlen;
+ struct sock_proto_props sopp;
+
sctp_stack_t *sctps = sctp->sctp_sctps;
if (sctp->sctp_current->isv4) {
@@ -408,9 +410,12 @@ sctp_set_ulp_prop(sctp_t *sctp)
ASSERT(sctp->sctp_ulpd);
ASSERT(sctp->sctp_current->sfa_pmss == sctp->sctp_mss);
- sctp->sctp_ulp_prop(sctp->sctp_ulpd,
- sctps->sctps_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t),
- sctp->sctp_mss - sizeof (sctp_data_hdr_t));
+ bzero(&sopp, sizeof (sopp));
+ sopp.sopp_flags = SOCKOPT_MAXBLK|SOCKOPT_WROFF;
+ sopp.sopp_wroff = sctps->sctps_wroff_xtra + hdrlen +
+ sizeof (sctp_data_hdr_t);
+ sopp.sopp_maxblk = sctp->sctp_mss - sizeof (sctp_data_hdr_t);
+ sctp->sctp_ulp_prop(sctp->sctp_ulpd, &sopp);
}
void
diff --git a/usr/src/uts/common/inet/sctp/sctp_conn.c b/usr/src/uts/common/inet/sctp/sctp_conn.c
index 716abc13bc..b4a9b56fdd 100644
--- a/usr/src/uts/common/inet/sctp/sctp_conn.c
+++ b/usr/src/uts/common/inet/sctp/sctp_conn.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/stream.h>
@@ -152,8 +150,11 @@ sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt,
acceptor->sctp_rwnd = listener->sctp_rwnd;
acceptor->sctp_irwnd = acceptor->sctp_rwnd;
acceptor->sctp_pd_point = acceptor->sctp_rwnd;
+ acceptor->sctp_upcalls = listener->sctp_upcalls;
+#if 0
bcopy(&listener->sctp_upcalls, &acceptor->sctp_upcalls,
sizeof (sctp_upcalls_t));
+#endif
return (0);
}
@@ -169,6 +170,7 @@ sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len,
int err;
conn_t *connp, *econnp;
sctp_stack_t *sctps;
+ struct sock_proto_props sopp;
/*
* No need to check for duplicate as this is the listener
@@ -292,22 +294,25 @@ sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len,
/* Connection established, so send up the conn_ind */
if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd,
- eager)) == NULL) {
+ (sock_lower_handle_t)eager, NULL, NULL, 0,
+ &eager->sctp_upcalls)) == NULL) {
sctp_close_eager(eager);
BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
return (NULL);
}
ASSERT(SCTP_IS_DETACHED(eager));
eager->sctp_detached = B_FALSE;
+ bzero(&sopp, sizeof (sopp));
+ sopp.sopp_flags = SOCKOPT_MAXBLK|SOCKOPT_WROFF;
+ sopp.sopp_maxblk = strmsgsz;
if (eager->sctp_family == AF_INET) {
- eager->sctp_ulp_prop(eager->sctp_ulpd,
- sctps->sctps_wroff_xtra + sizeof (sctp_data_hdr_t) +
- sctp->sctp_hdr_len, strmsgsz);
+ sopp.sopp_wroff = sctps->sctps_wroff_xtra +
+ sizeof (sctp_data_hdr_t) + sctp->sctp_hdr_len;
} else {
- eager->sctp_ulp_prop(eager->sctp_ulpd,
- sctps->sctps_wroff_xtra + sizeof (sctp_data_hdr_t) +
- sctp->sctp_hdr6_len, strmsgsz);
+ sopp.sopp_wroff = sctps->sctps_wroff_xtra +
+ sizeof (sctp_data_hdr_t) + sctp->sctp_hdr6_len;
}
+ eager->sctp_ulp_prop(eager->sctp_ulpd, &sopp);
return (eager);
}
@@ -333,6 +338,7 @@ sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen)
int err;
sctp_faddr_t *cur_fp;
sctp_stack_t *sctps = sctp->sctp_sctps;
+ struct sock_proto_props sopp;
/*
* Determine packet type based on type of address passed in
@@ -599,9 +605,11 @@ sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen)
BUMP_LOCAL(sctp->sctp_opkts);
notify_ulp:
- sctp->sctp_ulp_prop(sctp->sctp_ulpd,
- sctps->sctps_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t),
- 0);
+ bzero(&sopp, sizeof (sopp));
+ sopp.sopp_flags = SOCKOPT_WROFF;
+ sopp.sopp_wroff = sctps->sctps_wroff_xtra + hdrlen +
+ sizeof (sctp_data_hdr_t);
+ sctp->sctp_ulp_prop(sctp->sctp_ulpd, &sopp);
return (0);
default:
diff --git a/usr/src/uts/common/inet/sctp/sctp_cookie.c b/usr/src/uts/common/inet/sctp/sctp_cookie.c
index 93184bcd27..e089a901d3 100644
--- a/usr/src/uts/common/inet/sctp/sctp_cookie.c
+++ b/usr/src/uts/common/inet/sctp/sctp_cookie.c
@@ -1049,10 +1049,8 @@ sctp_send_cookie_echo(sctp_t *sctp, sctp_chunk_hdr_t *iackch, mblk_t *iackmp)
* unsent, since there won't be any sent-unack'ed
* here.
*/
- if (!SCTP_IS_DETACHED(sctp)) {
- sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
- sctp->sctp_unsent);
- }
+ if (!SCTP_IS_DETACHED(sctp))
+ SCTP_TXQ_UPDATE(sctp);
}
if (sctp->sctp_xmit_unsent == NULL)
sctp->sctp_xmit_unsent_tail = NULL;
diff --git a/usr/src/uts/common/inet/sctp/sctp_impl.h b/usr/src/uts/common/inet/sctp/sctp_impl.h
index 5f41226bf3..089edc3835 100644
--- a/usr/src/uts/common/inet/sctp/sctp_impl.h
+++ b/usr/src/uts/common/inet/sctp/sctp_impl.h
@@ -608,16 +608,16 @@ typedef struct sctp_s {
kcondvar_t sctp_cv;
boolean_t sctp_running;
- void *sctp_ulpd; /* SCTP upper layer desc. */
+#define sctp_ulpd sctp_connp->conn_upper_handle
+#define sctp_upcalls sctp_connp->conn_upcalls
- struct sctp_upcalls_s sctp_upcalls; /* upcalls for sctp_ulpd */
-#define sctp_ulp_newconn sctp_upcalls.su_newconn
-#define sctp_ulp_connected sctp_upcalls.su_connected
-#define sctp_ulp_disconnected sctp_upcalls.su_disconnected
-#define sctp_ulp_disconnecting sctp_upcalls.su_disconnecting
-#define sctp_ulp_recv sctp_upcalls.su_recv
-#define sctp_ulp_xmitted sctp_upcalls.su_xmitted
-#define sctp_ulp_prop sctp_upcalls.su_properties
+#define sctp_ulp_newconn sctp_upcalls->su_newconn
+#define sctp_ulp_connected sctp_upcalls->su_connected
+#define sctp_ulp_disconnected sctp_upcalls->su_disconnected
+#define sctp_ulp_opctl sctp_upcalls->su_opctl
+#define sctp_ulp_recv sctp_upcalls->su_recv
+#define sctp_ulp_xmitted sctp_upcalls->su_txq_full
+#define sctp_ulp_prop sctp_upcalls->su_set_proto_props
int32_t sctp_state;
@@ -768,8 +768,9 @@ typedef struct sctp_s {
sctp_rexmitting : 1, /* SCTP is retransmitting */
sctp_zero_win_probe : 1, /* doing zero win probe */
+ sctp_txq_full : 1, /* the tx queue is full */
sctp_ulp_discon_done : 1, /* ulp_disconnecting done */
- sctp_dummy : 7;
+ sctp_dummy : 6;
} sctp_bits;
struct {
uint32_t
@@ -809,6 +810,7 @@ typedef struct sctp_s {
#define sctp_linklocal sctp_bits.sctp_linklocal
#define sctp_rexmitting sctp_bits.sctp_rexmitting
#define sctp_zero_win_probe sctp_bits.sctp_zero_win_probe
+#define sctp_txq_full sctp_bits.sctp_txq_full
#define sctp_ulp_discon_done sctp_bits.sctp_ulp_discon_done
#define sctp_recvsndrcvinfo sctp_events.sctp_recvsndrcvinfo
@@ -935,6 +937,15 @@ typedef struct sctp_s {
uint32_t sctp_err_len; /* Total error chunks length */
} sctp_t;
+#define SCTP_TXQ_LEN(sctp) ((sctp)->sctp_unsent + (sctp)->sctp_unacked)
+#define SCTP_TXQ_UPDATE(sctp) \
+ if ((sctp)->sctp_txq_full && SCTP_TXQ_LEN(sctp) <= \
+ (sctp)->sctp_xmit_lowater) { \
+ (sctp)->sctp_txq_full = 0; \
+ (sctp)->sctp_ulp_xmitted((sctp)->sctp_ulpd, \
+ B_FALSE); \
+ }
+
#endif /* (defined(_KERNEL) || defined(_KMEMUSER)) */
extern void sctp_ack_timer(sctp_t *);
diff --git a/usr/src/uts/common/inet/sctp/sctp_input.c b/usr/src/uts/common/inet/sctp/sctp_input.c
index 71a85ad04e..87c79eedff 100644
--- a/usr/src/uts/common/inet/sctp/sctp_input.c
+++ b/usr/src/uts/common/inet/sctp/sctp_input.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/stream.h>
@@ -1192,6 +1190,7 @@ sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups,
boolean_t tpfinished = B_TRUE;
int32_t new_rwnd;
sctp_stack_t *sctps = sctp->sctp_sctps;
+ int error;
/* The following are used multiple times, so we inline them */
#define SCTP_ACK_IT(sctp, tsn) \
@@ -1292,8 +1291,8 @@ sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups,
oftsn = sctp->sctp_ftsn;
if (isfrag) {
- int error = 0;
+ error = 0;
/* fragmented data chunk */
dmp->b_rptr = (uchar_t *)dc;
if (ubit) {
@@ -1408,13 +1407,18 @@ sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups,
sctp->sctp_rxqueued -= dlen;
if (can_deliver) {
+
dmp->b_rptr = (uchar_t *)(dc + 1);
if (sctp_input_add_ancillary(sctp, &dmp, dc, fp, ipp) == 0) {
dprint(1, ("sctp_data_chunk: delivering %lu bytes\n",
msgdsize(dmp)));
sctp->sctp_rwnd -= dlen;
+ /*
+ * Override b_flag for SCTP sockfs internal use
+ */
+ dmp->b_flag = tpfinished ? 0 : SCTP_PARTIAL_DATA;
new_rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
- tpfinished ? 0 : SCTP_PARTIAL_DATA);
+ msgdsize(dmp), 0, &error, NULL);
if (new_rwnd > sctp->sctp_rwnd) {
sctp->sctp_rwnd = new_rwnd;
}
@@ -1492,8 +1496,13 @@ sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups,
dprint(1, ("sctp_data_chunk: delivering %lu "
"bytes\n", msgdsize(dmp)));
sctp->sctp_rwnd -= dlen;
+ /*
+ * Override b_flag for SCTP sockfs internal use
+ */
+ dmp->b_flag = tpfinished ?
+ 0 : SCTP_PARTIAL_DATA;
new_rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd,
- dmp, tpfinished ? 0 : SCTP_PARTIAL_DATA);
+ dmp, msgdsize(dmp), 0, &error, NULL);
if (new_rwnd > sctp->sctp_rwnd) {
sctp->sctp_rwnd = new_rwnd;
}
@@ -1806,10 +1815,8 @@ sctp_check_abandoned_msg(sctp_t *sctp, mblk_t *meta)
* Update ULP the amount of queued data, which is
* sent-unack'ed + unsent.
*/
- if (!SCTP_IS_DETACHED(sctp)) {
- sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
- sctp->sctp_unacked + sctp->sctp_unsent);
- }
+ if (!SCTP_IS_DETACHED(sctp))
+ SCTP_TXQ_UPDATE(sctp);
return (0);
}
return (-1);
@@ -1922,10 +1929,8 @@ cum_ack_done:
* Update ULP the amount of queued data, which is
* sent-unack'ed + unsent.
*/
- if (!SCTP_IS_DETACHED(sctp)) {
- sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
- sctp->sctp_unacked + sctp->sctp_unsent);
- }
+ if (!SCTP_IS_DETACHED(sctp))
+ SCTP_TXQ_UPDATE(sctp);
/* Time to send a shutdown? */
if (sctp->sctp_state == SCTPS_SHUTDOWN_PENDING) {
@@ -2141,6 +2146,7 @@ sctp_process_forward_tsn(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp,
}
if (can_deliver) {
int32_t nrwnd;
+ int error;
dmp->b_rptr = (uchar_t *)(dc + 1);
dmp->b_next = NULL;
@@ -2149,8 +2155,15 @@ sctp_process_forward_tsn(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp,
&dmp, dc, fp, ipp) == 0) {
sctp->sctp_rxqueued -= dlen;
sctp->sctp_rwnd -= dlen;
+ /*
+ * Override b_flag for SCTP sockfs
+ * internal use
+ */
+
+ dmp->b_flag = 0;
nrwnd = sctp->sctp_ulp_recv(
- sctp->sctp_ulpd, dmp, 0);
+ sctp->sctp_ulpd, dmp, msgdsize(dmp),
+ 0, &error, NULL);
if (nrwnd > sctp->sctp_rwnd)
sctp->sctp_rwnd = nrwnd;
} else {
@@ -3947,7 +3960,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp)
sctp_stop_faddr_timers(sctp);
if (!SCTP_IS_DETACHED(sctp)) {
sctp->sctp_ulp_connected(
- sctp->sctp_ulpd);
+ sctp->sctp_ulpd, 0, NULL, -1);
sctp_set_ulp_prop(sctp);
}
sctp->sctp_state = SCTPS_ESTABLISHED;
@@ -3983,7 +3996,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp)
case CHUNK_COOKIE_ACK:
if (!SCTP_IS_DETACHED(sctp)) {
sctp->sctp_ulp_connected(
- sctp->sctp_ulpd);
+ sctp->sctp_ulpd, 0, NULL, -1);
sctp_set_ulp_prop(sctp);
}
if (sctp->sctp_unacked == 0)
@@ -4020,7 +4033,7 @@ sctp_input_data(sctp_t *sctp, mblk_t *mp, mblk_t *ipsec_mp)
if (!SCTP_IS_DETACHED(sctp)) {
sctp->sctp_ulp_connected(
- sctp->sctp_ulpd);
+ sctp->sctp_ulpd, 0, NULL, -1);
sctp_set_ulp_prop(sctp);
}
if (sctp->sctp_unacked == 0)
diff --git a/usr/src/uts/common/inet/sctp/sctp_notify.c b/usr/src/uts/common/inet/sctp/sctp_notify.c
index f516154ce6..3ede878954 100644
--- a/usr/src/uts/common/inet/sctp/sctp_notify.c
+++ b/usr/src/uts/common/inet/sctp/sctp_notify.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/stream.h>
@@ -39,9 +37,12 @@
#include <netinet/sctp.h>
#include <inet/common.h>
+#include <inet/ipclassifier.h>
#include <inet/ip.h>
+
#include "sctp_impl.h"
+/* ARGSUSED */
static void
sctp_notify(sctp_t *sctp, mblk_t *emp, size_t len)
{
@@ -49,6 +50,7 @@ sctp_notify(sctp_t *sctp, mblk_t *emp, size_t len)
mblk_t *mp;
sctp_faddr_t *fp;
int32_t rwnd = 0;
+ int error;
if ((mp = allocb(sizeof (*tudi) + sizeof (void *) +
sizeof (struct sockaddr_in6), BPRI_HI)) == NULL) {
@@ -108,7 +110,13 @@ sctp_notify(sctp_t *sctp, mblk_t *emp, size_t len)
ASSERT(len == rwnd);
#endif
- rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd, mp, SCTP_NOTIFICATION);
+ /*
+ * Override b_flag for SCTP sockfs internal use
+ */
+ mp->b_flag = (short)SCTP_NOTIFICATION;
+
+ rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd, mp, msgdsize(mp), 0,
+ &error, NULL);
if (rwnd > sctp->sctp_rwnd) {
sctp->sctp_rwnd = rwnd;
}
diff --git a/usr/src/uts/common/inet/sctp/sctp_opt_data.c b/usr/src/uts/common/inet/sctp/sctp_opt_data.c
index c24c81c01f..b3921cf6ad 100644
--- a/usr/src/uts/common/inet/sctp/sctp_opt_data.c
+++ b/usr/src/uts/common/inet/sctp/sctp_opt_data.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/stream.h>
#define _SUN_TPI_VERSION 2
@@ -1386,8 +1384,11 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp,
}
us = (struct sctp_uc_swap *)invalp;
sctp->sctp_ulpd = us->sus_handle;
+ sctp->sctp_upcalls = us->sus_upcalls;
+#if 0
bcopy(us->sus_upcalls, &sctp->sctp_upcalls,
sizeof (sctp_upcalls_t));
+#endif
break;
}
case SCTP_PRSCTP:
diff --git a/usr/src/uts/common/inet/sctp/sctp_output.c b/usr/src/uts/common/inet/sctp/sctp_output.c
index 8065f1dcf1..938573b1be 100644
--- a/usr/src/uts/common/inet/sctp/sctp_output.c
+++ b/usr/src/uts/common/inet/sctp/sctp_output.c
@@ -288,6 +288,13 @@ sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
}
sctp->sctp_unsent += msg_len;
BUMP_LOCAL(sctp->sctp_msgcount);
+ /*
+ * Notify sockfs if the tx queue is full.
+ */
+ if (SCTP_TXQ_LEN(sctp) >= sctp->sctp_xmit_hiwater) {
+ sctp->sctp_txq_full = 1;
+ sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, B_TRUE);
+ }
if (sctp->sctp_state == SCTPS_ESTABLISHED)
sctp_output(sctp, UINT_MAX);
process_sendq:
@@ -366,10 +373,8 @@ nextmsg:
* Update ULP the amount of queued data, which is
* sent-unack'ed + unsent.
*/
- if (!SCTP_IS_DETACHED(sctp)) {
- sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
- sctp->sctp_unacked + sctp->sctp_unsent);
- }
+ if (!SCTP_IS_DETACHED(sctp))
+ SCTP_TXQ_UPDATE(sctp);
sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
goto try_next;
}
@@ -875,10 +880,8 @@ chunkified:
* Update ULP the amount of queued data, which is
* sent-unack'ed + unsent.
*/
- if (!SCTP_IS_DETACHED(sctp)) {
- sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
- sctp->sctp_unacked + sctp->sctp_unsent);
- }
+ if (!SCTP_IS_DETACHED(sctp))
+ SCTP_TXQ_UPDATE(sctp);
sctp_sendfail_event(sctp, meta, 0, B_TRUE);
next_msg:
meta = tmp_meta;
@@ -1541,10 +1544,8 @@ ftsn_done:
* Update ULP the amount of queued data, which is
* sent-unack'ed + unsent.
*/
- if (!SCTP_IS_DETACHED(sctp)) {
- sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
- sctp->sctp_unacked + sctp->sctp_unsent);
- }
+ if (!SCTP_IS_DETACHED(sctp))
+ SCTP_TXQ_UPDATE(sctp);
}
}
diff --git a/usr/src/uts/common/inet/sctp/sctp_shutdown.c b/usr/src/uts/common/inet/sctp/sctp_shutdown.c
index e8311a018f..b58016eb15 100644
--- a/usr/src/uts/common/inet/sctp/sctp_shutdown.c
+++ b/usr/src/uts/common/inet/sctp/sctp_shutdown.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/stream.h>
@@ -165,7 +163,7 @@ sctp_shutdown_received(sctp_t *sctp, sctp_chunk_hdr_t *sch, boolean_t crwsd,
/* Don't allow sending new data */
if (!SCTP_IS_DETACHED(sctp) && !sctp->sctp_ulp_discon_done) {
- sctp->sctp_ulp_disconnecting(sctp->sctp_ulpd);
+ sctp->sctp_ulp_opctl(sctp->sctp_ulpd, SOCK_OPCTL_SHUT_SEND, 0);
sctp->sctp_ulp_discon_done = B_TRUE;
}
diff --git a/usr/src/uts/common/inet/sctp_itf.h b/usr/src/uts/common/inet/sctp_itf.h
index 4a94cab233..eb7597ac0a 100644
--- a/usr/src/uts/common/inet/sctp_itf.h
+++ b/usr/src/uts/common/inet/sctp_itf.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _INET_SCTP_ITF_H
#define _INET_SCTP_ITF_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -54,21 +51,6 @@ extern "C" {
#define SCTP_ITF_VER 1
/*
- * This struct holds all the upcalls the SCTP kernel module will
- * invoke for different events. When calling sctp_create() to create
- * a SCTP handle, the caller must provide this information.
- */
-typedef struct sctp_upcalls_s {
- void * (*su_newconn)(void *parenthandle, void *connind);
- void (*su_connected)(void *handle);
- int (*su_disconnected)(void *handle, int error);
- void (*su_disconnecting)(void *handle);
- int (*su_recv)(void *handle, mblk_t *mp, int flags);
- void (*su_xmitted)(void *handle, int txqueued);
- void (*su_properties)(void *handle, int wroff, size_t maxblk);
-} sctp_upcalls_t;
-
-/*
* This struct holds various flow control limits the caller of
* sctp_create() should observe when interacting with SCTP.
*/
@@ -82,9 +64,10 @@ typedef struct sctp_sockbuf_limits_s {
/*
* Parameter to SCTP_UC_SWAP setsockopt
*/
+struct sock_upcalls_s;
struct sctp_uc_swap {
- void *sus_handle;
- sctp_upcalls_t *sus_upcalls;
+ void *sus_handle;
+ struct sock_upcalls_s *sus_upcalls;
};
struct sctp_s;
@@ -102,7 +85,7 @@ extern void sctp_close(struct sctp_s *conn);
extern int sctp_connect(struct sctp_s *conn, const struct sockaddr *dst,
socklen_t addrlen);
extern struct sctp_s *sctp_create(void *newhandle, struct sctp_s *parent,
- int family, int flags, const sctp_upcalls_t *su,
+ int family, int flags, struct sock_upcalls_s *su,
sctp_sockbuf_limits_t *sbl, cred_t *cr);
extern int sctp_disconnect(struct sctp_s *conn);
extern int sctp_get_opt(struct sctp_s *conn, int level, int opt, void *opts,
diff --git a/usr/src/uts/common/inet/sockmods/sockmod_sctp.c b/usr/src/uts/common/inet/sockmods/sockmod_sctp.c
new file mode 100644
index 0000000000..2600cfa181
--- /dev/null
+++ b/usr/src/uts/common/inet/sockmods/sockmod_sctp.c
@@ -0,0 +1,221 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/sysmacros.h>
+#include <sys/strsubr.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <netinet/sctp.h>
+#include <fs/sockfs/sockcommon.h>
+#include "socksctp.h"
+
+struct sonode *socksctp_create(struct sockparams *, int, int, int,
+ int, int, int *, cred_t *);
+void socksctp_destroy(struct sonode *);
+
+static int socksctp_constructor(void *, void *, int);
+static void socksctp_destructor(void *, void *);
+
+static __smod_priv_t sosctp_priv = {
+ socksctp_create,
+ socksctp_destroy,
+ NULL
+};
+
+static smod_reg_t sinfo = {
+ SOCKMOD_VERSION,
+ "socksctp",
+ SOCK_UC_VERSION,
+ SOCK_DC_VERSION,
+ NULL,
+ &sosctp_priv
+};
+
+kmem_cache_t *sosctp_assoccache;
+static kmem_cache_t *sosctp_sockcache;
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modlsockmod modlsockmod = {
+ &mod_sockmodops, "SCTP socket module", &sinfo
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modlsockmod,
+ NULL
+};
+
+static int
+socksctp_init(void)
+{
+ sosctp_sockcache = kmem_cache_create("sctpsock",
+ sizeof (struct sctp_sonode), 0, socksctp_constructor,
+ socksctp_destructor, NULL, NULL, NULL, 0);
+ sosctp_assoccache = kmem_cache_create("sctp_assoc",
+ sizeof (struct sctp_soassoc), 0, NULL, NULL, NULL, NULL, NULL, 0);
+ return (0);
+}
+
+static void
+socksctp_fini(void)
+{
+ kmem_cache_destroy(sosctp_sockcache);
+ kmem_cache_destroy(sosctp_assoccache);
+}
+
+/*ARGSUSED*/
+static int
+socksctp_constructor(void *buf, void *cdrarg, int kmflags)
+{
+ struct sctp_sonode *ss = buf;
+ struct sonode *so = &ss->ss_so;
+
+ ss->ss_type = SOSCTP_SOCKET;
+ return (sonode_constructor((void *)so, cdrarg, kmflags));
+}
+
+/*ARGSUSED*/
+static void
+socksctp_destructor(void *buf, void *cdrarg)
+{
+ struct sctp_sonode *ss = buf;
+ struct sonode *so = &ss->ss_so;
+
+ sonode_destructor((void *)so, cdrarg);
+}
+
+/*
+ * Creates a sctp socket data structure.
+ */
+/* ARGSUSED */
+struct sonode *
+socksctp_create(struct sockparams *sp, int family, int type, int protocol,
+ int version, int sflags, int *errorp, cred_t *cr)
+{
+ struct sctp_sonode *ss;
+ struct sonode *so;
+ int kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
+
+ if (version == SOV_STREAM) {
+ *errorp = EINVAL;
+ return (NULL);
+ }
+
+ /*
+ * We only support two types of SCTP socket. Let sotpi_create()
+ * handle all other cases, such as raw socket.
+ */
+ if (!(family == AF_INET || family == AF_INET6) ||
+ !(type == SOCK_STREAM || type == SOCK_SEQPACKET)) {
+ *errorp = EINVAL;
+ return (NULL);
+ }
+
+ ss = kmem_cache_alloc(sosctp_sockcache, kmflags);
+ if (ss == NULL) {
+ *errorp = ENOMEM;
+ return (NULL);
+ }
+
+ so = &ss->ss_so;
+
+ ss->ss_maxassoc = 0;
+ ss->ss_assoccnt = 0;
+ ss->ss_assocs = NULL;
+
+ if (type == SOCK_STREAM) {
+ sonode_init(so, sp, family, type, protocol,
+ &sosctp_sonodeops);
+ } else {
+ sonode_init(so, sp, family, type, protocol,
+ &sosctp_seq_sonodeops);
+ ASSERT(type == SOCK_SEQPACKET);
+ mutex_enter(&so->so_lock);
+ (void) sosctp_aid_grow(ss, 1, kmflags);
+ mutex_exit(&so->so_lock);
+ }
+
+ if (version == SOV_DEFAULT) {
+ version = so_default_version;
+ }
+ so->so_version = (short)version;
+
+ dprint(2, ("sosctp_create: %p domain %d type %d\n", (void *)so, family,
+ type));
+
+ return (so);
+}
+
+/*
+ * Free SCTP socket data structure.
+ */
+void
+socksctp_destroy(struct sonode *so)
+{
+ struct sctp_sonode *ss;
+
+ ASSERT((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) &&
+ so->so_protocol == IPPROTO_SCTP);
+
+ sosctp_fini(so, CRED());
+
+ ss = SOTOSSO(so);
+ kmem_cache_free(sosctp_sockcache, ss);
+}
+
+int
+_init(void)
+{
+ int error = 0;
+
+ (void) socksctp_init();
+
+ if ((error = mod_install(&modlinkage)) != 0)
+ socksctp_fini();
+
+ return (error);
+}
+
+int
+_fini(void)
+{
+ int error = 0;
+
+ if ((error = mod_remove(&modlinkage)) == 0)
+ socksctp_fini();
+
+ return (error);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/inet/sockmods/sockmod_sdp.c b/usr/src/uts/common/inet/sockmods/sockmod_sdp.c
new file mode 100644
index 0000000000..f609cbe069
--- /dev/null
+++ b/usr/src/uts/common/inet/sockmods/sockmod_sdp.c
@@ -0,0 +1,154 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/sysmacros.h>
+#include <sys/strsubr.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/tihdr.h>
+#include <sys/vfs.h>
+#include <fs/sockfs/nl7c.h>
+#include <inet/kssl/ksslapi.h>
+#include <inet/sdp_itf.h>
+#include <fs/sockfs/sockcommon.h>
+#include "socksdp.h"
+
+struct sonode *socksdp_create(struct sockparams *, int, int, int,
+ int, int, int *, cred_t *);
+static void socksdp_destroy(struct sonode *);
+
+static __smod_priv_t sosdp_priv = {
+ socksdp_create,
+ socksdp_destroy,
+ NULL
+};
+
+static smod_reg_t sinfo = {
+ SOCKMOD_VERSION,
+ "socksdp",
+ SOCK_UC_VERSION,
+ SOCK_DC_VERSION,
+ NULL,
+ &sosdp_priv
+};
+
+/*
+ * Module linkage information for the kernel
+ */
+static struct modlsockmod modlsockmod = {
+ &mod_sockmodops, "SDP socket module", &sinfo
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modlsockmod,
+ NULL
+};
+
+/*
+ * Creates a sdp socket data structure.
+ */
+/* ARGSUSED */
+struct sonode *
+socksdp_create(struct sockparams *sp, int family, int type, int protocol,
+ int version, int sflags, int *errorp, cred_t *cr)
+{
+ struct sonode *so;
+ int kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
+
+ dprint(4, ("Inside sosdp_create: domain:%d proto:%d type:%d",
+ family, protocol, type));
+
+ *errorp = 0;
+ if (is_system_labeled()) {
+ *errorp = EOPNOTSUPP;
+ return (NULL);
+ }
+
+ if (version == SOV_STREAM) {
+ *errorp = EINVAL;
+ return (NULL);
+ }
+
+ /*
+ * We only support one type of SDP socket. Let sotpi_create()
+ * handle all other cases, such as raw socket.
+ */
+ if (!(family == AF_INET || family == AF_INET6) ||
+ !(type == SOCK_STREAM)) {
+ *errorp = EINVAL;
+ return (NULL);
+ }
+
+ so = kmem_cache_alloc(socket_cache, kmflags);
+ if (so == NULL) {
+ *errorp = ENOMEM;
+ return (NULL);
+ }
+
+ sonode_init(so, sp, family, type, protocol, &sosdp_sonodeops);
+ so->so_pollev |= SO_POLLEV_ALWAYS;
+
+ dprint(2, ("sosdp_create: %p domain %d type %d\n", (void *)so, family,
+ type));
+
+ if (version == SOV_DEFAULT) {
+ version = so_default_version;
+ }
+ so->so_version = (short)version;
+
+ return (so);
+}
+
+static void
+socksdp_destroy(struct sonode *so)
+{
+ ASSERT(so->so_ops == &sosdp_sonodeops);
+
+ sosdp_fini(so, CRED());
+
+ kmem_cache_free(socket_cache, so);
+}
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/inet/sockmods/socksctp.c b/usr/src/uts/common/inet/sockmods/socksctp.c
new file mode 100644
index 0000000000..e013940703
--- /dev/null
+++ b/usr/src/uts/common/inet/sockmods/socksctp.c
@@ -0,0 +1,2105 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/t_lock.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/stropts.h>
+#include <sys/cmn_err.h>
+#include <sys/sysmacros.h>
+#include <sys/filio.h>
+
+#include <sys/project.h>
+#include <sys/tihdr.h>
+#include <sys/strsubr.h>
+#include <sys/esunddi.h>
+#include <sys/ddi.h>
+
+#include <sys/sockio.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/strsun.h>
+
+#include <netinet/sctp.h>
+#include <inet/sctp_itf.h>
+#include <fs/sockfs/sockcommon.h>
+#include "socksctp.h"
+
+/*
+ * SCTP sockfs sonode operations, 1-1 socket
+ */
+static int sosctp_init(struct sonode *, struct sonode *, struct cred *, int);
+static int sosctp_accept(struct sonode *, int, struct cred *, struct sonode **);
+static int sosctp_bind(struct sonode *, struct sockaddr *, socklen_t, int,
+ struct cred *);
+static int sosctp_listen(struct sonode *, int, struct cred *);
+static int sosctp_connect(struct sonode *, const struct sockaddr *, socklen_t,
+ int, int, struct cred *);
+static int sosctp_recvmsg(struct sonode *, struct nmsghdr *, struct uio *,
+ struct cred *);
+static int sosctp_sendmsg(struct sonode *, struct nmsghdr *, struct uio *,
+ struct cred *);
+static int sosctp_getpeername(struct sonode *, struct sockaddr *, socklen_t *,
+ boolean_t, struct cred *);
+static int sosctp_getsockname(struct sonode *, struct sockaddr *, socklen_t *,
+ struct cred *);
+static int sosctp_shutdown(struct sonode *, int, struct cred *);
+static int sosctp_getsockopt(struct sonode *, int, int, void *, socklen_t *,
+ int, struct cred *);
+static int sosctp_setsockopt(struct sonode *, int, int, const void *,
+ socklen_t, struct cred *);
+static int sosctp_ioctl(struct sonode *, int, intptr_t, int, struct cred *,
+ int32_t *);
+static int sosctp_close(struct sonode *, int, struct cred *);
+void sosctp_fini(struct sonode *, struct cred *);
+
+/*
+ * SCTP sockfs sonode operations, 1-N socket
+ */
+static int sosctp_seq_connect(struct sonode *, const struct sockaddr *,
+ socklen_t, int, int, struct cred *);
+static int sosctp_seq_sendmsg(struct sonode *, struct nmsghdr *, struct uio *,
+ struct cred *);
+
+/*
+ * Socket association upcalls, 1-N socket connection
+ */
+sock_upper_handle_t sctp_assoc_newconn(sock_upper_handle_t,
+ sock_lower_handle_t, sock_downcalls_t *, struct cred *, pid_t,
+ sock_upcalls_t **);
+static void sctp_assoc_connected(sock_upper_handle_t, sock_connid_t,
+ struct cred *, pid_t);
+static int sctp_assoc_disconnected(sock_upper_handle_t, sock_connid_t, int);
+static void sctp_assoc_disconnecting(sock_upper_handle_t, sock_opctl_action_t,
+ uintptr_t arg);
+static ssize_t sctp_assoc_recv(sock_upper_handle_t, mblk_t *, size_t, int,
+ int *, boolean_t *);
+static void sctp_assoc_xmitted(sock_upper_handle_t, boolean_t);
+static void sctp_assoc_properties(sock_upper_handle_t,
+ struct sock_proto_props *);
+
+sonodeops_t sosctp_sonodeops = {
+ sosctp_init, /* sop_init */
+ sosctp_accept, /* sop_accept */
+ sosctp_bind, /* sop_bind */
+ sosctp_listen, /* sop_listen */
+ sosctp_connect, /* sop_connect */
+ sosctp_recvmsg, /* sop_recvmsg */
+ sosctp_sendmsg, /* sop_sendmsg */
+ so_sendmblk_notsupp, /* sop_sendmblk */
+ sosctp_getpeername, /* sop_getpeername */
+ sosctp_getsockname, /* sop_getsockname */
+ sosctp_shutdown, /* sop_shutdown */
+ sosctp_getsockopt, /* sop_getsockopt */
+ sosctp_setsockopt, /* sop_setsockopt */
+ sosctp_ioctl, /* sop_ioctl */
+ so_poll, /* sop_poll */
+ sosctp_close, /* sop_close */
+};
+
+sonodeops_t sosctp_seq_sonodeops = {
+ sosctp_init, /* sop_init */
+ so_accept_notsupp, /* sop_accept */
+ sosctp_bind, /* sop_bind */
+ sosctp_listen, /* sop_listen */
+ sosctp_seq_connect, /* sop_connect */
+ sosctp_recvmsg, /* sop_recvmsg */
+ sosctp_seq_sendmsg, /* sop_sendmsg */
+ so_sendmblk_notsupp, /* sop_sendmblk */
+ so_getpeername_notsupp, /* sop_getpeername */
+ sosctp_getsockname, /* sop_getsockname */
+ so_shutdown_notsupp, /* sop_shutdown */
+ sosctp_getsockopt, /* sop_getsockopt */
+ sosctp_setsockopt, /* sop_setsockopt */
+ sosctp_ioctl, /* sop_ioctl */
+ so_poll, /* sop_poll */
+ sosctp_close, /* sop_close */
+};
+
+sock_upcalls_t sosctp_sock_upcalls = {
+ so_newconn,
+ so_connected,
+ so_disconnected,
+ so_opctl,
+ so_queue_msg,
+ so_set_prop,
+ so_txq_full,
+ NULL, /* su_signal_oob */
+};
+
+sock_upcalls_t sosctp_assoc_upcalls = {
+ sctp_assoc_newconn,
+ sctp_assoc_connected,
+ sctp_assoc_disconnected,
+ sctp_assoc_disconnecting,
+ sctp_assoc_recv,
+ sctp_assoc_properties,
+ sctp_assoc_xmitted,
+ NULL, /* su_recv_space */
+ NULL, /* su_signal_oob */
+};
+
+/* ARGSUSED */
+static int
+sosctp_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
+{
+ struct sctp_sonode *ss;
+ struct sctp_sonode *pss;
+ sctp_sockbuf_limits_t sbl;
+ sock_upcalls_t *upcalls;
+
+ ss = SOTOSSO(so);
+
+ if (pso != NULL) {
+ /*
+ * Passive open, just inherit settings from parent. We should
+ * not end up here for SOCK_SEQPACKET type sockets, since no
+ * new sonode is created in that case.
+ */
+ ASSERT(so->so_type == SOCK_STREAM);
+ pss = SOTOSSO(pso);
+
+ mutex_enter(&pso->so_lock);
+ so->so_state |= (SS_ISBOUND | SS_ISCONNECTED |
+ (pso->so_state & SS_ASYNC));
+ sosctp_so_inherit(pss, ss);
+ so->so_proto_props = pso->so_proto_props;
+ so->so_mode = pso->so_mode;
+ mutex_exit(&pso->so_lock);
+
+ return (0);
+ }
+
+ if (so->so_type == SOCK_STREAM) {
+ upcalls = &sosctp_sock_upcalls;
+ so->so_mode = SM_CONNREQUIRED;
+ } else {
+ ASSERT(so->so_type == SOCK_SEQPACKET);
+ upcalls = &sosctp_assoc_upcalls;
+ }
+ so->so_proto_handle = (sock_lower_handle_t)sctp_create(so, NULL,
+ so->so_family, SCTP_CAN_BLOCK, upcalls, &sbl, cr);
+ if (so->so_proto_handle == NULL)
+ return (ENOMEM);
+
+ so->so_rcvbuf = sbl.sbl_rxbuf;
+ so->so_rcvlowat = sbl.sbl_rxlowat;
+ so->so_sndbuf = sbl.sbl_txbuf;
+ so->so_sndlowat = sbl.sbl_txlowat;
+
+ return (0);
+}
+
+/*
+ * Accept incoming connection.
+ */
+/*ARGSUSED*/
+static int
+sosctp_accept(struct sonode *so, int fflag, struct cred *cr,
+ struct sonode **nsop)
+{
+ int error = 0;
+
+ if ((so->so_state & SS_ACCEPTCONN) == 0)
+ return (EINVAL);
+
+ error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), nsop);
+
+ return (error);
+}
+
+/*
+ * Bind local endpoint.
+ */
+/*ARGSUSED*/
+static int
+sosctp_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
+ int flags, struct cred *cr)
+{
+ int error;
+
+ if (!(flags & _SOBIND_LOCK_HELD)) {
+ mutex_enter(&so->so_lock);
+ so_lock_single(so); /* Set SOLOCKED */
+ } else {
+ ASSERT(MUTEX_HELD(&so->so_lock));
+ }
+
+ /*
+ * X/Open requires this check
+ */
+ if (so->so_state & SS_CANTSENDMORE) {
+ error = EINVAL;
+ goto done;
+ }
+
+
+ /*
+ * Protocol module does address family checks.
+ */
+ mutex_exit(&so->so_lock);
+
+ error = sctp_bind((struct sctp_s *)so->so_proto_handle, name, namelen);
+
+ mutex_enter(&so->so_lock);
+ if (error == 0) {
+ so->so_state |= SS_ISBOUND;
+ } else {
+ eprintsoline(so, error);
+ }
+done:
+ if (!(flags & _SOBIND_LOCK_HELD)) {
+ so_unlock_single(so, SOLOCKED);
+ mutex_exit(&so->so_lock);
+ } else {
+ /* If the caller held the lock don't release it here */
+ ASSERT(MUTEX_HELD(&so->so_lock));
+ ASSERT(so->so_flag & SOLOCKED);
+ }
+
+ return (error);
+}
+
+/*
+ * Turn socket into a listen socket.
+ */
+/* ARGSUSED */
+static int
+sosctp_listen(struct sonode *so, int backlog, struct cred *cr)
+{
+ int error = 0;
+
+ mutex_enter(&so->so_lock);
+ so_lock_single(so);
+
+ /*
+ * If this socket is trying to do connect, or if it has
+ * been connected, disallow.
+ */
+ if (so->so_state & (SS_ISCONNECTING | SS_ISCONNECTED |
+ SS_ISDISCONNECTING | SS_CANTRCVMORE | SS_CANTSENDMORE)) {
+ error = EINVAL;
+ eprintsoline(so, error);
+ goto done;
+ }
+
+ if (backlog < 0) {
+ backlog = 0;
+ }
+
+ /*
+ * If listen() is only called to change backlog, we don't
+ * need to notify protocol module.
+ */
+ if (so->so_state & SS_ACCEPTCONN) {
+ so->so_backlog = backlog;
+ goto done;
+ }
+
+ mutex_exit(&so->so_lock);
+ error = sctp_listen((struct sctp_s *)so->so_proto_handle);
+ mutex_enter(&so->so_lock);
+ if (error == 0) {
+ so->so_state |= (SS_ACCEPTCONN|SS_ISBOUND);
+ so->so_backlog = backlog;
+ } else {
+ eprintsoline(so, error);
+ }
+done:
+ so_unlock_single(so, SOLOCKED);
+ mutex_exit(&so->so_lock);
+
+ return (error);
+}
+
+/*
+ * Active open.
+ */
+/*ARGSUSED*/
+static int
+sosctp_connect(struct sonode *so, const struct sockaddr *name,
+ socklen_t namelen, int fflag, int flags, struct cred *cr)
+{
+ int error = 0;
+
+ ASSERT(so->so_type == SOCK_STREAM);
+
+ mutex_enter(&so->so_lock);
+ so_lock_single(so);
+
+ /*
+ * Can't connect() after listen(), or if the socket is already
+ * connected.
+ */
+ if (so->so_state & (SS_ACCEPTCONN|SS_ISCONNECTED|SS_ISCONNECTING)) {
+ if (so->so_state & SS_ISCONNECTED) {
+ error = EISCONN;
+ } else if (so->so_state & SS_ISCONNECTING) {
+ error = EALREADY;
+ } else {
+ error = EOPNOTSUPP;
+ }
+ eprintsoline(so, error);
+ goto done;
+ }
+
+ /*
+ * Check for failure of an earlier call
+ */
+ if (so->so_error != 0) {
+ error = sogeterr(so, B_TRUE);
+ eprintsoline(so, error);
+ goto done;
+ }
+
+ /*
+ * Connection is closing, or closed, don't allow reconnect.
+ * TCP allows this to proceed, but the socket remains unwriteable.
+ * BSD returns EINVAL.
+ */
+ if (so->so_state & (SS_ISDISCONNECTING|SS_CANTRCVMORE|
+ SS_CANTSENDMORE)) {
+ error = EINVAL;
+ eprintsoline(so, error);
+ goto done;
+ }
+
+ if (name == NULL || namelen == 0) {
+ mutex_exit(&so->so_lock);
+ error = EINVAL;
+ eprintsoline(so, error);
+ goto done;
+ }
+
+ soisconnecting(so);
+ mutex_exit(&so->so_lock);
+
+ error = sctp_connect((struct sctp_s *)so->so_proto_handle,
+ name, namelen);
+
+ mutex_enter(&so->so_lock);
+ if (error == 0) {
+ /*
+ * Allow other threads to access the socket
+ */
+ error = sowaitconnected(so, fflag, 0);
+ }
+done:
+ so_unlock_single(so, SOLOCKED);
+ mutex_exit(&so->so_lock);
+ return (error);
+}
+
+/*
+ * Active open for 1-N sockets, create a new association and
+ * call connect on that.
+ * If there parent hasn't been bound yet (this is the first association),
+ * make it so.
+ */
+static int
+sosctp_seq_connect(struct sonode *so, const struct sockaddr *name,
+ socklen_t namelen, int fflag, int flags, struct cred *cr)
+{
+ struct sctp_soassoc *ssa;
+ struct sctp_sonode *ss;
+ int error;
+
+ ASSERT(so->so_type == SOCK_SEQPACKET);
+
+ mutex_enter(&so->so_lock);
+ so_lock_single(so);
+
+ if (name == NULL || namelen == 0) {
+ error = EINVAL;
+ eprintsoline(so, error);
+ goto done;
+ }
+
+ ss = SOTOSSO(so);
+
+ error = sosctp_assoc_createconn(ss, name, namelen, NULL, 0, fflag,
+ cr, &ssa);
+ if (error != 0) {
+ if ((error == EHOSTUNREACH) && (flags & _SOCONNECT_XPG4_2)) {
+ error = ENETUNREACH;
+ }
+ }
+ if (ssa != NULL) {
+ SSA_REFRELE(ss, ssa);
+ }
+
+done:
+ so_unlock_single(so, SOLOCKED);
+ mutex_exit(&so->so_lock);
+ return (error);
+}
+
+/*
+ * Receive data.
+ */
+/* ARGSUSED */
+static int
+sosctp_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
+ struct cred *cr)
+{
+ struct sctp_sonode *ss = SOTOSSO(so);
+ struct sctp_soassoc *ssa = NULL;
+ int flags, error = 0;
+ struct T_unitdata_ind *tind;
+ int len, count, readcnt = 0, rxqueued;
+ socklen_t controllen, namelen;
+ void *opt;
+ mblk_t *mp;
+ rval_t rval;
+
+ controllen = msg->msg_controllen;
+ namelen = msg->msg_namelen;
+ flags = msg->msg_flags;
+ msg->msg_flags = 0;
+ msg->msg_controllen = 0;
+ msg->msg_namelen = 0;
+
+ if (so->so_type == SOCK_STREAM) {
+ if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|
+ SS_CANTRCVMORE))) {
+ return (ENOTCONN);
+ }
+ } else {
+ /* NOTE: Will come here from vop_read() as well */
+ /* For 1-N socket, recv() cannot be used. */
+ if (namelen == 0)
+ return (EOPNOTSUPP);
+ /*
+ * If there are no associations, and no new connections are
+ * coming in, there's not going to be new messages coming
+ * in either.
+ */
+ if (so->so_rcv_q_head == NULL && ss->ss_assoccnt == 0 &&
+ !(so->so_state & SS_ACCEPTCONN)) {
+ return (ENOTCONN);
+ }
+ }
+
+ /*
+ * out-of-band data not supported.
+ */
+ if (flags & MSG_OOB) {
+ return (EOPNOTSUPP);
+ }
+
+ /*
+ * flag possibilities:
+ *
+ * MSG_PEEK Don't consume data
+ * MSG_WAITALL Wait for full quantity of data (ignored if MSG_PEEK)
+ * MSG_DONTWAIT Non-blocking (same as FNDELAY | FNONBLOCK)
+ *
+ * MSG_WAITALL can return less than the full buffer if either
+ *
+ * 1. we would block and we are non-blocking
+ * 2. a full message cannot be delivered
+ *
+ * Given that we always get a full message from proto below,
+ * MSG_WAITALL is not meaningful.
+ */
+
+ mutex_enter(&so->so_lock);
+
+ /*
+ * Allow just one reader at a time.
+ */
+ error = so_lock_read_intr(so,
+ uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
+ if (error) {
+ mutex_exit(&so->so_lock);
+ return (error);
+ }
+ mutex_exit(&so->so_lock);
+again:
+ error = so_dequeue_msg(so, &mp, uiop, &rval, flags | MSG_DUPCTRL);
+ if (mp != NULL) {
+ if (so->so_type == SOCK_SEQPACKET) {
+ ssa = *(struct sctp_soassoc **)DB_BASE(mp);
+ }
+
+ tind = (struct T_unitdata_ind *)mp->b_rptr;
+
+ len = tind->SRC_length;
+
+ if (namelen > 0 && len > 0) {
+
+ opt = sogetoff(mp, tind->SRC_offset, len, 1);
+
+ ASSERT(opt != NULL);
+
+ msg->msg_name = kmem_alloc(len, KM_SLEEP);
+ msg->msg_namelen = len;
+
+ bcopy(opt, msg->msg_name, len);
+ }
+
+ len = tind->OPT_length;
+ if (controllen == 0) {
+ if (len > 0) {
+ msg->msg_flags |= MSG_CTRUNC;
+ }
+ } else if (len > 0) {
+ opt = sogetoff(mp, tind->OPT_offset, len,
+ __TPI_ALIGN_SIZE);
+
+ ASSERT(opt != NULL);
+ sosctp_pack_cmsg(opt, msg, len);
+ }
+
+ if (mp->b_flag & SCTP_NOTIFICATION) {
+ msg->msg_flags |= MSG_NOTIFICATION;
+ }
+
+ if (!(mp->b_flag & SCTP_PARTIAL_DATA))
+ msg->msg_flags |= MSG_EOR;
+ freemsg(mp);
+ }
+done:
+ /*
+ * Determine if we need to update SCTP about the buffer
+ * space. For performance reason, we cannot update SCTP
+ * every time a message is read. The socket buffer low
+ * watermark is used as the threshold.
+ */
+ if (ssa == NULL) {
+ mutex_enter(&so->so_lock);
+ rxqueued = so->so_rcv_queued;
+
+ so->so_rcv_queued = rxqueued - readcnt;
+ count = so->so_rcvbuf - so->so_rcv_queued;
+
+ ASSERT(so->so_rcv_q_head != NULL ||
+ so->so_rcv_head != NULL ||
+ so->so_rcv_queued == 0);
+
+ so_unlock_read(so);
+ mutex_exit(&so->so_lock);
+
+ if (readcnt > 0 && (((count > 0) &&
+ (rxqueued >= so->so_rcvlowat)) ||
+ (so->so_rcv_queued == 0))) {
+ /*
+ * If amount of queued data is higher than watermark,
+ * updata SCTP's idea of available buffer space.
+ */
+ sctp_recvd((struct sctp_s *)so->so_proto_handle, count);
+ }
+ } else {
+ mutex_enter(&so->so_lock);
+ rxqueued = ssa->ssa_rcv_queued;
+
+ ssa->ssa_rcv_queued = rxqueued - readcnt;
+ count = so->so_rcvbuf - ssa->ssa_rcv_queued;
+
+ so_unlock_read(so);
+
+ if (readcnt > 0 &&
+ (((count > 0) && (rxqueued >= so->so_rcvlowat)) ||
+ (ssa->ssa_rcv_queued == 0))) {
+ /*
+ * If amount of queued data is higher than watermark,
+ * updata SCTP's idea of available buffer space.
+ */
+ mutex_exit(&so->so_lock);
+
+ sctp_recvd((struct sctp_s *)ssa->ssa_conn, count);
+
+ mutex_enter(&so->so_lock);
+ }
+ /*
+ * MOREDATA flag is set if all data could not be copied
+ */
+ if (!(flags & MSG_PEEK) && !(rval.r_val1 & MOREDATA)) {
+ SSA_REFRELE(ss, ssa);
+ }
+ mutex_exit(&so->so_lock);
+ }
+
+ return (error);
+}
+
+int
+sosctp_uiomove(mblk_t *hdr_mp, ssize_t count, ssize_t blk_size, int wroff,
+ struct uio *uiop, int flags, cred_t *cr)
+{
+ ssize_t size;
+ int error;
+ mblk_t *mp;
+ dblk_t *dp;
+
+ /*
+ * Loop until we have all data copied into mblk's.
+ */
+ while (count > 0) {
+ size = MIN(count, blk_size);
+
+ /*
+ * As a message can be splitted up and sent in different
+ * packets, each mblk will have the extra space before
+ * data to accommodate what SCTP wants to put in there.
+ */
+ while ((mp = allocb_cred(size + wroff, cr)) == NULL) {
+ if ((uiop->uio_fmode & (FNDELAY|FNONBLOCK)) ||
+ (flags & MSG_DONTWAIT)) {
+ return (EAGAIN);
+ }
+ if ((error = strwaitbuf(size + wroff, BPRI_MED))) {
+ return (error);
+ }
+ }
+
+ dp = mp->b_datap;
+ dp->db_cpid = curproc->p_pid;
+ ASSERT(wroff <= dp->db_lim - mp->b_wptr);
+ mp->b_rptr += wroff;
+ error = uiomove(mp->b_rptr, size, UIO_WRITE, uiop);
+ if (error != 0) {
+ freeb(mp);
+ return (error);
+ }
+ mp->b_wptr = mp->b_rptr + size;
+ count -= size;
+ hdr_mp->b_cont = mp;
+ hdr_mp = mp;
+ }
+ return (0);
+}
+
+/*
+ * Send message.
+ */
+static int
+sosctp_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
+ struct cred *cr)
+{
+ struct sctp_sonode *ss = SOTOSSO(so);
+ mblk_t *mctl;
+ struct cmsghdr *cmsg;
+ struct sctp_sndrcvinfo *sinfo;
+ int optlen, flags, fflag;
+ ssize_t count, msglen;
+ int error;
+
+ ASSERT(so->so_type == SOCK_STREAM);
+
+ flags = msg->msg_flags;
+ if (flags & MSG_OOB) {
+ /*
+ * No out-of-band data support.
+ */
+ return (EOPNOTSUPP);
+ }
+
+ if (msg->msg_controllen != 0) {
+ optlen = msg->msg_controllen;
+ cmsg = sosctp_find_cmsg(msg->msg_control, optlen, SCTP_SNDRCV);
+ if (cmsg != NULL) {
+ if (cmsg->cmsg_len <
+ (sizeof (*sinfo) + sizeof (*cmsg))) {
+ eprintsoline(so, EINVAL);
+ return (EINVAL);
+ }
+ sinfo = (struct sctp_sndrcvinfo *)(cmsg + 1);
+
+ /* Both flags should not be set together. */
+ if ((sinfo->sinfo_flags & MSG_EOF) &&
+ (sinfo->sinfo_flags & MSG_ABORT)) {
+ eprintsoline(so, EINVAL);
+ return (EINVAL);
+ }
+
+ /* Initiate a graceful shutdown. */
+ if (sinfo->sinfo_flags & MSG_EOF) {
+ /* Can't include data in MSG_EOF message. */
+ if (uiop->uio_resid != 0) {
+ eprintsoline(so, EINVAL);
+ return (EINVAL);
+ }
+
+ /*
+ * This is the same sequence as done in
+ * shutdown(SHUT_WR).
+ */
+ mutex_enter(&so->so_lock);
+ so_lock_single(so);
+ socantsendmore(so);
+ cv_broadcast(&so->so_snd_cv);
+ so->so_state |= SS_ISDISCONNECTING;
+ mutex_exit(&so->so_lock);
+
+ pollwakeup(&so->so_poll_list, POLLOUT);
+ sctp_recvd((struct sctp_s *)so->so_proto_handle,
+ so->so_rcvbuf);
+ error = sctp_disconnect(
+ (struct sctp_s *)so->so_proto_handle);
+
+ mutex_enter(&so->so_lock);
+ so_unlock_single(so, SOLOCKED);
+ mutex_exit(&so->so_lock);
+ return (error);
+ }
+ }
+ } else {
+ optlen = 0;
+ }
+
+ mutex_enter(&so->so_lock);
+ for (;;) {
+ if (so->so_state & SS_CANTSENDMORE) {
+ mutex_exit(&so->so_lock);
+ return (EPIPE);
+ }
+
+ if (so->so_error != 0) {
+ error = sogeterr(so, B_TRUE);
+ mutex_exit(&so->so_lock);
+ return (error);
+ }
+
+ if (!so->so_snd_qfull)
+ break;
+
+ if (so->so_state & SS_CLOSING) {
+ mutex_exit(&so->so_lock);
+ return (EINTR);
+ }
+ /*
+ * Xmit window full in a blocking socket.
+ */
+ if ((uiop->uio_fmode & (FNDELAY|FNONBLOCK)) ||
+ (flags & MSG_DONTWAIT)) {
+ mutex_exit(&so->so_lock);
+ return (EAGAIN);
+ } else {
+ /*
+ * Wait for space to become available and try again.
+ */
+ error = cv_wait_sig(&so->so_snd_cv, &so->so_lock);
+ if (!error) { /* signal */
+ mutex_exit(&so->so_lock);
+ return (EINTR);
+ }
+ }
+ }
+ msglen = count = uiop->uio_resid;
+
+ /* Don't allow sending a message larger than the send buffer size. */
+ /* XXX Transport module need to enforce this */
+ if (msglen > so->so_sndbuf) {
+ mutex_exit(&so->so_lock);
+ return (EMSGSIZE);
+ }
+
+ /*
+ * Allow piggybacking data on handshake messages (SS_ISCONNECTING).
+ */
+ if (!(so->so_state & (SS_ISCONNECTING | SS_ISCONNECTED))) {
+ /*
+ * We need to check here for listener so that the
+ * same error will be returned as with a TCP socket.
+ * In this case, sosctp_connect() returns EOPNOTSUPP
+ * while a TCP socket returns ENOTCONN instead. Catch it
+ * here to have the same behavior as a TCP socket.
+ *
+ * We also need to make sure that the peer address is
+ * provided before we attempt to do the connect.
+ */
+ if ((so->so_state & SS_ACCEPTCONN) ||
+ msg->msg_name == NULL) {
+ mutex_exit(&so->so_lock);
+ error = ENOTCONN;
+ goto error_nofree;
+ }
+ mutex_exit(&so->so_lock);
+ fflag = uiop->uio_fmode;
+ if (flags & MSG_DONTWAIT) {
+ fflag |= FNDELAY;
+ }
+ error = sosctp_connect(so, msg->msg_name, msg->msg_namelen,
+ fflag, (so->so_version == SOV_XPG4_2) * _SOCONNECT_XPG4_2,
+ cr);
+ if (error) {
+ /*
+ * Check for non-fatal errors, socket connected
+ * while the lock had been lifted.
+ */
+ if (error != EISCONN && error != EALREADY) {
+ goto error_nofree;
+ }
+ error = 0;
+ }
+ } else {
+ mutex_exit(&so->so_lock);
+ }
+
+ mctl = sctp_alloc_hdr(msg->msg_name, msg->msg_namelen,
+ msg->msg_control, optlen, SCTP_CAN_BLOCK);
+ if (mctl == NULL) {
+ error = EINTR;
+ goto error_nofree;
+ }
+
+ /* Copy in the message. */
+ if ((error = sosctp_uiomove(mctl, count, ss->ss_wrsize, ss->ss_wroff,
+ uiop, flags, cr)) != 0) {
+ goto error_ret;
+ }
+ error = sctp_sendmsg((struct sctp_s *)so->so_proto_handle, mctl, 0);
+ if (error == 0)
+ return (0);
+
+error_ret:
+ freemsg(mctl);
+error_nofree:
+ mutex_enter(&so->so_lock);
+ if ((error == EPIPE) && (so->so_state & SS_CANTSENDMORE)) {
+ /*
+ * We received shutdown between the time lock was
+ * lifted and call to sctp_sendmsg().
+ */
+ mutex_exit(&so->so_lock);
+ return (EPIPE);
+ }
+ mutex_exit(&so->so_lock);
+ return (error);
+}
+
+/*
+ * Send message on 1-N socket. Connects automatically if there is
+ * no association.
+ */
+static int
+sosctp_seq_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
+ struct cred *cr)
+{
+ struct sctp_sonode *ss;
+ struct sctp_soassoc *ssa;
+ struct cmsghdr *cmsg;
+ struct sctp_sndrcvinfo *sinfo;
+ int aid = 0;
+ mblk_t *mctl;
+ int namelen, optlen, flags;
+ ssize_t count, msglen;
+ int error;
+ uint16_t s_flags = 0;
+
+ ASSERT(so->so_type == SOCK_SEQPACKET);
+
+ /*
+ * There shouldn't be problems with alignment, as the memory for
+ * msg_control was alloced with kmem_alloc.
+ */
+ cmsg = sosctp_find_cmsg(msg->msg_control, msg->msg_controllen,
+ SCTP_SNDRCV);
+ if (cmsg != NULL) {
+ if (cmsg->cmsg_len < (sizeof (*sinfo) + sizeof (*cmsg))) {
+ eprintsoline(so, EINVAL);
+ return (EINVAL);
+ }
+ sinfo = (struct sctp_sndrcvinfo *)(cmsg + 1);
+ s_flags = sinfo->sinfo_flags;
+ aid = sinfo->sinfo_assoc_id;
+ }
+
+ ss = SOTOSSO(so);
+ namelen = msg->msg_namelen;
+
+ if (msg->msg_controllen > 0) {
+ optlen = msg->msg_controllen;
+ } else {
+ optlen = 0;
+ }
+
+ mutex_enter(&so->so_lock);
+
+ /*
+ * If there is no association id, connect to address specified
+ * in msg_name. Otherwise look up the association using the id.
+ */
+ if (aid == 0) {
+ /*
+ * Connect and shutdown cannot be done together, so check for
+ * MSG_EOF.
+ */
+ if (msg->msg_name == NULL || namelen == 0 ||
+ (s_flags & MSG_EOF)) {
+ error = EINVAL;
+ eprintsoline(so, error);
+ goto done;
+ }
+ flags = uiop->uio_fmode;
+ if (msg->msg_flags & MSG_DONTWAIT) {
+ flags |= FNDELAY;
+ }
+ so_lock_single(so);
+ error = sosctp_assoc_createconn(ss, msg->msg_name, namelen,
+ msg->msg_control, optlen, flags, cr, &ssa);
+ if (error) {
+ if ((so->so_version == SOV_XPG4_2) &&
+ (error == EHOSTUNREACH)) {
+ error = ENETUNREACH;
+ }
+ if (ssa == NULL) {
+ /*
+ * Fatal error during connect(). Bail out.
+ * If ssa exists, it means that the handshake
+ * is in progress.
+ */
+ eprintsoline(so, error);
+ so_unlock_single(so, SOLOCKED);
+ goto done;
+ }
+ /*
+ * All the errors are non-fatal ones, don't return
+ * e.g. EINPROGRESS from sendmsg().
+ */
+ error = 0;
+ }
+ so_unlock_single(so, SOLOCKED);
+ } else {
+ if ((error = sosctp_assoc(ss, aid, &ssa)) != 0) {
+ eprintsoline(so, error);
+ goto done;
+ }
+ }
+
+ /*
+ * Now we have an association.
+ */
+ flags = msg->msg_flags;
+
+ /*
+ * MSG_EOF initiates graceful shutdown.
+ */
+ if (s_flags & MSG_EOF) {
+ if (uiop->uio_resid) {
+ /*
+ * Can't include data in MSG_EOF message.
+ */
+ error = EINVAL;
+ } else {
+ mutex_exit(&so->so_lock);
+ ssa->ssa_state |= SS_ISDISCONNECTING;
+ sctp_recvd((struct sctp_s *)ssa->ssa_conn,
+ so->so_rcvbuf);
+ error = sctp_disconnect((struct sctp_s *)ssa->ssa_conn);
+ mutex_enter(&so->so_lock);
+ }
+ goto refrele;
+ }
+
+ for (;;) {
+ if (ssa->ssa_state & SS_CANTSENDMORE) {
+ SSA_REFRELE(ss, ssa);
+ mutex_exit(&so->so_lock);
+ return (EPIPE);
+ }
+ if (ssa->ssa_error != 0) {
+ error = ssa->ssa_error;
+ ssa->ssa_error = 0;
+ goto refrele;
+ }
+
+ if (!ssa->ssa_snd_qfull)
+ break;
+
+ if (so->so_state & SS_CLOSING) {
+ error = EINTR;
+ goto refrele;
+ }
+ if ((uiop->uio_fmode & (FNDELAY|FNONBLOCK)) ||
+ (flags & MSG_DONTWAIT)) {
+ error = EAGAIN;
+ goto refrele;
+ } else {
+ /*
+ * Wait for space to become available and try again.
+ */
+ error = cv_wait_sig(&so->so_snd_cv, &so->so_lock);
+ if (!error) { /* signal */
+ error = EINTR;
+ goto refrele;
+ }
+ }
+ }
+
+ msglen = count = uiop->uio_resid;
+
+ /* Don't allow sending a message larger than the send buffer size. */
+ if (msglen > so->so_sndbuf) {
+ error = EMSGSIZE;
+ goto refrele;
+ }
+
+ /*
+ * Update TX buffer usage here so that we can lift the socket lock.
+ */
+ mutex_exit(&so->so_lock);
+
+ mctl = sctp_alloc_hdr(msg->msg_name, namelen, msg->msg_control,
+ optlen, SCTP_CAN_BLOCK);
+ if (mctl == NULL) {
+ error = EINTR;
+ goto lock_rele;
+ }
+
+ /* Copy in the message. */
+ if ((error = sosctp_uiomove(mctl, count, ssa->ssa_wrsize,
+ ssa->ssa_wroff, uiop, flags, cr)) != 0) {
+ goto lock_rele;
+ }
+ error = sctp_sendmsg((struct sctp_s *)ssa->ssa_conn, mctl, 0);
+lock_rele:
+ mutex_enter(&so->so_lock);
+ if (error != 0) {
+ freemsg(mctl);
+ if ((error == EPIPE) && (ssa->ssa_state & SS_CANTSENDMORE)) {
+ /*
+ * We received shutdown between the time lock was
+ * lifted and call to sctp_sendmsg().
+ */
+ SSA_REFRELE(ss, ssa);
+ mutex_exit(&so->so_lock);
+ return (EPIPE);
+ }
+ }
+
+refrele:
+ SSA_REFRELE(ss, ssa);
+done:
+ mutex_exit(&so->so_lock);
+ return (error);
+}
+
+/*
+ * Get address of remote node.
+ */
+/* ARGSUSED */
+static int
+sosctp_getpeername(struct sonode *so, struct sockaddr *addr, socklen_t *addrlen,
+ boolean_t accept, struct cred *cr)
+{
+ return (sctp_getpeername((struct sctp_s *)so->so_proto_handle, addr,
+ addrlen));
+}
+
+/*
+ * Get local address.
+ */
+/* ARGSUSED */
+static int
+sosctp_getsockname(struct sonode *so, struct sockaddr *addr, socklen_t *addrlen,
+ struct cred *cr)
+{
+ return (sctp_getsockname((struct sctp_s *)so->so_proto_handle, addr,
+ addrlen));
+}
+
+/*
+ * Called from shutdown().
+ */
+/* ARGSUSED */
+static int
+sosctp_shutdown(struct sonode *so, int how, struct cred *cr)
+{
+ uint_t state_change;
+ int wakesig = 0;
+ int error = 0;
+
+ mutex_enter(&so->so_lock);
+ /*
+ * Record the current state and then perform any state changes.
+ * Then use the difference between the old and new states to
+ * determine which needs to be done.
+ */
+ state_change = so->so_state;
+
+ switch (how) {
+ case SHUT_RD:
+ socantrcvmore(so);
+ break;
+ case SHUT_WR:
+ socantsendmore(so);
+ break;
+ case SHUT_RDWR:
+ socantsendmore(so);
+ socantrcvmore(so);
+ break;
+ default:
+ mutex_exit(&so->so_lock);
+ return (EINVAL);
+ }
+
+ state_change = so->so_state & ~state_change;
+
+ if (state_change & SS_CANTRCVMORE) {
+ if (so->so_rcv_q_head == NULL) {
+ cv_signal(&so->so_rcv_cv);
+ }
+ wakesig = POLLIN|POLLRDNORM;
+
+ socket_sendsig(so, SOCKETSIG_READ);
+ }
+ if (state_change & SS_CANTSENDMORE) {
+ cv_broadcast(&so->so_snd_cv);
+ wakesig |= POLLOUT;
+
+ so->so_state |= SS_ISDISCONNECTING;
+ }
+ mutex_exit(&so->so_lock);
+
+ pollwakeup(&so->so_poll_list, wakesig);
+
+ if (state_change & SS_CANTSENDMORE) {
+ sctp_recvd((struct sctp_s *)so->so_proto_handle, so->so_rcvbuf);
+ error = sctp_disconnect((struct sctp_s *)so->so_proto_handle);
+ }
+
+ /*
+ * HACK: sctp_disconnect() may return EWOULDBLOCK. But this error is
+ * not documented in standard socket API. Catch it here.
+ */
+ if (error == EWOULDBLOCK)
+ error = 0;
+ return (error);
+}
+
+/*
+ * Get socket options.
+ */
+/*ARGSUSED5*/
+static int
+sosctp_getsockopt(struct sonode *so, int level, int option_name,
+ void *optval, socklen_t *optlenp, int flags, struct cred *cr)
+{
+ if (level == IPPROTO_SCTP) {
+ /*
+ * Should go through ioctl().
+ */
+ return (EINVAL);
+ }
+ return (sctp_get_opt((struct sctp_s *)so->so_proto_handle, level,
+ option_name, optval, optlenp));
+}
+
+/*
+ * Set socket options
+ */
+/* ARGSUSED */
+static int
+sosctp_setsockopt(struct sonode *so, int level, int option_name,
+ const void *optval, t_uscalar_t optlen, struct cred *cr)
+{
+ struct sctp_sonode *ss = SOTOSSO(so);
+ struct sctp_soassoc *ssa = NULL;
+ sctp_assoc_t id;
+ int error, rc;
+ void *conn = NULL;
+
+ mutex_enter(&so->so_lock);
+
+ /*
+ * For some SCTP level options, one can select the association this
+ * applies to.
+ */
+ if (so->so_type == SOCK_STREAM) {
+ conn = so->so_proto_handle;
+ } else {
+ /*
+ * SOCK_SEQPACKET only
+ */
+ id = 0;
+ if (level == IPPROTO_SCTP) {
+ switch (option_name) {
+ case SCTP_RTOINFO:
+ case SCTP_ASSOCINFO:
+ case SCTP_SET_PEER_PRIMARY_ADDR:
+ case SCTP_PRIMARY_ADDR:
+ case SCTP_PEER_ADDR_PARAMS:
+ /*
+ * Association ID is the first element
+ * params struct
+ */
+ if (optlen < sizeof (sctp_assoc_t)) {
+ error = EINVAL;
+ eprintsoline(so, error);
+ goto done;
+ }
+ id = *(sctp_assoc_t *)optval;
+ break;
+ case SCTP_DEFAULT_SEND_PARAM:
+ if (optlen != sizeof (struct sctp_sndrcvinfo)) {
+ error = EINVAL;
+ eprintsoline(so, error);
+ goto done;
+ }
+ id = ((struct sctp_sndrcvinfo *)
+ optval)->sinfo_assoc_id;
+ break;
+ case SCTP_INITMSG:
+ /*
+ * Only applies to future associations
+ */
+ conn = so->so_proto_handle;
+ break;
+ default:
+ break;
+ }
+ } else if (level == SOL_SOCKET) {
+ if (option_name == SO_LINGER) {
+ error = EOPNOTSUPP;
+ eprintsoline(so, error);
+ goto done;
+ }
+ /*
+ * These 2 options are applied to all associations.
+ * The other socket level options are only applied
+ * to the socket (not associations).
+ */
+ if ((option_name != SO_RCVBUF) &&
+ (option_name != SO_SNDBUF)) {
+ conn = so->so_proto_handle;
+ }
+ } else {
+ conn = NULL;
+ }
+
+ /*
+ * If association ID was specified, do op on that assoc.
+ * Otherwise set the default setting of a socket.
+ */
+ if (id != 0) {
+ if ((error = sosctp_assoc(ss, id, &ssa)) != 0) {
+ eprintsoline(so, error);
+ goto done;
+ }
+ conn = ssa->ssa_conn;
+ }
+ }
+ dprint(2, ("sosctp_setsockopt %p (%d) - conn %p %d %d id:%d\n",
+ (void *)ss, so->so_type, (void *)conn, level, option_name, id));
+
+ ASSERT(ssa == NULL || (ssa != NULL && conn != NULL));
+ if (conn != NULL) {
+ mutex_exit(&so->so_lock);
+ error = sctp_set_opt((struct sctp_s *)conn, level, option_name,
+ optval, optlen);
+ mutex_enter(&so->so_lock);
+ if (ssa != NULL)
+ SSA_REFRELE(ss, ssa);
+ } else {
+ /*
+ * 1-N socket, and we have to apply the operation to ALL
+ * associations. Like with anything of this sort, the
+ * problem is what to do if the operation fails.
+ * Just try to apply the setting to everyone, but store
+ * error number if someone returns such. And since we are
+ * looping through all possible aids, some of them can be
+ * invalid. We just ignore this kind (sosctp_assoc()) of
+ * errors.
+ */
+ sctp_assoc_t aid;
+
+ mutex_exit(&so->so_lock);
+ error = sctp_set_opt((struct sctp_s *)so->so_proto_handle,
+ level, option_name, optval, optlen);
+ mutex_enter(&so->so_lock);
+ for (aid = 1; aid < ss->ss_maxassoc; aid++) {
+ if (sosctp_assoc(ss, aid, &ssa) != 0)
+ continue;
+ mutex_exit(&so->so_lock);
+ rc = sctp_set_opt((struct sctp_s *)ssa->ssa_conn, level,
+ option_name, optval, optlen);
+ mutex_enter(&so->so_lock);
+ SSA_REFRELE(ss, ssa);
+ if (error == 0) {
+ error = rc;
+ }
+ }
+ }
+done:
+ mutex_exit(&so->so_lock);
+ return (error);
+}
+
+/*ARGSUSED*/
+static int
+sosctp_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
+ struct cred *cr, int32_t *rvalp)
+{
+ struct sctp_sonode *ss;
+ int32_t value;
+ int error;
+ int intval;
+ pid_t pid;
+ struct sctp_soassoc *ssa;
+ void *conn;
+ void *buf;
+ STRUCT_DECL(sctpopt, opt);
+ uint32_t optlen;
+ int buflen;
+
+ ss = SOTOSSO(so);
+
+ /* handle socket specific ioctls */
+ switch (cmd) {
+ case FIONBIO:
+ if (so_copyin((void *)arg, &value, sizeof (int32_t),
+ (mode & (int)FKIOCTL))) {
+ return (EFAULT);
+ }
+ mutex_enter(&so->so_lock);
+ if (value) {
+ so->so_state |= SS_NDELAY;
+ } else {
+ so->so_state &= ~SS_NDELAY;
+ }
+ mutex_exit(&so->so_lock);
+ return (0);
+
+ case FIOASYNC:
+ if (so_copyin((void *)arg, &value, sizeof (int32_t),
+ (mode & (int)FKIOCTL))) {
+ return (EFAULT);
+ }
+ mutex_enter(&so->so_lock);
+
+ if (value) {
+ /* Turn on SIGIO */
+ so->so_state |= SS_ASYNC;
+ } else {
+ /* Turn off SIGIO */
+ so->so_state &= ~SS_ASYNC;
+ }
+ mutex_exit(&so->so_lock);
+ return (0);
+
+ case SIOCSPGRP:
+ case FIOSETOWN:
+ if (so_copyin((void *)arg, &pid, sizeof (pid_t),
+ (mode & (int)FKIOCTL))) {
+ return (EFAULT);
+ }
+ mutex_enter(&so->so_lock);
+
+ error = (pid != so->so_pgrp) ? socket_chgpgrp(so, pid) : 0;
+ mutex_exit(&so->so_lock);
+ return (error);
+
+ case SIOCGPGRP:
+ case FIOGETOWN:
+ if (so_copyout(&so->so_pgrp, (void *)arg,
+ sizeof (pid_t), (mode & (int)FKIOCTL)))
+ return (EFAULT);
+ return (0);
+
+ case FIONREAD:
+ /* XXX: Cannot be used unless standard buffer is used */
+ /*
+ * Return number of bytes of data in all data messages
+ * in queue in "arg".
+ * For stream socket, amount of available data.
+ * For sock_dgram, # of available bytes + addresses.
+ */
+ intval = (so->so_state & SS_ACCEPTCONN) ? 0 :
+ MIN(so->so_rcv_queued, INT_MAX);
+ if (so_copyout(&intval, (void *)arg, sizeof (intval),
+ (mode & (int)FKIOCTL)))
+ return (EFAULT);
+ return (0);
+ case SIOCATMARK:
+ /*
+ * No support for urgent data.
+ */
+ intval = 0;
+
+ if (so_copyout(&intval, (void *)arg, sizeof (int),
+ (mode & (int)FKIOCTL)))
+ return (EFAULT);
+ return (0);
+ case SIOCSCTPGOPT:
+ STRUCT_INIT(opt, mode);
+
+ if (so_copyin((void *)arg, STRUCT_BUF(opt), STRUCT_SIZE(opt),
+ (mode & (int)FKIOCTL))) {
+ return (EFAULT);
+ }
+ if ((optlen = STRUCT_FGET(opt, sopt_len)) > SO_MAXARGSIZE)
+ return (EINVAL);
+
+ /*
+ * Find the correct sctp_t based on whether it is 1-N socket
+ * or not.
+ */
+ intval = STRUCT_FGET(opt, sopt_aid);
+ mutex_enter(&so->so_lock);
+ if ((so->so_type == SOCK_SEQPACKET) && intval) {
+ if ((error = sosctp_assoc(ss, intval, &ssa)) != 0) {
+ mutex_exit(&so->so_lock);
+ return (error);
+ }
+ conn = ssa->ssa_conn;
+ ASSERT(conn != NULL);
+ } else {
+ conn = so->so_proto_handle;
+ ssa = NULL;
+ }
+ mutex_exit(&so->so_lock);
+
+ /* Copyin the option buffer and then call sctp_get_opt(). */
+ buflen = optlen;
+ /* Let's allocate a buffer enough to hold an int */
+ if (buflen < sizeof (uint32_t))
+ buflen = sizeof (uint32_t);
+ buf = kmem_alloc(buflen, KM_SLEEP);
+ if (so_copyin(STRUCT_FGETP(opt, sopt_val), buf, optlen,
+ (mode & (int)FKIOCTL))) {
+ if (ssa != NULL) {
+ mutex_enter(&so->so_lock);
+ SSA_REFRELE(ss, ssa);
+ mutex_exit(&so->so_lock);
+ }
+ kmem_free(buf, buflen);
+ return (EFAULT);
+ }
+ /* The option level has to be IPPROTO_SCTP */
+ error = sctp_get_opt((struct sctp_s *)conn, IPPROTO_SCTP,
+ STRUCT_FGET(opt, sopt_name), buf, &optlen);
+ if (ssa != NULL) {
+ mutex_enter(&so->so_lock);
+ SSA_REFRELE(ss, ssa);
+ mutex_exit(&so->so_lock);
+ }
+ optlen = MIN(buflen, optlen);
+ /* No error, copyout the result with the correct buf len. */
+ if (error == 0) {
+ STRUCT_FSET(opt, sopt_len, optlen);
+ if (so_copyout(STRUCT_BUF(opt), (void *)arg,
+ STRUCT_SIZE(opt), (mode & (int)FKIOCTL))) {
+ error = EFAULT;
+ } else if (so_copyout(buf, STRUCT_FGETP(opt, sopt_val),
+ optlen, (mode & (int)FKIOCTL))) {
+ error = EFAULT;
+ }
+ }
+ kmem_free(buf, buflen);
+ return (error);
+
+ case SIOCSCTPSOPT:
+ STRUCT_INIT(opt, mode);
+
+ if (so_copyin((void *)arg, STRUCT_BUF(opt), STRUCT_SIZE(opt),
+ (mode & (int)FKIOCTL))) {
+ return (EFAULT);
+ }
+ if ((optlen = STRUCT_FGET(opt, sopt_len)) > SO_MAXARGSIZE)
+ return (EINVAL);
+
+ /*
+ * Find the correct sctp_t based on whether it is 1-N socket
+ * or not.
+ */
+ intval = STRUCT_FGET(opt, sopt_aid);
+ mutex_enter(&so->so_lock);
+ if (intval != 0) {
+ if ((error = sosctp_assoc(ss, intval, &ssa)) != 0) {
+ mutex_exit(&so->so_lock);
+ return (error);
+ }
+ conn = ssa->ssa_conn;
+ ASSERT(conn != NULL);
+ } else {
+ conn = so->so_proto_handle;
+ ssa = NULL;
+ }
+ mutex_exit(&so->so_lock);
+
+ /* Copyin the option buffer and then call sctp_set_opt(). */
+ buf = kmem_alloc(optlen, KM_SLEEP);
+ if (so_copyin(STRUCT_FGETP(opt, sopt_val), buf, optlen,
+ (mode & (int)FKIOCTL))) {
+ if (ssa != NULL) {
+ mutex_enter(&so->so_lock);
+ SSA_REFRELE(ss, ssa);
+ mutex_exit(&so->so_lock);
+ }
+ kmem_free(buf, intval);
+ return (EFAULT);
+ }
+ /* The option level has to be IPPROTO_SCTP */
+ error = sctp_set_opt((struct sctp_s *)conn, IPPROTO_SCTP,
+ STRUCT_FGET(opt, sopt_name), buf, optlen);
+ if (ssa) {
+ mutex_enter(&so->so_lock);
+ SSA_REFRELE(ss, ssa);
+ mutex_exit(&so->so_lock);
+ }
+ kmem_free(buf, optlen);
+ return (error);
+
+ case SIOCSCTPPEELOFF: {
+ struct sonode *nso;
+ struct sctp_uc_swap us;
+ int nfd;
+ struct file *nfp;
+ struct vnode *nvp = NULL;
+ struct sockparams *sp;
+
+ dprint(2, ("sctppeeloff %p\n", (void *)ss));
+
+ if (so->so_type != SOCK_SEQPACKET) {
+ return (EOPNOTSUPP);
+ }
+ if (so_copyin((void *)arg, &intval, sizeof (intval),
+ (mode & (int)FKIOCTL))) {
+ return (EFAULT);
+ }
+ if (intval == 0) {
+ return (EINVAL);
+ }
+
+ /*
+ * Find sockparams. This is different from parent's entry,
+ * as the socket type is different.
+ */
+ error = solookup(so->so_family, SOCK_STREAM, so->so_protocol,
+ &sp);
+
+ /*
+ * Allocate the user fd.
+ */
+ if ((nfd = ufalloc(0)) == -1) {
+ eprintsoline(so, EMFILE);
+ return (EMFILE);
+ }
+
+ /*
+ * Copy the fd out.
+ */
+ if (so_copyout(&nfd, (void *)arg, sizeof (nfd),
+ (mode & (int)FKIOCTL))) {
+ error = EFAULT;
+ goto err;
+ }
+ mutex_enter(&so->so_lock);
+
+ /*
+ * Don't use sosctp_assoc() in order to peel off disconnected
+ * associations.
+ */
+ ssa = ((uint32_t)intval >= ss->ss_maxassoc) ? NULL :
+ ss->ss_assocs[intval].ssi_assoc;
+ if (ssa == NULL) {
+ mutex_exit(&so->so_lock);
+ error = EINVAL;
+ goto err;
+ }
+ SSA_REFHOLD(ssa);
+
+ nso = socksctp_create(sp, so->so_family, SOCK_STREAM,
+ so->so_protocol, so->so_version, SOCKET_NOSLEEP,
+ &error, cr);
+ if (nso == NULL) {
+ SSA_REFRELE(ss, ssa);
+ mutex_exit(&so->so_lock);
+ goto err;
+ }
+ /* cannot fail, only inheriting properties */
+ (void) sosctp_init(nso, so, CRED(), 0);
+ nvp = SOTOV(nso);
+ so_lock_single(so);
+ mutex_exit(&so->so_lock);
+ us.sus_handle = SOTOSSO(nso);
+ us.sus_upcalls = &sosctp_sock_upcalls;
+
+ /*
+ * Upcalls to new socket are blocked for the duration of
+ * downcall.
+ */
+ mutex_enter(&nso->so_lock);
+
+ error = sctp_set_opt((struct sctp_s *)ssa->ssa_conn,
+ IPPROTO_SCTP, SCTP_UC_SWAP, &us, sizeof (us));
+ if (error) {
+ goto peelerr;
+ }
+ error = falloc(nvp, FWRITE|FREAD, &nfp, NULL);
+ if (error) {
+ goto peelerr;
+ }
+
+ /*
+ * fill in the entries that falloc reserved
+ */
+ nfp->f_vnode = nvp;
+ mutex_exit(&nfp->f_tlock);
+ setf(nfd, nfp);
+
+ mutex_enter(&so->so_lock);
+
+ sosctp_assoc_move(ss, SOTOSSO(nso), ssa);
+
+ mutex_exit(&nso->so_lock);
+
+ ssa->ssa_conn = NULL;
+ sosctp_assoc_free(ss, ssa);
+
+ so_unlock_single(so, SOLOCKED);
+ mutex_exit(&so->so_lock);
+
+ return (0);
+
+err:
+ setf(nfd, NULL);
+ eprintsoline(so, error);
+ return (error);
+
+peelerr:
+ mutex_exit(&nso->so_lock);
+ mutex_enter(&so->so_lock);
+ ASSERT(nso->so_count == 1);
+ nso->so_count = 0;
+ so_unlock_single(so, SOLOCKED);
+ SSA_REFRELE(ss, ssa);
+ mutex_exit(&so->so_lock);
+
+ setf(nfd, NULL);
+ ASSERT(nvp->v_count == 1);
+ socket_destroy(nso);
+ eprintsoline(so, error);
+ return (error);
+ }
+ default:
+ return (EINVAL);
+ }
+}
+
+/*ARGSUSED*/
+static int
+sosctp_close(struct sonode *so, int flag, struct cred *cr)
+{
+ struct sctp_sonode *ss;
+ struct sctp_sa_id *ssi;
+ struct sctp_soassoc *ssa;
+ int32_t i;
+
+ ss = SOTOSSO(so);
+
+ /*
+ * Initiate connection shutdown. Update SCTP's receive
+ * window.
+ */
+ sctp_recvd((struct sctp_s *)so->so_proto_handle,
+ so->so_rcvbuf - so->so_rcv_queued);
+ (void) sctp_disconnect((struct sctp_s *)so->so_proto_handle);
+
+ /*
+ * New associations can't come in, but old ones might get
+ * closed in upcall. Protect against that by taking a reference
+ * on the association.
+ */
+ mutex_enter(&so->so_lock);
+ ssi = ss->ss_assocs;
+ for (i = 0; i < ss->ss_maxassoc; i++, ssi++) {
+ if ((ssa = ssi->ssi_assoc) != NULL) {
+ SSA_REFHOLD(ssa);
+ sosctp_assoc_isdisconnected(ssa, 0);
+ mutex_exit(&so->so_lock);
+
+ sctp_recvd((struct sctp_s *)ssa->ssa_conn,
+ so->so_rcvbuf - ssa->ssa_rcv_queued);
+ (void) sctp_disconnect((struct sctp_s *)ssa->ssa_conn);
+
+ mutex_enter(&so->so_lock);
+ SSA_REFRELE(ss, ssa);
+ }
+ }
+ mutex_exit(&so->so_lock);
+
+ return (0);
+}
+
+/*
+ * Closes incoming connections which were never accepted, frees
+ * resources.
+ */
+/* ARGSUSED */
+void
+sosctp_fini(struct sonode *so, struct cred *cr)
+{
+ struct sctp_sonode *ss;
+ struct sctp_sa_id *ssi;
+ struct sctp_soassoc *ssa;
+ int32_t i;
+
+ ss = SOTOSSO(so);
+
+ ASSERT(so->so_ops == &sosctp_sonodeops ||
+ so->so_ops == &sosctp_seq_sonodeops);
+
+ /* We are the sole owner of so now */
+ mutex_enter(&so->so_lock);
+
+ so_rcv_flush(so);
+
+ /* Free all pending connections */
+ so_acceptq_flush(so);
+
+ ssi = ss->ss_assocs;
+ for (i = 0; i < ss->ss_maxassoc; i++, ssi++) {
+ if ((ssa = ssi->ssi_assoc) != NULL) {
+ SSA_REFHOLD(ssa);
+ mutex_exit(&so->so_lock);
+
+ sctp_close((struct sctp_s *)ssa->ssa_conn);
+
+ mutex_enter(&so->so_lock);
+ ssa->ssa_conn = NULL;
+ sosctp_assoc_free(ss, ssa);
+ }
+ }
+ if (ss->ss_assocs != NULL) {
+ ASSERT(ss->ss_assoccnt == 0);
+ kmem_free(ss->ss_assocs,
+ ss->ss_maxassoc * sizeof (struct sctp_sa_id));
+ }
+ mutex_exit(&so->so_lock);
+
+ if (so->so_proto_handle)
+ sctp_close((struct sctp_s *)so->so_proto_handle);
+ so->so_proto_handle = NULL;
+
+ sonode_fini(so);
+}
+
+/*
+ * Upcalls from SCTP
+ */
+
+/*
+ * This is the upcall function for 1-N (SOCK_SEQPACKET) socket when a new
+ * association is created. Note that the first argument (handle) is of type
+ * sctp_sonode *, which is the one changed to a listener for new
+ * associations. All the other upcalls for 1-N socket take sctp_soassoc *
+ * as handle. The only exception is the su_properties upcall, which
+ * can take both types as handle.
+ */
+/* ARGSUSED */
+sock_upper_handle_t
+sctp_assoc_newconn(sock_upper_handle_t parenthandle,
+ sock_lower_handle_t connind, sock_downcalls_t *dc,
+ struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **ucp)
+{
+ struct sonode *lso = (struct sonode *)parenthandle;
+ struct sctp_sonode *lss = SOTOSSO(lso);
+ struct sctp_soassoc *ssa;
+ sctp_assoc_t id;
+
+ ASSERT(lss->ss_type == SOSCTP_SOCKET);
+ ASSERT(lso->so_state & SS_ACCEPTCONN);
+ ASSERT(lso->so_proto_handle != NULL); /* closed conn */
+ ASSERT(lso->so_type == SOCK_SEQPACKET);
+
+ mutex_enter(&lso->so_lock);
+
+ if ((id = sosctp_aid_get(lss)) == -1) {
+ /*
+ * Array not large enough; increase size.
+ */
+ if (sosctp_aid_grow(lss, lss->ss_maxassoc, KM_NOSLEEP) < 0) {
+ mutex_exit(&lso->so_lock);
+ return (NULL);
+ }
+ id = sosctp_aid_get(lss);
+ ASSERT(id != -1);
+ }
+
+ /*
+ * Create soassoc for this connection
+ */
+ ssa = sosctp_assoc_create(lss, KM_NOSLEEP);
+ if (ssa == NULL) {
+ mutex_exit(&lso->so_lock);
+ return (NULL);
+ }
+ sosctp_aid_reserve(lss, id, 1);
+ lss->ss_assocs[id].ssi_assoc = ssa;
+ ++lss->ss_assoccnt;
+ ssa->ssa_id = id;
+ ssa->ssa_conn = (struct sctp_s *)connind;
+ ssa->ssa_state = (SS_ISBOUND | SS_ISCONNECTED);
+ ssa->ssa_wroff = lss->ss_wroff;
+ ssa->ssa_wrsize = lss->ss_wrsize;
+
+ mutex_exit(&lso->so_lock);
+
+ *ucp = &sosctp_assoc_upcalls;
+
+ return ((sock_upper_handle_t)ssa);
+}
+
+/* ARGSUSED */
+static void
+sctp_assoc_connected(sock_upper_handle_t handle, sock_connid_t id,
+ struct cred *peer_cred, pid_t peer_cpid)
+{
+ struct sctp_soassoc *ssa = (struct sctp_soassoc *)handle;
+ struct sonode *so = &ssa->ssa_sonode->ss_so;
+
+ ASSERT(so->so_type == SOCK_SEQPACKET);
+ ASSERT(ssa->ssa_conn);
+
+ mutex_enter(&so->so_lock);
+ sosctp_assoc_isconnected(ssa);
+ mutex_exit(&so->so_lock);
+}
+
+/* ARGSUSED */
+static int
+sctp_assoc_disconnected(sock_upper_handle_t handle, sock_connid_t id, int error)
+{
+ struct sctp_soassoc *ssa = (struct sctp_soassoc *)handle;
+ struct sonode *so = &ssa->ssa_sonode->ss_so;
+ int ret;
+
+ ASSERT(so->so_type == SOCK_SEQPACKET);
+ ASSERT(ssa->ssa_conn != NULL);
+
+ mutex_enter(&so->so_lock);
+ sosctp_assoc_isdisconnected(ssa, error);
+ if (ssa->ssa_refcnt == 1) {
+ ret = 1;
+ ssa->ssa_conn = NULL;
+ } else {
+ ret = 0;
+ }
+ SSA_REFRELE(SOTOSSO(so), ssa);
+
+ cv_broadcast(&so->so_snd_cv);
+
+ mutex_exit(&so->so_lock);
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static void
+sctp_assoc_disconnecting(sock_upper_handle_t handle, sock_opctl_action_t action,
+ uintptr_t arg)
+{
+ struct sctp_soassoc *ssa = (struct sctp_soassoc *)handle;
+ struct sonode *so = &ssa->ssa_sonode->ss_so;
+
+ ASSERT(so->so_type == SOCK_SEQPACKET);
+ ASSERT(ssa->ssa_conn != NULL);
+ ASSERT(action == SOCK_OPCTL_SHUT_SEND);
+
+ mutex_enter(&so->so_lock);
+ sosctp_assoc_isdisconnecting(ssa);
+ mutex_exit(&so->so_lock);
+}
+
+/* ARGSUSED */
+static ssize_t
+sctp_assoc_recv(sock_upper_handle_t handle, mblk_t *mp, size_t len, int flags,
+ int *errorp, boolean_t *forcepush)
+{
+ struct sctp_soassoc *ssa = (struct sctp_soassoc *)handle;
+ struct sctp_sonode *ss = ssa->ssa_sonode;
+ struct sonode *so = &ss->ss_so;
+ struct T_unitdata_ind *tind;
+ mblk_t *mp2;
+ union sctp_notification *sn;
+ struct sctp_sndrcvinfo *sinfo;
+
+ ASSERT(ssa->ssa_type == SOSCTP_ASSOC);
+ ASSERT(so->so_type == SOCK_SEQPACKET);
+ ASSERT(ssa->ssa_conn != NULL); /* closed conn */
+ ASSERT(mp != NULL);
+
+ ASSERT(errorp != NULL);
+ *errorp = 0;
+
+ /*
+ * Should be getting T_unitdata_req's only.
+ * Must have address as part of packet.
+ */
+ tind = (struct T_unitdata_ind *)mp->b_rptr;
+ ASSERT((DB_TYPE(mp) == M_PROTO) &&
+ (tind->PRIM_type == T_UNITDATA_IND));
+ ASSERT(tind->SRC_length);
+
+ mutex_enter(&so->so_lock);
+
+ /*
+ * Override b_flag for SCTP sockfs internal use
+ */
+ mp->b_flag = (short)flags;
+
+ /*
+ * For notify messages, need to fill in association id.
+ * For data messages, sndrcvinfo could be in ancillary data.
+ */
+ if (flags & SCTP_NOTIFICATION) {
+ mp2 = mp->b_cont;
+ sn = (union sctp_notification *)mp2->b_rptr;
+ switch (sn->sn_header.sn_type) {
+ case SCTP_ASSOC_CHANGE:
+ sn->sn_assoc_change.sac_assoc_id = ssa->ssa_id;
+ break;
+ case SCTP_PEER_ADDR_CHANGE:
+ sn->sn_paddr_change.spc_assoc_id = ssa->ssa_id;
+ break;
+ case SCTP_REMOTE_ERROR:
+ sn->sn_remote_error.sre_assoc_id = ssa->ssa_id;
+ break;
+ case SCTP_SEND_FAILED:
+ sn->sn_send_failed.ssf_assoc_id = ssa->ssa_id;
+ break;
+ case SCTP_SHUTDOWN_EVENT:
+ sn->sn_shutdown_event.sse_assoc_id = ssa->ssa_id;
+ break;
+ case SCTP_ADAPTATION_INDICATION:
+ sn->sn_adaptation_event.sai_assoc_id = ssa->ssa_id;
+ break;
+ case SCTP_PARTIAL_DELIVERY_EVENT:
+ sn->sn_pdapi_event.pdapi_assoc_id = ssa->ssa_id;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ } else {
+ if (tind->OPT_length > 0) {
+ struct cmsghdr *cmsg;
+ char *cend;
+
+ cmsg = (struct cmsghdr *)
+ ((uchar_t *)mp->b_rptr + tind->OPT_offset);
+ cend = (char *)cmsg + tind->OPT_length;
+ for (;;) {
+ if ((char *)(cmsg + 1) > cend ||
+ ((char *)cmsg + cmsg->cmsg_len) > cend) {
+ break;
+ }
+ if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
+ (cmsg->cmsg_type == SCTP_SNDRCV)) {
+ sinfo = (struct sctp_sndrcvinfo *)
+ (cmsg + 1);
+ sinfo->sinfo_assoc_id = ssa->ssa_id;
+ break;
+ }
+ if (cmsg->cmsg_len > 0) {
+ cmsg = (struct cmsghdr *)
+ ((uchar_t *)cmsg + cmsg->cmsg_len);
+ } else {
+ break;
+ }
+ }
+ }
+ }
+
+ /*
+ * SCTP has reserved space in the header for storing a pointer.
+ * Put the pointer to assocation there, and queue the data.
+ */
+ SSA_REFHOLD(ssa);
+ ASSERT((mp->b_rptr - DB_BASE(mp)) >= sizeof (ssa));
+ *(struct sctp_soassoc **)DB_BASE(mp) = ssa;
+
+ mutex_exit(&so->so_lock);
+
+ return (so_queue_msg((sock_upper_handle_t)so, mp, len, 0, errorp,
+ NULL));
+}
+
+static void
+sctp_assoc_xmitted(sock_upper_handle_t handle, boolean_t qfull)
+{
+ struct sctp_soassoc *ssa = (struct sctp_soassoc *)handle;
+ struct sctp_sonode *ss = ssa->ssa_sonode;
+
+ ASSERT(ssa->ssa_type == SOSCTP_ASSOC);
+ ASSERT(ss->ss_so.so_type == SOCK_SEQPACKET);
+ ASSERT(ssa->ssa_conn != NULL);
+
+ mutex_enter(&ss->ss_so.so_lock);
+
+ ssa->ssa_snd_qfull = qfull;
+
+ /*
+ * Wake blocked writers.
+ */
+ cv_broadcast(&ss->ss_so.so_snd_cv);
+
+ mutex_exit(&ss->ss_so.so_lock);
+}
+
+static void
+sctp_assoc_properties(sock_upper_handle_t handle,
+ struct sock_proto_props *soppp)
+{
+ struct sctp_soassoc *ssa = (struct sctp_soassoc *)handle;
+ struct sctp_sonode *ss;
+
+ if (ssa->ssa_type == SOSCTP_ASSOC) {
+ ss = ssa->ssa_sonode;
+ mutex_enter(&ss->ss_so.so_lock);
+
+ /*
+ * Only change them if they're set.
+ */
+ if (soppp->sopp_wroff != 0) {
+ ssa->ssa_wroff = soppp->sopp_wroff;
+ }
+ if (soppp->sopp_maxblk != 0) {
+ ssa->ssa_wrsize = soppp->sopp_maxblk;
+ }
+ } else {
+ ss = (struct sctp_sonode *)handle;
+ mutex_enter(&ss->ss_so.so_lock);
+
+ if (soppp->sopp_wroff != 0) {
+ ss->ss_wroff = soppp->sopp_wroff;
+ }
+ if (soppp->sopp_maxblk != 0) {
+ ss->ss_wrsize = soppp->sopp_maxblk;
+ }
+ }
+
+ mutex_exit(&ss->ss_so.so_lock);
+}
diff --git a/usr/src/uts/common/fs/sockfs/socksctp.h b/usr/src/uts/common/inet/sockmods/socksctp.h
index dfbd818e40..55d56df7ae 100644
--- a/usr/src/uts/common/fs/sockfs/socksctp.h
+++ b/usr/src/uts/common/inet/sockmods/socksctp.h
@@ -26,8 +26,6 @@
#ifndef _SOCKSCTP_H_
#define _SOCKSCTP_H_
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -47,15 +45,8 @@ struct sctp_sonode {
sctp_assoc_t ss_maxassoc; /* assoc array size for 1-N */
sctp_assoc_t ss_assoccnt; /* current # of assocs */
struct sctp_sa_id *ss_assocs; /* assoc array for 1-N */
- kcondvar_t ss_txdata_cv; /* wait TX window to open */
- int ss_wroff;
- size_t ss_wrsize;
- int ss_txqueued; /* queued tx bytes */
- kcondvar_t ss_rxdata_cv; /* for waiting RX data */
- mblk_t *ss_rxdata; /* queued rx data */
- mblk_t **ss_rxtail; /* ptr to last message */
- int ss_rxqueued; /* queued rx bytes/# of conn */
- struct pollhead ss_poll_list;
+#define ss_wroff ss_so.so_proto_props.sopp_wroff
+#define ss_wrsize ss_so.so_proto_props.sopp_maxblk
};
/*
@@ -69,14 +60,13 @@ struct sctp_soassoc {
struct sctp_s *ssa_conn; /* opaque ptr passed to SCTP */
uint_t ssa_state; /* same as so_state */
int ssa_error; /* same as so_error */
- int ssa_txqueued; /* queued tx bytes */
+ boolean_t ssa_snd_qfull;
int ssa_wroff;
size_t ssa_wrsize;
- int ssa_rxqueued; /* queued rx bytes/# of conn */
+ int ssa_rcv_queued; /* queued rx bytes/# of conn */
};
/* 1-N socket association cache defined in socksctp.c */
-extern kmem_cache_t *sosctp_assoccache;
/*
* Association array element.
@@ -91,18 +81,14 @@ struct sctp_sa_id {
struct sctp_soassoc *ssi_assoc;
};
-extern sctp_upcalls_t sosctp_sock_upcalls;
-extern sctp_upcalls_t sosctp_assoc_upcalls;
-extern struct vnodeops *socksctp_vnodeops;
-extern const fs_operation_def_t socksctp_vnodeops_template[];
-
-extern void sosctp_free(struct sonode *so);
-extern int sosctp_chgpgrp(struct sctp_sonode *ss, pid_t pid);
-extern void sosctp_sendsig(struct sctp_sonode *ss, int event);
+extern sonodeops_t sosctp_sonodeops;
+extern sonodeops_t sosctp_seq_sonodeops;
+extern sock_upcalls_t sosctp_sock_upcalls;
+extern sock_upcalls_t sosctp_assoc_upcalls;
-extern int sosctp_bind(struct sonode *so, struct sockaddr *name,
- socklen_t namelen, int flags);
-extern int sosctp_recvmsg(struct sonode *, struct nmsghdr *, struct uio *);
+extern struct sonode *socksctp_create(struct sockparams *, int, int,
+ int, int, int, int *, cred_t *);
+extern void sosctp_fini(struct sonode *, struct cred *);
extern int sosctp_aid_grow(struct sctp_sonode *ss, sctp_assoc_t maxid,
int kmflags);
extern sctp_assoc_t sosctp_aid_get(struct sctp_sonode *ss);
@@ -119,7 +105,7 @@ extern struct sctp_soassoc *sosctp_assoc_create(struct sctp_sonode *ss,
extern void sosctp_assoc_free(struct sctp_sonode *ss, struct sctp_soassoc *ssa);
extern int sosctp_assoc_createconn(struct sctp_sonode *ss,
const struct sockaddr *name, socklen_t namelen,
- const uchar_t *control, socklen_t controllen, int fflag,
+ const uchar_t *control, socklen_t controllen, int fflag, struct cred *,
struct sctp_soassoc **ssap);
extern void sosctp_assoc_move(struct sctp_sonode *ss, struct sctp_sonode *nss,
struct sctp_soassoc *ssa);
@@ -165,12 +151,6 @@ extern int sosctp_uiomove(mblk_t *hdr_mp, ssize_t count, ssize_t blk_size,
} \
}
-/*
- * Event flags to sosctp_sendsig().
- */
-#define SCTPSIG_WRITE 0x1
-#define SCTPSIG_READ 0x2
-
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/fs/sockfs/socksctpsubr.c b/usr/src/uts/common/inet/sockmods/socksctpsubr.c
index e741bd29f7..fab1a4534d 100644
--- a/usr/src/uts/common/fs/sockfs/socksctpsubr.c
+++ b/usr/src/uts/common/inet/sockmods/socksctpsubr.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/t_lock.h>
#include <sys/param.h>
@@ -36,9 +34,6 @@
#include <sys/cmn_err.h>
#include <sys/sysmacros.h>
-#include <sys/vfs.h>
-#include <sys/vfs_opreg.h>
-
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/strsun.h>
@@ -46,8 +41,10 @@
#include <netinet/sctp.h>
#include <inet/sctp_itf.h>
+#include <fs/sockfs/sockcommon.h>
#include "socksctp.h"
+extern kmem_cache_t *sosctp_assoccache;
/*
* Find a free association id. See os/fio.c file descriptor allocator
* for description of the algorithm.
@@ -178,8 +175,10 @@ sosctp_assoc_create(struct sctp_sonode *ss, int kmflag)
ssa->ssa_sonode = ss;
ssa->ssa_state = 0;
ssa->ssa_error = 0;
+#if 0
ssa->ssa_txqueued = 0;
- ssa->ssa_rxqueued = 0;
+#endif
+ ssa->ssa_snd_qfull = 0;
}
dprint(2, ("sosctp_assoc_create %p %p\n", (void *)ss, (void *)ssa));
return (ssa);
@@ -305,55 +304,6 @@ sosctp_find_cmsg(const uchar_t *control, socklen_t clen, int type)
}
/*
- * Wait until the socket is connected or there is an error.
- * fmode should contain any nonblocking flags.
- */
-int
-sosctp_waitconnected(struct sonode *so, int fmode)
-{
- int error = 0;
-
- ASSERT(MUTEX_HELD(&so->so_lock));
- ASSERT((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ||
- so->so_error != 0);
-
- while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
- SS_ISCONNECTING && so->so_error == 0) {
-
- dprint(3, ("waiting for SS_ISCONNECTED on %p\n", (void *)so));
- if (fmode & (FNDELAY|FNONBLOCK))
- return (EINPROGRESS);
-
- if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
- /*
- * Return EINTR and let the application use
- * nonblocking techniques for detecting when
- * the connection has been established.
- */
- return (EINTR);
- }
- dprint(3, ("awoken on %p\n", (void *)so));
- }
-
- if (so->so_error != 0) {
- error = sogeterr(so);
- ASSERT(error != 0);
- dprint(3, ("sosctp_waitconnected: error %d\n", error));
- return (error);
- }
- if (!(so->so_state & SS_ISCONNECTED)) {
- /*
- * Another thread could have consumed so_error
- * e.g. by calling read. - take from sowaitconnected()
- */
- error = ECONNREFUSED;
- dprint(3, ("sosctp_waitconnected: error %d\n", error));
- return (error);
- }
- return (0);
-}
-
-/*
* Wait until the association is connected or there is an error.
* fmode should contain any nonblocking flags.
*/
@@ -373,6 +323,8 @@ sosctp_assoc_waitconnected(struct sctp_soassoc *ssa, int fmode)
if (fmode & (FNDELAY|FNONBLOCK))
return (EINPROGRESS);
+ if (so->so_state & SS_CLOSING)
+ return (EINTR);
if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
/*
* Return EINTR and let the application use
@@ -408,7 +360,7 @@ sosctp_assoc_waitconnected(struct sctp_soassoc *ssa, int fmode)
int
sosctp_assoc_createconn(struct sctp_sonode *ss, const struct sockaddr *name,
socklen_t namelen, const uchar_t *control, socklen_t controllen, int fflag,
- struct sctp_soassoc **ssap)
+ struct cred *cr, struct sctp_soassoc **ssap)
{
struct sonode *so = &ss->ss_so;
struct sctp_soassoc *ssa;
@@ -427,8 +379,8 @@ sosctp_assoc_createconn(struct sctp_sonode *ss, const struct sockaddr *name,
bzero(&laddr, sizeof (laddr));
laddr.ss_family = so->so_family;
- error = sosctp_bind(so, (struct sockaddr *)&laddr,
- sizeof (laddr), _SOBIND_LOCK_HELD);
+ error = SOP_BIND(so, (struct sockaddr *)&laddr,
+ sizeof (laddr), _SOBIND_LOCK_HELD, cr);
if (error) {
*ssap = NULL;
return (error);
@@ -456,8 +408,8 @@ sosctp_assoc_createconn(struct sctp_sonode *ss, const struct sockaddr *name,
ssa = sosctp_assoc_create(ss, KM_SLEEP);
ssa->ssa_wroff = ss->ss_wroff;
ssa->ssa_wrsize = ss->ss_wrsize;
- ssa->ssa_conn = sctp_create(ssa, so->so_priv, so->so_family,
- SCTP_CAN_BLOCK, &sosctp_assoc_upcalls, &sbl, CRED());
+ ssa->ssa_conn = sctp_create(ssa, (struct sctp_s *)so->so_proto_handle,
+ so->so_family, SCTP_CAN_BLOCK, &sosctp_assoc_upcalls, &sbl, cr);
mutex_enter(&so->so_lock);
ss->ss_assocs[id].ssi_assoc = ssa;
@@ -561,7 +513,7 @@ void
sosctp_assoc_move(struct sctp_sonode *ss, struct sctp_sonode *nss,
struct sctp_soassoc *ssa)
{
- mblk_t *mp, **nmp;
+ mblk_t *mp, **nmp, *last_mp;
struct sctp_soassoc *tmp;
sosctp_so_inherit(ss, nss);
@@ -571,26 +523,39 @@ sosctp_assoc_move(struct sctp_sonode *ss, struct sctp_sonode *nss,
(ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING|
SS_ISDISCONNECTING|SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ISBOUND));
nss->ss_so.so_error = ssa->ssa_error;
- nss->ss_txqueued = ssa->ssa_txqueued;
+#if 0
+ nss->ss_so.so_txqueued = ssa->ssa_txqueued;
+#endif
+ nss->ss_so.so_snd_qfull = ssa->ssa_snd_qfull;
nss->ss_wroff = ssa->ssa_wroff;
nss->ss_wrsize = ssa->ssa_wrsize;
- nss->ss_rxqueued = ssa->ssa_rxqueued;
- nss->ss_so.so_priv = ssa->ssa_conn;
+ nss->ss_so.so_rcv_queued = ssa->ssa_rcv_queued;
+ nss->ss_so.so_proto_handle = (sock_lower_handle_t)ssa->ssa_conn;
- if (nss->ss_rxqueued > 0) {
- nmp = &ss->ss_rxdata;
+ if (nss->ss_so.so_rcv_queued > 0) {
+ nmp = &ss->ss_so.so_rcv_q_head;
+ last_mp = NULL;
while ((mp = *nmp) != NULL) {
tmp = *(struct sctp_soassoc **)DB_BASE(mp);
if (tmp == ssa) {
*nmp = mp->b_next;
- *nss->ss_rxtail = mp;
- nss->ss_rxtail = &mp->b_next;
+ ASSERT(DB_TYPE(mp) != M_DATA);
+ if (nss->ss_so.so_rcv_q_last_head == NULL) {
+ nss->ss_so.so_rcv_q_head = mp;
+ } else {
+ nss->ss_so.so_rcv_q_last_head->b_next =
+ mp;
+ }
+ nss->ss_so.so_rcv_q_last_head = mp;
+ nss->ss_so.so_rcv_q_last_head->b_prev = last_mp;
+ mp->b_next = NULL;
} else {
nmp = &mp->b_next;
+ last_mp = mp;
}
}
- ss->ss_rxtail = nmp;
- *nss->ss_rxtail = NULL;
+ ss->ss_so.so_rcv_q_last_head = last_mp;
+ ss->ss_so.so_rcv_q_last_head->b_prev = last_mp;
}
}
@@ -643,97 +608,3 @@ sosctp_assoc_isdisconnected(struct sctp_soassoc *ssa, int error)
ssa->ssa_error = (ushort_t)error;
cv_broadcast(&so->so_state_cv);
}
-
-/*
- * Change the process/process group to which SIGIO is sent.
- */
-int
-sosctp_chgpgrp(struct sctp_sonode *ss, pid_t pid)
-{
- int error;
-
- ASSERT(MUTEX_HELD(&ss->ss_so.so_lock));
- if (pid != 0) {
- /*
- * Permissions check by sending signal 0.
- * Note that when kill fails it does a
- * set_errno causing the system call to fail.
- */
- error = kill(pid, 0);
- if (error != 0) {
- return (error);
- }
- }
- ss->ss_so.so_pgrp = pid;
- return (0);
-}
-
-/*
- * Generate a SIGIO, for 'writable' events include siginfo structure,
- * for read events just send the signal.
- */
-static void
-sosctp_sigproc(proc_t *proc, int event)
-{
- k_siginfo_t info;
-
- if (event & SCTPSIG_WRITE) {
- info.si_signo = SIGPOLL;
- info.si_code = POLL_OUT;
- info.si_errno = 0;
- info.si_fd = 0; /* not set with TCP either */
- info.si_band = 0;
- sigaddq(proc, NULL, &info, KM_NOSLEEP);
- }
- if (event & SCTPSIG_READ) {
- sigtoproc(proc, NULL, SIGPOLL);
- }
-}
-
-void
-sosctp_sendsig(struct sctp_sonode *ss, int event)
-{
- proc_t *proc;
- struct sonode *so = &ss->ss_so;
-
- ASSERT(MUTEX_HELD(&ss->ss_so.so_lock));
-
- if (so->so_pgrp == 0 || !(so->so_state & SS_ASYNC)) {
- return;
- }
- dprint(3, ("sending sig to %d\n", so->so_pgrp));
-
- if (so->so_pgrp > 0) {
- /*
- * XXX This unfortunately still generates
- * a signal when a fd is closed but
- * the proc is active.
- */
- mutex_enter(&pidlock);
- proc = prfind(so->so_pgrp);
- if (proc == NULL) {
- mutex_exit(&pidlock);
- return;
- }
- mutex_enter(&proc->p_lock);
- mutex_exit(&pidlock);
- sosctp_sigproc(proc, event);
- mutex_exit(&proc->p_lock);
- } else {
- /*
- * Send to process group. Hold pidlock across
- * calls to sosctp_sigproc().
- */
- pid_t pgrp = -so->so_pgrp;
-
- mutex_enter(&pidlock);
- proc = pgfind(pgrp);
- while (proc != NULL) {
- mutex_enter(&proc->p_lock);
- sosctp_sigproc(proc, event);
- proc = proc->p_pglink;
- mutex_exit(&proc->p_lock);
- }
- mutex_exit(&pidlock);
- }
-}
diff --git a/usr/src/uts/common/fs/sockfs/socksdp.c b/usr/src/uts/common/inet/sockmods/socksdp.c
index 7376783fc0..fdbdca5cb3 100644
--- a/usr/src/uts/common/fs/sockfs/socksdp.c
+++ b/usr/src/uts/common/inet/sockmods/socksdp.c
@@ -30,7 +30,6 @@
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/vfs.h>
-#include <sys/vfs_opreg.h>
#include <sys/vnode.h>
#include <sys/debug.h>
#include <sys/errno.h>
@@ -38,6 +37,9 @@
#include <sys/cmn_err.h>
#include <sys/sysmacros.h>
+#include <sys/filio.h>
+#include <sys/sockio.h>
+
#include <sys/project.h>
#include <sys/tihdr.h>
#include <sys/strsubr.h>
@@ -50,22 +52,37 @@
#include <inet/sdp_itf.h>
#include "socksdp.h"
+#include <fs/sockfs/sockcommon.h>
/*
* SDP sockfs sonode operations
*/
-static int sosdp_accept(struct sonode *, int, struct sonode **);
-static int sosdp_listen(struct sonode *, int);
+static int sosdp_init(struct sonode *, struct sonode *, struct cred *, int);
+static int sosdp_accept(struct sonode *, int, struct cred *, struct sonode **);
+static int sosdp_bind(struct sonode *, struct sockaddr *, socklen_t, int,
+ struct cred *);
+static int sosdp_listen(struct sonode *, int, struct cred *);
static int sosdp_connect(struct sonode *, const struct sockaddr *, socklen_t,
- int, int);
-static int sosdp_sendmsg(struct sonode *, struct nmsghdr *, struct uio *);
-static int sosdp_getpeername(struct sonode *);
-static int sosdp_getsockname(struct sonode *);
-static int sosdp_shutdown(struct sonode *, int);
+ int, int, struct cred *);
+static int sosdp_recvmsg(struct sonode *, struct nmsghdr *, struct uio *,
+ struct cred *);
+static int sosdp_sendmsg(struct sonode *, struct nmsghdr *, struct uio *,
+ struct cred *);
+static int sosdp_getpeername(struct sonode *, struct sockaddr *, socklen_t *,
+ boolean_t, struct cred *);
+static int sosdp_getsockname(struct sonode *, struct sockaddr *, socklen_t *,
+ struct cred *);
+static int sosdp_shutdown(struct sonode *, int, struct cred *);
static int sosdp_getsockopt(struct sonode *, int, int, void *, socklen_t *,
- int);
+ int, struct cred *);
static int sosdp_setsockopt(struct sonode *, int, int, const void *,
- socklen_t);
+ socklen_t, struct cred *);
+static int sosdp_ioctl(struct sonode *, int, intptr_t, int, struct cred *,
+ int32_t *);
+static int sosdp_poll(struct sonode *, short, int, short *,
+ struct pollhead **);
+static int sosdp_close(struct sonode *, int, struct cred *);
+void sosdp_fini(struct sonode *, struct cred *);
/*
@@ -80,20 +97,23 @@ static void sdp_sock_xmitted(void *handle, int txqueued);
static void sdp_sock_urgdata(void *handle);
static void sdp_sock_ordrel(void *handle);
-static kmem_cache_t *sosdp_sockcache;
-
sonodeops_t sosdp_sonodeops = {
- sosdp_accept, /* sop_accept */
- sosdp_bind, /* sop_bind */
- sosdp_listen, /* sop_listen */
- sosdp_connect, /* sop_connect */
- sosdp_recvmsg, /* sop_recvmsg */
- sosdp_sendmsg, /* sop_sendmsg */
- sosdp_getpeername, /* sop_getpeername */
- sosdp_getsockname, /* sop_getsockname */
- sosdp_shutdown, /* sop_shutdown */
- sosdp_getsockopt, /* sop_getsockopt */
- sosdp_setsockopt /* sop_setsockopt */
+ sosdp_init, /* sop_init */
+ sosdp_accept, /* sop_accept */
+ sosdp_bind, /* sop_bind */
+ sosdp_listen, /* sop_listen */
+ sosdp_connect, /* sop_connect */
+ sosdp_recvmsg, /* sop_recvmsg */
+ sosdp_sendmsg, /* sop_sendmsg */
+ so_sendmblk_notsupp, /* sop_sendmblk */
+ sosdp_getpeername, /* sop_getpeername */
+ sosdp_getsockname, /* sop_getsockname */
+ sosdp_shutdown, /* sop_shutdown */
+ sosdp_getsockopt, /* sop_getsockopt */
+ sosdp_setsockopt, /* sop_setsockopt */
+ sosdp_ioctl, /* sop_ioctl */
+ sosdp_poll, /* sop_poll */
+ sosdp_close, /* sop_close */
};
sdp_upcalls_t sosdp_sock_upcalls = {
@@ -107,320 +127,57 @@ sdp_upcalls_t sosdp_sock_upcalls = {
sdp_sock_ordrel,
};
-
-/*ARGSUSED*/
+/* ARGSUSED */
static int
-sosdp_sock_constructor(void *buf, void *cdrarg, int kmflags)
-{
- struct sdp_sonode *ss = buf;
- struct sonode *so = &ss->ss_so;
- struct vnode *vp;
-
- ss->ss_type = SOSDP_SOCKET;
- so->so_oobmsg = NULL;
- so->so_ack_mp = NULL;
- so->so_conn_ind_head = NULL;
- so->so_conn_ind_tail = NULL;
- so->so_discon_ind_mp = NULL;
- so->so_ux_bound_vp = NULL;
- so->so_unbind_mp = NULL;
- so->so_ops = NULL;
- so->so_accessvp = NULL;
- so->so_priv = NULL;
-
- so->so_nl7c_flags = 0;
- so->so_nl7c_uri = NULL;
- so->so_nl7c_rcv_mp = NULL;
-
- so->so_direct = NULL;
-
- vp = vn_alloc(kmflags);
- if (vp == NULL) {
- return (-1);
- }
- so->so_vnode = vp;
-
- vn_setops(vp, socksdp_vnodeops);
- vp->v_data = (caddr_t)so;
-
- mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&so->so_plumb_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&so->so_ack_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&so->so_connind_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&so->so_want_cv, NULL, CV_DEFAULT, NULL);
- return (0);
-}
-
-/*ARGSUSED*/
-static void
-sosdp_sock_destructor(void *buf, void *cdrarg)
-{
- struct sdp_sonode *ss = buf;
- struct sonode *so = &ss->ss_so;
- struct vnode *vp = SOTOV(so);
-
- ASSERT(so->so_direct == NULL);
-
- ASSERT(so->so_nl7c_flags == 0);
- ASSERT(so->so_nl7c_uri == NULL);
- ASSERT(so->so_nl7c_rcv_mp == NULL);
-
- ASSERT(so->so_oobmsg == NULL);
- ASSERT(so->so_ack_mp == NULL);
- ASSERT(so->so_conn_ind_head == NULL);
- ASSERT(so->so_conn_ind_tail == NULL);
- ASSERT(so->so_discon_ind_mp == NULL);
- ASSERT(so->so_ux_bound_vp == NULL);
- ASSERT(so->so_unbind_mp == NULL);
- ASSERT(so->so_ops == NULL || so->so_ops == &sosdp_sonodeops);
-
- ASSERT(vn_matchops(vp, socksdp_vnodeops));
- ASSERT(vp->v_data == (caddr_t)so);
-
- vn_free(vp);
-
- mutex_destroy(&so->so_lock);
- mutex_destroy(&so->so_plumb_lock);
- cv_destroy(&so->so_state_cv);
- cv_destroy(&so->so_ack_cv);
- cv_destroy(&so->so_connind_cv);
- cv_destroy(&so->so_want_cv);
-}
-
-
-int
-sosdp_init(void)
-{
- int error;
-
- error = vn_make_ops("socksdp", socksdp_vnodeops_template,
- &socksdp_vnodeops);
- if (error != 0) {
- cmn_err(CE_WARN, "sosdp_init: bad vnode ops template");
- return (error);
- }
-
- sosdp_sockcache = kmem_cache_create("sdpsock",
- sizeof (struct sdp_sonode), 0, sosdp_sock_constructor,
- sosdp_sock_destructor, NULL, NULL, NULL, 0);
- return (0);
-}
-
-static struct vnode *
-sosdp_makevp(struct vnode *accessvp, int domain, int type, int protocol,
- int kmflags)
+sosdp_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
{
- struct sdp_sonode *ss;
- struct sonode *so;
- struct vnode *vp;
- time_t now;
-
- ss = kmem_cache_alloc(sosdp_sockcache, kmflags);
- if (ss == NULL) {
- return (NULL);
- }
- so = &ss->ss_so;
- so->so_cache = sosdp_sockcache;
- so->so_obj = ss;
- vp = SOTOV(so);
- now = gethrestime_sec();
-
- so->so_flag = 0;
- so->so_accessvp = accessvp;
- so->so_dev = accessvp->v_rdev;
-
- so->so_state = 0;
- so->so_mode = 0;
-
- so->so_fsid = sockdev;
- so->so_atime = now;
- so->so_mtime = now;
- so->so_ctime = now;
- so->so_count = 0;
-
- so->so_family = domain;
- so->so_type = type;
- so->so_protocol = protocol;
- so->so_pushcnt = 0;
-
- so->so_options = 0;
- so->so_linger.l_onoff = 0;
- so->so_linger.l_linger = 0;
- so->so_sndbuf = 0;
- so->so_rcvbuf = 0;
- so->so_error = 0;
- so->so_delayed_error = 0;
-
- ASSERT(so->so_oobmsg == NULL);
- so->so_oobcnt = 0;
- so->so_oobsigcnt = 0;
- so->so_pgrp = 0;
- so->so_provinfo = NULL;
-
- so->so_laddr_sa = (struct sockaddr *)&ss->ss_laddr;
- so->so_faddr_sa = (struct sockaddr *)&ss->ss_faddr;
- so->so_laddr_maxlen = so->so_faddr_maxlen = sizeof (ss->ss_laddr);
- so->so_laddr_len = so->so_faddr_len = 0;
- so->so_eaddr_mp = NULL;
- so->so_delayed_error = 0;
-
- so->so_peercred = NULL;
-
- ASSERT(so->so_ack_mp == NULL);
- ASSERT(so->so_conn_ind_head == NULL);
- ASSERT(so->so_conn_ind_tail == NULL);
- ASSERT(so->so_ux_bound_vp == NULL);
- ASSERT(so->so_unbind_mp == NULL);
-
- vn_reinit(vp);
- vp->v_vfsp = rootvfs;
- vp->v_type = VSOCK;
- vp->v_rdev = so->so_dev;
-
- so->so_ops = &sosdp_sonodeops;
-
- ss->ss_rxqueued = 0;
- bzero(&ss->ss_poll_list, sizeof (ss->ss_poll_list));
-
- vn_exists(vp);
- return (vp);
-}
-
-/*
- * Creates a sdp socket data structure.
- * tso is non-NULL if it's passive open.
- */
-struct sonode *
-sosdp_create(vnode_t *accessvp, int domain, int type, int protocol,
- int version, struct sonode *tso, int *errorp)
-{
- struct sonode *so;
- vnode_t *vp;
- int error;
- int soflags;
- cred_t *cr;
-
- dprint(4, ("Inside sosdp_create: domain:%d proto:%d type:%d",
- domain, protocol, type));
-
- if (is_system_labeled()) {
- *errorp = EOPNOTSUPP;
- return (NULL);
- }
-
- if (version == SOV_STREAM) {
- *errorp = EINVAL;
- return (NULL);
- }
- ASSERT(accessvp != NULL);
+ int error = 0;
+ sdp_sockbuf_limits_t sbl;
+ sdp_upcalls_t *upcalls;
- /*
- * We only support one type of SDP socket. Let sotpi_create()
- * handle all other cases, such as raw socket.
- */
- if (!(domain == AF_INET || domain == AF_INET6) ||
- !(type == SOCK_STREAM)) {
- return (sotpi_create(accessvp, domain, type, protocol, version,
- NULL, errorp));
- }
+ if (pso != NULL) {
+ /* passive open, just inherit settings from parent */
- if (tso == NULL) {
- vp = sosdp_makevp(accessvp, domain, type, protocol, KM_SLEEP);
- ASSERT(vp != NULL);
+ mutex_enter(&so->so_lock);
- soflags = FREAD | FWRITE;
- } else {
- vp = sosdp_makevp(accessvp, domain, type, protocol,
- KM_NOSLEEP);
- if (vp == NULL) {
- /*
- * sosdp_makevp() only fails when there is no memory.
- */
- *errorp = ENOMEM;
- return (NULL);
- }
- soflags = FREAD | FWRITE | SO_ACCEPTOR;
- }
- /*
- * This function may be called in interrupt context, and CRED()
- * will be NULL. In this case, pass in kcred to VOP_OPEN().
- */
- if ((cr = CRED()) == NULL)
- cr = kcred;
- if ((error = VOP_OPEN(&vp, soflags, cr, NULL)) != 0) {
- VN_RELE(vp);
- *errorp = error;
- return (NULL);
- }
- so = VTOSO(vp);
+ so->so_state |= (SS_ISBOUND | SS_ISCONNECTED |
+ (pso->so_state & SS_ASYNC));
+ sosdp_so_inherit(pso, so);
+ so->so_proto_props = pso->so_proto_props;
- dprint(2, ("sosdp_create: %p domain %d type %d\n", (void *)so,
- domain, type));
+ mutex_exit(&so->so_lock);
- if (version == SOV_DEFAULT) {
- version = so_default_version;
+ return (0);
}
- so->so_version = (short)version;
- return (so);
-}
+ upcalls = &sosdp_sock_upcalls;
-/*
- * Free SDP socket data structure.
- * Closes incoming connections which were never accepted, frees
- * resources.
- */
-void
-sosdp_free(struct sonode *so)
-{
- struct sonode *nso;
- mblk_t *mp;
+ so->so_proto_handle = (sock_lower_handle_t)sdp_create(so, NULL,
+ so->so_family, SDP_CAN_BLOCK, upcalls, &sbl, cr, &error);
+ if (so->so_proto_handle == NULL)
+ return (ENOMEM);
- dprint(3, ("sosdp_free: so:%p priv:%p", (void *)so, so->so_priv));
+ so->so_rcvbuf = sbl.sbl_rxbuf;
+ so->so_rcvlowat = sbl.sbl_rxlowat;
+ so->so_sndbuf = sbl.sbl_txbuf;
+ so->so_sndlowat = sbl.sbl_txlowat;
- mutex_enter(&so->so_lock);
-
- /*
- * Need to clear these out so that sockfree() doesn't think that
- * there's memory in need of free'ing.
- */
- so->so_laddr_sa = so->so_faddr_sa = NULL;
- so->so_laddr_len = so->so_laddr_maxlen = 0;
- so->so_faddr_len = so->so_faddr_maxlen = 0;
-
- while ((mp = so->so_conn_ind_head) != NULL) {
- so->so_conn_ind_head = mp->b_next;
- mutex_exit(&so->so_lock);
- mp->b_next = NULL;
- nso = *(struct sonode **)mp->b_rptr;
-
- (void) VOP_CLOSE(SOTOV(nso), 0, 1, 0, CRED(), NULL);
- vn_invalid(SOTOV(nso));
- VN_RELE(SOTOV(nso));
-
- freeb(mp);
- mutex_enter(&so->so_lock);
- }
- so->so_conn_ind_tail = NULL;
- so->so_state &= ~SS_HASCONNIND;
- mutex_exit(&so->so_lock);
-
- sockfree(so);
+ return (error);
}
/*
* Accept incoming connection.
*/
+/* ARGSUSED */
static int
-sosdp_accept(struct sonode *lso, int fflag, struct sonode **nsop)
+sosdp_accept(struct sonode *lso, int fflag, struct cred *cr,
+ struct sonode **nsop)
{
int error = 0;
- mblk_t *mp;
struct sonode *nso;
- dprint(3, ("sosdp_accept: so:%p priv:%p", (void *)lso,
- lso->so_priv));
+ dprint(3, ("sosdp_accept: so:%p so_proto_handle:%p", (void *)lso,
+ (void *)lso->so_proto_handle));
if (!(lso->so_state & SS_ACCEPTCONN)) {
/*
@@ -429,50 +186,36 @@ sosdp_accept(struct sonode *lso, int fflag, struct sonode **nsop)
eprintsoline(lso, EINVAL);
return (EINVAL);
}
-
/*
* Returns right away if socket is nonblocking.
*/
- error = sowaitconnind(lso, fflag, &mp);
+ error = so_acceptq_dequeue(lso, (fflag & (FNONBLOCK|FNDELAY)), &nso);
if (error != 0) {
eprintsoline(lso, error);
- dprint(4, ("sosdp_accept: failed <%d>:lso:%p prv:%p",
- error, (void *)lso, lso->so_priv));
+ dprint(4, ("sosdp_accept: failed %d:lso:%p so_proto_handle:%p",
+ error, (void *)lso, (void *)lso->so_proto_handle));
return (error);
}
- nso = *(struct sonode **)mp->b_rptr;
- freeb(mp);
-
- mutex_enter(&lso->so_lock);
- ASSERT(SOTOSDO(lso)->ss_rxqueued > 0);
- --SOTOSDO(lso)->ss_rxqueued;
- mutex_exit(&lso->so_lock);
-
-
- /*
- * accept() needs remote address right away.
- */
- (void) sosdp_getpeername(nso);
dprint(2, ("sosdp_accept: new %p\n", (void *)nso));
-
*nsop = nso;
+
return (0);
}
/*
* Bind local endpoint.
*/
+/* ARGSUSED */
int
sosdp_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
- int flags)
+ int flags, struct cred *cr)
{
- int error = 0;
+ int error = 0;
if (!(flags & _SOBIND_LOCK_HELD)) {
mutex_enter(&so->so_lock);
so_lock_single(so); /* Set SOLOCKED */
- /* LINTED - statement has no conseq */
} else {
ASSERT(MUTEX_HELD(&so->so_lock));
ASSERT(so->so_flag & SOLOCKED);
@@ -487,6 +230,7 @@ sosdp_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
eprintsoline(so, error);
goto done;
}
+
/*
* X/Open requires this check
*/
@@ -496,16 +240,17 @@ sosdp_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
}
/*
- * Protocol module does address family checks.
+ * Protocol module does address family checks
*/
mutex_exit(&so->so_lock);
- error = sdp_bind(so->so_priv, name, namelen);
+ error = sdp_bind((struct sdp_conn_struct_t *)so->so_proto_handle,
+ name, namelen);
mutex_enter(&so->so_lock);
+
if (error == 0) {
so->so_state |= SS_ISBOUND;
- /* LINTED - statement has no conseq */
} else {
eprintsoline(so, error);
}
@@ -513,7 +258,6 @@ done:
if (!(flags & _SOBIND_LOCK_HELD)) {
so_unlock_single(so, SOLOCKED);
mutex_exit(&so->so_lock);
- /* LINTED - statement has no conseq */
} else {
/* If the caller held the lock don't release it here */
ASSERT(MUTEX_HELD(&so->so_lock));
@@ -525,12 +269,12 @@ done:
/*
* Turn socket into a listen socket.
*/
+/* ARGSUSED */
static int
-sosdp_listen(struct sonode *so, int backlog)
+sosdp_listen(struct sonode *so, int backlog, struct cred *cr)
{
int error = 0;
-
mutex_enter(&so->so_lock);
so_lock_single(so);
@@ -541,30 +285,9 @@ sosdp_listen(struct sonode *so, int backlog)
if (so->so_state & (SS_ISCONNECTING | SS_ISCONNECTED |
SS_ISDISCONNECTING | SS_CANTRCVMORE | SS_CANTSENDMORE)) {
error = EINVAL;
- eprintsoline(so, error);
+ eprintsoline(so, EINVAL);
goto done;
}
-
- if (backlog < 0) {
- backlog = 0;
- }
-
- /*
- * Use the same qlimit as in BSD. BSD checks the qlimit
- * before queuing the next connection implying that a
- * listen(sock, 0) allows one connection to be queued.
- * BSD also uses 1.5 times the requested backlog.
- *
- * XNS Issue 4 required a strict interpretation of the backlog.
- * This has been waived subsequently for Issue 4 and the change
- * incorporated in XNS Issue 5. So we aren't required to do
- * anything special for XPG apps.
- */
- if (backlog >= (INT_MAX - 1) / 3)
- backlog = INT_MAX;
- else
- backlog = backlog * 3 / 2 + 1;
-
/*
* If listen() is only called to change backlog, we don't
* need to notify protocol module.
@@ -576,13 +299,13 @@ sosdp_listen(struct sonode *so, int backlog)
mutex_exit(&so->so_lock);
- error = sdp_listen(so->so_priv, backlog);
+ error = sdp_listen((struct sdp_conn_struct_t *)so->so_proto_handle,
+ backlog);
mutex_enter(&so->so_lock);
if (error == 0) {
- so->so_state |= (SS_ACCEPTCONN|SS_ISBOUND);
+ so->so_state |= (SS_ACCEPTCONN | SS_ISBOUND);
so->so_backlog = backlog;
- /* LINTED - statement has no conseq */
} else {
eprintsoline(so, error);
}
@@ -599,13 +322,9 @@ done:
/*ARGSUSED*/
static int
sosdp_connect(struct sonode *so, const struct sockaddr *name,
- socklen_t namelen, int fflag, int flags)
+ socklen_t namelen, int fflag, int flags, struct cred *cr)
{
- int error;
-
- ASSERT(so->so_type == SOCK_STREAM);
- dprint(3, ("sosdp_connect: so:%p priv:%p", (void *)so,
- so->so_priv));
+ int error = 0;
mutex_enter(&so->so_lock);
so_lock_single(so);
@@ -627,10 +346,10 @@ sosdp_connect(struct sonode *so, const struct sockaddr *name,
}
/*
- * Check for failure of an earlier call
+ * check for failure of an earlier call
*/
if (so->so_error != 0) {
- error = sogeterr(so);
+ error = sogeterr(so, B_TRUE);
eprintsoline(so, error);
goto done;
}
@@ -647,24 +366,27 @@ sosdp_connect(struct sonode *so, const struct sockaddr *name,
goto done;
}
if (name == NULL || namelen == 0) {
- error = EINVAL;
- eprintsoline(so, error);
+ eprintsoline(so, EINVAL);
goto done;
}
soisconnecting(so);
-
mutex_exit(&so->so_lock);
- error = sdp_connect(so->so_priv, name, namelen);
+ error = sdp_connect((struct sdp_conn_struct_t *)so->so_proto_handle,
+ name, namelen);
+
mutex_enter(&so->so_lock);
if (error == 0) {
/*
* Allow other threads to access the socket
*/
- error = sosdp_waitconnected(so, fflag);
- dprint(4, ("sosdp_connect: wait on so:%p priv:%p failed:%d",
- (void *)so, so->so_priv, error));
+ error = sowaitconnected(so, fflag, 0);
+ dprint(4,
+ ("sosdp_connect: wait on so:%p "
+ "so_proto_handle:%p failed:%d",
+ (void *)so, (void *)so->so_proto_handle, error));
}
+
switch (error) {
case 0:
case EINPROGRESS:
@@ -684,12 +406,13 @@ done:
return (error);
}
-
/*
* Receive data.
*/
+/* ARGSUSED */
int
-sosdp_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
+sosdp_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
+ struct cred *cr)
{
int flags, error = 0;
int size;
@@ -735,7 +458,9 @@ sosdp_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
if (uiop->uio_fmode & (FNDELAY|FNONBLOCK)) {
flags |= MSG_DONTWAIT;
}
- error = sdp_recv(so->so_priv, msg, size, flags, uiop);
+ error = sdp_recv(
+ (struct sdp_conn_struct_t *)so->so_proto_handle, msg,
+ size, flags, uiop);
} else {
msg->msg_controllen = 0;
msg->msg_namelen = 0;
@@ -750,8 +475,10 @@ done:
/*
* Send message.
*/
+/* ARGSUSED */
static int
-sosdp_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
+sosdp_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
+ struct cred *cr)
{
int flags;
ssize_t count;
@@ -759,8 +486,8 @@ sosdp_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
ASSERT(so->so_type == SOCK_STREAM);
- dprint(4, ("sosdp_sendmsg: so:%p priv:%p",
- (void *)so, so->so_priv));
+ dprint(4, ("sosdp_sendmsg: so:%p so_proto_handle:%p",
+ (void *)so, (void *)so->so_proto_handle));
flags = msg->msg_flags;
@@ -771,12 +498,11 @@ sosdp_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
mutex_enter(&so->so_lock);
if (so->so_state & SS_CANTSENDMORE) {
mutex_exit(&so->so_lock);
- tsignal(curthread, SIGPIPE);
return (EPIPE);
}
if (so->so_error != 0) {
- error = sogeterr(so);
+ error = sogeterr(so, B_TRUE);
mutex_exit(&so->so_lock);
return (error);
}
@@ -794,93 +520,83 @@ sosdp_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
}
mutex_exit(&so->so_lock);
- error = sdp_send(so->so_priv, msg, count, flags, uiop);
- if (error == 0)
- return (0);
+ error = sdp_send((struct sdp_conn_struct_t *)so->so_proto_handle,
+ msg, count, flags, uiop);
- mutex_enter(&so->so_lock);
- if ((error == EPIPE) && (so->so_state & SS_CANTSENDMORE)) {
- /*
- * We received shutdown between the time lock was
- * lifted and call to sdp_sendmsg().
- */
- mutex_exit(&so->so_lock);
- tsignal(curthread, SIGPIPE);
- return (EPIPE);
- }
- mutex_exit(&so->so_lock);
return (error);
}
-
/*
* Get address of remote node.
*/
+/* ARGSUSED */
static int
-sosdp_getpeername(struct sonode *so)
+sosdp_getpeername(struct sonode *so, struct sockaddr *addr, socklen_t *addrlen,
+ boolean_t accept, struct cred *cr)
{
- int error;
-
- if (!(so->so_state & SS_ISCONNECTED)) {
- error = ENOTCONN;
+ if (!accept && !(so->so_state & SS_ISCONNECTED)) {
+ return (ENOTCONN);
} else {
- error = sdp_getpeername(so->so_priv, so->so_faddr_sa,
- &so->so_faddr_len);
+ return (sdp_getpeername(
+ (struct sdp_conn_struct_t *)so->so_proto_handle,
+ addr, addrlen));
}
- return (error);
}
/*
* Get local address.
*/
+/* ARGSUSED */
static int
-sosdp_getsockname(struct sonode *so)
+sosdp_getsockname(struct sonode *so, struct sockaddr *addr, socklen_t *addrlen,
+ struct cred *cr)
{
- int error;
-
mutex_enter(&so->so_lock);
+
if (!(so->so_state & SS_ISBOUND)) {
/*
* Zero address, except for address family
*/
- bzero(so->so_laddr_sa, so->so_laddr_maxlen);
-
- so->so_laddr_len = (so->so_family == AF_INET6) ?
- sizeof (struct sockaddr_in6) : sizeof (struct sockaddr_in);
- so->so_laddr_sa->sa_family = so->so_family;
- error = 0;
+ if (so->so_family == AF_INET || so->so_family == AF_INET6) {
+ bzero(addr, *addrlen);
+ *addrlen = (so->so_family == AF_INET6) ?
+ sizeof (struct sockaddr_in6) :
+ sizeof (struct sockaddr_in);
+ addr->sa_family = so->so_family;
+ }
mutex_exit(&so->so_lock);
+ return (0);
} else {
mutex_exit(&so->so_lock);
-
- error = sdp_getsockname(so->so_priv, so->so_laddr_sa,
- &so->so_laddr_len);
+ return (sdp_getsockname(
+ (struct sdp_conn_struct_t *)so->so_proto_handle,
+ addr, addrlen));
}
-
- return (error);
}
/*
* Called from shutdown().
*/
+/* ARGSUSED */
static int
-sosdp_shutdown(struct sonode *so, int how)
+sosdp_shutdown(struct sonode *so, int how, struct cred *cr)
{
- struct sdp_sonode *ss = SOTOSDO(so);
uint_t state_change;
int error = 0;
- short wakesig = 0;
mutex_enter(&so->so_lock);
so_lock_single(so);
-
/*
* Record the current state and then perform any state changes.
* Then use the difference between the old and new states to
* determine which needs to be done.
*/
state_change = so->so_state;
+ if (!(state_change & SS_ISCONNECTED)) {
+ error = ENOTCONN;
+ goto done;
+ }
switch (how) {
case SHUT_RD:
@@ -900,21 +616,16 @@ sosdp_shutdown(struct sonode *so, int how)
state_change = so->so_state & ~state_change;
- if (state_change & SS_CANTRCVMORE) {
- wakesig = POLLIN|POLLRDNORM;
- sosdp_sendsig(ss, SDPSIG_READ);
- }
if (state_change & SS_CANTSENDMORE) {
- wakesig |= POLLOUT;
so->so_state |= SS_ISDISCONNECTING;
}
- mutex_exit(&so->so_lock);
-
- pollwakeup(&ss->ss_poll_list, wakesig);
+ so_notify_shutdown(so);
if (state_change & SS_CANTSENDMORE) {
- error = sdp_shutdown(so->so_priv, how);
+ error = sdp_shutdown(
+ (struct sdp_conn_struct_t *)so->so_proto_handle, how);
}
+
mutex_enter(&so->so_lock);
done:
so_unlock_single(so, SOLOCKED);
@@ -935,7 +646,7 @@ done:
/*ARGSUSED*/
static int
sosdp_getsockopt(struct sonode *so, int level, int option_name,
- void *optval, socklen_t *optlenp, int flags)
+ void *optval, socklen_t *optlenp, int flags, struct cred *cr)
{
int error = 0;
void *option = NULL;
@@ -987,7 +698,7 @@ sosdp_getsockopt(struct sonode *so, int level, int option_name,
goto copyout;
case SO_ERROR:
- value = sogeterr(so);
+ value = sogeterr(so, B_TRUE);
goto copyout;
case SO_ACCEPTCONN:
@@ -1045,7 +756,8 @@ sosdp_getsockopt(struct sonode *so, int level, int option_name,
}
optlen = maxlen;
mutex_exit(&so->so_lock);
- error = sdp_get_opt(so->so_priv, level, option_name, optbuf, &optlen);
+ error = sdp_get_opt((struct sdp_conn_struct_t *)so->so_proto_handle,
+ level, option_name, optbuf, &optlen);
mutex_enter(&so->so_lock);
ASSERT(optlen <= maxlen);
if (error != 0) {
@@ -1078,43 +790,35 @@ done:
/*
* Set socket options
*/
+/* ARGSUSED */
static int
sosdp_setsockopt(struct sonode *so, int level, int option_name,
- const void *optval, t_uscalar_t optlen)
+ const void *optval, t_uscalar_t optlen, struct cred *cr)
{
- int error;
void *conn = NULL;
+ int error = 0;
-
- /* X/Open requires this check */
if (so->so_state & SS_CANTSENDMORE) {
return (EINVAL);
}
- /* Caller allocates aligned optval, or passes null */
- ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0);
-
- /* No SDP options should be zero-length */
- if (optlen == 0) {
- error = EINVAL;
- eprintsoline(so, error);
- return (error);
- }
-
mutex_enter(&so->so_lock);
so_lock_single(so);
if (so->so_type == SOCK_STREAM) {
- conn = so->so_priv;
+ conn = (void *)so->so_proto_handle;
}
dprint(2, ("sosdp_setsockopt (%d) - conn %p %d %d \n",
so->so_type, conn, level, option_name));
+
if (conn != NULL) {
mutex_exit(&so->so_lock);
- error = sdp_set_opt(conn, level, option_name, optval, optlen);
+ error = sdp_set_opt((struct sdp_conn_struct_t *)conn, level,
+ option_name, optval, optlen);
mutex_enter(&so->so_lock);
}
+
/*
* Check for SOL_SOCKET options and record their values.
* If we know about a SOL_SOCKET parameter and the transport
@@ -1244,6 +948,239 @@ done:
return (error);
}
+/* ARGSUSED */
+static int
+sosdp_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
+ struct cred *cr, int32_t *rvalp)
+{
+ int32_t value;
+ int error, intval;
+ pid_t pid;
+
+ /* handle socket specific ioctls */
+ switch (cmd) {
+ case FIONBIO:
+ if (so_copyin((void *)arg, &value, sizeof (int32_t),
+ (mode & (int)FKIOCTL))) {
+ return (EFAULT);
+ }
+ mutex_enter(&so->so_lock);
+ if (value != 0) {
+ so->so_state |= SS_NDELAY;
+ } else {
+ so->so_state &= ~SS_NDELAY;
+ }
+ mutex_exit(&so->so_lock);
+ return (0);
+
+ case FIOASYNC:
+ if (so_copyin((void *)arg, &value, sizeof (int32_t),
+ (mode & (int)FKIOCTL))) {
+ return (EFAULT);
+ }
+ mutex_enter(&so->so_lock);
+
+ if (value) {
+ /* Turn on SIGIO */
+ so->so_state |= SS_ASYNC;
+ } else {
+ /* Turn off SIGIO */
+ so->so_state &= ~SS_ASYNC;
+ }
+ mutex_exit(&so->so_lock);
+ return (0);
+
+ case SIOCSPGRP:
+ case FIOSETOWN:
+ if (so_copyin((void *)arg, &pid, sizeof (pid_t),
+ (mode & (int)FKIOCTL))) {
+ return (EFAULT);
+ }
+ mutex_enter(&so->so_lock);
+
+ error = (pid != so->so_pgrp) ? socket_chgpgrp(so, pid) : 0;
+ mutex_exit(&so->so_lock);
+ return (error);
+
+ case SIOCGPGRP:
+ case FIOGETOWN:
+ if (so_copyout(&so->so_pgrp, (void *)arg,
+ sizeof (pid_t), (mode & (int)FKIOCTL)))
+ return (EFAULT);
+ return (0);
+
+ case SIOCATMARK:
+ intval = 0;
+ error = sdp_ioctl(
+ (struct sdp_conn_struct_t *)so->so_proto_handle, cmd,
+ &intval, cr);
+ if (so_copyout(&intval, (void *)arg, sizeof (int),
+ (mode & (int)FKIOCTL)))
+ return (EFAULT);
+ return (0);
+
+
+ case SIOCSENABLESDP: {
+ int32_t enable;
+
+ /*
+ * System wide enable SDP
+ */
+
+ if (so_copyin((void *)arg, &enable, sizeof (int32_t),
+ mode & (int)FKIOCTL))
+ return (EFAULT);
+
+ error = sdp_ioctl(
+ (struct sdp_conn_struct_t *)so->so_proto_handle, cmd,
+ &enable, cr);
+ if (so_copyout(&enable, (void *)arg,
+ sizeof (int32_t), (mode & (int)FKIOCTL)))
+ return (EFAULT);
+ return (0);
+ }
+ /* from strioctl */
+ case FIONREAD:
+ /*
+ * Return number of bytes of data in all data messages
+ * in queue in "arg".
+ * For stream socket, amount of available data.
+ */
+ if (so->so_state & SS_ACCEPTCONN) {
+ intval = 0;
+ } else {
+ mutex_enter(&so->so_lock);
+ intval = sdp_polldata(
+ (struct sdp_conn_struct_t *)so->so_proto_handle,
+ SDP_READ);
+ mutex_exit(&so->so_lock);
+ }
+ if (so_copyout(&intval, (void *)arg, sizeof (intval),
+ (mode & (int)FKIOCTL)))
+ return (EFAULT);
+ return (0);
+ default:
+ return (EINVAL);
+ }
+}
+
+/*
+ * Check socktpi_poll() on why so_lock is not held in this function.
+ */
+static int
+sosdp_poll(struct sonode *so, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ short origevents = events;
+ int so_state;
+
+ so_state = so->so_state;
+
+ ASSERT(so->so_version != SOV_STREAM);
+
+ if (!(so_state & SS_ISCONNECTED) && (so->so_type == SOCK_STREAM)) {
+ /*
+ * Not connected yet - turn off write side events
+ */
+ events &= ~(POLLOUT|POLLWRBAND);
+ }
+
+ /*
+ * Check for errors
+ */
+ if (so->so_error != 0 &&
+ ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) {
+ *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents;
+ return (0);
+ }
+
+ *reventsp = 0;
+
+ /*
+ * Don't mark socket as writable until TX queued data is
+ * below watermark.
+ */
+ if (so->so_type == SOCK_STREAM) {
+ if (sdp_polldata(
+ (struct sdp_conn_struct_t *)so->so_proto_handle,
+ SDP_XMIT)) {
+ *reventsp |= POLLOUT & events;
+ }
+ } else {
+ *reventsp = 0;
+ goto done;
+ }
+
+ if (sdp_polldata((struct sdp_conn_struct_t *)so->so_proto_handle,
+ SDP_READ)) {
+ *reventsp |= (POLLIN|POLLRDNORM) & events;
+ }
+
+ if ((so_state & SS_CANTRCVMORE) || (so->so_acceptq_head != NULL)) {
+ *reventsp |= (POLLIN|POLLRDNORM) & events;
+ }
+
+done:
+ if (!*reventsp && !anyyet) {
+ *phpp = &so->so_poll_list;
+ }
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+sosdp_close(struct sonode *so, int flag, struct cred *cr)
+{
+ int error = 0;
+
+ mutex_enter(&so->so_lock);
+ so_lock_single(so);
+ /*
+ * Need to set flags as there might be ops in progress on
+ * this socket.
+ *
+ * If socket already disconnected/disconnecting,
+ * don't send signal (again).
+ */
+ soisdisconnected(so, 0);
+ mutex_exit(&so->so_lock);
+
+ /*
+ * Initiate connection shutdown.
+ */
+ error = sdp_disconnect((struct sdp_conn_struct_t *)so->so_proto_handle,
+ flag);
+
+ mutex_enter(&so->so_lock);
+ so_unlock_single(so, SOLOCKED);
+ so_notify_disconnected(so, error);
+
+ return (error);
+}
+
+/* ARGSUSED */
+void
+sosdp_fini(struct sonode *so, struct cred *cr)
+{
+ dprint(3, ("sosdp_fini: so:%p so_proto_handle:%p", (void *)so,
+ (void *)so->so_proto_handle));
+
+ ASSERT(so->so_ops == &sosdp_sonodeops);
+
+ if (so->so_proto_handle != NULL)
+ sdp_close((struct sdp_conn_struct_t *)so->so_proto_handle);
+ so->so_proto_handle = NULL;
+
+ mutex_enter(&so->so_lock);
+
+ so_acceptq_flush(so);
+
+ mutex_exit(&so->so_lock);
+
+ sonode_fini(so);
+}
+
/*
* Upcalls from SDP
*/
@@ -1254,83 +1191,37 @@ done:
static void *
sdp_sock_newconn(void *parenthandle, void *connind)
{
- struct sdp_sonode *lss = parenthandle;
- struct sonode *lso = &lss->ss_so;
+ struct sonode *lso = parenthandle;
struct sonode *nso;
- struct sdp_sonode *nss;
- mblk_t *mp;
int error;
ASSERT(lso->so_state & SS_ACCEPTCONN);
- ASSERT(lso->so_priv != NULL); /* closed conn */
+ ASSERT(lso->so_proto_handle != NULL); /* closed conn */
ASSERT(lso->so_type == SOCK_STREAM);
- dprint(3, ("sosdp_newconn A: so:%p priv:%p", (void *)lso,
- lso->so_priv));
+ dprint(3, ("sosdp_newconn A: so:%p so_proto_handle:%p", (void *)lso,
+ (void *)lso->so_proto_handle));
/*
* Check current # of queued conns against backlog
*/
- if (lss->ss_rxqueued >= lso->so_backlog) {
- return (NULL);
- }
-
- /*
- * Need to create a new socket.
- */
- mp = allocb(sizeof (connind), BPRI_MED);
- if (mp == NULL) {
- eprintsoline(lso, ENOMEM);
+ if (lso->so_rcv_queued >= lso->so_backlog) {
return (NULL);
}
- DB_TYPE(mp) = M_PROTO;
- VN_HOLD(lso->so_accessvp);
- nso = sosdp_create(lso->so_accessvp, lso->so_family, lso->so_type,
- lso->so_protocol, lso->so_version, lso, &error);
+ nso = socket_newconn(lso, connind, NULL, SOCKET_NOSLEEP, &error);
if (nso == NULL) {
- VN_RELE(lso->so_accessvp);
- freeb(mp);
eprintsoline(lso, error);
return (NULL);
}
dprint(2, ("sdp_stream_newconn: new %p\n", (void *)nso));
- nss = SOTOSDO(nso);
-
- /*
- * Inherit socket properties
- */
- mutex_enter(&lso->so_lock);
- mutex_enter(&nso->so_lock);
- nso->so_state |= (SS_ISBOUND | SS_ISCONNECTED |
- (lso->so_state & SS_ASYNC));
- sosdp_so_inherit(lss, nss);
- nso->so_priv = connind;
-
- mutex_exit(&nso->so_lock);
-
- ++lss->ss_rxqueued;
- mutex_exit(&lso->so_lock);
-
- /*
- * Copy pointer to new socket to connind queue message
- */
- *(struct sonode **)mp->b_wptr = nso;
- mp->b_wptr += sizeof (nso);
-
- /*
- * Wake people who're waiting incoming conns. Note that
- * soqueueconnind gets so_lock.
- */
- soqueueconnind(lso, mp);
- pollwakeup(&lss->ss_poll_list, POLLIN|POLLRDNORM);
+ (void) so_acceptq_enqueue(lso, nso);
mutex_enter(&lso->so_lock);
- sosdp_sendsig(lss, SDPSIG_READ);
- mutex_exit(&lso->so_lock);
- return (nss);
+ so_notify_newconn(lso);
+ return (nso);
}
/*
@@ -1339,26 +1230,19 @@ sdp_sock_newconn(void *parenthandle, void *connind)
static void
sdp_sock_connected(void *handle)
{
- struct sdp_sonode *ss = handle;
- struct sonode *so = &ss->ss_so;
+ struct sonode *so = handle;
ASSERT(so->so_type == SOCK_STREAM);
- dprint(3, ("sosdp_connected C: so:%p priv:%p", (void *)so,
- so->so_priv));
+ dprint(3, ("sosdp_connected C: so:%p so_proto_handle:%p", (void *)so,
+ (void *)so->so_proto_handle));
mutex_enter(&so->so_lock);
- ASSERT(so->so_priv); /* closed conn */
+ ASSERT(so->so_proto_handle); /* closed conn */
ASSERT(!(so->so_state & SS_ACCEPTCONN));
soisconnected(so);
- sosdp_sendsig(ss, SDPSIG_WRITE);
- mutex_exit(&so->so_lock);
-
- /*
- * Wake ones who're waiting for conn to become established.
- */
- pollwakeup(&ss->ss_poll_list, POLLOUT);
+ so_notify_connected(so);
}
/*
@@ -1368,32 +1252,17 @@ sdp_sock_connected(void *handle)
static void
sdp_sock_disconnected(void *handle, int error)
{
- int event = 0;
- struct sdp_sonode *ss = handle;
- struct sonode *so = &ss->ss_so;
+ struct sonode *so = handle;
ASSERT(so->so_type == SOCK_STREAM);
- dprint(2, ("sosdp_disconnected C: so:%p priv:%p error:%d",
- (void *)so, so->so_priv, error));
+ dprint(2, ("sosdp_disconnected C: so:%p so_proto_handle:%p error:%d",
+ (void *)so, (void *)so->so_proto_handle, error));
mutex_enter(&so->so_lock);
- ASSERT(so->so_priv != NULL); /* closed conn */
-
- /*
- * If socket is already disconnected/disconnecting,
- * don't (re)send signal.
- */
- if (!(so->so_state & SS_CANTRCVMORE))
- event |= SDPSIG_READ;
- if (!(so->so_state & SS_CANTSENDMORE))
- event |= SDPSIG_WRITE;
- if (event != 0)
- sosdp_sendsig(ss, event);
+ ASSERT(so->so_proto_handle != NULL); /* closed conn */
soisdisconnected(so, error);
- mutex_exit(&so->so_lock);
-
- pollwakeup(&ss->ss_poll_list, POLLIN|POLLRDNORM|POLLOUT);
+ so_notify_disconnected(so, error);
}
/*
@@ -1403,15 +1272,12 @@ sdp_sock_disconnected(void *handle, int error)
static int
sdp_sock_recv(void *handle, mblk_t *mp, int flags)
{
- struct sdp_sonode *ss = handle;
- struct sonode *so = &ss->ss_so;
+ struct sonode *so = handle;
ASSERT(so->so_type == SOCK_STREAM);
mutex_enter(&so->so_lock);
- sosdp_sendsig(ss, SDPSIG_READ);
- mutex_exit(&so->so_lock);
- pollwakeup(&ss->ss_poll_list, POLLIN|POLLRDNORM);
+ so_notify_data(so, 0);
return (so->so_rcvbuf);
}
@@ -1422,13 +1288,12 @@ sdp_sock_recv(void *handle, mblk_t *mp, int flags)
static void
sdp_sock_xmitted(void *handle, int writeable)
{
- struct sdp_sonode *ss = handle;
- struct sonode *so = &ss->ss_so;
+ struct sonode *so = handle;
- dprint(4, ("sosdp_sock_xmitted: so:%p priv:%p txq:%d",
- (void *)so, so->so_priv, writeable));
+ dprint(4, ("sosdp_sock_xmitted: so:%p so_proto_handle:%p txq:%d",
+ (void *)so, (void *)so->so_proto_handle, writeable));
mutex_enter(&so->so_lock);
- ASSERT(so->so_priv != NULL); /* closed conn */
+ ASSERT(so->so_proto_handle != NULL); /* closed conn */
/*
@@ -1436,9 +1301,7 @@ sdp_sock_xmitted(void *handle, int writeable)
* watermark.
*/
if (!writeable) {
- sosdp_sendsig(ss, SDPSIG_WRITE);
- mutex_exit(&so->so_lock);
- pollwakeup(&ss->ss_poll_list, POLLOUT);
+ so_notify_writable(so);
} else {
mutex_exit(&so->so_lock);
}
@@ -1451,16 +1314,14 @@ sdp_sock_xmitted(void *handle, int writeable)
static void
sdp_sock_urgdata(void *handle)
{
- struct sdp_sonode *ss = handle;
-
- ASSERT(ss->ss_so.so_type == SOCK_STREAM);
+ struct sonode *so = handle;
- mutex_enter(&ss->ss_so.so_lock);
+ ASSERT(so->so_type == SOCK_STREAM);
- ASSERT(ss->ss_so.so_priv != NULL); /* closed conn */
- sosdp_sendsig(ss, SDPSIG_URG);
+ mutex_enter(&so->so_lock);
- mutex_exit(&ss->ss_so.so_lock);
+ ASSERT(so->so_proto_handle != NULL); /* closed conn */
+ so_notify_oobsig(so);
}
/*
@@ -1469,31 +1330,26 @@ sdp_sock_urgdata(void *handle)
static void
sdp_sock_ordrel(void *handle)
{
- struct sdp_sonode *ss = handle;
- /* LINTED */
- struct sonode *so = &ss->ss_so;
-
- ASSERT(ss->ss_so.so_type == SOCK_STREAM);
-
- dprint(4, ("sdp_sock_ordrel : so:%p, priv:%p",
- (void *)so, so->so_priv));
- mutex_enter(&ss->ss_so.so_lock);
- socantrcvmore(&ss->ss_so);
- mutex_exit(&ss->ss_so.so_lock);
- pollwakeup(&ss->ss_poll_list, POLLIN|POLLRDNORM);
+ struct sonode *so = handle;
+
+ ASSERT(so->so_type == SOCK_STREAM);
+
+ dprint(4, ("sdp_sock_ordrel : so:%p, so_proto_handle:%p",
+ (void *)so, (void *)so->so_proto_handle));
+ mutex_enter(&so->so_lock);
+ socantrcvmore(so);
+ so_notify_eof(so);
}
static void
sdp_sock_connfail(void *handle, int error)
{
+ struct sonode *so = handle;
- struct sdp_sonode *ss = handle;
- struct sonode *so = &ss->ss_so;
-
- dprint(3, ("sosdp_conn Failed: so:%p priv:%p", (void *)so,
- so->so_priv));
+ dprint(3, ("sosdp_conn Failed: so:%p so_proto_handle:%p", (void *)so,
+ (void *)so->so_proto_handle));
mutex_enter(&so->so_lock);
- ASSERT(so->so_priv != NULL); /* closed conn */
+ ASSERT(so->so_proto_handle != NULL); /* closed conn */
so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
so->so_error = (ushort_t)error;
mutex_exit(&so->so_lock);
diff --git a/usr/src/uts/common/inet/sockmods/socksdp.h b/usr/src/uts/common/inet/sockmods/socksdp.h
new file mode 100644
index 0000000000..ba6bd109e8
--- /dev/null
+++ b/usr/src/uts/common/inet/sockmods/socksdp.h
@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SOCKSDP_H_
+#define _SOCKSDP_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern sonodeops_t sosdp_sonodeops;
+extern sdp_upcalls_t sosdp_sock_upcalls;
+
+extern void sosdp_fini(struct sonode *, struct cred *);
+extern void sosdp_so_inherit(struct sonode *, struct sonode *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SOCKSDP_H_ */
diff --git a/usr/src/uts/common/inet/sockmods/socksdpsubr.c b/usr/src/uts/common/inet/sockmods/socksdpsubr.c
new file mode 100644
index 0000000000..8917878ec5
--- /dev/null
+++ b/usr/src/uts/common/inet/sockmods/socksdpsubr.c
@@ -0,0 +1,60 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/t_lock.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/strsubr.h>
+#include <sys/cmn_err.h>
+#include <sys/sysmacros.h>
+
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/strsun.h>
+#include <sys/signal.h>
+
+#include <inet/sdp_itf.h>
+#include "socksdp.h"
+
+/*
+ * Inherit socket properties
+ */
+void
+sosdp_so_inherit(struct sonode *lso, struct sonode *nso)
+{
+ nso->so_options = lso->so_options & (SO_DEBUG|SO_REUSEADDR|
+ SO_KEEPALIVE|SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK|
+ SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER);
+ nso->so_sndbuf = lso->so_sndbuf;
+ nso->so_rcvbuf = lso->so_rcvbuf;
+ nso->so_pgrp = lso->so_pgrp;
+
+ nso->so_rcvlowat = lso->so_rcvlowat;
+ nso->so_sndlowat = lso->so_sndlowat;
+}
diff --git a/usr/src/uts/common/inet/spdsock.h b/usr/src/uts/common/inet/spdsock.h
index a5f18bd1c4..7622e56a45 100644
--- a/usr/src/uts/common/inet/spdsock.h
+++ b/usr/src/uts/common/inet/spdsock.h
@@ -26,8 +26,6 @@
#ifndef _INET_SPDSOCK_H
#define _INET_SPDSOCK_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/netstack.h>
#ifdef __cplusplus
@@ -112,8 +110,7 @@ extern uint_t spdsock_max_optsize;
extern int spdsock_opt_get(queue_t *, int, int, uchar_t *);
extern int spdsock_opt_set(queue_t *, uint_t, int, int, uint_t, uchar_t *,
- uint_t *, uchar_t *, void *, cred_t *,
- mblk_t *);
+ uint_t *, uchar_t *, void *, cred_t *, mblk_t *);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/inet/squeue.c b/usr/src/uts/common/inet/squeue.c
index 559abd9178..396068a2d9 100644
--- a/usr/src/uts/common/inet/squeue.c
+++ b/usr/src/uts/common/inet/squeue.c
@@ -1240,3 +1240,142 @@ squeue_getprivate(squeue_t *sqp, sqprivate_t p)
return (&sqp->sq_private[p]);
}
+
+/* ARGSUSED */
+void
+squeue_wakeup_conn(void *arg, mblk_t *mp, void *arg2)
+{
+ conn_t *connp = (conn_t *)arg;
+ squeue_t *sqp = connp->conn_sqp;
+
+ /*
+ * Mark the squeue as paused before waking up the thread stuck
+ * in squeue_synch_enter().
+ */
+ mutex_enter(&sqp->sq_lock);
+ sqp->sq_state |= SQS_PAUSE;
+
+ /*
+ * Notify the thread that it's OK to proceed; that is done by
+ * clearing the MSGWAITSYNC flag. The synch thread will free the mblk.
+ */
+ ASSERT(mp->b_flag & MSGWAITSYNC);
+ mp->b_flag &= ~MSGWAITSYNC;
+ cv_broadcast(&connp->conn_sq_cv);
+
+ /*
+ * We are doing something on behalf of another thread, so we have to
+ * pause and wait until it finishes.
+ */
+ while (sqp->sq_state & SQS_PAUSE) {
+ cv_wait(&sqp->sq_synch_cv, &sqp->sq_lock);
+ }
+ mutex_exit(&sqp->sq_lock);
+}
+
+/* ARGSUSED */
+int
+squeue_synch_enter(squeue_t *sqp, void *arg, uint8_t tag)
+{
+ conn_t *connp = (conn_t *)arg;
+
+ mutex_enter(&sqp->sq_lock);
+ if (sqp->sq_first == NULL && !(sqp->sq_state & SQS_PROC)) {
+ /*
+ * We are OK to proceed if the squeue is empty, and
+ * no one owns the squeue.
+ *
+ * The caller won't own the squeue as this is called from the
+ * application.
+ */
+ ASSERT(sqp->sq_run == NULL);
+
+ sqp->sq_state |= SQS_PROC;
+ sqp->sq_run = curthread;
+ mutex_exit(&sqp->sq_lock);
+
+#if SQUEUE_DEBUG
+ sqp->sq_curmp = NULL;
+ sqp->sq_curproc = NULL;
+ sqp->sq_connp = connp;
+#endif
+ connp->conn_on_sqp = B_TRUE;
+ return (0);
+ } else {
+ mblk_t *mp;
+
+ mp = allocb(0, BPRI_MED);
+ if (mp == NULL) {
+ mutex_exit(&sqp->sq_lock);
+ return (ENOMEM);
+ }
+
+ /*
+ * We mark the mblk as awaiting synchronous squeue access
+ * by setting the MSGWAITSYNC flag. Once squeue_wakeup_conn
+ * fires, MSGWAITSYNC is cleared, at which point we know we
+ * have exclusive access.
+ */
+ mp->b_flag |= MSGWAITSYNC;
+
+ CONN_INC_REF(connp);
+ SET_SQUEUE(mp, squeue_wakeup_conn, connp);
+ ENQUEUE_CHAIN(sqp, mp, mp, 1);
+
+ ASSERT(sqp->sq_run != curthread);
+
+ /* Wait until the enqueued mblk get processed. */
+ while (mp->b_flag & MSGWAITSYNC)
+ cv_wait(&connp->conn_sq_cv, &sqp->sq_lock);
+ mutex_exit(&sqp->sq_lock);
+
+ freeb(mp);
+
+ return (0);
+ }
+}
+
+/* ARGSUSED */
+void
+squeue_synch_exit(squeue_t *sqp, void *arg)
+{
+ conn_t *connp = (conn_t *)arg;
+
+ mutex_enter(&sqp->sq_lock);
+ if (sqp->sq_run == curthread) {
+ ASSERT(sqp->sq_state & SQS_PROC);
+
+ sqp->sq_state &= ~SQS_PROC;
+ sqp->sq_run = NULL;
+ connp->conn_on_sqp = B_FALSE;
+
+ if (sqp->sq_first == NULL) {
+ mutex_exit(&sqp->sq_lock);
+ } else {
+ /*
+ * If this was a normal thread, then it would
+ * (most likely) continue processing the pending
+ * requests. Since the just completed operation
+ * was executed synchronously, the thread should
+ * not be delayed. To compensate, wake up the
+ * worker thread right away when there are outstanding
+ * requests.
+ */
+ sqp->sq_awaken = lbolt;
+ cv_signal(&sqp->sq_worker_cv);
+ mutex_exit(&sqp->sq_lock);
+ }
+ } else {
+ /*
+ * The caller doesn't own the squeue, clear the SQS_PAUSE flag,
+ * and wake up the squeue owner, such that owner can continue
+ * processing.
+ */
+ ASSERT(sqp->sq_state & SQS_PAUSE);
+ sqp->sq_state &= ~SQS_PAUSE;
+
+ /* There should be only one thread blocking on sq_synch_cv. */
+ cv_signal(&sqp->sq_synch_cv);
+ mutex_exit(&sqp->sq_lock);
+ }
+}
diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h
index 3a557048d6..76d1864d62 100644
--- a/usr/src/uts/common/inet/tcp.h
+++ b/usr/src/uts/common/inet/tcp.h
@@ -35,6 +35,7 @@ extern "C" {
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <sys/socket.h>
+#include <sys/socket_proto.h>
#include <sys/sodirect.h>
#include <sys/multidata.h>
#include <sys/md5.h>
@@ -201,7 +202,6 @@ typedef struct tcp_s {
#define TCP_OFO_FIN_VALID 0x8 /* Has TCP received an out of order FIN? */
- int32_t tcp_xmit_hiwater; /* Send buffer high water mark. */
timeout_id_t tcp_timer_tid; /* Control block for timer service */
uchar_t tcp_timer_backoff; /* Backoff shift count. */
@@ -340,7 +340,10 @@ typedef struct tcp_s {
struct tcp_s *tcp_listener; /* Our listener */
- int32_t tcp_xmit_lowater; /* Send buffer low water mark. */
+ size_t tcp_xmit_hiwater; /* Send buffer high water mark. */
+ size_t tcp_xmit_lowater; /* Send buffer low water mark. */
+ size_t tcp_recv_hiwater; /* Recv high water mark */
+ size_t tcp_recv_lowater; /* Recv low water mark */
uint32_t tcp_irs; /* Initial recv seq num */
uint32_t tcp_fss; /* Final/fin send seq num */
@@ -491,6 +494,7 @@ typedef struct tcp_s {
struct tcp_s *tcp_acceptor_hash; /* Acceptor hash chain */
struct tcp_s **tcp_ptpahn; /* Pointer to previous accept hash next. */
struct tcp_s *tcp_bind_hash; /* Bind hash chain */
+ struct tcp_s *tcp_bind_hash_port; /* tcp_t's bound to the same lport */
struct tcp_s **tcp_ptpbhn;
boolean_t tcp_ire_ill_check_done;
@@ -599,6 +603,15 @@ typedef struct tcp_s {
boolean_t tcp_flow_stopped;
/*
+ * The socket generation number is bumped when an outgoing connection
+ * attempts is made, and it sent up to the socket when the
+ * connection was successfully established, or an error occured. The
+ * generation is used to ensure that the socket does not miss the
+ * asynchronous notification.
+ */
+ sock_connid_t tcp_connid;
+
+ /*
* tcp_sodirect is used by tcp on the receive side to push mblk_t(s)
* directly to sockfs. Also, to schedule asynchronous copyout directly
* to a pending user-land uio buffer.
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index 4bb50d2344..ce7d9fb395 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -58,6 +58,7 @@
#include <sys/errno.h>
#include <sys/signal.h>
#include <sys/socket.h>
+#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/isa_defs.h>
#include <sys/md5.h>
@@ -78,7 +79,7 @@
#include <inet/ip_impl.h>
#include <inet/ip6.h>
#include <inet/ip_ndp.h>
-#include <inet/mi.h>
+#include <inet/proto_set.h>
#include <inet/mib2.h>
#include <inet/nd.h>
#include <inet/optcom.h>
@@ -386,11 +387,8 @@ kstat_t *tcp_g_kstat;
* tcp write side.
*/
#define CALL_IP_WPUT(connp, q, mp) { \
- tcp_stack_t *tcps; \
- \
- tcps = connp->conn_netstack->netstack_tcp; \
ASSERT(((q)->q_flag & QREADR) == 0); \
- TCP_DBGSTAT(tcps, tcp_ip_output); \
+ TCP_DBGSTAT(connp->conn_netstack->netstack_tcp, tcp_ip_output); \
connp->conn_send(connp, (mp), (q), IP_WPUT); \
}
@@ -650,6 +648,19 @@ typedef struct tcp_opt_s {
} tcp_opt_t;
/*
+ * TCP option struct passing information b/w lisenter and eager.
+ */
+struct tcp_options {
+ uint_t to_flags;
+ ssize_t to_boundif; /* IPV6_BOUND_IF */
+ sock_upper_handle_t to_handle;
+};
+
+#define TCPOPT_BOUNDIF 0x00000001 /* set IPV6_BOUND_IF */
+#define TCPOPT_RECVPKTINFO 0x00000002 /* set IPV6_RECVPKTINFO */
+#define TCPOPT_UPPERHANDLE 0x00000004 /* set upper handle */
+
+/*
* RFC1323-recommended phrasing of TSTAMP option, for easier parsing
*/
@@ -742,6 +753,7 @@ void tcp_input(void *arg, mblk_t *mp, void *arg2);
void tcp_rput_data(void *arg, mblk_t *mp, void *arg2);
static void tcp_close_output(void *arg, mblk_t *mp, void *arg2);
void tcp_output(void *arg, mblk_t *mp, void *arg2);
+void tcp_output_urgent(void *arg, mblk_t *mp, void *arg2);
static void tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2);
static void tcp_timer_handler(void *arg, mblk_t *mp, void *arg2);
static void tcp_linger_interrupted(void *arg, mblk_t *mp, void *arg2);
@@ -750,7 +762,7 @@ static void tcp_linger_interrupted(void *arg, mblk_t *mp, void *arg2);
/* Prototype for TCP functions */
static void tcp_random_init(void);
int tcp_random(void);
-static void tcp_accept(tcp_t *tcp, mblk_t *mp);
+static void tcp_tli_accept(tcp_t *tcp, mblk_t *mp);
static void tcp_accept_swap(tcp_t *listener, tcp_t *acceptor,
tcp_t *eager);
static int tcp_adapt_ire(tcp_t *tcp, mblk_t *ire_mp);
@@ -761,12 +773,12 @@ static void tcp_closei_local(tcp_t *tcp);
static void tcp_close_detached(tcp_t *tcp);
static boolean_t tcp_conn_con(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph,
mblk_t *idmp, mblk_t **defermp);
-static void tcp_connect(tcp_t *tcp, mblk_t *mp);
-static void tcp_connect_ipv4(tcp_t *tcp, mblk_t *mp, ipaddr_t *dstaddrp,
- in_port_t dstport, uint_t srcid);
-static void tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp,
+static void tcp_tpi_connect(tcp_t *tcp, mblk_t *mp);
+static int tcp_connect_ipv4(tcp_t *tcp, ipaddr_t *dstaddrp,
+ in_port_t dstport, uint_t srcid, cred_t *cr, pid_t pid);
+static int tcp_connect_ipv6(tcp_t *tcp, in6_addr_t *dstaddrp,
in_port_t dstport, uint32_t flowinfo, uint_t srcid,
- uint32_t scope_id);
+ uint32_t scope_id, cred_t *cr, pid_t pid);
static int tcp_clean_death(tcp_t *tcp, int err, uint8_t tag);
static void tcp_def_q_set(tcp_t *tcp, mblk_t *mp);
static void tcp_disconnect(tcp_t *tcp, mblk_t *mp);
@@ -803,11 +815,9 @@ static int tcp_header_init_ipv6(tcp_t *tcp);
int tcp_init(tcp_t *tcp, queue_t *q);
static int tcp_init_values(tcp_t *tcp);
static mblk_t *tcp_ip_advise_mblk(void *addr, int addr_len, ipic_t **ipic);
-static mblk_t *tcp_ip_bind_mp(tcp_t *tcp, t_scalar_t bind_prim,
- t_scalar_t addr_length);
static void tcp_ip_ire_mark_advice(tcp_t *tcp);
static void tcp_ip_notify(tcp_t *tcp);
-static mblk_t *tcp_ire_mp(mblk_t *mp);
+static mblk_t *tcp_ire_mp(mblk_t **mpp);
static void tcp_iss_init(tcp_t *tcp);
static void tcp_keepalive_killer(void *arg);
static int tcp_parse_options(tcph_t *tcph, tcp_opt_t *tcpopt);
@@ -816,8 +826,8 @@ static int tcp_conprim_opt_process(tcp_t *tcp, mblk_t *mp,
int *do_disconnectp, int *t_errorp, int *sys_errorp);
static boolean_t tcp_allow_connopt_set(int level, int name);
int tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr);
-int tcp_opt_get(queue_t *q, int level, int name, uchar_t *ptr);
-int tcp_opt_set(queue_t *q, uint_t optset_context, int level,
+int tcp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr);
+int tcp_tpi_opt_set(queue_t *q, uint_t optset_context, int level,
int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
uchar_t *outvalp, void *thisdg_attrs, cred_t *cr,
mblk_t *mblk);
@@ -842,7 +852,8 @@ static void tcp_reinit_values(tcp_t *tcp);
static void tcp_report_item(mblk_t *mp, tcp_t *tcp, int hashval,
tcp_t *thisstream, cred_t *cr);
-static uint_t tcp_rcv_drain(queue_t *q, tcp_t *tcp);
+static uint_t tcp_rwnd_reopen(tcp_t *tcp);
+static uint_t tcp_rcv_drain(tcp_t *tcp);
static void tcp_sack_rxmit(tcp_t *tcp, uint_t *flags);
static boolean_t tcp_send_rst_chk(tcp_stack_t *);
static void tcp_ss_rexmit(tcp_t *tcp);
@@ -868,7 +879,8 @@ static in_port_t tcp_update_next_port(in_port_t port, const tcp_t *tcp,
boolean_t random);
static in_port_t tcp_get_next_priv_port(const tcp_t *);
static void tcp_wput_sock(queue_t *q, mblk_t *mp);
-void tcp_wput_accept(queue_t *q, mblk_t *mp);
+static void tcp_wput_fallback(queue_t *q, mblk_t *mp);
+void tcp_tpi_accept(queue_t *q, mblk_t *mp);
static void tcp_wput_data(tcp_t *tcp, mblk_t *mp, boolean_t urgent);
static void tcp_wput_flush(tcp_t *tcp, mblk_t *mp);
static void tcp_wput_iocdata(tcp_t *tcp, mblk_t *mp);
@@ -901,9 +913,7 @@ static boolean_t tcp_check_policy(tcp_t *, mblk_t *, ipha_t *, ip6_t *,
boolean_t, boolean_t);
static void tcp_icmp_error_ipv6(tcp_t *tcp, mblk_t *mp,
boolean_t ipsec_mctl);
-static mblk_t *tcp_setsockopt_mp(int level, int cmd,
- char *opt, int optlen);
-static int tcp_build_hdrs(queue_t *, tcp_t *);
+static int tcp_build_hdrs(tcp_t *);
static void tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp,
uint32_t seg_seq, uint32_t seg_ack, int seg_len,
tcph_t *tcph);
@@ -943,7 +953,7 @@ static int tcp_squeue_switch(int);
static int tcp_open(queue_t *, dev_t *, int, int, cred_t *, boolean_t);
static int tcp_openv4(queue_t *, dev_t *, int, int, cred_t *);
static int tcp_openv6(queue_t *, dev_t *, int, int, cred_t *);
-static int tcp_close(queue_t *, int);
+static int tcp_tpi_close(queue_t *, int);
static int tcpclose_accept(queue_t *);
static void tcp_squeue_add(squeue_t *);
@@ -958,6 +968,19 @@ extern void tcp_kssl_input(tcp_t *, mblk_t *);
void tcp_eager_kill(void *arg, mblk_t *mp, void *arg2);
void tcp_clean_death_wrapper(void *arg, mblk_t *mp, void *arg2);
+static int tcp_accept(sock_lower_handle_t, sock_lower_handle_t,
+ sock_upper_handle_t, cred_t *);
+static int tcp_listen(sock_lower_handle_t, int, cred_t *);
+static int tcp_post_ip_bind(tcp_t *, mblk_t *, int);
+static int tcp_do_listen(conn_t *, int, cred_t *);
+static int tcp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
+ cred_t *, pid_t);
+static int tcp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
+ boolean_t);
+static int tcp_do_unbind(conn_t *);
+static int tcp_bind_check(conn_t *, struct sockaddr *, socklen_t, cred_t *,
+ boolean_t);
+
/*
* Routines related to the TCP_IOC_ABORT_CONN ioctl command.
*
@@ -1001,11 +1024,11 @@ static struct module_info tcp_winfo = {
* We have separate open functions for the /dev/tcp and /dev/tcp6 devices.
*/
struct qinit tcp_rinitv4 = {
- NULL, (pfi_t)tcp_rsrv, tcp_openv4, tcp_close, NULL, &tcp_rinfo
+ NULL, (pfi_t)tcp_rsrv, tcp_openv4, tcp_tpi_close, NULL, &tcp_rinfo
};
struct qinit tcp_rinitv6 = {
- NULL, (pfi_t)tcp_rsrv, tcp_openv6, tcp_close, NULL, &tcp_rinfo
+ NULL, (pfi_t)tcp_rsrv, tcp_openv6, tcp_tpi_close, NULL, &tcp_rinfo
};
struct qinit tcp_winit = {
@@ -1017,6 +1040,11 @@ struct qinit tcp_sock_winit = {
(pfi_t)tcp_wput_sock, (pfi_t)tcp_wsrv, NULL, NULL, NULL, &tcp_winfo
};
+/* TCP entry point during fallback */
+struct qinit tcp_fallback_sock_winit = {
+ (pfi_t)tcp_wput_fallback, NULL, NULL, NULL, NULL, &tcp_winfo
+};
+
/*
* Entry points for TCP as a acceptor STREAM opened by sockfs when doing
* an accept. Avoid allocating data structures since eager has already
@@ -1027,7 +1055,7 @@ struct qinit tcp_acceptor_rinit = {
};
struct qinit tcp_acceptor_winit = {
- (pfi_t)tcp_wput_accept, NULL, NULL, NULL, NULL, &tcp_winfo
+ (pfi_t)tcp_tpi_accept, NULL, NULL, NULL, NULL, &tcp_winfo
};
/*
@@ -1036,7 +1064,7 @@ struct qinit tcp_acceptor_winit = {
* have a separate one for tcp_openv6.
*/
struct qinit tcp_loopback_rinit = {
- (pfi_t)0, (pfi_t)tcp_rsrv, tcp_openv4, tcp_close, (pfi_t)0,
+ (pfi_t)0, (pfi_t)tcp_rsrv, tcp_openv4, tcp_tpi_close, (pfi_t)0,
&tcp_rinfo, NULL, tcp_fuse_rrw, tcp_fuse_rinfop, STRUIOT_STANDARD
};
@@ -1050,6 +1078,8 @@ struct streamtab tcpinfov6 = {
&tcp_rinitv6, &tcp_winit
};
+sock_downcalls_t sock_tcp_downcalls;
+
/*
* Have to ensure that tcp_g_q_close is not done by an
* interrupt thread.
@@ -1907,6 +1937,7 @@ tcp_time_wait_collector(void *arg)
CALLOUT_FLAG_ROUNDUP);
mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
}
+
/*
* Reply to a clients T_CONN_RES TPI message. This function
* is used only for TLI/XTI listener. Sockfs sends T_CONN_RES
@@ -1914,7 +1945,7 @@ tcp_time_wait_collector(void *arg)
* Read the block comment on top of tcp_conn_request().
*/
static void
-tcp_accept(tcp_t *listener, mblk_t *mp)
+tcp_tli_accept(tcp_t *listener, mblk_t *mp)
{
tcp_t *acceptor;
tcp_t *eager;
@@ -1923,6 +1954,7 @@ tcp_accept(tcp_t *listener, mblk_t *mp)
t_uscalar_t acceptor_id;
t_scalar_t seqnum;
mblk_t *opt_mp = NULL; /* T_OPTMGMT_REQ messages */
+ struct tcp_options *tcpopt;
mblk_t *ok_mp;
mblk_t *mp1;
tcp_stack_t *tcps = listener->tcp_tcps;
@@ -2070,7 +2102,8 @@ tcp_accept(tcp_t *listener, mblk_t *mp)
ASSERT(eager->tcp_connp->conn_ref >= 1);
/* Pre allocate the stroptions mblk also */
- opt_mp = allocb(sizeof (struct stroptions), BPRI_HI);
+ opt_mp = allocb(MAX(sizeof (struct tcp_options),
+ sizeof (struct T_conn_res)), BPRI_HI);
if (opt_mp == NULL) {
CONN_DEC_REF(acceptor->tcp_connp);
CONN_DEC_REF(eager->tcp_connp);
@@ -2078,29 +2111,20 @@ tcp_accept(tcp_t *listener, mblk_t *mp)
return;
}
DB_TYPE(opt_mp) = M_SETOPTS;
- opt_mp->b_wptr += sizeof (struct stroptions);
+ opt_mp->b_wptr += sizeof (struct tcp_options);
+ tcpopt = (struct tcp_options *)opt_mp->b_rptr;
+ tcpopt->to_flags = 0;
/*
* Prepare for inheriting IPV6_BOUND_IF and IPV6_RECVPKTINFO
- * from listener to acceptor. The message is chained on opt_mp
- * which will be sent onto eager's squeue.
+ * from listener to acceptor.
*/
if (listener->tcp_bound_if != 0) {
- /* allocate optmgmt req */
- mp1 = tcp_setsockopt_mp(IPPROTO_IPV6,
- IPV6_BOUND_IF, (char *)&listener->tcp_bound_if,
- sizeof (int));
- if (mp1 != NULL)
- linkb(opt_mp, mp1);
+ tcpopt->to_flags |= TCPOPT_BOUNDIF;
+ tcpopt->to_boundif = listener->tcp_bound_if;
}
if (listener->tcp_ipv6_recvancillary & TCP_IPV6_RECVPKTINFO) {
- uint_t on = 1;
-
- /* allocate optmgmt req */
- mp1 = tcp_setsockopt_mp(IPPROTO_IPV6,
- IPV6_RECVPKTINFO, (char *)&on, sizeof (on));
- if (mp1 != NULL)
- linkb(opt_mp, mp1);
+ tcpopt->to_flags |= TCPOPT_RECVPKTINFO;
}
/* Re-use mp1 to hold a copy of mp, in case reallocb fails */
@@ -2341,6 +2365,7 @@ tcp_accept(tcp_t *listener, mblk_t *mp)
finish:
ASSERT(acceptor->tcp_detached);
ASSERT(tcps->tcps_g_q != NULL);
+ ASSERT(!IPCL_IS_NONSTR(acceptor->tcp_connp));
acceptor->tcp_rq = tcps->tcps_g_q;
acceptor->tcp_wq = WR(tcps->tcps_g_q);
(void) tcp_clean_death(acceptor, 0, 2);
@@ -2995,39 +3020,24 @@ error:
return (0);
}
-/*
- * tcp_bind is called (holding the writer lock) by tcp_wput_proto to process a
- * O_T_BIND_REQ/T_BIND_REQ message.
- */
static void
-tcp_bind(tcp_t *tcp, mblk_t *mp)
+tcp_tpi_bind(tcp_t *tcp, mblk_t *mp)
{
+ int error;
+ conn_t *connp = tcp->tcp_connp;
+ struct sockaddr *sa;
+ mblk_t *mp1;
+ struct T_bind_req *tbr;
+ int backlog;
+ socklen_t len;
sin_t *sin;
sin6_t *sin6;
- mblk_t *mp1;
- in_port_t requested_port;
- in_port_t allocated_port;
- struct T_bind_req *tbr;
- boolean_t bind_to_req_port_only;
- boolean_t backlog_update = B_FALSE;
- boolean_t user_specified;
- in6_addr_t v6addr;
- ipaddr_t v4addr;
- uint_t origipversion;
- int err;
- queue_t *q = tcp->tcp_wq;
- conn_t *connp = tcp->tcp_connp;
- mlp_type_t addrtype, mlptype;
- zone_t *zone;
- cred_t *cr;
- in_port_t mlp_port;
- tcp_stack_t *tcps = tcp->tcp_tcps;
ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
if (tcp->tcp_debug) {
(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
- "tcp_bind: bad req, len %u",
+ "tcp_tpi_bind: bad req, len %u",
(uint_t)(mp->b_wptr - mp->b_rptr));
}
tcp_err_ack(tcp, mp, TPROTO, 0);
@@ -3041,442 +3051,80 @@ tcp_bind(tcp_t *tcp, mblk_t *mp)
}
mp = mp1;
tbr = (struct T_bind_req *)mp->b_rptr;
- if (tcp->tcp_state >= TCPS_BOUND) {
- if ((tcp->tcp_state == TCPS_BOUND ||
- tcp->tcp_state == TCPS_LISTEN) &&
- tcp->tcp_conn_req_max != tbr->CONIND_number &&
- tbr->CONIND_number > 0) {
- /*
- * Handle listen() increasing CONIND_number.
- * This is more "liberal" then what the TPI spec
- * requires but is needed to avoid a t_unbind
- * when handling listen() since the port number
- * might be "stolen" between the unbind and bind.
- */
- backlog_update = B_TRUE;
- goto do_bind;
- }
- if (tcp->tcp_debug) {
- (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
- "tcp_bind: bad state, %d", tcp->tcp_state);
- }
- tcp_err_ack(tcp, mp, TOUTSTATE, 0);
- return;
- }
- origipversion = tcp->tcp_ipversion;
- switch (tbr->ADDR_length) {
- case 0: /* request for a generic port */
+ backlog = tbr->CONIND_number;
+ len = tbr->ADDR_length;
+
+ switch (len) {
+ case 0: /* request for a generic port */
tbr->ADDR_offset = sizeof (struct T_bind_req);
if (tcp->tcp_family == AF_INET) {
tbr->ADDR_length = sizeof (sin_t);
sin = (sin_t *)&tbr[1];
*sin = sin_null;
sin->sin_family = AF_INET;
+ sa = (struct sockaddr *)sin;
+ len = sizeof (sin_t);
mp->b_wptr = (uchar_t *)&sin[1];
- tcp->tcp_ipversion = IPV4_VERSION;
- IN6_IPADDR_TO_V4MAPPED(INADDR_ANY, &v6addr);
} else {
ASSERT(tcp->tcp_family == AF_INET6);
tbr->ADDR_length = sizeof (sin6_t);
sin6 = (sin6_t *)&tbr[1];
*sin6 = sin6_null;
sin6->sin6_family = AF_INET6;
+ sa = (struct sockaddr *)sin6;
+ len = sizeof (sin6_t);
mp->b_wptr = (uchar_t *)&sin6[1];
- tcp->tcp_ipversion = IPV6_VERSION;
- V6_SET_ZERO(v6addr);
}
- requested_port = 0;
break;
- case sizeof (sin_t): /* Complete IPv4 address */
- sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset,
+ case sizeof (sin_t): /* Complete IPv4 address */
+ sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
sizeof (sin_t));
- if (sin == NULL || !OK_32PTR((char *)sin)) {
- if (tcp->tcp_debug) {
- (void) strlog(TCP_MOD_ID, 0, 1,
- SL_ERROR|SL_TRACE,
- "tcp_bind: bad address parameter, "
- "offset %d, len %d",
- tbr->ADDR_offset, tbr->ADDR_length);
- }
- tcp_err_ack(tcp, mp, TPROTO, 0);
- return;
- }
- /*
- * With sockets sockfs will accept bogus sin_family in
- * bind() and replace it with the family used in the socket
- * call.
- */
- if (sin->sin_family != AF_INET ||
- tcp->tcp_family != AF_INET) {
- tcp_err_ack(tcp, mp, TSYSERR, EAFNOSUPPORT);
- return;
- }
- requested_port = ntohs(sin->sin_port);
- tcp->tcp_ipversion = IPV4_VERSION;
- v4addr = sin->sin_addr.s_addr;
- IN6_IPADDR_TO_V4MAPPED(v4addr, &v6addr);
break;
case sizeof (sin6_t): /* Complete IPv6 address */
- sin6 = (sin6_t *)mi_offset_param(mp,
+ sa = (struct sockaddr *)mi_offset_param(mp,
tbr->ADDR_offset, sizeof (sin6_t));
- if (sin6 == NULL || !OK_32PTR((char *)sin6)) {
- if (tcp->tcp_debug) {
- (void) strlog(TCP_MOD_ID, 0, 1,
- SL_ERROR|SL_TRACE,
- "tcp_bind: bad IPv6 address parameter, "
- "offset %d, len %d", tbr->ADDR_offset,
- tbr->ADDR_length);
- }
- tcp_err_ack(tcp, mp, TSYSERR, EINVAL);
- return;
- }
- if (sin6->sin6_family != AF_INET6 ||
- tcp->tcp_family != AF_INET6) {
- tcp_err_ack(tcp, mp, TSYSERR, EAFNOSUPPORT);
- return;
- }
- requested_port = ntohs(sin6->sin6_port);
- tcp->tcp_ipversion = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ?
- IPV4_VERSION : IPV6_VERSION;
- v6addr = sin6->sin6_addr;
break;
default:
if (tcp->tcp_debug) {
(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
- "tcp_bind: bad address length, %d",
+ "tcp_tpi_bind: bad address length, %d",
tbr->ADDR_length);
}
tcp_err_ack(tcp, mp, TBADADDR, 0);
return;
}
- tcp->tcp_bound_source_v6 = v6addr;
-
- /* Check for change in ipversion */
- if (origipversion != tcp->tcp_ipversion) {
- ASSERT(tcp->tcp_family == AF_INET6);
- err = tcp->tcp_ipversion == IPV6_VERSION ?
- tcp_header_init_ipv6(tcp) : tcp_header_init_ipv4(tcp);
- if (err) {
- tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
- return;
- }
- }
-
- /*
- * Initialize family specific fields. Copy of the src addr.
- * in tcp_t is needed for the lookup funcs.
- */
- if (tcp->tcp_ipversion == IPV6_VERSION) {
- tcp->tcp_ip6h->ip6_src = v6addr;
- } else {
- IN6_V4MAPPED_TO_IPADDR(&v6addr, tcp->tcp_ipha->ipha_src);
- }
- tcp->tcp_ip_src_v6 = v6addr;
- /*
- * For O_T_BIND_REQ:
- * Verify that the target port/addr is available, or choose
- * another.
- * For T_BIND_REQ:
- * Verify that the target port/addr is available or fail.
- * In both cases when it succeeds the tcp is inserted in the
- * bind hash table. This ensures that the operation is atomic
- * under the lock on the hash bucket.
- */
- bind_to_req_port_only = requested_port != 0 &&
- tbr->PRIM_type != O_T_BIND_REQ;
- /*
- * Get a valid port (within the anonymous range and should not
- * be a privileged one) to use if the user has not given a port.
- * If multiple threads are here, they may all start with
- * with the same initial port. But, it should be fine as long as
- * tcp_bindi will ensure that no two threads will be assigned
- * the same port.
- *
- * NOTE: XXX If a privileged process asks for an anonymous port, we
- * still check for ports only in the range > tcp_smallest_non_priv_port,
- * unless TCP_ANONPRIVBIND option is set.
- */
- mlptype = mlptSingle;
- mlp_port = requested_port;
- if (requested_port == 0) {
- requested_port = tcp->tcp_anon_priv_bind ?
- tcp_get_next_priv_port(tcp) :
- tcp_update_next_port(tcps->tcps_next_port_to_try,
- tcp, B_TRUE);
- if (requested_port == 0) {
- tcp_err_ack(tcp, mp, TNOADDR, 0);
- return;
- }
- user_specified = B_FALSE;
-
- /*
- * If the user went through one of the RPC interfaces to create
- * this socket and RPC is MLP in this zone, then give him an
- * anonymous MLP.
- */
- cr = DB_CREDDEF(mp, tcp->tcp_cred);
- if (connp->conn_anon_mlp && is_system_labeled()) {
- zone = crgetzone(cr);
- addrtype = tsol_mlp_addr_type(zone->zone_id,
- IPV6_VERSION, &v6addr,
- tcps->tcps_netstack->netstack_ip);
- if (addrtype == mlptSingle) {
- tcp_err_ack(tcp, mp, TNOADDR, 0);
- return;
- }
- mlptype = tsol_mlp_port_type(zone, IPPROTO_TCP,
- PMAPPORT, addrtype);
- mlp_port = PMAPPORT;
- }
- } else {
- int i;
- boolean_t priv = B_FALSE;
-
- /*
- * If the requested_port is in the well-known privileged range,
- * verify that the stream was opened by a privileged user.
- * Note: No locks are held when inspecting tcp_g_*epriv_ports
- * but instead the code relies on:
- * - the fact that the address of the array and its size never
- * changes
- * - the atomic assignment of the elements of the array
- */
- cr = DB_CREDDEF(mp, tcp->tcp_cred);
- if (requested_port < tcps->tcps_smallest_nonpriv_port) {
- priv = B_TRUE;
- } else {
- for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
- if (requested_port ==
- tcps->tcps_g_epriv_ports[i]) {
- priv = B_TRUE;
- break;
- }
- }
- }
- if (priv) {
- if (secpolicy_net_privaddr(cr, requested_port,
- IPPROTO_TCP) != 0) {
- if (tcp->tcp_debug) {
- (void) strlog(TCP_MOD_ID, 0, 1,
- SL_ERROR|SL_TRACE,
- "tcp_bind: no priv for port %d",
- requested_port);
- }
- tcp_err_ack(tcp, mp, TACCES, 0);
- return;
- }
- }
- user_specified = B_TRUE;
-
- if (is_system_labeled()) {
- zone = crgetzone(cr);
- addrtype = tsol_mlp_addr_type(zone->zone_id,
- IPV6_VERSION, &v6addr,
- tcps->tcps_netstack->netstack_ip);
- if (addrtype == mlptSingle) {
- tcp_err_ack(tcp, mp, TNOADDR, 0);
- return;
- }
- mlptype = tsol_mlp_port_type(zone, IPPROTO_TCP,
- requested_port, addrtype);
- }
- }
-
- if (mlptype != mlptSingle) {
- if (secpolicy_net_bindmlp(cr) != 0) {
- if (tcp->tcp_debug) {
- (void) strlog(TCP_MOD_ID, 0, 1,
- SL_ERROR|SL_TRACE,
- "tcp_bind: no priv for multilevel port %d",
- requested_port);
- }
- tcp_err_ack(tcp, mp, TACCES, 0);
- return;
- }
-
- /*
- * If we're specifically binding a shared IP address and the
- * port is MLP on shared addresses, then check to see if this
- * zone actually owns the MLP. Reject if not.
- */
- if (mlptype == mlptShared && addrtype == mlptShared) {
- /*
- * No need to handle exclusive-stack zones since
- * ALL_ZONES only applies to the shared stack.
- */
- zoneid_t mlpzone;
-
- mlpzone = tsol_mlp_findzone(IPPROTO_TCP,
- htons(mlp_port));
- if (connp->conn_zoneid != mlpzone) {
- if (tcp->tcp_debug) {
- (void) strlog(TCP_MOD_ID, 0, 1,
- SL_ERROR|SL_TRACE,
- "tcp_bind: attempt to bind port "
- "%d on shared addr in zone %d "
- "(should be %d)",
- mlp_port, connp->conn_zoneid,
- mlpzone);
- }
- tcp_err_ack(tcp, mp, TACCES, 0);
- return;
- }
- }
-
- if (!user_specified) {
- err = tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
- requested_port, B_TRUE);
- if (err != 0) {
- if (tcp->tcp_debug) {
- (void) strlog(TCP_MOD_ID, 0, 1,
- SL_ERROR|SL_TRACE,
- "tcp_bind: cannot establish anon "
- "MLP for port %d",
- requested_port);
- }
- tcp_err_ack(tcp, mp, TSYSERR, err);
- return;
- }
- connp->conn_anon_port = B_TRUE;
- }
- connp->conn_mlp_type = mlptype;
- }
-
- allocated_port = tcp_bindi(tcp, requested_port, &v6addr,
- tcp->tcp_reuseaddr, B_FALSE, bind_to_req_port_only, user_specified);
-
- if (allocated_port == 0) {
- connp->conn_mlp_type = mlptSingle;
- if (connp->conn_anon_port) {
- connp->conn_anon_port = B_FALSE;
- (void) tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
- requested_port, B_FALSE);
- }
- if (bind_to_req_port_only) {
- if (tcp->tcp_debug) {
- (void) strlog(TCP_MOD_ID, 0, 1,
- SL_ERROR|SL_TRACE,
- "tcp_bind: requested addr busy");
- }
- tcp_err_ack(tcp, mp, TADDRBUSY, 0);
- } else {
- /* If we are out of ports, fail the bind. */
- if (tcp->tcp_debug) {
- (void) strlog(TCP_MOD_ID, 0, 1,
- SL_ERROR|SL_TRACE,
- "tcp_bind: out of ports?");
- }
- tcp_err_ack(tcp, mp, TNOADDR, 0);
- }
- return;
- }
- ASSERT(tcp->tcp_state == TCPS_BOUND);
-do_bind:
- if (!backlog_update) {
- if (tcp->tcp_family == AF_INET)
- sin->sin_port = htons(allocated_port);
- else
- sin6->sin6_port = htons(allocated_port);
- }
- if (tcp->tcp_family == AF_INET) {
- if (tbr->CONIND_number != 0) {
- mp1 = tcp_ip_bind_mp(tcp, tbr->PRIM_type,
- sizeof (sin_t));
- } else {
- /* Just verify the local IP address */
- mp1 = tcp_ip_bind_mp(tcp, tbr->PRIM_type, IP_ADDR_LEN);
- }
- } else {
- if (tbr->CONIND_number != 0) {
- mp1 = tcp_ip_bind_mp(tcp, tbr->PRIM_type,
- sizeof (sin6_t));
+ error = tcp_bind_check(connp, sa, len, DB_CRED(mp),
+ tbr->PRIM_type != O_T_BIND_REQ);
+ if (error == 0) {
+ if (tcp->tcp_family == AF_INET) {
+ sin = (sin_t *)sa;
+ sin->sin_port = tcp->tcp_lport;
} else {
- /* Just verify the local IP address */
- mp1 = tcp_ip_bind_mp(tcp, tbr->PRIM_type,
- IPV6_ADDR_LEN);
- }
- }
- if (mp1 == NULL) {
- if (connp->conn_anon_port) {
- connp->conn_anon_port = B_FALSE;
- (void) tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
- requested_port, B_FALSE);
+ sin6 = (sin6_t *)sa;
+ sin6->sin6_port = tcp->tcp_lport;
}
- connp->conn_mlp_type = mlptSingle;
- tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
- return;
- }
-
- tbr->PRIM_type = T_BIND_ACK;
- mp->b_datap->db_type = M_PCPROTO;
- /* Chain in the reply mp for tcp_rput() */
- mp1->b_cont = mp;
- mp = mp1;
-
- tcp->tcp_conn_req_max = tbr->CONIND_number;
- if (tcp->tcp_conn_req_max) {
- if (tcp->tcp_conn_req_max < tcps->tcps_conn_req_min)
- tcp->tcp_conn_req_max = tcps->tcps_conn_req_min;
- if (tcp->tcp_conn_req_max > tcps->tcps_conn_req_max_q)
- tcp->tcp_conn_req_max = tcps->tcps_conn_req_max_q;
- /*
- * If this is a listener, do not reset the eager list
- * and other stuffs. Note that we don't check if the
- * existing eager list meets the new tcp_conn_req_max
- * requirement.
- */
- if (tcp->tcp_state != TCPS_LISTEN) {
- tcp->tcp_state = TCPS_LISTEN;
- /* Initialize the chain. Don't need the eager_lock */
- tcp->tcp_eager_next_q0 = tcp->tcp_eager_prev_q0 = tcp;
- tcp->tcp_eager_next_drop_q0 = tcp;
- tcp->tcp_eager_prev_drop_q0 = tcp;
- tcp->tcp_second_ctimer_threshold =
- tcps->tcps_ip_abort_linterval;
+ if (backlog > 0) {
+ error = tcp_do_listen(connp, backlog, DB_CRED(mp));
}
}
-
- /*
- * We can call ip_bind directly which returns a T_BIND_ACK mp. The
- * processing continues in tcp_rput_other().
- *
- * We need to make sure that the conn_recv is set to a non-null
- * value before we insert the conn into the classifier table.
- * This is to avoid a race with an incoming packet which does an
- * ipcl_classify().
- */
- connp->conn_recv = tcp_conn_request;
- if (tcp->tcp_family == AF_INET6) {
- ASSERT(tcp->tcp_connp->conn_af_isv6);
- mp = ip_bind_v6(q, mp, tcp->tcp_connp, &tcp->tcp_sticky_ipp);
- } else {
- ASSERT(!tcp->tcp_connp->conn_af_isv6);
- mp = ip_bind_v4(q, mp, tcp->tcp_connp);
- }
- /*
- * If the bind cannot complete immediately
- * IP will arrange to call tcp_rput_other
- * when the bind completes.
- */
- if (mp != NULL) {
- tcp_rput_other(tcp, mp);
+done:
+ if (error > 0) {
+ tcp_err_ack(tcp, mp, TSYSERR, error);
+ } else if (error < 0) {
+ tcp_err_ack(tcp, mp, -error, 0);
} else {
- /*
- * Bind will be resumed later. Need to ensure
- * that conn doesn't disappear when that happens.
- * This will be decremented in ip_resume_tcp_bind().
- */
- CONN_INC_REF(tcp->tcp_connp);
+ mp->b_datap->db_type = M_PCPROTO;
+ tbr->PRIM_type = T_BIND_ACK;
+ putnext(tcp->tcp_rq, mp);
}
}
-
/*
* If the "bind_to_req_port_only" parameter is set, if the requested port
* number is available, return it, If not return 0
@@ -3560,12 +3208,14 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
mutex_enter(&tbf->tf_lock);
for (ltcp = tbf->tf_tcp; ltcp != NULL;
ltcp = ltcp->tcp_bind_hash) {
+ if (lport == ltcp->tcp_lport)
+ break;
+ }
+
+ for (; ltcp != NULL; ltcp = ltcp->tcp_bind_hash_port) {
boolean_t not_socket;
boolean_t exclbind;
- if (lport != ltcp->tcp_lport)
- continue;
-
lconnp = ltcp->tcp_connp;
/*
@@ -3817,6 +3467,7 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag)
{
mblk_t *mp;
queue_t *q;
+ conn_t *connp = tcp->tcp_connp;
tcp_stack_t *tcps = tcp->tcp_tcps;
sodirect_t *sodp;
@@ -3857,7 +3508,7 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag)
*/
tcp_closei_local(tcp);
if (!tcp->tcp_tconnind_started) {
- CONN_DEC_REF(tcp->tcp_connp);
+ CONN_DEC_REF(connp);
} else {
tcp->tcp_state = TCPS_BOUND;
}
@@ -3879,7 +3530,10 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag)
q = tcp->tcp_rq;
/* Trash all inbound data */
- flushq(q, FLUSHALL);
+ if (!IPCL_IS_NONSTR(connp)) {
+ ASSERT(q != NULL);
+ flushq(q, FLUSHALL);
+ }
/*
* If we are at least part way open and there is error
@@ -3900,16 +3554,22 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag)
(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE|SL_ERROR,
"tcp_clean_death: discon err %d", err);
}
- mp = mi_tpi_discon_ind(NULL, err, 0);
- if (mp != NULL) {
- putnext(q, mp);
+ if (IPCL_IS_NONSTR(connp)) {
+ /* Direct socket, use upcall */
+ (*connp->conn_upcalls->su_disconnected)(
+ connp->conn_upper_handle, tcp->tcp_connid, err);
} else {
- if (tcp->tcp_debug) {
- (void) strlog(TCP_MOD_ID, 0, 1,
- SL_ERROR|SL_TRACE,
- "tcp_clean_death, sending M_ERROR");
+ mp = mi_tpi_discon_ind(NULL, err, 0);
+ if (mp != NULL) {
+ putnext(q, mp);
+ } else {
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1,
+ SL_ERROR|SL_TRACE,
+ "tcp_clean_death, sending M_ERROR");
+ }
+ (void) putnextctl1(q, M_ERROR, EPROTO);
}
- (void) putnextctl1(q, M_ERROR, EPROTO);
}
if (tcp->tcp_state <= TCPS_SYN_RCVD) {
/* SYN_SENT or SYN_RCVD */
@@ -3921,6 +3581,9 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag)
}
tcp_reinit(tcp);
+ if (IPCL_IS_NONSTR(connp))
+ (void) tcp_do_unbind(connp);
+
return (-1);
}
@@ -3954,7 +3617,6 @@ tcp_stop_lingering(tcp_t *tcp)
*/
tcp_timers_stop(tcp);
-
tcp->tcp_detached = B_TRUE;
ASSERT(tcps->tcps_g_q != NULL);
tcp->tcp_rq = tcps->tcps_g_q;
@@ -3984,8 +3646,10 @@ finish:
mutex_enter(&tcp->tcp_closelock);
tcp->tcp_detached = B_TRUE;
ASSERT(tcps->tcps_g_q != NULL);
+
tcp->tcp_rq = tcps->tcps_g_q;
tcp->tcp_wq = WR(tcps->tcps_g_q);
+
tcp->tcp_closed = 1;
cv_signal(&tcp->tcp_closecv);
mutex_exit(&tcp->tcp_closelock);
@@ -4005,21 +3669,17 @@ tcp_close_linger_timeout(void *arg)
tcp_stop_lingering(tcp);
}
-static int
-tcp_close(queue_t *q, int flags)
+static void
+tcp_close_common(conn_t *connp, int flags)
{
- conn_t *connp = Q_TO_CONN(q);
tcp_t *tcp = connp->conn_tcp;
mblk_t *mp = &tcp->tcp_closemp;
boolean_t conn_ioctl_cleanup_reqd = B_FALSE;
mblk_t *bp;
- ASSERT(WR(q)->q_next == NULL);
ASSERT(connp->conn_ref >= 2);
/*
- * We are being closed as /dev/tcp or /dev/tcp6.
- *
* Mark the conn as closing. ill_pending_mp_add will not
* add any mp to the pending mp list, after this conn has
* started closing. Same for sq_pending_mp_add
@@ -4106,11 +3766,35 @@ tcp_close(queue_t *q, int flags)
if (conn_ioctl_cleanup_reqd)
conn_ioctl_cleanup(connp);
+ tcp->tcp_cpid = -1;
+}
+
+static int
+tcp_tpi_close(queue_t *q, int flags)
+{
+ conn_t *connp;
+
+ ASSERT(WR(q)->q_next == NULL);
+
+ if (flags & SO_FALLBACK) {
+ /*
+ * stream is being closed while in fallback
+ * simply free the resources that were allocated
+ */
+ inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
+ qprocsoff(q);
+ goto done;
+ }
+
+ connp = Q_TO_CONN(q);
+ /*
+ * We are being closed as /dev/tcp or /dev/tcp6.
+ */
+ tcp_close_common(connp, flags);
+
qprocsoff(q);
inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
- tcp->tcp_cpid = -1;
-
/*
* Drop IP's reference on the conn. This is the last reference
* on the connp if the state was less than established. If the
@@ -4124,6 +3808,7 @@ tcp_close(queue_t *q, int flags)
* packets in squeue for the timewait state.
*/
CONN_DEC_REF(connp);
+done:
q->q_ptr = WR(q)->q_ptr = NULL;
return (0);
}
@@ -4615,11 +4300,13 @@ tcp_free(tcp_t *tcp)
}
if (tcp->tcp_fused_sigurg_mp != NULL) {
+ ASSERT(!IPCL_IS_NONSTR(tcp->tcp_connp));
freeb(tcp->tcp_fused_sigurg_mp);
tcp->tcp_fused_sigurg_mp = NULL;
}
if (tcp->tcp_ordrel_mp != NULL) {
+ ASSERT(!IPCL_IS_NONSTR(tcp->tcp_connp));
freeb(tcp->tcp_ordrel_mp);
tcp->tcp_ordrel_mp = NULL;
}
@@ -4761,10 +4448,19 @@ tcp_conn_con(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph, mblk_t *idmp,
DB_CPID(mp) = DB_CPID(idmp);
}
- if (defermp == NULL)
- putnext(tcp->tcp_rq, mp);
- else
+ if (defermp == NULL) {
+ conn_t *connp = tcp->tcp_connp;
+ if (IPCL_IS_NONSTR(connp)) {
+ (*connp->conn_upcalls->su_connected)
+ (connp->conn_upper_handle, tcp->tcp_connid, cr,
+ DB_CPID(mp));
+ freemsg(mp);
+ } else {
+ putnext(tcp->tcp_rq, mp);
+ }
+ } else {
*defermp = mp;
+ }
if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
@@ -4946,10 +4642,13 @@ tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp,
/* Inherit information from the "parent" */
tcp->tcp_ipversion = ltcp->tcp_ipversion;
tcp->tcp_family = ltcp->tcp_family;
+
tcp->tcp_wq = ltcp->tcp_wq;
tcp->tcp_rq = ltcp->tcp_rq;
+
tcp->tcp_mss = tcps->tcps_mss_def_ipv6;
tcp->tcp_detached = B_TRUE;
+ SOCK_CONNID_INIT(tcp->tcp_connid);
if ((err = tcp_init_values(tcp)) != 0) {
freemsg(tpi_mp);
return (err);
@@ -5100,6 +4799,12 @@ tcp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp,
tcp->tcp_kssl_pending = B_TRUE;
}
+ /* Inherit the listener's non-STREAMS flag */
+ if (IPCL_IS_NONSTR(lconnp)) {
+ connp->conn_flags |= IPCL_NONSTR;
+ connp->conn_upcalls = lconnp->conn_upcalls;
+ }
+
return (0);
}
@@ -5159,6 +4864,7 @@ tcp_conn_create_v4(conn_t *lconnp, conn_t *connp, ipha_t *ipha,
tcp->tcp_rq = ltcp->tcp_rq;
tcp->tcp_mss = tcps->tcps_mss_def_ipv4;
tcp->tcp_detached = B_TRUE;
+ SOCK_CONNID_INIT(tcp->tcp_connid);
if ((err = tcp_init_values(tcp)) != 0) {
freemsg(tpi_mp);
return (err);
@@ -5219,6 +4925,12 @@ tcp_conn_create_v4(conn_t *lconnp, conn_t *connp, ipha_t *ipha,
tcp->tcp_kssl_pending = B_TRUE;
}
+ /* Inherit the listener's non-STREAMS flag */
+ if (IPCL_IS_NONSTR(lconnp)) {
+ connp->conn_flags |= IPCL_NONSTR;
+ connp->conn_upcalls = lconnp->conn_upcalls;
+ }
+
return (0);
}
@@ -5474,7 +5186,7 @@ tcp_update_label(tcp_t *tcp, const cred_t *cr)
if (tsol_update_sticky(&tcp->tcp_sticky_ipp,
&tcp->tcp_label_len, optbuf) != 0)
return (B_FALSE);
- if (tcp_build_hdrs(tcp->tcp_rq, tcp) != 0)
+ if (tcp_build_hdrs(tcp) != 0)
return (B_FALSE);
}
@@ -5732,12 +5444,13 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
eager = econnp->conn_tcp;
/*
- * Pre-allocate the T_ordrel_ind mblk so that at close time, we
- * will always have that to send up. Otherwise, we need to do
+ * Pre-allocate the T_ordrel_ind mblk for TPI socket so that at close
+ * time, we will always have that to send up. Otherwise, we need to do
* special handling in case the allocation fails at that time.
*/
ASSERT(eager->tcp_ordrel_mp == NULL);
- if ((eager->tcp_ordrel_mp = mi_tpi_ordrel_ind()) == NULL)
+ if (!IPCL_IS_NONSTR(econnp) &&
+ (eager->tcp_ordrel_mp = mi_tpi_ordrel_ind()) == NULL)
goto error3;
/* Inherit various TCP parameters from the listener */
@@ -5839,7 +5552,7 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
* There should be no ire in the mp as we are being called after
* receiving the SYN.
*/
- ASSERT(tcp_ire_mp(mp) == NULL);
+ ASSERT(tcp_ire_mp(&mp) == NULL);
/*
* Adapt our mss, ttl, ... according to information provided in IRE.
@@ -5871,7 +5584,7 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
* we should not inherit receive window size from listener.
*/
eager->tcp_rwnd = MSS_ROUNDUP(
- (eager->tcp_rwnd == 0 ? tcp->tcp_rq->q_hiwat :
+ (eager->tcp_rwnd == 0 ? tcp->tcp_recv_hiwater:
eager->tcp_rwnd), eager->tcp_mss);
if (eager->tcp_snd_ws_ok)
tcp_set_ws_value(eager);
@@ -5899,6 +5612,8 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
*
*/
/* Set the TCP options */
+ eager->tcp_recv_hiwater = tcp->tcp_recv_hiwater;
+ eager->tcp_recv_lowater = tcp->tcp_recv_lowater;
eager->tcp_xmit_hiwater = tcp->tcp_xmit_hiwater;
eager->tcp_dgram_errind = tcp->tcp_dgram_errind;
eager->tcp_oobinline = tcp->tcp_oobinline;
@@ -5906,6 +5621,7 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
eager->tcp_broadcast = tcp->tcp_broadcast;
eager->tcp_useloopback = tcp->tcp_useloopback;
eager->tcp_dontroute = tcp->tcp_dontroute;
+ eager->tcp_debug = tcp->tcp_debug;
eager->tcp_linger = tcp->tcp_linger;
eager->tcp_lingertime = tcp->tcp_lingertime;
if (tcp->tcp_ka_enabled)
@@ -5979,6 +5695,7 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2)
goto error;
}
DB_CPID(mp1) = tcp->tcp_cpid;
+ mblk_setcred(mp1, tcp->tcp_cred);
eager->tcp_cpid = tcp->tcp_cpid;
eager->tcp_open_time = lbolt64;
@@ -6168,9 +5885,9 @@ done:
* Successful connect request processing begins when our client passes
* a T_CONN_REQ message into tcp_wput() and ends when tcp_rput() passes
* our T_OK_ACK reply message upstream. The control flow looks like this:
- * upstream -> tcp_wput() -> tcp_wput_proto() -> tcp_connect() -> IP
- * upstream <- tcp_rput() <- IP
- * After various error checks are completed, tcp_connect() lays
+ * upstream -> tcp_wput() -> tcp_wput_proto() -> tcp_tpi_connect() -> IP
+ * upstream <- tcp_rput() <- IP
+ * After various error checks are completed, tcp_tpi_connect() lays
* the target address and port into the composite header template,
* preallocates the T_OK_ACK reply message, construct a full 12 byte bind
* request followed by an IRE request, and passes the three mblk message
@@ -6185,15 +5902,14 @@ done:
* above.
*/
static void
-tcp_connect(tcp_t *tcp, mblk_t *mp)
+tcp_tpi_connect(tcp_t *tcp, mblk_t *mp)
{
sin_t *sin;
- sin6_t *sin6;
queue_t *q = tcp->tcp_wq;
struct T_conn_req *tcr;
- ipaddr_t *dstaddrp;
- in_port_t dstport;
- uint_t srcid;
+ struct sockaddr *sa;
+ socklen_t len;
+ int error;
tcr = (struct T_conn_req *)mp->b_rptr;
@@ -6287,46 +6003,24 @@ tcp_connect(tcp_t *tcp, mblk_t *mp)
/* FALLTHRU */
case sizeof (sin_t):
- sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset,
+ sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
sizeof (sin_t));
- if (sin == NULL || !OK_32PTR((char *)sin)) {
- tcp_err_ack(tcp, mp, TSYSERR, EINVAL);
- return;
- }
- if (tcp->tcp_family != AF_INET ||
- sin->sin_family != AF_INET) {
- tcp_err_ack(tcp, mp, TSYSERR, EAFNOSUPPORT);
- return;
- }
- if (sin->sin_port == 0) {
- tcp_err_ack(tcp, mp, TBADADDR, 0);
- return;
- }
- if (tcp->tcp_connp && tcp->tcp_connp->conn_ipv6_v6only) {
- tcp_err_ack(tcp, mp, TSYSERR, EAFNOSUPPORT);
- return;
- }
-
+ len = sizeof (sin_t);
break;
case sizeof (sin6_t):
- sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset,
+ sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
sizeof (sin6_t));
- if (sin6 == NULL || !OK_32PTR((char *)sin6)) {
- tcp_err_ack(tcp, mp, TSYSERR, EINVAL);
- return;
- }
- if (tcp->tcp_family != AF_INET6 ||
- sin6->sin6_family != AF_INET6) {
- tcp_err_ack(tcp, mp, TSYSERR, EAFNOSUPPORT);
- return;
- }
- if (sin6->sin6_port == 0) {
- tcp_err_ack(tcp, mp, TBADADDR, 0);
- return;
- }
+ len = sizeof (sin6_t);
break;
}
+
+ error = proto_verify_ip_addr(tcp->tcp_family, sa, len);
+ if (error != 0) {
+ tcp_err_ack(tcp, mp, TSYSERR, error);
+ return;
+ }
+
/*
* TODO: If someone in TCPS_TIME_WAIT has this dst/port we
* should key on their sequence number and cut them loose.
@@ -6394,80 +6088,17 @@ tcp_connect(tcp_t *tcp, mblk_t *mp)
}
}
- /*
- * If we're connecting to an IPv4-mapped IPv6 address, we need to
- * make sure that the template IP header in the tcp structure is an
- * IPv4 header, and that the tcp_ipversion is IPV4_VERSION. We
- * need to this before we call tcp_bindi() so that the port lookup
- * code will look for ports in the correct port space (IPv4 and
- * IPv6 have separate port spaces).
- */
- if (tcp->tcp_family == AF_INET6 && tcp->tcp_ipversion == IPV6_VERSION &&
- IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
- int err = 0;
-
- err = tcp_header_init_ipv4(tcp);
- if (err != 0) {
- mp = mi_tpi_err_ack_alloc(mp, TSYSERR, ENOMEM);
- goto connect_failed;
- }
- if (tcp->tcp_lport != 0)
- *(uint16_t *)tcp->tcp_tcph->th_lport = tcp->tcp_lport;
- }
-
- if (tcp->tcp_issocket) {
- /*
- * TCP is _D_SODIRECT and sockfs is directly above so save
- * the shared sonode sodirect_t pointer (if any) to enable
- * TCP sodirect.
- */
- tcp->tcp_sodirect = SOD_QTOSODP(tcp->tcp_rq);
+ /* call the non-TPI version */
+ error = tcp_do_connect(tcp->tcp_connp, sa, len, DB_CRED(mp),
+ DB_CPID(mp));
+ if (error < 0) {
+ mp = mi_tpi_err_ack_alloc(mp, -error, 0);
+ } else if (error > 0) {
+ mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error);
+ } else {
+ mp = mi_tpi_ok_ack_alloc(mp);
}
- switch (tcp->tcp_state) {
- case TCPS_IDLE:
- /*
- * We support quick connect, refer to comments in
- * tcp_connect_*()
- */
- /* FALLTHRU */
- case TCPS_BOUND:
- case TCPS_LISTEN:
- if (tcp->tcp_family == AF_INET6) {
- if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
- tcp_connect_ipv6(tcp, mp,
- &sin6->sin6_addr,
- sin6->sin6_port, sin6->sin6_flowinfo,
- sin6->__sin6_src_id, sin6->sin6_scope_id);
- return;
- }
- /*
- * Destination adress is mapped IPv6 address.
- * Source bound address should be unspecified or
- * IPv6 mapped address as well.
- */
- if (!IN6_IS_ADDR_UNSPECIFIED(
- &tcp->tcp_bound_source_v6) &&
- !IN6_IS_ADDR_V4MAPPED(&tcp->tcp_bound_source_v6)) {
- mp = mi_tpi_err_ack_alloc(mp, TSYSERR,
- EADDRNOTAVAIL);
- break;
- }
- dstaddrp = &V4_PART_OF_V6((sin6->sin6_addr));
- dstport = sin6->sin6_port;
- srcid = sin6->__sin6_src_id;
- } else {
- dstaddrp = &sin->sin_addr.s_addr;
- dstport = sin->sin_port;
- srcid = 0;
- }
-
- tcp_connect_ipv4(tcp, mp, dstaddrp, dstport, srcid);
- return;
- default:
- mp = mi_tpi_err_ack_alloc(mp, TOUTSTATE, 0);
- break;
- }
/*
* Note: Code below is the "failure" case
*/
@@ -6479,23 +6110,22 @@ connect_failed:
tcp_err_ack_prim(tcp, NULL, T_CONN_REQ,
TSYSERR, ENOMEM);
}
- if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
- tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
}
/*
* Handle connect to IPv4 destinations, including connections for AF_INET6
* sockets connecting to IPv4 mapped IPv6 destinations.
*/
-static void
-tcp_connect_ipv4(tcp_t *tcp, mblk_t *mp, ipaddr_t *dstaddrp, in_port_t dstport,
- uint_t srcid)
+static int
+tcp_connect_ipv4(tcp_t *tcp, ipaddr_t *dstaddrp, in_port_t dstport,
+ uint_t srcid, cred_t *cr, pid_t pid)
{
tcph_t *tcph;
- mblk_t *mp1;
+ mblk_t *mp;
ipaddr_t dstaddr = *dstaddrp;
int32_t oldstate;
uint16_t lport;
+ int error = 0;
tcp_stack_t *tcps = tcp->tcp_tcps;
ASSERT(tcp->tcp_ipversion == IPV4_VERSION);
@@ -6538,7 +6168,7 @@ tcp_connect_ipv4(tcp_t *tcp, mblk_t *mp, ipaddr_t *dstaddrp, in_port_t dstport,
*/
if (dstaddr == tcp->tcp_ipha->ipha_src &&
dstport == tcp->tcp_lport) {
- mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0);
+ error = -TBADADDR;
goto failed;
}
@@ -6583,91 +6213,77 @@ tcp_connect_ipv4(tcp_t *tcp, mblk_t *mp, ipaddr_t *dstaddrp, in_port_t dstport,
lport = tcp_bindi(tcp, lport, &tcp->tcp_ip_src_v6, 0, B_TRUE,
B_FALSE, B_FALSE);
if (lport == 0) {
- mp = mi_tpi_err_ack_alloc(mp, TNOADDR, 0);
+ error = -TNOADDR;
goto failed;
}
}
tcp->tcp_state = TCPS_SYN_SENT;
- /*
- * TODO: allow data with connect requests
- * by unlinking M_DATA trailers here and
- * linking them in behind the T_OK_ACK mblk.
- * The tcp_rput() bind ack handler would then
- * feed them to tcp_wput_data() rather than call
- * tcp_timer().
- */
- mp = mi_tpi_ok_ack_alloc(mp);
- if (!mp) {
+ mp = allocb(sizeof (ire_t), BPRI_HI);
+ if (mp == NULL) {
tcp->tcp_state = oldstate;
+ error = ENOMEM;
goto failed;
}
- if (tcp->tcp_family == AF_INET) {
- mp1 = tcp_ip_bind_mp(tcp, O_T_BIND_REQ,
- sizeof (ipa_conn_t));
- } else {
- mp1 = tcp_ip_bind_mp(tcp, O_T_BIND_REQ,
- sizeof (ipa6_conn_t));
+ mp->b_wptr += sizeof (ire_t);
+ mp->b_datap->db_type = IRE_DB_REQ_TYPE;
+ tcp->tcp_hard_binding = 1;
+ if (cr == NULL) {
+ cr = tcp->tcp_cred;
+ pid = tcp->tcp_cpid;
}
- if (mp1) {
- /*
- * We need to make sure that the conn_recv is set to a non-null
- * value before we insert the conn_t into the classifier table.
- * This is to avoid a race with an incoming packet which does
- * an ipcl_classify().
- */
- tcp->tcp_connp->conn_recv = tcp_input;
+ mblk_setcred(mp, cr);
+ DB_CPID(mp) = pid;
- /* Hang onto the T_OK_ACK for later. */
- linkb(mp1, mp);
- mblk_setcred(mp1, tcp->tcp_cred);
- if (tcp->tcp_family == AF_INET)
- mp1 = ip_bind_v4(tcp->tcp_wq, mp1, tcp->tcp_connp);
- else {
- mp1 = ip_bind_v6(tcp->tcp_wq, mp1, tcp->tcp_connp,
- &tcp->tcp_sticky_ipp);
+ /*
+ * We need to make sure that the conn_recv is set to a non-null
+ * value before we insert the conn_t into the classifier table.
+ * This is to avoid a race with an incoming packet which does
+ * an ipcl_classify().
+ */
+ tcp->tcp_connp->conn_recv = tcp_input;
+
+ if (tcp->tcp_family == AF_INET) {
+ error = ip_proto_bind_connected_v4(tcp->tcp_connp, &mp,
+ IPPROTO_TCP, &tcp->tcp_ipha->ipha_src, tcp->tcp_lport,
+ tcp->tcp_remote, tcp->tcp_fport, B_TRUE, B_TRUE);
+ } else {
+ in6_addr_t v6src;
+ if (tcp->tcp_ipversion == IPV4_VERSION) {
+ IN6_IPADDR_TO_V4MAPPED(tcp->tcp_ipha->ipha_src, &v6src);
+ } else {
+ v6src = tcp->tcp_ip6h->ip6_src;
}
- BUMP_MIB(&tcps->tcps_mib, tcpActiveOpens);
- tcp->tcp_active_open = 1;
- /*
- * If the bind cannot complete immediately
- * IP will arrange to call tcp_rput_other
- * when the bind completes.
- */
- if (mp1 != NULL)
- tcp_rput_other(tcp, mp1);
- return;
+ error = ip_proto_bind_connected_v6(tcp->tcp_connp, &mp,
+ IPPROTO_TCP, &v6src, tcp->tcp_lport, &tcp->tcp_remote_v6,
+ &tcp->tcp_sticky_ipp, tcp->tcp_fport, B_TRUE, B_TRUE);
}
- /* Error case */
- tcp->tcp_state = oldstate;
- mp = mi_tpi_err_ack_alloc(mp, TSYSERR, ENOMEM);
+ BUMP_MIB(&tcps->tcps_mib, tcpActiveOpens);
+ tcp->tcp_active_open = 1;
+ return (tcp_post_ip_bind(tcp, mp, error));
failed:
/* return error ack and blow away saved option results if any */
- if (mp != NULL)
- putnext(tcp->tcp_rq, mp);
- else {
- tcp_err_ack_prim(tcp, NULL, T_CONN_REQ,
- TSYSERR, ENOMEM);
- }
if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
-
+ return (error);
}
/*
* Handle connect to IPv6 destinations.
*/
-static void
-tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp,
- in_port_t dstport, uint32_t flowinfo, uint_t srcid, uint32_t scope_id)
+static int
+tcp_connect_ipv6(tcp_t *tcp, in6_addr_t *dstaddrp, in_port_t dstport,
+ uint32_t flowinfo, uint_t srcid, uint32_t scope_id, cred_t *cr, pid_t pid)
{
tcph_t *tcph;
- mblk_t *mp1;
+ mblk_t *mp;
ip6_rthdr_t *rth;
int32_t oldstate;
uint16_t lport;
tcp_stack_t *tcps = tcp->tcp_tcps;
+ int error = 0;
+ conn_t *connp = tcp->tcp_connp;
ASSERT(tcp->tcp_family == AF_INET6);
@@ -6678,8 +6294,7 @@ tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp,
* IPv4-mapped IPv6 address.
*/
if (tcp->tcp_ipversion != IPV6_VERSION) {
- mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0);
- goto failed;
+ return (-TBADADDR);
}
/*
@@ -6694,7 +6309,7 @@ tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp,
/* Handle __sin6_src_id if socket not bound to an IP address */
if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&tcp->tcp_ip6h->ip6_src)) {
ip_srcid_find_id(srcid, &tcp->tcp_ip6h->ip6_src,
- tcp->tcp_connp->conn_zoneid, tcps->tcps_netstack);
+ connp->conn_zoneid, tcps->tcps_netstack);
tcp->tcp_ip_src_v6 = tcp->tcp_ip6h->ip6_src;
}
@@ -6724,7 +6339,7 @@ tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp,
ipp->ipp_fields |= IPPF_SCOPE_ID;
if (ipp->ipp_fields & IPPF_HAS_IP6I)
ip2dbg(("tcp_connect_v6: SCOPE_ID set\n"));
- reterr = tcp_build_hdrs(tcp->tcp_rq, tcp);
+ reterr = tcp_build_hdrs(tcp);
if (reterr != 0)
goto failed;
ip1dbg(("tcp_connect_ipv6: tcp_bld_hdrs returned\n"));
@@ -6741,7 +6356,7 @@ tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp,
*/
if (IN6_ARE_ADDR_EQUAL(dstaddrp, &tcp->tcp_ip6h->ip6_src) &&
(dstport == tcp->tcp_lport)) {
- mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0);
+ error = -TBADADDR;
goto failed;
}
@@ -6751,7 +6366,6 @@ tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp,
(IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
(flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
-
/*
* Massage a routing header (if present) putting the first hop
* in ip6_dst. Compute a starting value for the checksum which
@@ -6791,26 +6405,26 @@ tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp,
lport = tcp_bindi(tcp, lport, &tcp->tcp_ip_src_v6, 0, B_TRUE,
B_FALSE, B_FALSE);
if (lport == 0) {
- mp = mi_tpi_err_ack_alloc(mp, TNOADDR, 0);
+ error = -TNOADDR;
goto failed;
}
}
tcp->tcp_state = TCPS_SYN_SENT;
- /*
- * TODO: allow data with connect requests
- * by unlinking M_DATA trailers here and
- * linking them in behind the T_OK_ACK mblk.
- * The tcp_rput() bind ack handler would then
- * feed them to tcp_wput_data() rather than call
- * tcp_timer().
- */
- mp = mi_tpi_ok_ack_alloc(mp);
- if (!mp) {
- tcp->tcp_state = oldstate;
- goto failed;
- }
- mp1 = tcp_ip_bind_mp(tcp, O_T_BIND_REQ, sizeof (ipa6_conn_t));
- if (mp1) {
+
+ mp = allocb(sizeof (ire_t), BPRI_HI);
+ if (mp != NULL) {
+ in6_addr_t v6src;
+
+ mp->b_wptr += sizeof (ire_t);
+ mp->b_datap->db_type = IRE_DB_REQ_TYPE;
+ if (cr == NULL) {
+ cr = tcp->tcp_cred;
+ pid = tcp->tcp_cpid;
+ }
+ mblk_setcred(mp, cr);
+ DB_CPID(mp) = pid;
+ tcp->tcp_hard_binding = 1;
+
/*
* We need to make sure that the conn_recv is set to a non-null
* value before we insert the conn_t into the classifier table.
@@ -6819,32 +6433,28 @@ tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp,
*/
tcp->tcp_connp->conn_recv = tcp_input;
- /* Hang onto the T_OK_ACK for later. */
- linkb(mp1, mp);
- mblk_setcred(mp1, tcp->tcp_cred);
- mp1 = ip_bind_v6(tcp->tcp_wq, mp1, tcp->tcp_connp,
- &tcp->tcp_sticky_ipp);
+ if (tcp->tcp_ipversion == IPV4_VERSION) {
+ IN6_IPADDR_TO_V4MAPPED(tcp->tcp_ipha->ipha_src, &v6src);
+ } else {
+ v6src = tcp->tcp_ip6h->ip6_src;
+ }
+ error = ip_proto_bind_connected_v6(connp, &mp, IPPROTO_TCP,
+ &v6src, tcp->tcp_lport, &tcp->tcp_remote_v6,
+ &tcp->tcp_sticky_ipp, tcp->tcp_fport, B_TRUE, B_TRUE);
BUMP_MIB(&tcps->tcps_mib, tcpActiveOpens);
tcp->tcp_active_open = 1;
- /* ip_bind_v6() may return ACK or ERROR */
- if (mp1 != NULL)
- tcp_rput_other(tcp, mp1);
- return;
+
+ return (tcp_post_ip_bind(tcp, mp, error));
}
/* Error case */
tcp->tcp_state = oldstate;
- mp = mi_tpi_err_ack_alloc(mp, TSYSERR, ENOMEM);
+ error = ENOMEM;
failed:
/* return error ack and blow away saved option results if any */
- if (mp != NULL)
- putnext(tcp->tcp_rq, mp);
- else {
- tcp_err_ack_prim(tcp, NULL, T_CONN_REQ,
- TSYSERR, ENOMEM);
- }
if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
+ return (error);
}
/*
@@ -6870,72 +6480,61 @@ tcp_def_q_set(tcp_t *tcp, mblk_t *mp)
mutex_exit(&tcps->tcps_g_q_lock);
iocp->ioc_error = EALREADY;
} else {
- mblk_t *mp1;
+ int error = 0;
+ conn_t *connp = tcp->tcp_connp;
+ ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
- mp1 = tcp_ip_bind_mp(tcp, O_T_BIND_REQ, 0);
- if (mp1 == NULL) {
- mutex_exit(&tcps->tcps_g_q_lock);
- iocp->ioc_error = ENOMEM;
- } else {
- tcps->tcps_g_q = tcp->tcp_rq;
- mutex_exit(&tcps->tcps_g_q_lock);
- iocp->ioc_error = 0;
- iocp->ioc_rval = 0;
- /*
- * We are passing tcp_sticky_ipp as NULL
- * as it is not useful for tcp_default queue
- *
- * Set conn_recv just in case.
- */
- tcp->tcp_connp->conn_recv = tcp_conn_request;
+ tcps->tcps_g_q = tcp->tcp_rq;
+ mutex_exit(&tcps->tcps_g_q_lock);
+ iocp->ioc_error = 0;
+ iocp->ioc_rval = 0;
+ /*
+ * We are passing tcp_sticky_ipp as NULL
+ * as it is not useful for tcp_default queue
+ *
+ * Set conn_recv just in case.
+ */
+ tcp->tcp_connp->conn_recv = tcp_conn_request;
- mp1 = ip_bind_v6(q, mp1, tcp->tcp_connp, NULL);
- if (mp1 != NULL)
- tcp_rput_other(tcp, mp1);
+ ASSERT(connp->conn_af_isv6);
+ connp->conn_ulp = IPPROTO_TCP;
+
+ if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_TCP].connf_head !=
+ NULL || connp->conn_mac_exempt) {
+ error = -TBADADDR;
+ } else {
+ connp->conn_srcv6 = ipv6_all_zeros;
+ ipcl_proto_insert_v6(connp, IPPROTO_TCP);
}
+
+ (void) tcp_post_ip_bind(tcp, NULL, error);
}
qreply(q, mp);
}
-/*
- * Our client hereby directs us to reject the connection request
- * that tcp_conn_request() marked with 'seqnum'. Rejection consists
- * of sending the appropriate RST, not an ICMP error.
- */
-static void
-tcp_disconnect(tcp_t *tcp, mblk_t *mp)
+static int
+tcp_disconnect_common(tcp_t *tcp, t_scalar_t seqnum)
{
tcp_t *ltcp = NULL;
- t_scalar_t seqnum;
conn_t *connp;
tcp_stack_t *tcps = tcp->tcp_tcps;
- ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
- if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_discon_req)) {
- tcp_err_ack(tcp, mp, TPROTO, 0);
- return;
- }
-
/*
* Right now, upper modules pass down a T_DISCON_REQ to TCP,
* when the stream is in BOUND state. Do not send a reset,
* since the destination IP address is not valid, and it can
* be the initialized value of all zeros (broadcast address).
*
- * If TCP has sent down a bind request to IP and has not
- * received the reply, reject the request. Otherwise, TCP
- * will be confused.
+ * XXX There won't be any pending bind request to IP.
*/
- if (tcp->tcp_state <= TCPS_BOUND || tcp->tcp_hard_binding) {
+ if (tcp->tcp_state <= TCPS_BOUND) {
if (tcp->tcp_debug) {
(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
"tcp_disconnect: bad state, %d", tcp->tcp_state);
}
- tcp_err_ack(tcp, mp, TOUTSTATE, 0);
- return;
+ return (TOUTSTATE);
}
- seqnum = ((struct T_discon_req *)mp->b_rptr)->SEQ_number;
if (seqnum == -1 || tcp->tcp_conn_req_max == 0) {
@@ -7009,25 +6608,42 @@ tcp_disconnect(tcp_t *tcp, mblk_t *mp)
tcp_reinit(tcp);
- if (old_state >= TCPS_ESTABLISHED) {
+ return (0);
+ } else if (!tcp_eager_blowoff(tcp, seqnum)) {
+ return (TBADSEQ);
+ }
+ return (0);
+}
+
+/*
+ * Our client hereby directs us to reject the connection request
+ * that tcp_conn_request() marked with 'seqnum'. Rejection consists
+ * of sending the appropriate RST, not an ICMP error.
+ */
+static void
+tcp_disconnect(tcp_t *tcp, mblk_t *mp)
+{
+ t_scalar_t seqnum;
+ int error;
+
+ ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
+ if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_discon_req)) {
+ tcp_err_ack(tcp, mp, TPROTO, 0);
+ return;
+ }
+ seqnum = ((struct T_discon_req *)mp->b_rptr)->SEQ_number;
+ error = tcp_disconnect_common(tcp, seqnum);
+ if (error != 0)
+ tcp_err_ack(tcp, mp, error, 0);
+ else {
+ if (tcp->tcp_state >= TCPS_ESTABLISHED) {
/* Send M_FLUSH according to TPI */
(void) putnextctl1(tcp->tcp_rq, M_FLUSH, FLUSHRW);
}
mp = mi_tpi_ok_ack_alloc(mp);
if (mp)
putnext(tcp->tcp_rq, mp);
- return;
- } else if (!tcp_eager_blowoff(tcp, seqnum)) {
- tcp_err_ack(tcp, mp, TBADSEQ, 0);
- return;
}
- if (tcp->tcp_state >= TCPS_ESTABLISHED) {
- /* Send M_FLUSH according to TPI */
- (void) putnextctl1(tcp->tcp_rq, M_FLUSH, FLUSHRW);
- }
- mp = mi_tpi_ok_ack_alloc(mp);
- if (mp)
- putnext(tcp->tcp_rq, mp);
}
/*
@@ -7566,6 +7182,24 @@ tcp_copy_info(struct T_info_ack *tia, tcp_t *tcp)
/* TODO: Default ETSDU is 1. Is that correct for tcp? */
}
+static void
+tcp_do_capability_ack(tcp_t *tcp, struct T_capability_ack *tcap,
+ t_uscalar_t cap_bits1)
+{
+ tcap->CAP_bits1 = 0;
+
+ if (cap_bits1 & TC1_INFO) {
+ tcp_copy_info(&tcap->INFO_ack, tcp);
+ tcap->CAP_bits1 |= TC1_INFO;
+ }
+
+ if (cap_bits1 & TC1_ACCEPTOR_ID) {
+ tcap->ACCEPTOR_id = tcp->tcp_acceptor_id;
+ tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
+ }
+
+}
+
/*
* This routine responds to T_CAPABILITY_REQ messages. It is called by
* tcp_wput. Much of the T_CAPABILITY_ACK information is copied from
@@ -7591,17 +7225,7 @@ tcp_capability_req(tcp_t *tcp, mblk_t *mp)
return;
tcap = (struct T_capability_ack *)mp->b_rptr;
- tcap->CAP_bits1 = 0;
-
- if (cap_bits1 & TC1_INFO) {
- tcp_copy_info(&tcap->INFO_ack, tcp);
- tcap->CAP_bits1 |= TC1_INFO;
- }
-
- if (cap_bits1 & TC1_ACCEPTOR_ID) {
- tcap->ACCEPTOR_id = tcp->tcp_acceptor_id;
- tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
- }
+ tcp_do_capability_ack(tcp, tcap, cap_bits1);
putnext(tcp->tcp_rq, mp);
}
@@ -7822,10 +7446,12 @@ tcp_reinit(tcp_t *tcp)
tcp->tcp_urp_mark_mp = NULL;
}
if (tcp->tcp_fused_sigurg_mp != NULL) {
+ ASSERT(!IPCL_IS_NONSTR(tcp->tcp_connp));
freeb(tcp->tcp_fused_sigurg_mp);
tcp->tcp_fused_sigurg_mp = NULL;
}
if (tcp->tcp_ordrel_mp != NULL) {
+ ASSERT(!IPCL_IS_NONSTR(tcp->tcp_connp));
freeb(tcp->tcp_ordrel_mp);
tcp->tcp_ordrel_mp = NULL;
}
@@ -7925,7 +7551,10 @@ tcp_reinit(tcp_t *tcp)
tcp->tcp_ip_src_v6 = tcp->tcp_bound_source_v6;
ASSERT(tcp->tcp_ptpbhn != NULL);
- tcp->tcp_rq->q_hiwat = tcps->tcps_recv_hiwat;
+ if (!IPCL_IS_NONSTR(tcp->tcp_connp))
+ tcp->tcp_rq->q_hiwat = tcps->tcps_recv_hiwat;
+ tcp->tcp_recv_hiwater = tcps->tcps_recv_hiwat;
+ tcp->tcp_recv_lowater = tcp_rinfo.mi_lowat;
tcp->tcp_rwnd = tcps->tcps_recv_hiwat;
tcp->tcp_mss = tcp->tcp_ipversion != IPV4_VERSION ?
tcps->tcps_mss_def_ipv6 : tcps->tcps_mss_def_ipv4;
@@ -7952,6 +7581,7 @@ tcp_reinit_values(tcp)
#define PRESERVE(x) ((x) = (x))
#endif /* lint */
+ PRESERVE(tcp->tcp_bind_hash_port);
PRESERVE(tcp->tcp_bind_hash);
PRESERVE(tcp->tcp_ptpbhn);
PRESERVE(tcp->tcp_acceptor_hash);
@@ -8239,6 +7869,8 @@ tcp_reinit_values(tcp)
DONTCARE(tcp->tcmp_stk[0]);
#endif
+ PRESERVE(tcp->tcp_connid);
+
#undef DONTCARE
#undef PRESERVE
@@ -9072,156 +8704,6 @@ noticmpv6:
}
/*
- * IP recognizes seven kinds of bind requests:
- *
- * - A zero-length address binds only to the protocol number.
- *
- * - A 4-byte address is treated as a request to
- * validate that the address is a valid local IPv4
- * address, appropriate for an application to bind to.
- * IP does the verification, but does not make any note
- * of the address at this time.
- *
- * - A 16-byte address contains is treated as a request
- * to validate a local IPv6 address, as the 4-byte
- * address case above.
- *
- * - A 16-byte sockaddr_in to validate the local IPv4 address and also
- * use it for the inbound fanout of packets.
- *
- * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also
- * use it for the inbound fanout of packets.
- *
- * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout
- * information consisting of local and remote addresses
- * and ports. In this case, the addresses are both
- * validated as appropriate for this operation, and, if
- * so, the information is retained for use in the
- * inbound fanout.
- *
- * - A 36-byte address address (ipa6_conn_t) containing complete IPv6
- * fanout information, like the 12-byte case above.
- *
- * IP will also fill in the IRE request mblk with information
- * regarding our peer. In all cases, we notify IP of our protocol
- * type by appending a single protocol byte to the bind request.
- */
-static mblk_t *
-tcp_ip_bind_mp(tcp_t *tcp, t_scalar_t bind_prim, t_scalar_t addr_length)
-{
- char *cp;
- mblk_t *mp;
- struct T_bind_req *tbr;
- ipa_conn_t *ac;
- ipa6_conn_t *ac6;
- sin_t *sin;
- sin6_t *sin6;
-
- ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ);
- ASSERT((tcp->tcp_family == AF_INET &&
- tcp->tcp_ipversion == IPV4_VERSION) ||
- (tcp->tcp_family == AF_INET6 &&
- (tcp->tcp_ipversion == IPV4_VERSION ||
- tcp->tcp_ipversion == IPV6_VERSION)));
-
- mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI);
- if (!mp)
- return (mp);
- mp->b_datap->db_type = M_PROTO;
- tbr = (struct T_bind_req *)mp->b_rptr;
- tbr->PRIM_type = bind_prim;
- tbr->ADDR_offset = sizeof (*tbr);
- tbr->CONIND_number = 0;
- tbr->ADDR_length = addr_length;
- cp = (char *)&tbr[1];
- switch (addr_length) {
- case sizeof (ipa_conn_t):
- ASSERT(tcp->tcp_family == AF_INET);
- ASSERT(tcp->tcp_ipversion == IPV4_VERSION);
-
- mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
- if (mp->b_cont == NULL) {
- freemsg(mp);
- return (NULL);
- }
- mp->b_cont->b_wptr += sizeof (ire_t);
- mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
-
- /* cp known to be 32 bit aligned */
- ac = (ipa_conn_t *)cp;
- ac->ac_laddr = tcp->tcp_ipha->ipha_src;
- ac->ac_faddr = tcp->tcp_remote;
- ac->ac_fport = tcp->tcp_fport;
- ac->ac_lport = tcp->tcp_lport;
- tcp->tcp_hard_binding = 1;
- break;
-
- case sizeof (ipa6_conn_t):
- ASSERT(tcp->tcp_family == AF_INET6);
-
- mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
- if (mp->b_cont == NULL) {
- freemsg(mp);
- return (NULL);
- }
- mp->b_cont->b_wptr += sizeof (ire_t);
- mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
-
- /* cp known to be 32 bit aligned */
- ac6 = (ipa6_conn_t *)cp;
- if (tcp->tcp_ipversion == IPV4_VERSION) {
- IN6_IPADDR_TO_V4MAPPED(tcp->tcp_ipha->ipha_src,
- &ac6->ac6_laddr);
- } else {
- ac6->ac6_laddr = tcp->tcp_ip6h->ip6_src;
- }
- ac6->ac6_faddr = tcp->tcp_remote_v6;
- ac6->ac6_fport = tcp->tcp_fport;
- ac6->ac6_lport = tcp->tcp_lport;
- tcp->tcp_hard_binding = 1;
- break;
-
- case sizeof (sin_t):
- /*
- * NOTE: IPV6_ADDR_LEN also has same size.
- * Use family to discriminate.
- */
- if (tcp->tcp_family == AF_INET) {
- sin = (sin_t *)cp;
-
- *sin = sin_null;
- sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = tcp->tcp_bound_source;
- sin->sin_port = tcp->tcp_lport;
- break;
- } else {
- *(in6_addr_t *)cp = tcp->tcp_bound_source_v6;
- }
- break;
-
- case sizeof (sin6_t):
- ASSERT(tcp->tcp_family == AF_INET6);
- sin6 = (sin6_t *)cp;
-
- *sin6 = sin6_null;
- sin6->sin6_family = AF_INET6;
- sin6->sin6_addr = tcp->tcp_bound_source_v6;
- sin6->sin6_port = tcp->tcp_lport;
- break;
-
- case IP_ADDR_LEN:
- ASSERT(tcp->tcp_ipversion == IPV4_VERSION);
- *(uint32_t *)cp = tcp->tcp_ipha->ipha_src;
- break;
-
- }
- /* Add protocol number to end */
- cp[addr_length] = (char)IPPROTO_TCP;
- mp->b_wptr = (uchar_t *)&cp[addr_length + 1];
- return (mp);
-}
-
-/*
* Notify IP that we are having trouble with this connection. IP should
* blow the IRE away and start over.
*/
@@ -9268,25 +8750,29 @@ tcp_ip_notify(tcp_t *tcp)
/* Unlink and return any mblk that looks like it contains an ire */
static mblk_t *
-tcp_ire_mp(mblk_t *mp)
+tcp_ire_mp(mblk_t **mpp)
{
- mblk_t *prev_mp;
+ mblk_t *mp = *mpp;
+ mblk_t *prev_mp = NULL;
for (;;) {
- prev_mp = mp;
- mp = mp->b_cont;
- if (mp == NULL)
- break;
switch (DB_TYPE(mp)) {
case IRE_DB_TYPE:
case IRE_DB_REQ_TYPE:
- if (prev_mp != NULL)
+ if (mp == *mpp) {
+ *mpp = mp->b_cont;
+ } else {
prev_mp->b_cont = mp->b_cont;
+ }
mp->b_cont = NULL;
return (mp);
default:
break;
}
+ prev_mp = mp;
+ mp = mp->b_cont;
+ if (mp == NULL)
+ break;
}
return (mp);
}
@@ -9408,10 +8894,10 @@ tcp_maxpsz_set(tcp_t *tcp, boolean_t set_maxblk)
queue_t *q = tcp->tcp_rq;
int32_t mss = tcp->tcp_mss;
int maxpsz;
+ conn_t *connp = tcp->tcp_connp;
if (TCP_IS_DETACHED(tcp))
return (mss);
-
if (tcp->tcp_fused) {
maxpsz = tcp_fuse_maxpsz_set(tcp);
mss = INFPSZ;
@@ -9435,6 +8921,7 @@ tcp_maxpsz_set(tcp_t *tcp, boolean_t set_maxblk)
* head to break down larger than SMSS writes into SMSS-
* size mblks, up to tcp_maxpsz_multiplier mblks at a time.
*/
+ /* XXX tune this with ndd tcp_maxpsz_multiplier */
maxpsz = tcp->tcp_maxpsz * mss;
if (maxpsz > tcp->tcp_xmit_hiwater/2) {
maxpsz = tcp->tcp_xmit_hiwater/2;
@@ -9442,12 +8929,15 @@ tcp_maxpsz_set(tcp_t *tcp, boolean_t set_maxblk)
maxpsz = MSS_ROUNDUP(maxpsz, mss);
}
}
- (void) setmaxps(q, maxpsz);
- tcp->tcp_wq->q_maxpsz = maxpsz;
- if (set_maxblk)
- (void) mi_set_sth_maxblk(q, mss);
+ (void) proto_set_maxpsz(q, connp, maxpsz);
+ if (!(IPCL_IS_NONSTR(connp))) {
+ /* XXX do it in set_maxpsz()? */
+ tcp->tcp_wq->q_maxpsz = maxpsz;
+ }
+ if (set_maxblk)
+ (void) proto_set_tx_maxblk(q, connp, mss);
return (mss);
}
@@ -9687,116 +9177,74 @@ tcp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
return (tcp_open(q, devp, flag, sflag, credp, B_TRUE));
}
-static int
-tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
- boolean_t isv6)
+static conn_t *
+tcp_create_common(queue_t *q, cred_t *credp, boolean_t isv6,
+ boolean_t issocket, int *errorp)
{
tcp_t *tcp = NULL;
conn_t *connp;
int err;
- vmem_t *minor_arena = NULL;
- dev_t conn_dev;
zoneid_t zoneid;
- tcp_stack_t *tcps = NULL;
+ tcp_stack_t *tcps;
+ squeue_t *sqp;
- if (q->q_ptr != NULL)
- return (0);
+ ASSERT(errorp != NULL);
+ /*
+ * Find the proper zoneid and netstack.
+ */
+ /*
+ * Special case for install: miniroot needs to be able to
+ * access files via NFS as though it were always in the
+ * global zone.
+ */
+ if (credp == kcred && nfs_global_client_only != 0) {
+ zoneid = GLOBAL_ZONEID;
+ tcps = netstack_find_by_stackid(GLOBAL_NETSTACKID)->
+ netstack_tcp;
+ ASSERT(tcps != NULL);
+ } else {
+ netstack_t *ns;
- if (sflag == MODOPEN)
- return (EINVAL);
+ ns = netstack_find_by_cred(credp);
+ ASSERT(ns != NULL);
+ tcps = ns->netstack_tcp;
+ ASSERT(tcps != NULL);
- if (!(flag & SO_ACCEPTOR)) {
/*
- * Special case for install: miniroot needs to be able to
- * access files via NFS as though it were always in the
- * global zone.
+ * For exclusive stacks we set the zoneid to zero
+ * to make TCP operate as if in the global zone.
*/
- if (credp == kcred && nfs_global_client_only != 0) {
+ if (tcps->tcps_netstack->netstack_stackid !=
+ GLOBAL_NETSTACKID)
zoneid = GLOBAL_ZONEID;
- tcps = netstack_find_by_stackid(GLOBAL_NETSTACKID)->
- netstack_tcp;
- ASSERT(tcps != NULL);
- } else {
- netstack_t *ns;
-
- ns = netstack_find_by_cred(credp);
- ASSERT(ns != NULL);
- tcps = ns->netstack_tcp;
- ASSERT(tcps != NULL);
-
- /*
- * For exclusive stacks we set the zoneid to zero
- * to make TCP operate as if in the global zone.
- */
- if (tcps->tcps_netstack->netstack_stackid !=
- GLOBAL_NETSTACKID)
- zoneid = GLOBAL_ZONEID;
- else
- zoneid = crgetzoneid(credp);
- }
- /*
- * For stackid zero this is done from strplumb.c, but
- * non-zero stackids are handled here.
- */
- if (tcps->tcps_g_q == NULL &&
- tcps->tcps_netstack->netstack_stackid !=
- GLOBAL_NETSTACKID) {
- tcp_g_q_setup(tcps);
- }
- }
-
- if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
- ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
- minor_arena = ip_minor_arena_la;
- } else {
- /*
- * Either minor numbers in the large arena were exhausted
- * or a non socket application is doing the open.
- * Try to allocate from the small arena.
- */
- if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
- if (tcps != NULL)
- netstack_rele(tcps->tcps_netstack);
- return (EBUSY);
- }
- minor_arena = ip_minor_arena_sa;
+ else
+ zoneid = crgetzoneid(credp);
}
- ASSERT(minor_arena != NULL);
-
- *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
-
- if (flag & SO_ACCEPTOR) {
- /* No netstack_find_by_cred, hence no netstack_rele needed */
- ASSERT(tcps == NULL);
- q->q_qinfo = &tcp_acceptor_rinit;
- /*
- * the conn_dev and minor_arena will be subsequently used by
- * tcp_wput_accept() and tcpclose_accept() to figure out the
- * minor device number for this connection from the q_ptr.
- */
- RD(q)->q_ptr = (void *)conn_dev;
- WR(q)->q_qinfo = &tcp_acceptor_winit;
- WR(q)->q_ptr = (void *)minor_arena;
- qprocson(q);
- return (0);
+ /*
+ * For stackid zero this is done from strplumb.c, but
+ * non-zero stackids are handled here.
+ */
+ if (tcps->tcps_g_q == NULL &&
+ tcps->tcps_netstack->netstack_stackid !=
+ GLOBAL_NETSTACKID) {
+ tcp_g_q_setup(tcps);
}
- connp = (conn_t *)tcp_get_conn(IP_SQUEUE_GET(lbolt), tcps);
+ sqp = IP_SQUEUE_GET((uint_t)gethrtime());
+ connp = (conn_t *)tcp_get_conn(sqp, tcps);
/*
* Both tcp_get_conn and netstack_find_by_cred incremented refcnt,
* so we drop it by one.
*/
netstack_rele(tcps->tcps_netstack);
if (connp == NULL) {
- inet_minor_free(minor_arena, conn_dev);
- q->q_ptr = NULL;
- return (ENOSR);
+ *errorp = ENOSR;
+ return (NULL);
}
- connp->conn_sqp = IP_SQUEUE_GET(lbolt);
+ connp->conn_sqp = sqp;
connp->conn_initial_sqp = connp->conn_sqp;
tcp = connp->conn_tcp;
- q->q_ptr = WR(q)->q_ptr = connp;
if (isv6) {
connp->conn_flags |= (IPCL_TCP6|IPCL_ISV6);
connp->conn_send = ip_output_v6;
@@ -9838,45 +9286,135 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
if (getpflags(NET_MAC_AWARE, credp) != 0)
connp->conn_mac_exempt = B_TRUE;
- connp->conn_dev = conn_dev;
- connp->conn_minor_arena = minor_arena;
+ connp->conn_dev = NULL;
+ if (issocket) {
+ connp->conn_flags |= IPCL_SOCKET;
+ tcp->tcp_issocket = 1;
+ }
- ASSERT(q->q_qinfo == &tcp_rinitv4 || q->q_qinfo == &tcp_rinitv6);
- ASSERT(WR(q)->q_qinfo == &tcp_winit);
+ tcp->tcp_recv_hiwater = tcps->tcps_recv_hiwat;
+ tcp->tcp_rwnd = tcps->tcps_recv_hiwat;
+ tcp->tcp_recv_lowater = tcp_rinfo.mi_lowat;
- if (flag & SO_SOCKSTR) {
+ /* Non-zero default values */
+ connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
+
+ if (q == NULL) {
/*
- * No need to insert a socket in tcp acceptor hash.
- * If it was a socket acceptor stream, we dealt with
- * it above. A socket listener can never accept a
- * connection and doesn't need acceptor_id.
+ * Create a helper stream for non-STREAMS socket.
*/
- connp->conn_flags |= IPCL_SOCKET;
- tcp->tcp_issocket = 1;
- WR(q)->q_qinfo = &tcp_sock_winit;
+ err = ip_create_helper_stream(connp, tcps->tcps_ldi_ident);
+ if (err != 0) {
+ ip1dbg(("tcp_create: create of IP helper stream "
+ "failed\n"));
+ CONN_DEC_REF(connp);
+ *errorp = err;
+ return (NULL);
+ }
+ q = connp->conn_rq;
} else {
-#ifdef _ILP32
- tcp->tcp_acceptor_id = (t_uscalar_t)RD(q);
-#else
- tcp->tcp_acceptor_id = conn_dev;
-#endif /* _ILP32 */
- tcp_acceptor_hash_insert(tcp->tcp_acceptor_id, tcp);
+ RD(q)->q_hiwat = tcps->tcps_recv_hiwat;
}
+ SOCK_CONNID_INIT(tcp->tcp_connid);
err = tcp_init(tcp, q);
if (err != 0) {
- inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
- tcp_acceptor_hash_remove(tcp);
CONN_DEC_REF(connp);
+ *errorp = err;
+ return (NULL);
+ }
+
+ return (connp);
+}
+
+static int
+tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
+ boolean_t isv6)
+{
+ tcp_t *tcp = NULL;
+ conn_t *connp = NULL;
+ int err;
+ vmem_t *minor_arena = NULL;
+ dev_t conn_dev;
+ boolean_t issocket;
+
+ if (q->q_ptr != NULL)
+ return (0);
+
+ if (sflag == MODOPEN)
+ return (EINVAL);
+
+ if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
+ ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
+ minor_arena = ip_minor_arena_la;
+ } else {
+ /*
+ * Either minor numbers in the large arena were exhausted
+ * or a non socket application is doing the open.
+ * Try to allocate from the small arena.
+ */
+ if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
+ return (EBUSY);
+ }
+ minor_arena = ip_minor_arena_sa;
+ }
+
+ ASSERT(minor_arena != NULL);
+
+ *devp = makedevice(getmajor(*devp), (minor_t)conn_dev);
+
+ if (flag & SO_FALLBACK) {
+ /*
+ * Non streams socket needs a stream to fallback to
+ */
+ RD(q)->q_ptr = (void *)conn_dev;
+ WR(q)->q_qinfo = &tcp_fallback_sock_winit;
+ WR(q)->q_ptr = (void *)minor_arena;
+ qprocson(q);
+ return (0);
+ } else if (flag & SO_ACCEPTOR) {
+ q->q_qinfo = &tcp_acceptor_rinit;
+ /*
+ * the conn_dev and minor_arena will be subsequently used by
+ * tcp_wput_accept() and tcpclose_accept() to figure out the
+ * minor device number for this connection from the q_ptr.
+ */
+ RD(q)->q_ptr = (void *)conn_dev;
+ WR(q)->q_qinfo = &tcp_acceptor_winit;
+ WR(q)->q_ptr = (void *)minor_arena;
+ qprocson(q);
+ return (0);
+ }
+
+ issocket = flag & SO_SOCKSTR;
+ connp = tcp_create_common(q, credp, isv6, issocket, &err);
+
+ if (connp == NULL) {
+ inet_minor_free(minor_arena, conn_dev);
q->q_ptr = WR(q)->q_ptr = NULL;
return (err);
}
- RD(q)->q_hiwat = tcps->tcps_recv_hiwat;
- tcp->tcp_rwnd = tcps->tcps_recv_hiwat;
+ q->q_ptr = WR(q)->q_ptr = connp;
+
+ connp->conn_dev = conn_dev;
+ connp->conn_minor_arena = minor_arena;
+
+ ASSERT(q->q_qinfo == &tcp_rinitv4 || q->q_qinfo == &tcp_rinitv6);
+ ASSERT(WR(q)->q_qinfo == &tcp_winit);
+
+ if (issocket) {
+ WR(q)->q_qinfo = &tcp_sock_winit;
+ } else {
+ tcp = connp->conn_tcp;
+#ifdef _ILP32
+ tcp->tcp_acceptor_id = (t_uscalar_t)RD(q);
+#else
+ tcp->tcp_acceptor_id = conn_dev;
+#endif /* _ILP32 */
+ tcp_acceptor_hash_insert(tcp->tcp_acceptor_id, tcp);
+ }
- /* Non-zero default values */
- connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
/*
* Put the ref for TCP. Ref for IP was already put
* by ipcl_conn_create. Also Make the conn_t globally
@@ -9922,7 +9460,7 @@ tcp_allow_connopt_set(int level, int name)
}
/*
- * This routine gets default values of certain options whose default
+ * this routine gets default values of certain options whose default
* values are maintained by protocol specific code
*/
/* ARGSUSED */
@@ -9975,15 +9513,10 @@ tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
return (sizeof (int));
}
-
-/*
- * TCP routine to get the values of options.
- */
-int
-tcp_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
+static int
+tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
{
int *i1 = (int *)ptr;
- conn_t *connp = Q_TO_CONN(q);
tcp_t *tcp = connp->conn_tcp;
ip6_pkt_t *ipp = &tcp->tcp_sticky_ipp;
@@ -10028,7 +9561,7 @@ tcp_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
*i1 = tcp->tcp_xmit_hiwater;
break;
case SO_RCVBUF:
- *i1 = RD(q)->q_hiwat;
+ *i1 = tcp->tcp_recv_hiwater;
break;
case SO_SND_COPYAVOID:
*i1 = tcp->tcp_snd_zcopy_on ?
@@ -10052,6 +9585,8 @@ tcp_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
case SO_DOMAIN:
*i1 = tcp->tcp_family;
break;
+ case SO_ACCEPTCONN:
+ *i1 = (tcp->tcp_state == TCPS_LISTEN);
default:
return (-1);
}
@@ -10293,22 +9828,84 @@ tcp_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
}
/*
+ * TCP routine to get the values of options.
+ */
+int
+tcp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
+{
+ return (tcp_opt_get(Q_TO_CONN(q), level, name, ptr));
+}
+
+/* returns UNIX error, the optlen is a value-result arg */
+int
+tcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
+ void *optvalp, socklen_t *optlen, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ squeue_t *sqp = connp->conn_sqp;
+ int error;
+ t_uscalar_t max_optbuf_len;
+ void *optvalp_buf;
+ int len;
+
+ error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
+ tcp_opt_obj.odb_opt_des_arr,
+ tcp_opt_obj.odb_opt_arr_cnt,
+ tcp_opt_obj.odb_topmost_tpiprovider,
+ B_FALSE, B_TRUE, cr);
+ if (error != 0) {
+ if (error < 0) {
+ error = proto_tlitosyserr(-error);
+ }
+ return (error);
+ }
+
+ optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
+
+ error = squeue_synch_enter(sqp, connp, 0);
+ if (error == ENOMEM) {
+ return (ENOMEM);
+ }
+
+ len = tcp_opt_get(connp, level, option_name, optvalp_buf);
+ squeue_synch_exit(sqp, connp);
+
+ if (len < 0) {
+ /*
+ * Pass on to IP
+ */
+ kmem_free(optvalp_buf, max_optbuf_len);
+ return (ip_get_options(connp, level, option_name,
+ optvalp, optlen, cr));
+ } else {
+ /*
+ * update optlen and copy option value
+ */
+ t_uscalar_t size = MIN(len, *optlen);
+ bcopy(optvalp_buf, optvalp, size);
+ bcopy(&size, optlen, sizeof (size));
+
+ kmem_free(optvalp_buf, max_optbuf_len);
+ return (0);
+ }
+}
+
+/*
* We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
* Parameters are assumed to be verified by the caller.
*/
/* ARGSUSED */
int
-tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
+tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
- void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
+ void *thisdg_attrs, cred_t *cr)
{
- conn_t *connp = Q_TO_CONN(q);
tcp_t *tcp = connp->conn_tcp;
int *i1 = (int *)invalp;
boolean_t onoff = (*i1 == 0) ? 0 : 1;
boolean_t checkonly;
int reterr;
- tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps;
+ tcp_stack_t *tcps = tcp->tcp_tcps;
switch (optset_context) {
case SETFN_OPTCOM_CHECKONLY:
@@ -10371,7 +9968,6 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
* of passed in length is done. It is assumed *_optcom_req()
* routines do the right thing.
*/
-
switch (level) {
case SOL_SOCKET:
switch (name) {
@@ -10408,7 +10004,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
break;
case SO_KEEPALIVE:
if (checkonly) {
- /* T_CHECK case */
+ /* check only case */
break;
}
@@ -10462,8 +10058,11 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
}
break;
case SO_OOBINLINE:
- if (!checkonly)
+ if (!checkonly) {
tcp->tcp_oobinline = onoff;
+ if (IPCL_IS_NONSTR(tcp->tcp_connp))
+ proto_set_rx_oob_opt(connp, onoff);
+ }
break;
case SO_DGRAM_ERRIND:
if (!checkonly)
@@ -10740,7 +10339,6 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
/*
* Only sticky options; no ancillary data
*/
- ASSERT(thisdg_attrs == NULL);
ipp = &tcp->tcp_sticky_ipp;
switch (name) {
@@ -10764,22 +10362,15 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
(uint8_t)*i1;
ipp->ipp_fields |= IPPF_UNICAST_HOPS;
}
- reterr = tcp_build_hdrs(q, tcp);
+ reterr = tcp_build_hdrs(tcp);
if (reterr != 0)
return (reterr);
}
break;
case IPV6_BOUND_IF:
if (!checkonly) {
- int error = 0;
-
tcp->tcp_bound_if = *i1;
- error = ip_opt_set_ill(tcp->tcp_connp, *i1,
- B_TRUE, checkonly, level, name, mblk);
- if (error != 0) {
- *outlenp = 0;
- return (error);
- }
+ PASS_OPT_TO_IP(connp);
}
break;
/*
@@ -10795,6 +10386,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
~TCP_IPV6_RECVPKTINFO;
/* Force it to be sent up with the next msg */
tcp->tcp_recvifindex = 0;
+ PASS_OPT_TO_IP(connp);
}
break;
case IPV6_RECVTCLASS:
@@ -10805,6 +10397,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
else
tcp->tcp_ipv6_recvancillary &=
~TCP_IPV6_RECVTCLASS;
+ PASS_OPT_TO_IP(connp);
}
break;
case IPV6_RECVHOPLIMIT:
@@ -10817,6 +10410,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
~TCP_IPV6_RECVHOPLIMIT;
/* Force it to be sent up with the next msg */
tcp->tcp_recvhops = 0xffffffffU;
+ PASS_OPT_TO_IP(connp);
}
break;
case IPV6_RECVHOPOPTS:
@@ -10827,6 +10421,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
else
tcp->tcp_ipv6_recvancillary &=
~TCP_IPV6_RECVHOPOPTS;
+ PASS_OPT_TO_IP(connp);
}
break;
case IPV6_RECVDSTOPTS:
@@ -10837,6 +10432,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
else
tcp->tcp_ipv6_recvancillary &=
~TCP_IPV6_RECVDSTOPTS;
+ PASS_OPT_TO_IP(connp);
}
break;
case _OLD_IPV6_RECVDSTOPTS:
@@ -10857,6 +10453,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
else
tcp->tcp_ipv6_recvancillary &=
~TCP_IPV6_RECVRTHDR;
+ PASS_OPT_TO_IP(connp);
}
break;
case IPV6_RECVRTHDRDSTOPTS:
@@ -10867,6 +10464,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
else
tcp->tcp_ipv6_recvancillary &=
~TCP_IPV6_RECVRTDSTOPTS;
+ PASS_OPT_TO_IP(connp);
}
break;
case IPV6_PKTINFO:
@@ -10890,11 +10488,11 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
return (EINVAL);
/*
- * ip6_set_pktinfo() validates the source
- * address and interface index.
+ * IP will validate the source address and
+ * interface index.
*/
- reterr = ip6_set_pktinfo(cr, tcp->tcp_connp,
- pkti, mblk);
+ reterr = ip_set_options(tcp->tcp_connp, level,
+ name, invalp, inlen, cr);
if (reterr != 0)
return (reterr);
ipp->ipp_ifindex = pkti->ipi6_ifindex;
@@ -10908,9 +10506,10 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
else
ipp->ipp_fields &= ~IPPF_ADDR;
}
- reterr = tcp_build_hdrs(q, tcp);
+ reterr = tcp_build_hdrs(tcp);
if (reterr != 0)
return (reterr);
+ PASS_OPT_TO_IP(connp);
break;
case IPV6_TCLASS:
if (inlen != 0 && inlen != sizeof (int))
@@ -10931,7 +10530,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
}
ipp->ipp_fields |= IPPF_TCLASS;
}
- reterr = tcp_build_hdrs(q, tcp);
+ reterr = tcp_build_hdrs(tcp);
if (reterr != 0)
return (reterr);
break;
@@ -10962,9 +10561,10 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
else
ipp->ipp_fields &= ~IPPF_NEXTHOP;
}
- reterr = tcp_build_hdrs(q, tcp);
+ reterr = tcp_build_hdrs(tcp);
if (reterr != 0)
return (reterr);
+ PASS_OPT_TO_IP(connp);
break;
case IPV6_HOPOPTS: {
ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
@@ -10989,7 +10589,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
ipp->ipp_fields &= ~IPPF_HOPOPTS;
else
ipp->ipp_fields |= IPPF_HOPOPTS;
- reterr = tcp_build_hdrs(q, tcp);
+ reterr = tcp_build_hdrs(tcp);
if (reterr != 0)
return (reterr);
break;
@@ -11017,7 +10617,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
ipp->ipp_fields &= ~IPPF_RTDSTOPTS;
else
ipp->ipp_fields |= IPPF_RTDSTOPTS;
- reterr = tcp_build_hdrs(q, tcp);
+ reterr = tcp_build_hdrs(tcp);
if (reterr != 0)
return (reterr);
break;
@@ -11045,7 +10645,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
ipp->ipp_fields &= ~IPPF_DSTOPTS;
else
ipp->ipp_fields |= IPPF_DSTOPTS;
- reterr = tcp_build_hdrs(q, tcp);
+ reterr = tcp_build_hdrs(tcp);
if (reterr != 0)
return (reterr);
break;
@@ -11073,14 +10673,15 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
ipp->ipp_fields &= ~IPPF_RTHDR;
else
ipp->ipp_fields |= IPPF_RTHDR;
- reterr = tcp_build_hdrs(q, tcp);
+ reterr = tcp_build_hdrs(tcp);
if (reterr != 0)
return (reterr);
break;
}
case IPV6_V6ONLY:
- if (!checkonly)
+ if (!checkonly) {
tcp->tcp_connp->conn_ipv6_v6only = onoff;
+ }
break;
case IPV6_USE_MIN_MTU:
if (inlen != sizeof (int))
@@ -11140,6 +10741,80 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
return (0);
}
+/* ARGSUSED */
+int
+tcp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
+ uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
+ void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
+{
+ conn_t *connp = Q_TO_CONN(q);
+
+ return (tcp_opt_set(connp, optset_context, level, name, inlen, invalp,
+ outlenp, outvalp, thisdg_attrs, cr));
+}
+
+int
+tcp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
+ const void *optvalp, socklen_t optlen, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ squeue_t *sqp = connp->conn_sqp;
+ int error;
+
+ /*
+ * Entering the squeue synchronously can result in a context switch,
+ * which can cause a rather sever performance degradation. So we try to
+ * handle whatever options we can without entering the squeue.
+ */
+ if (level == IPPROTO_TCP) {
+ switch (option_name) {
+ case TCP_NODELAY:
+ if (optlen != sizeof (int32_t))
+ return (EINVAL);
+ mutex_enter(&connp->conn_tcp->tcp_non_sq_lock);
+ connp->conn_tcp->tcp_naglim = *(int *)optvalp ? 1 :
+ connp->conn_tcp->tcp_mss;
+ mutex_exit(&connp->conn_tcp->tcp_non_sq_lock);
+ return (0);
+ default:
+ break;
+ }
+ }
+
+ error = squeue_synch_enter(sqp, connp, 0);
+ if (error == ENOMEM) {
+ return (ENOMEM);
+ }
+
+ error = proto_opt_check(level, option_name, optlen, NULL,
+ tcp_opt_obj.odb_opt_des_arr,
+ tcp_opt_obj.odb_opt_arr_cnt,
+ tcp_opt_obj.odb_topmost_tpiprovider,
+ B_TRUE, B_FALSE, cr);
+
+ if (error != 0) {
+ if (error < 0) {
+ error = proto_tlitosyserr(-error);
+ }
+ squeue_synch_exit(sqp, connp);
+ return (error);
+ }
+
+ error = tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
+ optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
+ NULL, cr);
+ squeue_synch_exit(sqp, connp);
+
+ if (error < 0) {
+ /*
+ * Pass on to ip
+ */
+ error = ip_set_options(connp, level, option_name, optvalp,
+ optlen, cr);
+ }
+ return (error);
+}
+
/*
* Update tcp_sticky_hdrs based on tcp_sticky_ipp.
* The headers include ip6i_t (if needed), ip6_t, any sticky extension
@@ -11148,7 +10823,7 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
* Returns failure if can't allocate memory.
*/
static int
-tcp_build_hdrs(queue_t *q, tcp_t *tcp)
+tcp_build_hdrs(tcp_t *tcp)
{
char *hdrs;
uint_t hdrs_len;
@@ -11157,6 +10832,7 @@ tcp_build_hdrs(queue_t *q, tcp_t *tcp)
ip6_pkt_t *ipp = &tcp->tcp_sticky_ipp;
in6_addr_t src, dst;
tcp_stack_t *tcps = tcp->tcp_tcps;
+ conn_t *connp = tcp->tcp_connp;
/*
* save the existing tcp header and source/dest IP addresses
@@ -11241,7 +10917,8 @@ tcp_build_hdrs(queue_t *q, tcp_t *tcp)
}
/* Try to get everything in a single mblk */
- (void) mi_set_sth_wroff(RD(q), hdrs_len + tcps->tcps_wroff_xtra);
+ (void) proto_set_tx_wroff(tcp->tcp_rq, connp,
+ hdrs_len + tcps->tcps_wroff_xtra);
return (0);
}
@@ -11368,6 +11045,7 @@ tcp_opt_set_header(tcp_t *tcp, boolean_t checkonly, uchar_t *ptr, uint_t len)
uint8_t *ip_optp;
tcph_t *new_tcph;
tcp_stack_t *tcps = tcp->tcp_tcps;
+ conn_t *connp = tcp->tcp_connp;
if ((len > TCP_MAX_IP_OPTIONS_LENGTH) || (len & 0x3))
return (EINVAL);
@@ -11408,7 +11086,7 @@ tcp_opt_set_header(tcp_t *tcp, boolean_t checkonly, uchar_t *ptr, uint_t len)
tcp->tcp_hdr_len = len + tcph_len;
if (!TCP_IS_DETACHED(tcp)) {
/* Always allocate room for all options. */
- (void) mi_set_sth_wroff(tcp->tcp_rq,
+ (void) proto_set_tx_wroff(tcp->tcp_rq, connp,
TCP_MAX_COMBINED_HEADER_LENGTH + tcps->tcps_wroff_xtra);
}
return (0);
@@ -11721,26 +11399,55 @@ tcp_reass_elim_overlap(tcp_t *tcp, mblk_t *mp)
tcp->tcp_reass_tail = mp;
}
+static uint_t
+tcp_rwnd_reopen(tcp_t *tcp)
+{
+ uint_t ret = 0;
+ uint_t thwin;
+
+ /* Learn the latest rwnd information that we sent to the other side. */
+ thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win))
+ << tcp->tcp_rcv_ws;
+ /* This is peer's calculated send window (our receive window). */
+ thwin -= tcp->tcp_rnxt - tcp->tcp_rack;
+ /*
+ * Increase the receive window to max. But we need to do receiver
+ * SWS avoidance. This means that we need to check the increase of
+ * of receive window is at least 1 MSS.
+ */
+ if (tcp->tcp_recv_hiwater - thwin >= tcp->tcp_mss) {
+ /*
+ * If the window that the other side knows is less than max
+ * deferred acks segments, send an update immediately.
+ */
+ if (thwin < tcp->tcp_rack_cur_max * tcp->tcp_mss) {
+ BUMP_MIB(&tcp->tcp_tcps->tcps_mib, tcpOutWinUpdate);
+ ret = TH_ACK_NEEDED;
+ }
+ tcp->tcp_rwnd = tcp->tcp_recv_hiwater;
+ }
+ return (ret);
+}
+
/*
* Send up all messages queued on tcp_rcv_list.
*/
static uint_t
-tcp_rcv_drain(queue_t *q, tcp_t *tcp)
+tcp_rcv_drain(tcp_t *tcp)
{
mblk_t *mp;
uint_t ret = 0;
- uint_t thwin;
#ifdef DEBUG
uint_t cnt = 0;
#endif
- tcp_stack_t *tcps = tcp->tcp_tcps;
+ queue_t *q = tcp->tcp_rq;
/* Can't drain on an eager connection */
if (tcp->tcp_listener != NULL)
return (ret);
- /* Can't be sodirect enabled */
- ASSERT(SOD_NOT_ENABLED(tcp));
+ /* Can't be a non-STREAMS connection or sodirect enabled */
+ ASSERT((!IPCL_IS_NONSTR(tcp->tcp_connp)) && SOD_NOT_ENABLED(tcp));
/* No need for the push timer now. */
if (tcp->tcp_push_tid != 0) {
@@ -11758,7 +11465,8 @@ tcp_rcv_drain(queue_t *q, tcp_t *tcp)
* some work.
*/
if ((tcp->tcp_fused || tcp->tcp_fused_sigurg)) {
- ASSERT(tcp->tcp_fused_sigurg_mp != NULL);
+ ASSERT(IPCL_IS_NONSTR(tcp->tcp_connp) ||
+ tcp->tcp_fused_sigurg_mp != NULL);
if (tcp_fuse_rcv_drain(q, tcp, tcp->tcp_fused ? NULL :
&tcp->tcp_fused_sigurg_mp))
return (ret);
@@ -11779,32 +11487,16 @@ tcp_rcv_drain(queue_t *q, tcp_t *tcp)
}
putnext(q, mp);
}
+#ifdef DEBUG
ASSERT(cnt == tcp->tcp_rcv_cnt);
+#endif
tcp->tcp_rcv_last_head = NULL;
tcp->tcp_rcv_last_tail = NULL;
tcp->tcp_rcv_cnt = 0;
- /* Learn the latest rwnd information that we sent to the other side. */
- thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win))
- << tcp->tcp_rcv_ws;
- /* This is peer's calculated send window (our receive window). */
- thwin -= tcp->tcp_rnxt - tcp->tcp_rack;
- /*
- * Increase the receive window to max. But we need to do receiver
- * SWS avoidance. This means that we need to check the increase of
- * of receive window is at least 1 MSS.
- */
- if (canputnext(q) && (q->q_hiwat - thwin >= tcp->tcp_mss)) {
- /*
- * If the window that the other side knows is less than max
- * deferred acks segments, send an update immediately.
- */
- if (thwin < tcp->tcp_rack_cur_max * tcp->tcp_mss) {
- BUMP_MIB(&tcps->tcps_mib, tcpOutWinUpdate);
- ret = TH_ACK_NEEDED;
- }
- tcp->tcp_rwnd = q->q_hiwat;
- }
+ if (canputnext(q))
+ return (tcp_rwnd_reopen(tcp));
+
return (ret);
}
@@ -12993,8 +12685,27 @@ tcp_send_conn_ind(void *arg, mblk_t *mp, void *arg2)
tcp->tcp_remote)] = tcp->tcp_remote;
}
mutex_exit(&listener->tcp_eager_lock);
- if (need_send_conn_ind)
- putnext(listener->tcp_rq, mp);
+ if (need_send_conn_ind) {
+ if (IPCL_IS_NONSTR(lconnp)) {
+ ASSERT(tcp->tcp_listener == listener);
+ ASSERT(tcp->tcp_saved_listener == listener);
+ if ((*lconnp->conn_upcalls->su_newconn)
+ (lconnp->conn_upper_handle,
+ (sock_lower_handle_t)tcp->tcp_connp,
+ &sock_tcp_downcalls, DB_CRED(mp), DB_CPID(mp),
+ &tcp->tcp_connp->conn_upcalls) != NULL) {
+ /*
+ * Keep the message around
+ * in case of fallback
+ */
+ tcp->tcp_conn.tcp_eager_conn_ind = mp;
+ } else {
+ freemsg(mp);
+ }
+ } else {
+ putnext(listener->tcp_rq, mp);
+ }
+ }
}
mblk_t *
@@ -13223,6 +12934,7 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
rptr = mp->b_rptr;
}
ASSERT(DB_TYPE(mp) == M_DATA);
+ ASSERT(mp->b_next == NULL);
tcph = (tcph_t *)&rptr[ip_hdr_len];
seg_seq = ABE32_TO_U32(tcph->th_seq);
@@ -13339,8 +13051,8 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
* The following changes our rwnd to be a multiple of the
* MIN(peer MSS, our MSS) for performance reason.
*/
- (void) tcp_rwnd_set(tcp, MSS_ROUNDUP(tcp->tcp_rq->q_hiwat,
- tcp->tcp_mss));
+ (void) tcp_rwnd_set(tcp,
+ MSS_ROUNDUP(tcp->tcp_recv_hiwater, tcp->tcp_mss));
/* Is the other end ECN capable? */
if (tcp->tcp_ecn_ok) {
@@ -13361,12 +13073,13 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
if (!TCP_IS_DETACHED(tcp)) {
/* Allocate room for SACK options if needed. */
if (tcp->tcp_snd_sack_ok) {
- (void) mi_set_sth_wroff(tcp->tcp_rq,
- tcp->tcp_hdr_len + TCPOPT_MAX_SACK_LEN +
+ (void) proto_set_tx_wroff(tcp->tcp_rq, connp,
+ tcp->tcp_hdr_len +
+ TCPOPT_MAX_SACK_LEN +
(tcp->tcp_loopback ? 0 :
tcps->tcps_wroff_xtra));
} else {
- (void) mi_set_sth_wroff(tcp->tcp_rq,
+ (void) proto_set_tx_wroff(tcp->tcp_rq, connp,
tcp->tcp_hdr_len +
(tcp->tcp_loopback ? 0 :
tcps->tcps_wroff_xtra));
@@ -13466,8 +13179,18 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
BUMP_LOCAL(tcp->tcp_obsegs);
BUMP_MIB(&tcps->tcps_mib, tcpOutAck);
- /* Send up T_CONN_CON */
- putnext(tcp->tcp_rq, mp1);
+ if (!IPCL_IS_NONSTR(connp)) {
+ /* Send up T_CONN_CON */
+ putnext(tcp->tcp_rq, mp1);
+ } else {
+ (*connp->conn_upcalls->
+ su_connected)
+ (connp->conn_upper_handle,
+ tcp->tcp_connid,
+ DB_CRED(mp1),
+ DB_CPID(mp1));
+ freemsg(mp1);
+ }
freemsg(mp);
return;
@@ -13481,7 +13204,15 @@ tcp_rput_data(void *arg, mblk_t *mp, void *arg2)
*/
TCP_STAT(tcps, tcp_fusion_unfusable);
tcp->tcp_unfusable = B_TRUE;
- putnext(tcp->tcp_rq, mp1);
+ if (!IPCL_IS_NONSTR(connp)) {
+ putnext(tcp->tcp_rq, mp1);
+ } else {
+ (*connp->conn_upcalls->su_connected)
+ (connp->conn_upper_handle,
+ tcp->tcp_connid, DB_CRED(mp1),
+ DB_CPID(mp1));
+ freemsg(mp1);
+ }
}
/*
@@ -13835,31 +13566,40 @@ try_again:;
if ((flags & TH_URG) &&
(!tcp->tcp_urp_last_valid || SEQ_GT(urp + seg_seq,
tcp->tcp_urp_last))) {
- mp1 = allocb(0, BPRI_MED);
- if (mp1 == NULL) {
- freemsg(mp);
- return;
- }
- if (!TCP_IS_DETACHED(tcp) &&
- !putnextctl1(tcp->tcp_rq, M_PCSIG,
- SIGURG)) {
- /* Try again on the rexmit. */
- freemsg(mp1);
- freemsg(mp);
- return;
+ if (IPCL_IS_NONSTR(connp)) {
+ if (!TCP_IS_DETACHED(tcp)) {
+ (*connp->conn_upcalls->
+ su_signal_oob)
+ (connp->conn_upper_handle,
+ urp);
+ }
+ } else {
+ mp1 = allocb(0, BPRI_MED);
+ if (mp1 == NULL) {
+ freemsg(mp);
+ return;
+ }
+ if (!TCP_IS_DETACHED(tcp) &&
+ !putnextctl1(tcp->tcp_rq,
+ M_PCSIG, SIGURG)) {
+ /* Try again on the rexmit. */
+ freemsg(mp1);
+ freemsg(mp);
+ return;
+ }
+ /*
+ * If the next byte would be the mark
+ * then mark with MARKNEXT else mark
+ * with NOTMARKNEXT.
+ */
+ if (gap == 0 && urp == 0)
+ mp1->b_flag |= MSGMARKNEXT;
+ else
+ mp1->b_flag |= MSGNOTMARKNEXT;
+ freemsg(tcp->tcp_urp_mark_mp);
+ tcp->tcp_urp_mark_mp = mp1;
+ flags |= TH_SEND_URP_MARK;
}
- /*
- * If the next byte would be the mark
- * then mark with MARKNEXT else mark
- * with NOTMARKNEXT.
- */
- if (gap == 0 && urp == 0)
- mp1->b_flag |= MSGMARKNEXT;
- else
- mp1->b_flag |= MSGNOTMARKNEXT;
- freemsg(tcp->tcp_urp_mark_mp);
- tcp->tcp_urp_mark_mp = mp1;
- flags |= TH_SEND_URP_MARK;
tcp->tcp_urp_last_valid = B_TRUE;
tcp->tcp_urp_last = urp + seg_seq;
}
@@ -14070,50 +13810,60 @@ ok:;
if (flags & TH_URG && urp >= 0) {
if (!tcp->tcp_urp_last_valid ||
SEQ_GT(urp + seg_seq, tcp->tcp_urp_last)) {
- /*
- * If we haven't generated the signal yet for this
- * urgent pointer value, do it now. Also, send up a
- * zero-length M_DATA indicating whether or not this is
- * the mark. The latter is not needed when a
- * T_EXDATA_IND is sent up. However, if there are
- * allocation failures this code relies on the sender
- * retransmitting and the socket code for determining
- * the mark should not block waiting for the peer to
- * transmit. Thus, for simplicity we always send up the
- * mark indication.
- */
- mp1 = allocb(0, BPRI_MED);
- if (mp1 == NULL) {
- freemsg(mp);
- return;
- }
- if (!TCP_IS_DETACHED(tcp) &&
- !putnextctl1(tcp->tcp_rq, M_PCSIG, SIGURG)) {
- /* Try again on the rexmit. */
- freemsg(mp1);
- freemsg(mp);
- return;
- }
- /*
- * Mark with NOTMARKNEXT for now.
- * The code below will change this to MARKNEXT
- * if we are at the mark.
- *
- * If there are allocation failures (e.g. in dupmsg
- * below) the next time tcp_rput_data sees the urgent
- * segment it will send up the MSG*MARKNEXT message.
- */
- mp1->b_flag |= MSGNOTMARKNEXT;
- freemsg(tcp->tcp_urp_mark_mp);
- tcp->tcp_urp_mark_mp = mp1;
- flags |= TH_SEND_URP_MARK;
+ if (IPCL_IS_NONSTR(connp)) {
+ if (!TCP_IS_DETACHED(tcp)) {
+ (*connp->conn_upcalls->su_signal_oob)
+ (connp->conn_upper_handle, urp);
+ }
+ } else {
+ /*
+ * If we haven't generated the signal yet for
+ * this urgent pointer value, do it now. Also,
+ * send up a zero-length M_DATA indicating
+ * whether or not this is the mark. The latter
+ * is not needed when a T_EXDATA_IND is sent up.
+ * However, if there are allocation failures
+ * this code relies on the sender retransmitting
+ * and the socket code for determining the mark
+ * should not block waiting for the peer to
+ * transmit. Thus, for simplicity we always
+ * send up the mark indication.
+ */
+ mp1 = allocb(0, BPRI_MED);
+ if (mp1 == NULL) {
+ freemsg(mp);
+ return;
+ }
+ if (!TCP_IS_DETACHED(tcp) &&
+ !putnextctl1(tcp->tcp_rq, M_PCSIG,
+ SIGURG)) {
+ /* Try again on the rexmit. */
+ freemsg(mp1);
+ freemsg(mp);
+ return;
+ }
+ /*
+ * Mark with NOTMARKNEXT for now.
+ * The code below will change this to MARKNEXT
+ * if we are at the mark.
+ *
+ * If there are allocation failures (e.g. in
+ * dupmsg below) the next time tcp_rput_data
+ * sees the urgent segment it will send up the
+ * MSGMARKNEXT message.
+ */
+ mp1->b_flag |= MSGNOTMARKNEXT;
+ freemsg(tcp->tcp_urp_mark_mp);
+ tcp->tcp_urp_mark_mp = mp1;
+ flags |= TH_SEND_URP_MARK;
#ifdef DEBUG
- (void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE,
- "tcp_rput: sent M_PCSIG 2 seq %x urp %x "
- "last %x, %s",
- seg_seq, urp, tcp->tcp_urp_last,
- tcp_display(tcp, NULL, DISP_PORT_ONLY));
+ (void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE,
+ "tcp_rput: sent M_PCSIG 2 seq %x urp %x "
+ "last %x, %s",
+ seg_seq, urp, tcp->tcp_urp_last,
+ tcp_display(tcp, NULL, DISP_PORT_ONLY));
#endif /* DEBUG */
+ }
tcp->tcp_urp_last_valid = B_TRUE;
tcp->tcp_urp_last = urp + seg_seq;
} else if (tcp->tcp_urp_mark_mp != NULL) {
@@ -14218,7 +13968,15 @@ ok:;
* This segment contains only the urgent byte. We
* have to allocate the T_exdata_ind, if we can.
*/
- if (!tcp->tcp_urp_mp) {
+ if (IPCL_IS_NONSTR(connp)) {
+ int error;
+
+ (*connp->conn_upcalls->su_recv)
+ (connp->conn_upper_handle, mp, seg_len,
+ MSG_OOB, &error, NULL);
+ mp = NULL;
+ goto update_ack;
+ } else if (!tcp->tcp_urp_mp) {
struct T_exdata_ind *tei;
mp1 = allocb(sizeof (struct T_exdata_ind),
BPRI_MED);
@@ -14299,15 +14057,16 @@ ok:;
seg_len, flags,
tcp_display(tcp, NULL, DISP_PORT_ONLY));
#endif /* DEBUG */
- } else {
- /* Data left until we hit mark */
+ }
#ifdef DEBUG
+ else {
+ /* Data left until we hit mark */
(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE,
"tcp_rput: URP %d bytes left, %s",
urp - seg_len, tcp_display(tcp, NULL,
DISP_PORT_ONLY));
-#endif /* DEBUG */
}
+#endif /* DEBUG */
}
process_ack:
@@ -15194,6 +14953,7 @@ est:
mp = mp->b_cont;
freeb(mp1);
}
+update_ack:
tcph = tcp->tcp_tcph;
tcp->tcp_rack_cnt++;
{
@@ -15239,6 +14999,9 @@ est:
tcp->tcp_rnxt += seg_len;
U32_TO_ABE32(tcp->tcp_rnxt, tcph->th_ack);
+ if (mp == NULL)
+ goto xmit_check;
+
/* Update SACK list */
if (tcp->tcp_snd_sack_ok && tcp->tcp_num_sack_blk > 0) {
tcp_sack_remove(tcp->tcp_sack_list, tcp->tcp_rnxt,
@@ -15297,17 +15060,28 @@ est:
if (!(sodp->sod_state & SOD_ENABLED) ||
(tcp->tcp_kssl_ctx != NULL &&
DB_TYPE(mp) == M_DATA)) {
- mutex_exit(sodp->sod_lockp);
sodp = NULL;
}
+ mutex_exit(sodp->sod_lockp);
}
if (mp->b_datap->db_type != M_DATA ||
(flags & TH_MARKNEXT_NEEDED)) {
- if (sodp != NULL) {
- if (sodp->sod_uioa.uioa_state & UIOA_ENABLED) {
- sodp->sod_uioa.uioa_state &= UIOA_CLR;
- sodp->sod_uioa.uioa_state |= UIOA_FINI;
+ if (IPCL_IS_NONSTR(connp)) {
+ int error;
+
+ if ((*connp->conn_upcalls->su_recv)
+ (connp->conn_upper_handle, mp,
+ seg_len, 0, &error, NULL) <= 0) {
+ if (error == ENOSPC) {
+ tcp->tcp_rwnd -= seg_len;
+ } else if (error == EOPNOTSUPP) {
+ tcp_rcv_enqueue(tcp, mp,
+ seg_len);
+ }
}
+ } else if (sodp != NULL) {
+ mutex_enter(sodp->sod_lockp);
+ SOD_UIOAFINI(sodp);
if (!SOD_QEMPTY(sodp) &&
(sodp->sod_state & SOD_WAKE_NOT)) {
flags |= tcp_rcv_sod_wakeup(tcp, sodp);
@@ -15316,7 +15090,7 @@ est:
mutex_exit(sodp->sod_lockp);
}
} else if (tcp->tcp_rcv_list != NULL) {
- flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
+ flags |= tcp_rcv_drain(tcp);
}
ASSERT(tcp->tcp_rcv_list == NULL ||
tcp->tcp_fused_sigurg);
@@ -15338,23 +15112,44 @@ est:
DTRACE_PROBE1(kssl_mblk__ksslinput_data1,
mblk_t *, mp);
tcp_kssl_input(tcp, mp);
- } else {
+ } else if (!IPCL_IS_NONSTR(connp)) {
+ /* Already handled non-STREAMS case. */
putnext(tcp->tcp_rq, mp);
if (!canputnext(tcp->tcp_rq))
tcp->tcp_rwnd -= seg_len;
}
} else if ((tcp->tcp_kssl_ctx != NULL) &&
(DB_TYPE(mp) == M_DATA)) {
- /* Do SSL processing first */
- DTRACE_PROBE1(kssl_mblk__ksslinput_data2,
- mblk_t *, mp);
+ /* Does this need SSL processing first? */
+ DTRACE_PROBE1(kssl_mblk__ksslinput_data2, mblk_t *, mp);
tcp_kssl_input(tcp, mp);
+ } else if (IPCL_IS_NONSTR(connp)) {
+ /* Non-STREAMS socket */
+ boolean_t push = flags & (TH_PUSH|TH_FIN);
+ int error;
+
+ if ((*connp->conn_upcalls->su_recv)(
+ connp->conn_upper_handle,
+ mp, seg_len, 0, &error, &push) <= 0) {
+ if (error == ENOSPC) {
+ tcp->tcp_rwnd -= seg_len;
+ } else if (error == EOPNOTSUPP) {
+ tcp_rcv_enqueue(tcp, mp, seg_len);
+ }
+ } else if (push) {
+ /*
+ * PUSH bit set and sockfs is not
+ * flow controlled
+ */
+ flags |= tcp_rwnd_reopen(tcp);
+ }
} else if (sodp != NULL) {
/*
* Sodirect so all mblk_t's are queued on the
* socket directly, check for wakeup of blocked
* reader (if any), and last if flow-controled.
*/
+ mutex_enter(sodp->sod_lockp);
flags |= tcp_rcv_sod_enqueue(tcp, sodp, mp, seg_len);
if ((sodp->sod_state & SOD_WAKE_NEED) ||
(flags & (TH_PUSH|TH_FIN))) {
@@ -15368,7 +15163,7 @@ est:
mutex_exit(sodp->sod_lockp);
}
} else if ((flags & (TH_PUSH|TH_FIN)) ||
- tcp->tcp_rcv_cnt + seg_len >= tcp->tcp_rq->q_hiwat >> 3) {
+ tcp->tcp_rcv_cnt + seg_len >= tcp->tcp_recv_hiwater >> 3) {
if (tcp->tcp_rcv_list != NULL) {
/*
* Enqueue the new segment first and then
@@ -15379,12 +15174,12 @@ est:
* This way can remove the else part later
* on.
*
- * We don't this to avoid one more call to
+ * We don't do this to avoid one more call to
* canputnext() as tcp_rcv_drain() needs to
* call canputnext().
*/
tcp_rcv_enqueue(tcp, mp, seg_len);
- flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
+ flags |= tcp_rcv_drain(tcp);
} else {
putnext(tcp->tcp_rq, mp);
if (!canputnext(tcp->tcp_rq))
@@ -15394,6 +15189,8 @@ est:
/*
* Enqueue all packets when processing an mblk
* from the co queue and also enqueue normal packets.
+ * For packets which belong to SSL stream do SSL
+ * processing first.
*/
tcp_rcv_enqueue(tcp, mp, seg_len);
}
@@ -15409,7 +15206,8 @@ est:
* such that the Q is empty now even though data was added
* above.
*/
- if (((sodp != NULL && !SOD_QEMPTY(sodp) &&
+ if (!IPCL_IS_NONSTR(connp) &&
+ ((sodp != NULL && !SOD_QEMPTY(sodp) &&
(sodp->sod_state & SOD_WAKE_NOT)) ||
(sodp == NULL && tcp->tcp_rcv_list != NULL)) &&
tcp->tcp_push_tid == 0) {
@@ -15495,6 +15293,7 @@ xmit_check:
ack_check:
if (flags & TH_SEND_URP_MARK) {
ASSERT(tcp->tcp_urp_mark_mp);
+ ASSERT(!IPCL_IS_NONSTR(connp));
/*
* Send up any queued data and then send the mark message
*/
@@ -15514,7 +15313,7 @@ ack_check:
flags |= tcp_rcv_sod_wakeup(tcp, sodp);
/* sod_wakeup() does the mutex_exit() */
} else if (tcp->tcp_rcv_list != NULL) {
- flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
+ flags |= tcp_rcv_drain(tcp);
ASSERT(tcp->tcp_rcv_list == NULL ||
tcp->tcp_fused_sigurg);
@@ -15568,6 +15367,14 @@ ack_check:
ASSERT(tcp->tcp_listener == NULL);
+ if (IPCL_IS_NONSTR(connp)) {
+ ASSERT(tcp->tcp_ordrel_mp == NULL);
+ tcp->tcp_ordrel_done = B_TRUE;
+ (*connp->conn_upcalls->su_opctl)
+ (connp->conn_upper_handle, SOCK_OPCTL_SHUT_RECV, 0);
+ goto done;
+ }
+
SOD_PTR_ENTER(tcp, sodp);
if (sodp != NULL) {
if (sodp->sod_uioa.uioa_state & UIOA_ENABLED) {
@@ -15588,7 +15395,7 @@ ack_check:
/*
* Push any mblk(s) enqueued from co processing.
*/
- flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
+ flags |= tcp_rcv_drain(tcp);
ASSERT(tcp->tcp_rcv_list == NULL ||
tcp->tcp_fused_sigurg);
@@ -15934,7 +15741,7 @@ tcp_rput_add_ancillary(tcp_t *tcp, mblk_t *mp, ip6_pkt_t *ipp)
* thus we clear out all addresses and ports.
*/
static void
-tcp_bind_failed(tcp_t *tcp, mblk_t *mp, int error)
+tcp_tpi_bind_failed(tcp_t *tcp, mblk_t *mp, int error)
{
queue_t *q = tcp->tcp_rq;
tcph_t *tcph;
@@ -15980,7 +15787,7 @@ tcp_bind_failed(tcp_t *tcp, mblk_t *mp, int error)
tea->ERROR_prim = T_CONN_REQ;
break;
default:
- panic("tcp_bind_failed: unexpected TPI type");
+ panic("tcp_tpi_bind_failed: unexpected TPI type");
/*NOTREACHED*/
}
@@ -16015,17 +15822,9 @@ tcp_bind_failed(tcp_t *tcp, mblk_t *mp, int error)
void
tcp_rput_other(tcp_t *tcp, mblk_t *mp)
{
- mblk_t *mp1;
uchar_t *rptr = mp->b_rptr;
queue_t *q = tcp->tcp_rq;
struct T_error_ack *tea;
- uint32_t mss;
- mblk_t *syn_mp;
- mblk_t *mdti;
- mblk_t *lsoi;
- int retval;
- mblk_t *ire_mp;
- tcp_stack_t *tcps = tcp->tcp_tcps;
switch (mp->b_datap->db_type) {
case M_PROTO:
@@ -16037,190 +15836,11 @@ tcp_rput_other(tcp_t *tcp, mblk_t *mp)
switch (tea->PRIM_type) {
case T_BIND_ACK:
/*
- * Adapt Multidata information, if any. The
- * following tcp_mdt_update routine will free
- * the message.
- */
- if ((mdti = tcp_mdt_info_mp(mp)) != NULL) {
- tcp_mdt_update(tcp, &((ip_mdt_info_t *)mdti->
- b_rptr)->mdt_capab, B_TRUE);
- freemsg(mdti);
- }
-
- /*
- * Check to update LSO information with tcp, and
- * tcp_lso_update routine will free the message.
- */
- if ((lsoi = tcp_lso_info_mp(mp)) != NULL) {
- tcp_lso_update(tcp, &((ip_lso_info_t *)lsoi->
- b_rptr)->lso_capab);
- freemsg(lsoi);
- }
-
- /* Get the IRE, if we had requested for it */
- ire_mp = tcp_ire_mp(mp);
-
- if (tcp->tcp_hard_binding) {
- tcp->tcp_hard_binding = B_FALSE;
- tcp->tcp_hard_bound = B_TRUE;
- CL_INET_CONNECT(tcp);
- } else {
- if (ire_mp != NULL)
- freeb(ire_mp);
- goto after_syn_sent;
- }
-
- retval = tcp_adapt_ire(tcp, ire_mp);
- if (ire_mp != NULL)
- freeb(ire_mp);
- if (retval == 0) {
- tcp_bind_failed(tcp, mp,
- (int)((tcp->tcp_state >= TCPS_SYN_SENT) ?
- ENETUNREACH : EADDRNOTAVAIL));
- return;
- }
- /*
- * Don't let an endpoint connect to itself.
- * Also checked in tcp_connect() but that
- * check can't handle the case when the
- * local IP address is INADDR_ANY.
- */
- if (tcp->tcp_ipversion == IPV4_VERSION) {
- if ((tcp->tcp_ipha->ipha_dst ==
- tcp->tcp_ipha->ipha_src) &&
- (BE16_EQL(tcp->tcp_tcph->th_lport,
- tcp->tcp_tcph->th_fport))) {
- tcp_bind_failed(tcp, mp, EADDRNOTAVAIL);
- return;
- }
- } else {
- if (IN6_ARE_ADDR_EQUAL(
- &tcp->tcp_ip6h->ip6_dst,
- &tcp->tcp_ip6h->ip6_src) &&
- (BE16_EQL(tcp->tcp_tcph->th_lport,
- tcp->tcp_tcph->th_fport))) {
- tcp_bind_failed(tcp, mp, EADDRNOTAVAIL);
- return;
- }
- }
- ASSERT(tcp->tcp_state == TCPS_SYN_SENT);
- /*
- * This should not be possible! Just for
- * defensive coding...
- */
- if (tcp->tcp_state != TCPS_SYN_SENT)
- goto after_syn_sent;
-
- if (is_system_labeled() &&
- !tcp_update_label(tcp, CONN_CRED(tcp->tcp_connp))) {
- tcp_bind_failed(tcp, mp, EHOSTUNREACH);
- return;
- }
-
- ASSERT(q == tcp->tcp_rq);
- /*
- * tcp_adapt_ire() does not adjust
- * for TCP/IP header length.
- */
- mss = tcp->tcp_mss - tcp->tcp_hdr_len;
-
- /*
- * Just make sure our rwnd is at
- * least tcp_recv_hiwat_mss * MSS
- * large, and round up to the nearest
- * MSS.
- *
- * We do the round up here because
- * we need to get the interface
- * MTU first before we can do the
- * round up.
- */
- tcp->tcp_rwnd = MAX(MSS_ROUNDUP(tcp->tcp_rwnd, mss),
- tcps->tcps_recv_hiwat_minmss * mss);
- q->q_hiwat = tcp->tcp_rwnd;
- tcp_set_ws_value(tcp);
- U32_TO_ABE16((tcp->tcp_rwnd >> tcp->tcp_rcv_ws),
- tcp->tcp_tcph->th_win);
- if (tcp->tcp_rcv_ws > 0 || tcps->tcps_wscale_always)
- tcp->tcp_snd_ws_ok = B_TRUE;
-
- /*
- * Set tcp_snd_ts_ok to true
- * so that tcp_xmit_mp will
- * include the timestamp
- * option in the SYN segment.
- */
- if (tcps->tcps_tstamp_always ||
- (tcp->tcp_rcv_ws && tcps->tcps_tstamp_if_wscale)) {
- tcp->tcp_snd_ts_ok = B_TRUE;
- }
-
- /*
- * tcp_snd_sack_ok can be set in
- * tcp_adapt_ire() if the sack metric
- * is set. So check it here also.
- */
- if (tcps->tcps_sack_permitted == 2 ||
- tcp->tcp_snd_sack_ok) {
- if (tcp->tcp_sack_info == NULL) {
- tcp->tcp_sack_info =
- kmem_cache_alloc(
- tcp_sack_info_cache,
- KM_SLEEP);
- }
- tcp->tcp_snd_sack_ok = B_TRUE;
- }
-
- /*
- * Should we use ECN? Note that the current
- * default value (SunOS 5.9) of tcp_ecn_permitted
- * is 1. The reason for doing this is that there
- * are equipments out there that will drop ECN
- * enabled IP packets. Setting it to 1 avoids
- * compatibility problems.
- */
- if (tcps->tcps_ecn_permitted == 2)
- tcp->tcp_ecn_ok = B_TRUE;
-
- TCP_TIMER_RESTART(tcp, tcp->tcp_rto);
- syn_mp = tcp_xmit_mp(tcp, NULL, 0, NULL, NULL,
- tcp->tcp_iss, B_FALSE, NULL, B_FALSE);
- if (syn_mp) {
- cred_t *cr;
- pid_t pid;
-
- /*
- * Obtain the credential from the
- * thread calling connect(); the credential
- * lives on in the second mblk which
- * originated from T_CONN_REQ and is echoed
- * with the T_BIND_ACK from ip. If none
- * can be found, default to the creator
- * of the socket.
- */
- if (mp->b_cont == NULL ||
- (cr = DB_CRED(mp->b_cont)) == NULL) {
- cr = tcp->tcp_cred;
- pid = tcp->tcp_cpid;
- } else {
- pid = DB_CPID(mp->b_cont);
- }
- mblk_setcred(syn_mp, cr);
- DB_CPID(syn_mp) = pid;
- tcp_send_data(tcp, tcp->tcp_wq, syn_mp);
- }
- after_syn_sent:
- /*
- * A trailer mblk indicates a waiting client upstream.
- * We complete here the processing begun in
- * either tcp_bind() or tcp_connect() by passing
- * upstream the reply message they supplied.
+ * AF_INET socket should not be here.
*/
- mp1 = mp;
- mp = mp->b_cont;
- freeb(mp1);
- if (mp)
- break;
+ ASSERT(tcp->tcp_family != AF_INET &&
+ tcp->tcp_family != AF_INET6);
+ (void) tcp_post_ip_bind(tcp, mp->b_cont, 0);
return;
case T_ERROR_ACK:
if (tcp->tcp_debug) {
@@ -16233,25 +15853,11 @@ tcp_rput_other(tcp_t *tcp, mblk_t *mp)
switch (tea->ERROR_prim) {
case O_T_BIND_REQ:
case T_BIND_REQ:
- tcp_bind_failed(tcp, mp,
+ ASSERT(tcp->tcp_family != AF_INET);
+ tcp_tpi_bind_failed(tcp, mp,
(int)((tcp->tcp_state >= TCPS_SYN_SENT) ?
ENETUNREACH : EADDRNOTAVAIL));
return;
- case T_UNBIND_REQ:
- tcp->tcp_hard_binding = B_FALSE;
- tcp->tcp_hard_bound = B_FALSE;
- if (mp->b_cont) {
- freemsg(mp->b_cont);
- mp->b_cont = NULL;
- }
- if (tcp->tcp_unbind_pending)
- tcp->tcp_unbind_pending = 0;
- else {
- /* From tcp_ip_unbind() - free */
- freemsg(mp);
- return;
- }
- break;
case T_SVR4_OPTMGMT_REQ:
if (tcp->tcp_drop_opt_ack_cnt > 0) {
/* T_OPTMGMT_REQ generated by TCP */
@@ -16285,6 +15891,7 @@ tcp_rput_other(tcp_t *tcp, mblk_t *mp)
}
break;
default:
+ ASSERT(tea->ERROR_prim != T_UNBIND_REQ);
break;
}
break;
@@ -16302,6 +15909,7 @@ tcp_rput_other(tcp_t *tcp, mblk_t *mp)
* bind. Otherwise accept could possibly run and free
* this tcp struct.
*/
+ ASSERT(q != NULL);
putnext(q, mp);
}
@@ -16345,7 +15953,7 @@ tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2)
*/
TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp);
if (tcp->tcp_rcv_list != NULL)
- (void) tcp_rcv_drain(tcp->tcp_rq, tcp);
+ (void) tcp_rcv_drain(tcp);
if (peer_tcp > tcp) {
mutex_enter(&peer_tcp->tcp_non_sq_lock);
@@ -16487,8 +16095,20 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd)
* purposes in tcp_fuse_output().
*/
sth_hiwat = tcp_fuse_set_rcv_hiwat(tcp, rwnd);
- if (!tcp_detached)
- (void) mi_set_sth_hiwat(tcp->tcp_rq, sth_hiwat);
+ if (!tcp_detached) {
+ (void) proto_set_rx_hiwat(tcp->tcp_rq, tcp->tcp_connp,
+ sth_hiwat);
+ if (IPCL_IS_NONSTR(tcp->tcp_connp)) {
+ conn_t *connp = tcp->tcp_connp;
+ struct sock_proto_props sopp;
+
+ sopp.sopp_flags = SOCKOPT_RCVTHRESH;
+ sopp.sopp_rcvthresh = sth_hiwat >> 3;
+
+ (*connp->conn_upcalls->su_set_proto_props)
+ (connp->conn_upper_handle, &sopp);
+ }
+ }
/*
* In the fusion case, the maxpsz stream head value of
@@ -16500,10 +16120,11 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd)
return (rwnd);
}
- if (tcp_detached)
+ if (tcp_detached) {
old_max_rwnd = tcp->tcp_rwnd;
- else
- old_max_rwnd = tcp->tcp_rq->q_hiwat;
+ } else {
+ old_max_rwnd = tcp->tcp_recv_hiwater;
+ }
/*
* Insist on a receive window that is at least
@@ -16570,17 +16191,20 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd)
if (tcp_detached)
return (rwnd);
/*
- * We set the maximum receive window into rq->q_hiwat.
+ * We set the maximum receive window into rq->q_hiwat if it is
+ * a STREAMS socket.
* This is not actually used for flow control.
*/
- tcp->tcp_rq->q_hiwat = rwnd;
+ if (!IPCL_IS_NONSTR(tcp->tcp_connp))
+ tcp->tcp_rq->q_hiwat = rwnd;
+ tcp->tcp_recv_hiwater = rwnd;
/*
- * Set the Stream head high water mark. This doesn't have to be
+ * Set the STREAM head high water mark. This doesn't have to be
* here, since we are simply using default values, but we would
* prefer to choose these values algorithmically, with a likely
* relationship to rwnd.
*/
- (void) mi_set_sth_hiwat(tcp->tcp_rq,
+ (void) proto_set_rx_hiwat(tcp->tcp_rq, tcp->tcp_connp,
MAX(rwnd, tcps->tcps_sth_rcv_hiwat));
return (rwnd);
}
@@ -16939,7 +16563,7 @@ tcp_snmp_state(tcp_t *tcp)
static char tcp_report_header[] =
"TCP " MI_COL_HDRPAD_STR
- "zone dest snxt suna "
+ "zone dest snxt suna "
"swnd rnxt rack rwnd rto mss w sw rw t "
"recent [lport,fport] state";
@@ -17127,7 +16751,7 @@ static int
tcp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
{
tf_t *tbf;
- tcp_t *tcp;
+ tcp_t *tcp, *ltcp;
int i;
zoneid_t zoneid;
tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps;
@@ -17153,15 +16777,18 @@ tcp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
for (i = 0; i < TCP_BIND_FANOUT_SIZE; i++) {
tbf = &tcps->tcps_bind_fanout[i];
mutex_enter(&tbf->tf_lock);
- for (tcp = tbf->tf_tcp; tcp != NULL;
- tcp = tcp->tcp_bind_hash) {
- if (zoneid != GLOBAL_ZONEID &&
- zoneid != tcp->tcp_connp->conn_zoneid)
- continue;
- CONN_INC_REF(tcp->tcp_connp);
- tcp_report_item(mp->b_cont, tcp, i,
- Q_TO_TCP(q), cr);
- CONN_DEC_REF(tcp->tcp_connp);
+ for (ltcp = tbf->tf_tcp; ltcp != NULL;
+ ltcp = ltcp->tcp_bind_hash) {
+ for (tcp = ltcp; tcp != NULL;
+ tcp = tcp->tcp_bind_hash_port) {
+ if (zoneid != GLOBAL_ZONEID &&
+ zoneid != tcp->tcp_connp->conn_zoneid)
+ continue;
+ CONN_INC_REF(tcp->tcp_connp);
+ tcp_report_item(mp->b_cont, tcp, i,
+ Q_TO_TCP(q), cr);
+ CONN_DEC_REF(tcp->tcp_connp);
+ }
}
mutex_exit(&tbf->tf_lock);
}
@@ -17201,7 +16828,7 @@ tcp_listen_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
(void) mi_mpprintf(mp,
" TCP " MI_COL_HDRPAD_STR
- "zone IP addr port seqnum backlog (q0/q/max)");
+ "zone IP addr port seqnum backlog (q0/q/max)");
ipst = tcps->tcps_netstack->netstack_ip;
@@ -17717,19 +17344,18 @@ tcp_timer(void *arg)
}
-/* tcp_unbind is called by tcp_wput_proto to handle T_UNBIND_REQ messages. */
-static void
-tcp_unbind(tcp_t *tcp, mblk_t *mp)
+static int
+tcp_do_unbind(conn_t *connp)
{
- conn_t *connp;
+ tcp_t *tcp = connp->conn_tcp;
+ int error = 0;
switch (tcp->tcp_state) {
case TCPS_BOUND:
case TCPS_LISTEN:
break;
default:
- tcp_err_ack(tcp, mp, TOUTSTATE, 0);
- return;
+ return (-TOUTSTATE);
}
/*
@@ -17752,14 +17378,32 @@ tcp_unbind(tcp_t *tcp, mblk_t *mp)
tcp_bind_hash_remove(tcp);
tcp->tcp_state = TCPS_IDLE;
tcp->tcp_mdt = B_FALSE;
- /* Send M_FLUSH according to TPI */
- (void) putnextctl1(tcp->tcp_rq, M_FLUSH, FLUSHRW);
+
connp = tcp->tcp_connp;
connp->conn_mdt_ok = B_FALSE;
ipcl_hash_remove(connp);
bzero(&connp->conn_ports, sizeof (connp->conn_ports));
- mp = mi_tpi_ok_ack_alloc(mp);
- putnext(tcp->tcp_rq, mp);
+
+ return (error);
+}
+
+/* tcp_unbind is called by tcp_wput_proto to handle T_UNBIND_REQ messages. */
+static void
+tcp_tpi_unbind(tcp_t *tcp, mblk_t *mp)
+{
+ int error = tcp_do_unbind(tcp->tcp_connp);
+
+ if (error > 0) {
+ tcp_err_ack(tcp, mp, TSYSERR, error);
+ } else if (error < 0) {
+ tcp_err_ack(tcp, mp, -error, 0);
+ } else {
+ /* Send M_FLUSH according to TPI */
+ (void) putnextctl1(tcp->tcp_rq, M_FLUSH, FLUSHRW);
+
+ mp = mi_tpi_ok_ack_alloc(mp);
+ putnext(tcp->tcp_rq, mp);
+ }
}
/*
@@ -18025,9 +17669,9 @@ tcp_output(void *arg, mblk_t *mp, void *arg2)
/* find out how much we can send */
/* BEGIN CSTYLED */
/*
- * un-acked usable
+ * un-acked usable
* |--------------|-----------------|
- * tcp_suna tcp_snxt tcp_suna+tcp_swnd
+ * tcp_suna tcp_snxt tcp_suna+tcp_swnd
*/
/* END CSTYLED */
@@ -18229,10 +17873,6 @@ slow:
tcp_wput_data(tcp, NULL, B_FALSE);
}
-/*
- * The function called through squeue to get behind eager's perimeter to
- * finish the accept processing.
- */
/* ARGSUSED */
void
tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
@@ -18240,17 +17880,33 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
conn_t *connp = (conn_t *)arg;
tcp_t *tcp = connp->conn_tcp;
queue_t *q = tcp->tcp_rq;
- mblk_t *mp1;
- mblk_t *stropt_mp = mp;
- struct stroptions *stropt;
- uint_t thwin;
- tcp_stack_t *tcps = tcp->tcp_tcps;
+ struct tcp_options *tcpopt;
+ tcp_stack_t *tcps = tcp->tcp_tcps;
+
+ /* socket options */
+ uint_t sopp_flags;
+ ssize_t sopp_rxhiwat;
+ ssize_t sopp_maxblk;
+ ushort_t sopp_wroff;
+ ushort_t sopp_tail;
+ ushort_t sopp_copyopt;
+
+ tcpopt = (struct tcp_options *)mp->b_rptr;
/*
* Drop the eager's ref on the listener, that was placed when
* this eager began life in tcp_conn_request.
*/
CONN_DEC_REF(tcp->tcp_saved_listener->tcp_connp);
+ if (IPCL_IS_NONSTR(connp)) {
+ /* Safe to free conn_ind message */
+ freemsg(tcp->tcp_conn.tcp_eager_conn_ind);
+ tcp->tcp_conn.tcp_eager_conn_ind = NULL;
+
+ /* The listener tells us which upper handle to use */
+ ASSERT(tcpopt->to_flags & TCPOPT_UPPERHANDLE);
+ connp->conn_upper_handle = tcpopt->to_handle;
+ }
tcp->tcp_detached = B_FALSE;
@@ -18267,37 +17923,47 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
*/
ASSERT(tcp->tcp_listener == NULL);
if (tcp->tcp_issocket || tcp->tcp_send_discon_ind) {
- struct T_discon_ind *tdi;
-
- (void) putnextctl1(q, M_FLUSH, FLUSHRW);
- /*
- * Let us reuse the incoming mblk to avoid memory
- * allocation failure problems. We know that the
- * size of the incoming mblk i.e. stroptions is greater
- * than sizeof T_discon_ind. So the reallocb below
- * can't fail.
- */
- freemsg(mp->b_cont);
- mp->b_cont = NULL;
- ASSERT(DB_REF(mp) == 1);
- mp = reallocb(mp, sizeof (struct T_discon_ind),
- B_FALSE);
- ASSERT(mp != NULL);
- DB_TYPE(mp) = M_PROTO;
- ((union T_primitives *)mp->b_rptr)->type = T_DISCON_IND;
- tdi = (struct T_discon_ind *)mp->b_rptr;
- if (tcp->tcp_issocket) {
- tdi->DISCON_reason = ECONNREFUSED;
- tdi->SEQ_number = 0;
+ if (IPCL_IS_NONSTR(connp)) {
+ ASSERT(tcp->tcp_issocket);
+ (*connp->conn_upcalls->su_disconnected)(
+ connp->conn_upper_handle, tcp->tcp_connid,
+ ECONNREFUSED);
+ freemsg(mp);
} else {
- tdi->DISCON_reason = ENOPROTOOPT;
- tdi->SEQ_number =
- tcp->tcp_conn_req_seqnum;
+ struct T_discon_ind *tdi;
+
+ (void) putnextctl1(q, M_FLUSH, FLUSHRW);
+ /*
+ * Let us reuse the incoming mblk to avoid
+ * memory allocation failure problems. We know
+ * that the size of the incoming mblk i.e.
+ * stroptions is greater than sizeof
+ * T_discon_ind. So the reallocb below can't
+ * fail.
+ */
+ freemsg(mp->b_cont);
+ mp->b_cont = NULL;
+ ASSERT(DB_REF(mp) == 1);
+ mp = reallocb(mp, sizeof (struct T_discon_ind),
+ B_FALSE);
+ ASSERT(mp != NULL);
+ DB_TYPE(mp) = M_PROTO;
+ ((union T_primitives *)mp->b_rptr)->type =
+ T_DISCON_IND;
+ tdi = (struct T_discon_ind *)mp->b_rptr;
+ if (tcp->tcp_issocket) {
+ tdi->DISCON_reason = ECONNREFUSED;
+ tdi->SEQ_number = 0;
+ } else {
+ tdi->DISCON_reason = ENOPROTOOPT;
+ tdi->SEQ_number =
+ tcp->tcp_conn_req_seqnum;
+ }
+ mp->b_wptr = mp->b_rptr +
+ sizeof (struct T_discon_ind);
+ putnext(q, mp);
+ return;
}
- mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
- putnext(q, mp);
- } else {
- freemsg(mp);
}
if (tcp->tcp_hard_binding) {
tcp->tcp_hard_binding = B_FALSE;
@@ -18306,19 +17972,21 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
return;
}
- mp1 = stropt_mp->b_cont;
- stropt_mp->b_cont = NULL;
- ASSERT(DB_TYPE(stropt_mp) == M_SETOPTS);
- stropt = (struct stroptions *)stropt_mp->b_rptr;
+ if (tcpopt->to_flags & TCPOPT_BOUNDIF) {
+ int boundif = tcpopt->to_boundif;
+ uint_t len = sizeof (int);
- while (mp1 != NULL) {
- mp = mp1;
- mp1 = mp1->b_cont;
- mp->b_cont = NULL;
- tcp->tcp_drop_opt_ack_cnt++;
- CALL_IP_WPUT(connp, tcp->tcp_wq, mp);
+ (void) tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, IPPROTO_IPV6,
+ IPV6_BOUND_IF, len, (uchar_t *)&boundif, &len,
+ (uchar_t *)&boundif, NULL, tcp->tcp_cred);
+ }
+ if (tcpopt->to_flags & TCPOPT_RECVPKTINFO) {
+ uint_t on = 1;
+ uint_t len = sizeof (uint_t);
+ (void) tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, IPPROTO_IPV6,
+ IPV6_RECVPKTINFO, len, (uchar_t *)&on, &len,
+ (uchar_t *)&on, NULL, tcp->tcp_cred);
}
- mp = NULL;
/*
* For a loopback connection with tcp_direct_sockfs on, note that
@@ -18331,42 +17999,50 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
* Set the max window size (tcp_rq->q_hiwat) of the acceptor
* properly. This is the first time we know of the acceptor'
* queue. So we do it here.
+ *
+ * XXX
*/
if (tcp->tcp_rcv_list == NULL) {
/*
* Recv queue is empty, tcp_rwnd should not have changed.
* That means it should be equal to the listener's tcp_rwnd.
*/
- tcp->tcp_rq->q_hiwat = tcp->tcp_rwnd;
+ if (!IPCL_IS_NONSTR(connp))
+ tcp->tcp_rq->q_hiwat = tcp->tcp_rwnd;
+ tcp->tcp_recv_hiwater = tcp->tcp_rwnd;
} else {
#ifdef DEBUG
- uint_t cnt = 0;
+ mblk_t *tmp;
+ mblk_t *mp1;
+ uint_t cnt = 0;
mp1 = tcp->tcp_rcv_list;
- while ((mp = mp1) != NULL) {
- mp1 = mp->b_next;
- cnt += msgdsize(mp);
+ while ((tmp = mp1) != NULL) {
+ mp1 = tmp->b_next;
+ cnt += msgdsize(tmp);
}
ASSERT(cnt != 0 && tcp->tcp_rcv_cnt == cnt);
#endif
/* There is some data, add them back to get the max. */
- tcp->tcp_rq->q_hiwat = tcp->tcp_rwnd + tcp->tcp_rcv_cnt;
+ if (!IPCL_IS_NONSTR(connp))
+ tcp->tcp_rq->q_hiwat = tcp->tcp_rwnd + tcp->tcp_rcv_cnt;
+ tcp->tcp_recv_hiwater = tcp->tcp_rwnd + tcp->tcp_rcv_cnt;
}
/*
* This is the first time we run on the correct
* queue after tcp_accept. So fix all the q parameters
* here.
*/
- stropt->so_flags = SO_HIWAT | SO_MAXBLK | SO_WROFF;
- stropt->so_maxblk = tcp_maxpsz_set(tcp, B_FALSE);
+ sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_MAXBLK | SOCKOPT_WROFF;
+ sopp_maxblk = tcp_maxpsz_set(tcp, B_FALSE);
/*
* Record the stream head's high water mark for this endpoint;
* this is used for flow-control purposes.
*/
- stropt->so_hiwat = tcp->tcp_fused ?
- tcp_fuse_set_rcv_hiwat(tcp, q->q_hiwat) :
- MAX(q->q_hiwat, tcps->tcps_sth_rcv_hiwat);
+ sopp_rxhiwat = tcp->tcp_fused ?
+ tcp_fuse_set_rcv_hiwat(tcp, tcp->tcp_recv_hiwater) :
+ MAX(tcp->tcp_recv_hiwater, tcps->tcps_sth_rcv_hiwat);
/*
* Determine what write offset value to use depending on SACK and
@@ -18382,17 +18058,17 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
* since it would reduce the amount of work done by kmem.
* Non-fused tcp loopback case is handled separately below.
*/
- stropt->so_wroff = 0;
+ sopp_wroff = 0;
/*
* Update the peer's transmit parameters according to
* our recently calculated high water mark value.
*/
(void) tcp_maxpsz_set(tcp->tcp_loopback_peer, B_TRUE);
} else if (tcp->tcp_snd_sack_ok) {
- stropt->so_wroff = tcp->tcp_hdr_len + TCPOPT_MAX_SACK_LEN +
+ sopp_wroff = tcp->tcp_hdr_len + TCPOPT_MAX_SACK_LEN +
(tcp->tcp_loopback ? 0 : tcps->tcps_wroff_xtra);
} else {
- stropt->so_wroff = tcp->tcp_hdr_len + (tcp->tcp_loopback ? 0 :
+ sopp_wroff = tcp->tcp_hdr_len + (tcp->tcp_loopback ? 0 :
tcps->tcps_wroff_xtra);
}
@@ -18408,20 +18084,62 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
* costs.
*/
if (tcp->tcp_kssl_ctx != NULL) {
- stropt->so_wroff += SSL3_WROFFSET;
+ sopp_wroff += SSL3_WROFFSET;
- stropt->so_flags |= SO_TAIL;
- stropt->so_tail = SSL3_MAX_TAIL_LEN;
+ sopp_flags |= SOCKOPT_TAIL;
+ sopp_tail = SSL3_MAX_TAIL_LEN;
- stropt->so_flags |= SO_COPYOPT;
- stropt->so_copyopt = ZCVMUNSAFE;
+ sopp_flags |= SOCKOPT_ZCOPY;
+ sopp_copyopt = ZCVMUNSAFE;
- stropt->so_maxblk = SSL3_MAX_RECORD_LEN;
+ sopp_maxblk = SSL3_MAX_RECORD_LEN;
}
/* Send the options up */
- putnext(q, stropt_mp);
+ if (IPCL_IS_NONSTR(connp)) {
+ struct sock_proto_props sopp;
+
+ sopp.sopp_flags = sopp_flags;
+ sopp.sopp_wroff = sopp_wroff;
+ sopp.sopp_maxblk = sopp_maxblk;
+ sopp.sopp_rxhiwat = sopp_rxhiwat;
+ if (sopp_flags & SOCKOPT_TAIL) {
+ ASSERT(tcp->tcp_kssl_ctx != NULL);
+ ASSERT(sopp_flags & SOCKOPT_ZCOPY);
+ sopp.sopp_tail = sopp_tail;
+ sopp.sopp_zcopyflag = sopp_copyopt;
+ }
+ (*connp->conn_upcalls->su_set_proto_props)
+ (connp->conn_upper_handle, &sopp);
+ } else {
+ struct stroptions *stropt;
+ mblk_t *stropt_mp = allocb(sizeof (struct stroptions), BPRI_HI);
+ if (stropt_mp == NULL) {
+ tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
+ return;
+ }
+ DB_TYPE(stropt_mp) = M_SETOPTS;
+ stropt = (struct stroptions *)stropt_mp->b_rptr;
+ stropt_mp->b_wptr += sizeof (struct stroptions);
+ stropt = (struct stroptions *)stropt_mp->b_rptr;
+ stropt->so_flags |= SO_HIWAT | SO_WROFF | SO_MAXBLK;
+ stropt->so_hiwat = sopp_rxhiwat;
+ stropt->so_wroff = sopp_wroff;
+ stropt->so_maxblk = sopp_maxblk;
+
+ if (sopp_flags & SOCKOPT_TAIL) {
+ ASSERT(tcp->tcp_kssl_ctx != NULL);
+
+ stropt->so_flags |= SO_TAIL | SO_COPYOPT;
+ stropt->so_tail = sopp_tail;
+ stropt->so_copyopt = sopp_copyopt;
+ }
+
+ /* Send the options up */
+ putnext(q, stropt_mp);
+ }
+ freemsg(mp);
/*
* Pass up any data and/or a fin that has been received.
*
@@ -18432,43 +18150,77 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
* code, the rwnd may never open up again!
*/
if (tcp->tcp_rcv_list != NULL) {
- /* We drain directly in case of fused tcp loopback */
- sodirect_t *sodp;
-
- if (!tcp->tcp_fused && canputnext(q)) {
- tcp->tcp_rwnd = q->q_hiwat;
- thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win))
- << tcp->tcp_rcv_ws;
- thwin -= tcp->tcp_rnxt - tcp->tcp_rack;
- if (tcp->tcp_state >= TCPS_ESTABLISHED &&
- (q->q_hiwat - thwin >= tcp->tcp_mss)) {
- tcp_xmit_ctl(NULL,
- tcp, (tcp->tcp_swnd == 0) ?
- tcp->tcp_suna : tcp->tcp_snxt,
- tcp->tcp_rnxt, TH_ACK);
- BUMP_MIB(&tcps->tcps_mib, tcpOutWinUpdate);
+ if (IPCL_IS_NONSTR(connp)) {
+ mblk_t *mp;
+ int space_left;
+ int error;
+ boolean_t push = B_TRUE;
+
+ if (!tcp->tcp_fused && (*connp->conn_upcalls->su_recv)
+ (connp->conn_upper_handle, NULL, 0, 0, &error,
+ &push) >= 0) {
+ tcp->tcp_rwnd = tcp->tcp_recv_hiwater;
+ if (tcp->tcp_state >= TCPS_ESTABLISHED &&
+ tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) {
+ tcp_xmit_ctl(NULL,
+ tcp, (tcp->tcp_swnd == 0) ?
+ tcp->tcp_suna : tcp->tcp_snxt,
+ tcp->tcp_rnxt, TH_ACK);
+ }
}
-
- }
-
- SOD_PTR_ENTER(tcp, sodp);
- if (sodp != NULL) {
- /* Sodirect, move from rcv_list */
- ASSERT(!tcp->tcp_fused);
while ((mp = tcp->tcp_rcv_list) != NULL) {
+ push = B_TRUE;
tcp->tcp_rcv_list = mp->b_next;
mp->b_next = NULL;
- (void) tcp_rcv_sod_enqueue(tcp, sodp, mp,
- msgdsize(mp));
+ space_left = (*connp->conn_upcalls->su_recv)
+ (connp->conn_upper_handle, mp, msgdsize(mp),
+ 0, &error, &push);
+ if (space_left < 0) {
+ /*
+ * At this point the eager is not
+ * visible to anyone, so fallback
+ * can not happen.
+ */
+ ASSERT(error != EOPNOTSUPP);
+ }
}
tcp->tcp_rcv_last_head = NULL;
tcp->tcp_rcv_last_tail = NULL;
tcp->tcp_rcv_cnt = 0;
- (void) tcp_rcv_sod_wakeup(tcp, sodp);
- /* sod_wakeup() did the mutex_exit() */
} else {
- /* Not sodirect, drain */
- (void) tcp_rcv_drain(q, tcp);
+ /* We drain directly in case of fused tcp loopback */
+ sodirect_t *sodp;
+
+ if (!tcp->tcp_fused && canputnext(q)) {
+ tcp->tcp_rwnd = q->q_hiwat;
+ if (tcp->tcp_state >= TCPS_ESTABLISHED &&
+ tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) {
+ tcp_xmit_ctl(NULL,
+ tcp, (tcp->tcp_swnd == 0) ?
+ tcp->tcp_suna : tcp->tcp_snxt,
+ tcp->tcp_rnxt, TH_ACK);
+ }
+ }
+
+ SOD_PTR_ENTER(tcp, sodp);
+ if (sodp != NULL) {
+ /* Sodirect, move from rcv_list */
+ ASSERT(!tcp->tcp_fused);
+ while ((mp = tcp->tcp_rcv_list) != NULL) {
+ tcp->tcp_rcv_list = mp->b_next;
+ mp->b_next = NULL;
+ (void) tcp_rcv_sod_enqueue(tcp, sodp,
+ mp, msgdsize(mp));
+ }
+ tcp->tcp_rcv_last_head = NULL;
+ tcp->tcp_rcv_last_tail = NULL;
+ tcp->tcp_rcv_cnt = 0;
+ (void) tcp_rcv_sod_wakeup(tcp, sodp);
+ /* sod_wakeup() did the mutex_exit() */
+ } else {
+ /* Not sodirect, drain */
+ (void) tcp_rcv_drain(tcp);
+ }
}
/*
@@ -18502,18 +18254,27 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
}
ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg);
if (tcp->tcp_fin_rcvd && !tcp->tcp_ordrel_done) {
- mp = tcp->tcp_ordrel_mp;
- tcp->tcp_ordrel_mp = NULL;
tcp->tcp_ordrel_done = B_TRUE;
- putnext(q, mp);
+ if (IPCL_IS_NONSTR(connp)) {
+ ASSERT(tcp->tcp_ordrel_mp == NULL);
+ (*connp->conn_upcalls->su_opctl)(
+ connp->conn_upper_handle,
+ SOCK_OPCTL_SHUT_RECV, 0);
+ } else {
+ mp = tcp->tcp_ordrel_mp;
+ tcp->tcp_ordrel_mp = NULL;
+ putnext(q, mp);
+ }
}
if (tcp->tcp_hard_binding) {
tcp->tcp_hard_binding = B_FALSE;
tcp->tcp_hard_bound = B_TRUE;
}
- /* We can enable synchronous streams now */
- if (tcp->tcp_fused) {
+ /* We can enable synchronous streams for STREAMS tcp endpoint now */
+ if (tcp->tcp_fused && !IPCL_IS_NONSTR(connp) &&
+ tcp->tcp_loopback_peer != NULL &&
+ !IPCL_IS_NONSTR(tcp->tcp_loopback_peer->tcp_connp)) {
tcp_fuse_syncstr_enable_pair(tcp);
}
@@ -18547,6 +18308,8 @@ tcp_send_pending(void *arg, mblk_t *mp, void *arg2)
{
conn_t *connp = (conn_t *)arg;
tcp_t *listener = connp->conn_tcp;
+ struct T_conn_ind *conn_ind;
+ tcp_t *tcp;
if (listener->tcp_state == TCPS_CLOSED ||
TCP_IS_DETACHED(listener)) {
@@ -18554,8 +18317,6 @@ tcp_send_pending(void *arg, mblk_t *mp, void *arg2)
* If listener has closed, it would have caused a
* a cleanup/blowoff to happen for the eager.
*/
- tcp_t *tcp;
- struct T_conn_ind *conn_ind;
conn_ind = (struct T_conn_ind *)mp->b_rptr;
bcopy(mp->b_rptr + conn_ind->OPT_offset, &tcp,
@@ -18571,7 +18332,218 @@ tcp_send_pending(void *arg, mblk_t *mp, void *arg2)
freemsg(mp);
return;
}
- putnext(listener->tcp_rq, mp);
+ if (IPCL_IS_NONSTR(connp)) {
+ conn_ind = (struct T_conn_ind *)mp->b_rptr;
+ bcopy(mp->b_rptr + conn_ind->OPT_offset, &tcp,
+ conn_ind->OPT_length);
+
+ if ((*connp->conn_upcalls->su_newconn)
+ (connp->conn_upper_handle,
+ (sock_lower_handle_t)tcp->tcp_connp,
+ &sock_tcp_downcalls, DB_CRED(mp), DB_CPID(mp),
+ &tcp->tcp_connp->conn_upcalls) != NULL) {
+ /* Keep the message around in case of fallback */
+ tcp->tcp_conn.tcp_eager_conn_ind = mp;
+ } else {
+ freemsg(mp);
+ }
+ } else {
+ putnext(listener->tcp_rq, mp);
+ }
+}
+
+/* ARGSUSED */
+static int
+tcp_accept_common(conn_t *lconnp, conn_t *econnp,
+ sock_upper_handle_t sock_handle, cred_t *cr)
+{
+ tcp_t *listener, *eager;
+ mblk_t *opt_mp;
+ struct tcp_options *tcpopt;
+
+ listener = lconnp->conn_tcp;
+ ASSERT(listener->tcp_state == TCPS_LISTEN);
+ eager = econnp->conn_tcp;
+ ASSERT(eager->tcp_listener != NULL);
+
+ ASSERT(eager->tcp_rq != NULL);
+
+ /* If tcp_fused and sodirect enabled disable it */
+ if (eager->tcp_fused && eager->tcp_sodirect != NULL) {
+ /* Fused, disable sodirect */
+ mutex_enter(eager->tcp_sodirect->sod_lockp);
+ SOD_DISABLE(eager->tcp_sodirect);
+ mutex_exit(eager->tcp_sodirect->sod_lockp);
+ eager->tcp_sodirect = NULL;
+ }
+
+ opt_mp = allocb(sizeof (struct tcp_options), BPRI_HI);
+ if (opt_mp == NULL) {
+ return (-TPROTO);
+ }
+ bzero((char *)opt_mp->b_rptr, sizeof (struct tcp_options));
+ eager->tcp_issocket = B_TRUE;
+
+ econnp->conn_upcalls = lconnp->conn_upcalls;
+ econnp->conn_zoneid = listener->tcp_connp->conn_zoneid;
+ econnp->conn_allzones = listener->tcp_connp->conn_allzones;
+ ASSERT(econnp->conn_netstack ==
+ listener->tcp_connp->conn_netstack);
+ ASSERT(eager->tcp_tcps == listener->tcp_tcps);
+
+ /* Put the ref for IP */
+ CONN_INC_REF(econnp);
+
+ /*
+ * We should have minimum of 3 references on the conn
+ * at this point. One each for TCP and IP and one for
+ * the T_conn_ind that was sent up when the 3-way handshake
+ * completed. In the normal case we would also have another
+ * reference (making a total of 4) for the conn being in the
+ * classifier hash list. However the eager could have received
+ * an RST subsequently and tcp_closei_local could have removed
+ * the eager from the classifier hash list, hence we can't
+ * assert that reference.
+ */
+ ASSERT(econnp->conn_ref >= 3);
+
+ opt_mp->b_datap->db_type = M_SETOPTS;
+ opt_mp->b_wptr += sizeof (struct tcp_options);
+
+ /*
+ * Prepare for inheriting IPV6_BOUND_IF and IPV6_RECVPKTINFO
+ * from listener to acceptor. In case of non-STREAMS sockets,
+ * we also need to pass the upper handle along.
+ */
+ tcpopt = (struct tcp_options *)opt_mp->b_rptr;
+ tcpopt->to_flags = 0;
+
+ if (IPCL_IS_NONSTR(econnp)) {
+ ASSERT(sock_handle != NULL);
+ tcpopt->to_flags |= TCPOPT_UPPERHANDLE;
+ tcpopt->to_handle = sock_handle;
+ }
+ if (listener->tcp_bound_if != 0) {
+ tcpopt->to_flags |= TCPOPT_BOUNDIF;
+ tcpopt->to_boundif = listener->tcp_bound_if;
+ }
+ if (listener->tcp_ipv6_recvancillary & TCP_IPV6_RECVPKTINFO) {
+ tcpopt->to_flags |= TCPOPT_RECVPKTINFO;
+ }
+
+ mutex_enter(&listener->tcp_eager_lock);
+ if (listener->tcp_eager_prev_q0->tcp_conn_def_q0) {
+
+ tcp_t *tail;
+ tcp_t *tcp;
+ mblk_t *mp1;
+
+ tcp = listener->tcp_eager_prev_q0;
+ /*
+ * listener->tcp_eager_prev_q0 points to the TAIL of the
+ * deferred T_conn_ind queue. We need to get to the head
+ * of the queue in order to send up T_conn_ind the same
+ * order as how the 3WHS is completed.
+ */
+ while (tcp != listener) {
+ if (!tcp->tcp_eager_prev_q0->tcp_conn_def_q0 &&
+ !tcp->tcp_kssl_pending)
+ break;
+ else
+ tcp = tcp->tcp_eager_prev_q0;
+ }
+ /* None of the pending eagers can be sent up now */
+ if (tcp == listener)
+ goto no_more_eagers;
+
+ mp1 = tcp->tcp_conn.tcp_eager_conn_ind;
+ tcp->tcp_conn.tcp_eager_conn_ind = NULL;
+ /* Move from q0 to q */
+ ASSERT(listener->tcp_conn_req_cnt_q0 > 0);
+ listener->tcp_conn_req_cnt_q0--;
+ listener->tcp_conn_req_cnt_q++;
+ tcp->tcp_eager_next_q0->tcp_eager_prev_q0 =
+ tcp->tcp_eager_prev_q0;
+ tcp->tcp_eager_prev_q0->tcp_eager_next_q0 =
+ tcp->tcp_eager_next_q0;
+ tcp->tcp_eager_prev_q0 = NULL;
+ tcp->tcp_eager_next_q0 = NULL;
+ tcp->tcp_conn_def_q0 = B_FALSE;
+
+ /* Make sure the tcp isn't in the list of droppables */
+ ASSERT(tcp->tcp_eager_next_drop_q0 == NULL &&
+ tcp->tcp_eager_prev_drop_q0 == NULL);
+
+ /*
+ * Insert at end of the queue because sockfs sends
+ * down T_CONN_RES in chronological order. Leaving
+ * the older conn indications at front of the queue
+ * helps reducing search time.
+ */
+ tail = listener->tcp_eager_last_q;
+ if (tail != NULL) {
+ tail->tcp_eager_next_q = tcp;
+ } else {
+ listener->tcp_eager_next_q = tcp;
+ }
+ listener->tcp_eager_last_q = tcp;
+ tcp->tcp_eager_next_q = NULL;
+
+ /* Need to get inside the listener perimeter */
+ CONN_INC_REF(listener->tcp_connp);
+ SQUEUE_ENTER_ONE(listener->tcp_connp->conn_sqp, mp1,
+ tcp_send_pending, listener->tcp_connp, SQ_FILL,
+ SQTAG_TCP_SEND_PENDING);
+ }
+no_more_eagers:
+ tcp_eager_unlink(eager);
+ mutex_exit(&listener->tcp_eager_lock);
+
+ /*
+ * At this point, the eager is detached from the listener
+ * but we still have an extra refs on eager (apart from the
+ * usual tcp references). The ref was placed in tcp_rput_data
+ * before sending the conn_ind in tcp_send_conn_ind.
+ * The ref will be dropped in tcp_accept_finish().
+ */
+ SQUEUE_ENTER_ONE(econnp->conn_sqp, opt_mp, tcp_accept_finish,
+ econnp, SQ_NODRAIN, SQTAG_TCP_ACCEPT_FINISH_Q0);
+ return (0);
+}
+
+int
+tcp_accept(sock_lower_handle_t lproto_handle,
+ sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
+ cred_t *cr)
+{
+ conn_t *lconnp, *econnp;
+ tcp_t *listener, *eager;
+ tcp_stack_t *tcps;
+
+ lconnp = (conn_t *)lproto_handle;
+ listener = lconnp->conn_tcp;
+ ASSERT(listener->tcp_state == TCPS_LISTEN);
+ econnp = (conn_t *)eproto_handle;
+ eager = econnp->conn_tcp;
+ ASSERT(eager->tcp_listener != NULL);
+ tcps = eager->tcp_tcps;
+
+ ASSERT(IPCL_IS_NONSTR(econnp));
+ /*
+ * Create helper stream if it is a non-TPI TCP connection.
+ */
+ if (ip_create_helper_stream(econnp, tcps->tcps_ldi_ident)) {
+ ip1dbg(("tcp_accept: create of IP helper stream"
+ " failed\n"));
+ return (EPROTO);
+ }
+ eager->tcp_rq = econnp->conn_rq;
+ eager->tcp_wq = econnp->conn_wq;
+
+ ASSERT(eager->tcp_rq != NULL);
+
+ eager->tcp_sodirect = SOD_SOTOSODP(sock_handle);
+ return (tcp_accept_common(lconnp, econnp, sock_handle, cr));
}
@@ -18581,7 +18553,7 @@ tcp_send_pending(void *arg, mblk_t *mp, void *arg2)
* Read the block comment on top of tcp_conn_request().
*/
void
-tcp_wput_accept(queue_t *q, mblk_t *mp)
+tcp_tpi_accept(queue_t *q, mblk_t *mp)
{
queue_t *rq = RD(q);
struct T_conn_res *conn_res;
@@ -18589,7 +18561,6 @@ tcp_wput_accept(queue_t *q, mblk_t *mp)
tcp_t *listener;
struct T_ok_ack *ok;
t_scalar_t PRIM_type;
- mblk_t *opt_mp;
conn_t *econnp;
ASSERT(DB_TYPE(mp) == M_PROTO);
@@ -18615,14 +18586,6 @@ tcp_wput_accept(queue_t *q, mblk_t *mp)
* correct function (tcpclose_accept) in case allocb
* fails.
*/
- opt_mp = allocb(sizeof (struct stroptions), BPRI_HI);
- if (opt_mp == NULL) {
- mp = mi_tpi_err_ack_alloc(mp, TPROTO, 0);
- if (mp != NULL)
- putnext(rq, mp);
- return;
- }
-
bcopy(mp->b_rptr + conn_res->OPT_offset,
&eager, conn_res->OPT_length);
PRIM_type = conn_res->PRIM_type;
@@ -18641,45 +18604,20 @@ tcp_wput_accept(queue_t *q, mblk_t *mp)
q->q_ptr = econnp;
q->q_qinfo = &tcp_winit;
listener = eager->tcp_listener;
- eager->tcp_issocket = B_TRUE;
/*
* TCP is _D_SODIRECT and sockfs is directly above so
* save shared sodirect_t pointer (if any).
- *
- * If tcp_fused and sodirect enabled disable it.
*/
eager->tcp_sodirect = SOD_QTOSODP(eager->tcp_rq);
- if (eager->tcp_fused && eager->tcp_sodirect != NULL) {
- /* Fused, disable sodirect */
- mutex_enter(eager->tcp_sodirect->sod_lockp);
- SOD_DISABLE(eager->tcp_sodirect);
- mutex_exit(eager->tcp_sodirect->sod_lockp);
- eager->tcp_sodirect = NULL;
+ if (tcp_accept_common(listener->tcp_connp,
+ econnp, NULL, CRED()) < 0) {
+ mp = mi_tpi_err_ack_alloc(mp, TPROTO, 0);
+ if (mp != NULL)
+ putnext(rq, mp);
+ return;
}
- econnp->conn_zoneid = listener->tcp_connp->conn_zoneid;
- econnp->conn_allzones = listener->tcp_connp->conn_allzones;
- ASSERT(econnp->conn_netstack ==
- listener->tcp_connp->conn_netstack);
- ASSERT(eager->tcp_tcps == listener->tcp_tcps);
-
- /* Put the ref for IP */
- CONN_INC_REF(econnp);
-
- /*
- * We should have minimum of 3 references on the conn
- * at this point. One each for TCP and IP and one for
- * the T_conn_ind that was sent up when the 3-way handshake
- * completed. In the normal case we would also have another
- * reference (making a total of 4) for the conn being in the
- * classifier hash list. However the eager could have received
- * an RST subsequently and tcp_closei_local could have removed
- * the eager from the classifier hash list, hence we can't
- * assert that reference.
- */
- ASSERT(econnp->conn_ref >= 3);
-
/*
* Send the new local address also up to sockfs. There
* should already be enough space in the mp that came
@@ -18721,115 +18659,6 @@ tcp_wput_accept(queue_t *q, mblk_t *mp)
}
putnext(rq, mp);
-
- opt_mp->b_datap->db_type = M_SETOPTS;
- opt_mp->b_wptr += sizeof (struct stroptions);
-
- /*
- * Prepare for inheriting IPV6_BOUND_IF and IPV6_RECVPKTINFO
- * from listener to acceptor. The message is chained on the
- * bind_mp which tcp_rput_other will send down to IP.
- */
- if (listener->tcp_bound_if != 0) {
- /* allocate optmgmt req */
- mp = tcp_setsockopt_mp(IPPROTO_IPV6,
- IPV6_BOUND_IF, (char *)&listener->tcp_bound_if,
- sizeof (int));
- if (mp != NULL)
- linkb(opt_mp, mp);
- }
- if (listener->tcp_ipv6_recvancillary & TCP_IPV6_RECVPKTINFO) {
- uint_t on = 1;
-
- /* allocate optmgmt req */
- mp = tcp_setsockopt_mp(IPPROTO_IPV6,
- IPV6_RECVPKTINFO, (char *)&on, sizeof (on));
- if (mp != NULL)
- linkb(opt_mp, mp);
- }
-
-
- mutex_enter(&listener->tcp_eager_lock);
-
- if (listener->tcp_eager_prev_q0->tcp_conn_def_q0) {
-
- tcp_t *tail;
- tcp_t *tcp;
- mblk_t *mp1;
-
- tcp = listener->tcp_eager_prev_q0;
- /*
- * listener->tcp_eager_prev_q0 points to the TAIL of the
- * deferred T_conn_ind queue. We need to get to the head
- * of the queue in order to send up T_conn_ind the same
- * order as how the 3WHS is completed.
- */
- while (tcp != listener) {
- if (!tcp->tcp_eager_prev_q0->tcp_conn_def_q0 &&
- !tcp->tcp_kssl_pending)
- break;
- else
- tcp = tcp->tcp_eager_prev_q0;
- }
- /* None of the pending eagers can be sent up now */
- if (tcp == listener)
- goto no_more_eagers;
-
- mp1 = tcp->tcp_conn.tcp_eager_conn_ind;
- tcp->tcp_conn.tcp_eager_conn_ind = NULL;
- /* Move from q0 to q */
- ASSERT(listener->tcp_conn_req_cnt_q0 > 0);
- listener->tcp_conn_req_cnt_q0--;
- listener->tcp_conn_req_cnt_q++;
- tcp->tcp_eager_next_q0->tcp_eager_prev_q0 =
- tcp->tcp_eager_prev_q0;
- tcp->tcp_eager_prev_q0->tcp_eager_next_q0 =
- tcp->tcp_eager_next_q0;
- tcp->tcp_eager_prev_q0 = NULL;
- tcp->tcp_eager_next_q0 = NULL;
- tcp->tcp_conn_def_q0 = B_FALSE;
-
- /* Make sure the tcp isn't in the list of droppables */
- ASSERT(tcp->tcp_eager_next_drop_q0 == NULL &&
- tcp->tcp_eager_prev_drop_q0 == NULL);
-
- /*
- * Insert at end of the queue because sockfs sends
- * down T_CONN_RES in chronological order. Leaving
- * the older conn indications at front of the queue
- * helps reducing search time.
- */
- tail = listener->tcp_eager_last_q;
- if (tail != NULL) {
- tail->tcp_eager_next_q = tcp;
- } else {
- listener->tcp_eager_next_q = tcp;
- }
- listener->tcp_eager_last_q = tcp;
- tcp->tcp_eager_next_q = NULL;
-
- /* Need to get inside the listener perimeter */
- CONN_INC_REF(listener->tcp_connp);
- SQUEUE_ENTER_ONE(listener->tcp_connp->conn_sqp, mp1,
- tcp_send_pending, listener->tcp_connp,
- SQ_FILL, SQTAG_TCP_SEND_PENDING);
- }
-no_more_eagers:
- tcp_eager_unlink(eager);
- mutex_exit(&listener->tcp_eager_lock);
-
- /*
- * At this point, the eager is detached from the listener
- * but we still have an extra refs on eager (apart from the
- * usual tcp references). The ref was placed in tcp_rput_data
- * before sending the conn_ind in tcp_send_conn_ind.
- * The ref will be dropped in tcp_accept_finish(). As sockfs
- * has already established this tcp with it's own stream,
- * it's OK to set tcp_detached to B_FALSE.
- */
- econnp->conn_tcp->tcp_detached = B_FALSE;
- SQUEUE_ENTER_ONE(econnp->conn_sqp, opt_mp, tcp_accept_finish,
- econnp, SQ_NODRAIN, SQTAG_TCP_ACCEPT_FINISH_Q0);
return;
default:
mp = mi_tpi_err_ack_alloc(mp, TNOTSUPPORT, 0);
@@ -18878,7 +18707,7 @@ tcp_getmyname(tcp_t *tcp, struct sockaddr *sa, uint_t *salenp)
}
static int
-tcp_getpeername(tcp_t *tcp, struct sockaddr *sa, uint_t *salenp)
+i_tcp_getpeername(tcp_t *tcp, struct sockaddr *sa, uint_t *salenp)
{
sin_t *sin = (sin_t *)sa;
sin6_t *sin6 = (sin6_t *)sa;
@@ -18898,6 +18727,7 @@ tcp_getpeername(tcp_t *tcp, struct sockaddr *sa, uint_t *salenp)
sin->sin_port = tcp->tcp_fport;
IN6_V4MAPPED_TO_IPADDR(&tcp->tcp_remote_v6,
sin->sin_addr.s_addr);
+ *salenp = sizeof (sin_t);
break;
case AF_INET6:
@@ -18912,6 +18742,7 @@ tcp_getpeername(tcp_t *tcp, struct sockaddr *sa, uint_t *salenp)
sin6->sin6_flowinfo = tcp->tcp_ip6h->ip6_vcf &
~IPV6_VERS_AND_FLOW_MASK;
}
+ *salenp = sizeof (sin6_t);
break;
}
@@ -18939,7 +18770,7 @@ tcp_wput_cmdblk(queue_t *q, mblk_t *mp)
switch (cmdp->cb_cmd) {
case TI_GETPEERNAME:
- cmdp->cb_error = tcp_getpeername(tcp, data, &cmdp->cb_len);
+ cmdp->cb_error = i_tcp_getpeername(tcp, data, &cmdp->cb_len);
break;
case TI_GETMYNAME:
cmdp->cb_error = tcp_getmyname(tcp, data, &cmdp->cb_len);
@@ -18961,6 +18792,7 @@ tcp_wput(queue_t *q, mblk_t *mp)
t_scalar_t type;
uchar_t *rptr;
struct iocblk *iocp;
+ size_t size;
tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps;
ASSERT(connp->conn_ref >= 2);
@@ -18970,13 +18802,18 @@ tcp_wput(queue_t *q, mblk_t *mp)
tcp = connp->conn_tcp;
ASSERT(tcp != NULL);
+ size = msgdsize(mp);
+
mutex_enter(&tcp->tcp_non_sq_lock);
- tcp->tcp_squeue_bytes += msgdsize(mp);
+ tcp->tcp_squeue_bytes += size;
if (TCP_UNSENT_BYTES(tcp) > tcp->tcp_xmit_hiwater) {
tcp_setqfull(tcp);
}
mutex_exit(&tcp->tcp_non_sq_lock);
+ if (DB_CRED(mp) == NULL && is_system_labeled())
+ msg_setcredpid(mp, CONN_CRED(connp), curproc->p_pid);
+
CONN_INC_REF(connp);
SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output, connp,
tcp_squeue_flag, SQTAG_TCP_OUTPUT);
@@ -19108,6 +18945,16 @@ tcp_wput_sock(queue_t *wq, mblk_t *mp)
tcp_wput(wq, mp);
}
+/* ARGSUSED */
+static void
+tcp_wput_fallback(queue_t *wq, mblk_t *mp)
+{
+#ifdef DEBUG
+ cmn_err(CE_CONT, "tcp_wput_fallback: Message during fallback \n");
+#endif
+ freemsg(mp);
+}
+
static boolean_t
tcp_zcopy_check(tcp_t *tcp)
{
@@ -19150,10 +18997,12 @@ tcp_zcopy_check(tcp_t *tcp)
tcp->tcp_snd_zcopy_on = zc_enabled;
if (!TCP_IS_DETACHED(tcp)) {
if (zc_enabled) {
- (void) mi_set_sth_copyopt(tcp->tcp_rq, ZCVMSAFE);
+ (void) proto_set_tx_copyopt(tcp->tcp_rq, connp,
+ ZCVMSAFE);
TCP_STAT(tcps, tcp_zcopy_on);
} else {
- (void) mi_set_sth_copyopt(tcp->tcp_rq, ZCVMUNSAFE);
+ (void) proto_set_tx_copyopt(tcp->tcp_rq, connp,
+ ZCVMUNSAFE);
TCP_STAT(tcps, tcp_zcopy_off);
}
}
@@ -19170,7 +19019,8 @@ tcp_zcopy_disable(tcp_t *tcp, mblk_t *bp)
else if (tcp->tcp_snd_zcopy_on) {
tcp->tcp_snd_zcopy_on = B_FALSE;
if (!TCP_IS_DETACHED(tcp)) {
- (void) mi_set_sth_copyopt(tcp->tcp_rq, ZCVMUNSAFE);
+ (void) proto_set_tx_copyopt(tcp->tcp_rq, tcp->tcp_connp,
+ ZCVMUNSAFE);
TCP_STAT(tcps, tcp_zcopy_disable);
}
}
@@ -19259,9 +19109,16 @@ static void
tcp_zcopy_notify(tcp_t *tcp)
{
struct stdata *stp;
+ conn_t *connp;
if (tcp->tcp_detached)
return;
+ connp = tcp->tcp_connp;
+ if (IPCL_IS_NONSTR(connp)) {
+ (*connp->conn_upcalls->su_zcopy_notify)
+ (connp->conn_upper_handle);
+ return;
+ }
stp = STREAM(tcp->tcp_rq);
mutex_enter(&stp->sd_lock);
stp->sd_flag |= STZCNOTIFY;
@@ -19423,13 +19280,14 @@ tcp_send_data(tcp_t *tcp, queue_t *q, mblk_t *mp)
ASSERT(DB_TYPE(mp) == M_DATA);
- if (DB_CRED(mp) == NULL)
- mblk_setcred(mp, CONN_CRED(connp));
+ if (is_system_labeled() && DB_CRED(mp) == NULL)
+ mblk_setcred(mp, CONN_CRED(tcp->tcp_connp));
ipha = (ipha_t *)mp->b_rptr;
src = ipha->ipha_src;
dst = ipha->ipha_dst;
+ ASSERT(q != NULL);
DTRACE_PROBE2(tcp__trace__send, mblk_t *, mp, tcp_t *, tcp);
/*
@@ -22430,7 +22288,7 @@ tcp_wput_iocdata(tcp_t *tcp, mblk_t *mp)
error = tcp_getmyname(tcp, (void *)mp1->b_rptr, &addrlen);
break;
case TI_GETPEERNAME:
- error = tcp_getpeername(tcp, (void *)mp1->b_rptr, &addrlen);
+ error = i_tcp_getpeername(tcp, (void *)mp1->b_rptr, &addrlen);
break;
}
@@ -22445,6 +22303,35 @@ tcp_wput_iocdata(tcp_t *tcp, mblk_t *mp)
}
}
+static void
+tcp_disable_direct_sockfs(tcp_t *tcp)
+{
+#ifdef _ILP32
+ tcp->tcp_acceptor_id = (t_uscalar_t)tcp->tcp_rq;
+#else
+ tcp->tcp_acceptor_id = tcp->tcp_connp->conn_dev;
+#endif
+ /*
+ * Insert this socket into the acceptor hash.
+ * We might need it for T_CONN_RES message
+ */
+ tcp_acceptor_hash_insert(tcp->tcp_acceptor_id, tcp);
+
+ if (tcp->tcp_fused) {
+ /*
+ * This is a fused loopback tcp; disable
+ * read-side synchronous streams interface
+ * and drain any queued data. It is okay
+ * to do this for non-synchronous streams
+ * fused tcp as well.
+ */
+ tcp_fuse_disable_pair(tcp, B_FALSE);
+ }
+ tcp->tcp_issocket = B_FALSE;
+ tcp->tcp_sodirect = NULL;
+ TCP_STAT(tcp->tcp_tcps, tcp_sock_fallback);
+}
+
/*
* tcp_wput_ioctl is called by tcp_wput_nondata() to handle all M_IOCTL
* messages.
@@ -22457,7 +22344,6 @@ tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2)
tcp_t *tcp = connp->conn_tcp;
queue_t *q = tcp->tcp_wq;
struct iocblk *iocp;
- tcp_stack_t *tcps = tcp->tcp_tcps;
ASSERT(DB_TYPE(mp) == M_IOCTL);
/*
@@ -22498,31 +22384,7 @@ tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2)
DB_TYPE(mp) = M_IOCNAK;
iocp->ioc_error = EINVAL;
} else {
-#ifdef _ILP32
- tcp->tcp_acceptor_id = (t_uscalar_t)RD(q);
-#else
- tcp->tcp_acceptor_id = tcp->tcp_connp->conn_dev;
-#endif
- /*
- * Insert this socket into the acceptor hash.
- * We might need it for T_CONN_RES message
- */
- tcp_acceptor_hash_insert(tcp->tcp_acceptor_id, tcp);
-
- if (tcp->tcp_fused) {
- /*
- * This is a fused loopback tcp; disable
- * read-side synchronous streams interface
- * and drain any queued data. It is okay
- * to do this for non-synchronous streams
- * fused tcp as well.
- */
- tcp_fuse_disable_pair(tcp, B_FALSE);
- }
- tcp->tcp_issocket = B_FALSE;
- tcp->tcp_sodirect = NULL;
- TCP_STAT(tcps, tcp_sock_fallback);
-
+ tcp_disable_direct_sockfs(tcp);
DB_TYPE(mp) = M_IOCACK;
iocp->ioc_error = 0;
}
@@ -22546,7 +22408,6 @@ tcp_wput_proto(void *arg, mblk_t *mp, void *arg2)
union T_primitives *tprim = (union T_primitives *)mp->b_rptr;
uchar_t *rptr;
t_scalar_t type;
- int len;
cred_t *cr = DB_CREDDEF(mp, tcp->tcp_cred);
/*
@@ -22566,34 +22427,16 @@ tcp_wput_proto(void *arg, mblk_t *mp, void *arg2)
if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) {
type = ((union T_primitives *)rptr)->type;
if (type == T_EXDATA_REQ) {
- uint32_t msize = msgdsize(mp->b_cont);
-
- len = msize - 1;
- if (len < 0) {
- freemsg(mp);
- return;
- }
- /*
- * Try to force urgent data out on the wire.
- * Even if we have unsent data this will
- * at least send the urgent flag.
- * XXX does not handle more flag correctly.
- */
- len += tcp->tcp_unsent;
- len += tcp->tcp_snxt;
- tcp->tcp_urg = len;
- tcp->tcp_valid_bits |= TCP_URG_VALID;
-
- /* Bypass tcp protocol for fused tcp loopback */
- if (tcp->tcp_fused && tcp_fuse_output(tcp, mp, msize))
- return;
+ tcp_output_urgent(connp, mp->b_cont, arg2);
+ freeb(mp);
} else if (type != T_DATA_REQ) {
goto non_urgent_data;
+ } else {
+ /* TODO: options, flags, ... from user */
+ /* Set length to zero for reclamation below */
+ tcp_wput_data(tcp, mp->b_cont, B_TRUE);
+ freeb(mp);
}
- /* TODO: options, flags, ... from user */
- /* Set length to zero for reclamation below */
- tcp_wput_data(tcp, mp->b_cont, B_TRUE);
- freeb(mp);
return;
} else {
if (tcp->tcp_debug) {
@@ -22631,17 +22474,17 @@ non_urgent_data:
/* FALLTHROUGH */
case O_T_BIND_REQ: /* bind request */
case T_BIND_REQ: /* new semantics bind request */
- tcp_bind(tcp, mp);
+ tcp_tpi_bind(tcp, mp);
break;
case T_UNBIND_REQ: /* unbind request */
- tcp_unbind(tcp, mp);
+ tcp_tpi_unbind(tcp, mp);
break;
case O_T_CONN_RES: /* old connection response XXX */
case T_CONN_RES: /* connection response */
- tcp_accept(tcp, mp);
+ tcp_tli_accept(tcp, mp);
break;
case T_CONN_REQ: /* connection request */
- tcp_connect(tcp, mp);
+ tcp_tpi_connect(tcp, mp);
break;
case T_DISCON_REQ: /* disconnect request */
tcp_disconnect(tcp, mp);
@@ -23278,6 +23121,7 @@ tcp_xmit_end(tcp_t *tcp)
ipic->ipic_rtt_sd = tcp->tcp_rtt_sd;
CALL_IP_WPUT(tcp->tcp_connp, tcp->tcp_wq, mp);
+
return (0);
}
@@ -23798,14 +23642,15 @@ tcp_push_timer(void *arg)
{
conn_t *connp = (conn_t *)arg;
tcp_t *tcp = connp->conn_tcp;
- tcp_stack_t *tcps = tcp->tcp_tcps;
uint_t flags;
sodirect_t *sodp;
- TCP_DBGSTAT(tcps, tcp_push_timer_cnt);
+ TCP_DBGSTAT(tcp->tcp_tcps, tcp_push_timer_cnt);
ASSERT(tcp->tcp_listener == NULL);
+ ASSERT(!IPCL_IS_NONSTR(connp));
+
/*
* We need to plug synchronous streams during our drain to prevent
* a race with tcp_fuse_rrw() or tcp_fusion_rinfop().
@@ -23818,7 +23663,7 @@ tcp_push_timer(void *arg)
flags = tcp_rcv_sod_wakeup(tcp, sodp);
/* sod_wakeup() does the mutex_exit() */
} else if (tcp->tcp_rcv_list != NULL) {
- flags = tcp_rcv_drain(tcp->tcp_rq, tcp);
+ flags = tcp_rcv_drain(tcp);
}
if (flags == TH_ACK_NEEDED)
tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK);
@@ -24030,15 +23875,19 @@ tcp_ack_mp(tcp_t *tcp)
}
/*
- * Hash list insertion routine for tcp_t structures.
- * Inserts entries with the ones bound to a specific IP address first
- * followed by those bound to INADDR_ANY.
+ * Hash list insertion routine for tcp_t structures. Each hash bucket
+ * contains a list of tcp_t entries, and each entry is bound to a unique
+ * port. If there are multiple tcp_t's that are bound to the same port, then
+ * one of them will be linked into the hash bucket list, and the rest will
+ * hang off of that one entry. For each port, entries bound to a specific IP
+ * address will be inserted before those those bound to INADDR_ANY.
*/
static void
tcp_bind_hash_insert(tf_t *tbf, tcp_t *tcp, int caller_holds_lock)
{
tcp_t **tcpp;
tcp_t *tcpnext;
+ tcp_t *tcphash;
if (tcp->tcp_ptpbhn != NULL) {
ASSERT(!caller_holds_lock);
@@ -24050,9 +23899,22 @@ tcp_bind_hash_insert(tf_t *tbf, tcp_t *tcp, int caller_holds_lock)
} else {
ASSERT(MUTEX_HELD(&tbf->tf_lock));
}
- tcpnext = tcpp[0];
- if (tcpnext) {
+ tcphash = tcpp[0];
+ tcpnext = NULL;
+ if (tcphash != NULL) {
+ /* Look for an entry using the same port */
+ while ((tcphash = tcpp[0]) != NULL &&
+ tcp->tcp_lport != tcphash->tcp_lport)
+ tcpp = &(tcphash->tcp_bind_hash);
+
+ /* The port was not found, just add to the end */
+ if (tcphash == NULL)
+ goto insert;
+
/*
+ * OK, there already exists an entry bound to the
+ * same port.
+ *
* If the new tcp bound to the INADDR_ANY address
* and the first one in the list is not bound to
* INADDR_ANY we skip all entries until we find the
@@ -24061,17 +23923,36 @@ tcp_bind_hash_insert(tf_t *tbf, tcp_t *tcp, int caller_holds_lock)
* specific address get preference over those binding to
* INADDR_ANY.
*/
+ tcpnext = tcphash;
+ tcphash = NULL;
if (V6_OR_V4_INADDR_ANY(tcp->tcp_bound_source_v6) &&
!V6_OR_V4_INADDR_ANY(tcpnext->tcp_bound_source_v6)) {
while ((tcpnext = tcpp[0]) != NULL &&
!V6_OR_V4_INADDR_ANY(tcpnext->tcp_bound_source_v6))
- tcpp = &(tcpnext->tcp_bind_hash);
- if (tcpnext)
- tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash;
- } else
- tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash;
+ tcpp = &(tcpnext->tcp_bind_hash_port);
+
+ if (tcpnext) {
+ tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash_port;
+ tcphash = tcpnext->tcp_bind_hash;
+ if (tcphash != NULL) {
+ tcphash->tcp_ptpbhn =
+ &(tcp->tcp_bind_hash);
+ tcpnext->tcp_bind_hash = NULL;
+ }
+ }
+ } else {
+ tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash_port;
+ tcphash = tcpnext->tcp_bind_hash;
+ if (tcphash != NULL) {
+ tcphash->tcp_ptpbhn =
+ &(tcp->tcp_bind_hash);
+ tcpnext->tcp_bind_hash = NULL;
+ }
+ }
}
- tcp->tcp_bind_hash = tcpnext;
+insert:
+ tcp->tcp_bind_hash_port = tcpnext;
+ tcp->tcp_bind_hash = tcphash;
tcp->tcp_ptpbhn = tcpp;
tcpp[0] = tcp;
if (!caller_holds_lock)
@@ -24101,8 +23982,17 @@ tcp_bind_hash_remove(tcp_t *tcp)
ASSERT(lockp != NULL);
mutex_enter(lockp);
if (tcp->tcp_ptpbhn) {
- tcpnext = tcp->tcp_bind_hash;
- if (tcpnext) {
+ tcpnext = tcp->tcp_bind_hash_port;
+ if (tcpnext != NULL) {
+ tcp->tcp_bind_hash_port = NULL;
+ tcpnext->tcp_ptpbhn = tcp->tcp_ptpbhn;
+ tcpnext->tcp_bind_hash = tcp->tcp_bind_hash;
+ if (tcpnext->tcp_bind_hash != NULL) {
+ tcpnext->tcp_bind_hash->tcp_ptpbhn =
+ &(tcpnext->tcp_bind_hash);
+ tcp->tcp_bind_hash = NULL;
+ }
+ } else if ((tcpnext = tcp->tcp_bind_hash) != NULL) {
tcpnext->tcp_ptpbhn = tcp->tcp_ptpbhn;
tcp->tcp_bind_hash = NULL;
}
@@ -24507,36 +24397,6 @@ tcp_random(void)
return (i);
}
-/*
- * XXX This will go away when TPI is extended to send
- * info reqs to sockfs/timod .....
- * Given a queue, set the max packet size for the write
- * side of the queue below stream head. This value is
- * cached on the stream head.
- * Returns 1 on success, 0 otherwise.
- */
-static int
-setmaxps(queue_t *q, int maxpsz)
-{
- struct stdata *stp;
- queue_t *wq;
- stp = STREAM(q);
-
- /*
- * At this point change of a queue parameter is not allowed
- * when a multiplexor is sitting on top.
- */
- if (stp->sd_flag & STPLEX)
- return (0);
-
- claimstr(stp->sd_wrq);
- wq = stp->sd_wrq->q_next;
- ASSERT(wq != NULL);
- (void) strqset(wq, QMAXPSZ, 0, maxpsz);
- releasestr(stp->sd_wrq);
- return (1);
-}
-
static int
tcp_conprim_opt_process(tcp_t *tcp, mblk_t *mp, int *do_disconnectp,
int *t_errorp, int *sys_errorp)
@@ -24964,6 +24824,8 @@ tcp_ddi_g_init(void)
}
+#define INET_NAME "ip"
+
/*
* Initialize the TCP stack instance.
*/
@@ -24973,6 +24835,8 @@ tcp_stack_init(netstackid_t stackid, netstack_t *ns)
tcp_stack_t *tcps;
tcpparam_t *pa;
int i;
+ int error = 0;
+ major_t major;
tcps = (tcp_stack_t *)kmem_zalloc(sizeof (*tcps), KM_SLEEP);
tcps->tcps_netstack = ns;
@@ -25038,6 +24902,9 @@ tcp_stack_init(netstackid_t stackid, netstack_t *ns)
tcps->tcps_kstat = tcp_kstat2_init(stackid, &tcps->tcps_statistics);
tcps->tcps_mibkp = tcp_kstat_init(stackid, tcps);
+ major = mod_name_to_major(INET_NAME);
+ error = ldi_ident_from_major(major, &tcps->tcps_ldi_ident);
+ ASSERT(error == 0);
return (tcps);
}
@@ -25125,6 +24992,7 @@ tcp_stack_fini(netstackid_t stackid, void *arg)
tcp_kstat_fini(stackid, tcps->tcps_mibkp);
tcps->tcps_mibkp = NULL;
+ ldi_ident_release(tcps->tcps_ldi_ident);
kmem_free(tcps, sizeof (*tcps));
}
@@ -25922,44 +25790,6 @@ done:
}
/*
- * Allocate a T_SVR4_OPTMGMT_REQ.
- * The caller needs to increment tcp_drop_opt_ack_cnt when sending these so
- * that tcp_rput_other can drop the acks.
- */
-static mblk_t *
-tcp_setsockopt_mp(int level, int cmd, char *opt, int optlen)
-{
- mblk_t *mp;
- struct T_optmgmt_req *tor;
- struct opthdr *oh;
- uint_t size;
- char *optptr;
-
- size = sizeof (*tor) + sizeof (*oh) + optlen;
- mp = allocb(size, BPRI_MED);
- if (mp == NULL)
- return (NULL);
-
- mp->b_wptr += size;
- mp->b_datap->db_type = M_PROTO;
- tor = (struct T_optmgmt_req *)mp->b_rptr;
- tor->PRIM_type = T_SVR4_OPTMGMT_REQ;
- tor->MGMT_flags = T_NEGOTIATE;
- tor->OPT_length = sizeof (*oh) + optlen;
- tor->OPT_offset = (t_scalar_t)sizeof (*tor);
-
- oh = (struct opthdr *)&tor[1];
- oh->level = level;
- oh->name = cmd;
- oh->len = optlen;
- if (optlen != 0) {
- optptr = (char *)&oh[1];
- bcopy(opt, optptr, optlen);
- }
- return (mp);
-}
-
-/*
* TCP Timers Implementation.
*/
timeout_id_t
@@ -25968,16 +25798,15 @@ tcp_timeout(conn_t *connp, void (*f)(void *), clock_t tim)
mblk_t *mp;
tcp_timer_t *tcpt;
tcp_t *tcp = connp->conn_tcp;
- tcp_stack_t *tcps = tcp->tcp_tcps;
ASSERT(connp->conn_sqp != NULL);
- TCP_DBGSTAT(tcps, tcp_timeout_calls);
+ TCP_DBGSTAT(tcp->tcp_tcps, tcp_timeout_calls);
if (tcp->tcp_timercache == NULL) {
mp = tcp_timermp_alloc(KM_NOSLEEP | KM_PANIC);
} else {
- TCP_DBGSTAT(tcps, tcp_timeout_cached_alloc);
+ TCP_DBGSTAT(tcp->tcp_tcps, tcp_timeout_cached_alloc);
mp = tcp->tcp_timercache;
tcp->tcp_timercache = mp->b_next;
mp->b_next = NULL;
@@ -26052,9 +25881,8 @@ tcp_timeout_cancel(conn_t *connp, timeout_id_t id)
mblk_t *mp = (mblk_t *)id;
tcp_timer_t *tcpt;
clock_t delta;
- tcp_stack_t *tcps = connp->conn_tcp->tcp_tcps;
- TCP_DBGSTAT(tcps, tcp_timeout_cancel_reqs);
+ TCP_DBGSTAT(connp->conn_tcp->tcp_tcps, tcp_timeout_cancel_reqs);
if (mp == NULL)
return (-1);
@@ -26065,7 +25893,7 @@ tcp_timeout_cancel(conn_t *connp, timeout_id_t id)
delta = untimeout_default(tcpt->tcpt_tid, 0);
if (delta >= 0) {
- TCP_DBGSTAT(tcps, tcp_timeout_canceled);
+ TCP_DBGSTAT(connp->conn_tcp->tcp_tcps, tcp_timeout_canceled);
tcp_timer_free(connp->conn_tcp, mp);
CONN_DEC_REF(connp);
}
@@ -26156,7 +25984,6 @@ static void
tcp_timer_free(tcp_t *tcp, mblk_t *mp)
{
mblk_t *mp1 = tcp->tcp_timercache;
- tcp_stack_t *tcps = tcp->tcp_tcps;
if (mp->b_wptr != NULL) {
/*
@@ -26174,7 +26001,7 @@ tcp_timer_free(tcp_t *tcp, mblk_t *mp)
tcp->tcp_timercache = mp;
} else {
kmem_cache_free(tcp_timercache, mp);
- TCP_DBGSTAT(tcps, tcp_timermp_freed);
+ TCP_DBGSTAT(tcp->tcp_tcps, tcp_timermp_freed);
}
}
@@ -26188,23 +26015,33 @@ tcp_timer_free(tcp_t *tcp, mblk_t *mp)
* decision to call based on the tcp_t.tcp_flow_stopped value which
* when check outside the q's lock is only an advisory check ...
*/
-
void
tcp_setqfull(tcp_t *tcp)
{
- queue_t *q = tcp->tcp_wq;
tcp_stack_t *tcps = tcp->tcp_tcps;
+ conn_t *connp = tcp->tcp_connp;
+
+ if (tcp->tcp_closed)
+ return;
+
+ if (IPCL_IS_NONSTR(connp)) {
+ (*connp->conn_upcalls->su_txq_full)
+ (tcp->tcp_connp->conn_upper_handle, B_TRUE);
+ tcp->tcp_flow_stopped = B_TRUE;
+ } else {
+ queue_t *q = tcp->tcp_wq;
- if (!(q->q_flag & QFULL)) {
- mutex_enter(QLOCK(q));
if (!(q->q_flag & QFULL)) {
- /* still need to set QFULL */
- q->q_flag |= QFULL;
- tcp->tcp_flow_stopped = B_TRUE;
- mutex_exit(QLOCK(q));
- TCP_STAT(tcps, tcp_flwctl_on);
- } else {
- mutex_exit(QLOCK(q));
+ mutex_enter(QLOCK(q));
+ if (!(q->q_flag & QFULL)) {
+ /* still need to set QFULL */
+ q->q_flag |= QFULL;
+ tcp->tcp_flow_stopped = B_TRUE;
+ mutex_exit(QLOCK(q));
+ TCP_STAT(tcps, tcp_flwctl_on);
+ } else {
+ mutex_exit(QLOCK(q));
+ }
}
}
}
@@ -26212,23 +26049,33 @@ tcp_setqfull(tcp_t *tcp)
void
tcp_clrqfull(tcp_t *tcp)
{
- queue_t *q = tcp->tcp_wq;
+ conn_t *connp = tcp->tcp_connp;
+
+ if (tcp->tcp_closed)
+ return;
+
+ if (IPCL_IS_NONSTR(connp)) {
+ (*connp->conn_upcalls->su_txq_full)
+ (tcp->tcp_connp->conn_upper_handle, B_FALSE);
+ tcp->tcp_flow_stopped = B_FALSE;
+ } else {
+ queue_t *q = tcp->tcp_wq;
- if (q->q_flag & QFULL) {
- mutex_enter(QLOCK(q));
if (q->q_flag & QFULL) {
- q->q_flag &= ~QFULL;
- tcp->tcp_flow_stopped = B_FALSE;
- mutex_exit(QLOCK(q));
- if (q->q_flag & QWANTW)
- qbackenable(q, 0);
- } else {
- mutex_exit(QLOCK(q));
+ mutex_enter(QLOCK(q));
+ if (q->q_flag & QFULL) {
+ q->q_flag &= ~QFULL;
+ tcp->tcp_flow_stopped = B_FALSE;
+ mutex_exit(QLOCK(q));
+ if (q->q_flag & QWANTW)
+ qbackenable(q, 0);
+ } else {
+ mutex_exit(QLOCK(q));
+ }
}
}
}
-
/*
* kstats related to squeues i.e. not per IP instance
*/
@@ -26681,3 +26528,1626 @@ tcp_squeue_add(squeue_t *sqp)
}
tcp_time_wait->tcp_free_list_cnt = 0;
}
+
+static int
+tcp_post_ip_bind(tcp_t *tcp, mblk_t *mp, int error)
+{
+ mblk_t *ire_mp = NULL;
+ mblk_t *syn_mp;
+ mblk_t *mdti;
+ mblk_t *lsoi;
+ int retval;
+ tcph_t *tcph;
+ uint32_t mss;
+ queue_t *q = tcp->tcp_rq;
+ conn_t *connp = tcp->tcp_connp;
+ tcp_stack_t *tcps = tcp->tcp_tcps;
+
+ if (error == 0) {
+ /*
+ * Adapt Multidata information, if any. The
+ * following tcp_mdt_update routine will free
+ * the message.
+ */
+ if (mp != NULL && ((mdti = tcp_mdt_info_mp(mp)) != NULL)) {
+ tcp_mdt_update(tcp, &((ip_mdt_info_t *)mdti->
+ b_rptr)->mdt_capab, B_TRUE);
+ freemsg(mdti);
+ }
+
+ /*
+ * Check to update LSO information with tcp, and
+ * tcp_lso_update routine will free the message.
+ */
+ if (mp != NULL && ((lsoi = tcp_lso_info_mp(mp)) != NULL)) {
+ tcp_lso_update(tcp, &((ip_lso_info_t *)lsoi->
+ b_rptr)->lso_capab);
+ freemsg(lsoi);
+ }
+
+ /* Get the IRE, if we had requested for it */
+ if (mp != NULL)
+ ire_mp = tcp_ire_mp(&mp);
+
+ if (tcp->tcp_hard_binding) {
+ tcp->tcp_hard_binding = B_FALSE;
+ tcp->tcp_hard_bound = B_TRUE;
+ CL_INET_CONNECT(tcp);
+ } else {
+ if (ire_mp != NULL)
+ freeb(ire_mp);
+ goto after_syn_sent;
+ }
+
+ retval = tcp_adapt_ire(tcp, ire_mp);
+ if (ire_mp != NULL)
+ freeb(ire_mp);
+ if (retval == 0) {
+ error = (int)((tcp->tcp_state >= TCPS_SYN_SENT) ?
+ ENETUNREACH : EADDRNOTAVAIL);
+ goto ipcl_rm;
+ }
+ /*
+ * Don't let an endpoint connect to itself.
+ * Also checked in tcp_connect() but that
+ * check can't handle the case when the
+ * local IP address is INADDR_ANY.
+ */
+ if (tcp->tcp_ipversion == IPV4_VERSION) {
+ if ((tcp->tcp_ipha->ipha_dst ==
+ tcp->tcp_ipha->ipha_src) &&
+ (BE16_EQL(tcp->tcp_tcph->th_lport,
+ tcp->tcp_tcph->th_fport))) {
+ error = EADDRNOTAVAIL;
+ goto ipcl_rm;
+ }
+ } else {
+ if (IN6_ARE_ADDR_EQUAL(
+ &tcp->tcp_ip6h->ip6_dst,
+ &tcp->tcp_ip6h->ip6_src) &&
+ (BE16_EQL(tcp->tcp_tcph->th_lport,
+ tcp->tcp_tcph->th_fport))) {
+ error = EADDRNOTAVAIL;
+ goto ipcl_rm;
+ }
+ }
+ ASSERT(tcp->tcp_state == TCPS_SYN_SENT);
+ /*
+ * This should not be possible! Just for
+ * defensive coding...
+ */
+ if (tcp->tcp_state != TCPS_SYN_SENT)
+ goto after_syn_sent;
+
+ if (is_system_labeled() &&
+ !tcp_update_label(tcp, CONN_CRED(tcp->tcp_connp))) {
+ error = EHOSTUNREACH;
+ goto ipcl_rm;
+ }
+
+ /*
+ * tcp_adapt_ire() does not adjust
+ * for TCP/IP header length.
+ */
+ mss = tcp->tcp_mss - tcp->tcp_hdr_len;
+
+ /*
+ * Just make sure our rwnd is at
+ * least tcp_recv_hiwat_mss * MSS
+ * large, and round up to the nearest
+ * MSS.
+ *
+ * We do the round up here because
+ * we need to get the interface
+ * MTU first before we can do the
+ * round up.
+ */
+ tcp->tcp_rwnd = MAX(MSS_ROUNDUP(tcp->tcp_rwnd, mss),
+ tcps->tcps_recv_hiwat_minmss * mss);
+ if (!IPCL_IS_NONSTR(connp))
+ q->q_hiwat = tcp->tcp_rwnd;
+ tcp->tcp_recv_hiwater = tcp->tcp_rwnd;
+ tcp_set_ws_value(tcp);
+ U32_TO_ABE16((tcp->tcp_rwnd >> tcp->tcp_rcv_ws),
+ tcp->tcp_tcph->th_win);
+ if (tcp->tcp_rcv_ws > 0 || tcps->tcps_wscale_always)
+ tcp->tcp_snd_ws_ok = B_TRUE;
+
+ /*
+ * Set tcp_snd_ts_ok to true
+ * so that tcp_xmit_mp will
+ * include the timestamp
+ * option in the SYN segment.
+ */
+ if (tcps->tcps_tstamp_always ||
+ (tcp->tcp_rcv_ws && tcps->tcps_tstamp_if_wscale)) {
+ tcp->tcp_snd_ts_ok = B_TRUE;
+ }
+
+ /*
+ * tcp_snd_sack_ok can be set in
+ * tcp_adapt_ire() if the sack metric
+ * is set. So check it here also.
+ */
+ if (tcps->tcps_sack_permitted == 2 ||
+ tcp->tcp_snd_sack_ok) {
+ if (tcp->tcp_sack_info == NULL) {
+ tcp->tcp_sack_info =
+ kmem_cache_alloc(tcp_sack_info_cache,
+ KM_SLEEP);
+ }
+ tcp->tcp_snd_sack_ok = B_TRUE;
+ }
+
+ /*
+ * Should we use ECN? Note that the current
+ * default value (SunOS 5.9) of tcp_ecn_permitted
+ * is 1. The reason for doing this is that there
+ * are equipments out there that will drop ECN
+ * enabled IP packets. Setting it to 1 avoids
+ * compatibility problems.
+ */
+ if (tcps->tcps_ecn_permitted == 2)
+ tcp->tcp_ecn_ok = B_TRUE;
+
+ TCP_TIMER_RESTART(tcp, tcp->tcp_rto);
+ syn_mp = tcp_xmit_mp(tcp, NULL, 0, NULL, NULL,
+ tcp->tcp_iss, B_FALSE, NULL, B_FALSE);
+ if (syn_mp) {
+ cred_t *cr;
+ pid_t pid;
+
+ /*
+ * Obtain the credential from the
+ * thread calling connect().
+ * If none can be found, default to
+ * the creator of the socket.
+ */
+ if (mp == NULL ||
+ (cr = DB_CRED(mp)) == NULL) {
+ cr = tcp->tcp_cred;
+ pid = tcp->tcp_cpid;
+ } else {
+ pid = DB_CPID(mp);
+ }
+
+ mblk_setcred(syn_mp, cr);
+ DB_CPID(syn_mp) = pid;
+ tcp_send_data(tcp, tcp->tcp_wq, syn_mp);
+ }
+ after_syn_sent:
+ /*
+ * A trailer mblk indicates a waiting client upstream.
+ * We complete here the processing begun in
+ * either tcp_bind() or tcp_connect() by passing
+ * upstream the reply message they supplied.
+ */
+ if (mp != NULL) {
+ ASSERT(mp->b_cont == NULL);
+ freeb(mp);
+ }
+ return (error);
+ } else {
+ /* error */
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE|SL_ERROR,
+ "tcp_post_ip_bind: error == %d", error);
+ }
+ if (mp != NULL) {
+ freeb(mp);
+ }
+ }
+
+ipcl_rm:
+ /*
+ * Need to unbind with classifier since we were just
+ * told that our bind succeeded. a.k.a error == 0 at the entry.
+ */
+ tcp->tcp_hard_bound = B_FALSE;
+ tcp->tcp_hard_binding = B_FALSE;
+
+ ipcl_hash_remove(connp);
+
+bind_failed:
+ tcp->tcp_state = TCPS_IDLE;
+ if (tcp->tcp_ipversion == IPV4_VERSION)
+ tcp->tcp_ipha->ipha_src = 0;
+ else
+ V6_SET_ZERO(tcp->tcp_ip6h->ip6_src);
+ /*
+ * Copy of the src addr. in tcp_t is needed since
+ * the lookup funcs. can only look at tcp_t
+ */
+ V6_SET_ZERO(tcp->tcp_ip_src_v6);
+
+ tcph = tcp->tcp_tcph;
+ tcph->th_lport[0] = 0;
+ tcph->th_lport[1] = 0;
+ tcp_bind_hash_remove(tcp);
+ bzero(&connp->u_port, sizeof (connp->u_port));
+ /* blow away saved option results if any */
+ if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
+ tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
+
+ conn_delete_ire(tcp->tcp_connp, NULL);
+
+ return (error);
+}
+
+static int
+tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
+ boolean_t bind_to_req_port_only, cred_t *cr)
+{
+ in_port_t mlp_port;
+ mlp_type_t addrtype, mlptype;
+ boolean_t user_specified;
+ in_port_t allocated_port;
+ in_port_t requested_port = *requested_port_ptr;
+ conn_t *connp;
+ zone_t *zone;
+ tcp_stack_t *tcps = tcp->tcp_tcps;
+ in6_addr_t v6addr = tcp->tcp_ip_src_v6;
+
+ /*
+ * XXX It's up to the caller to specify bind_to_req_port_only or not.
+ */
+ if (cr == NULL)
+ cr = tcp->tcp_cred;
+ /*
+ * Get a valid port (within the anonymous range and should not
+ * be a privileged one) to use if the user has not given a port.
+ * If multiple threads are here, they may all start with
+ * with the same initial port. But, it should be fine as long as
+ * tcp_bindi will ensure that no two threads will be assigned
+ * the same port.
+ *
+ * NOTE: XXX If a privileged process asks for an anonymous port, we
+ * still check for ports only in the range > tcp_smallest_non_priv_port,
+ * unless TCP_ANONPRIVBIND option is set.
+ */
+ mlptype = mlptSingle;
+ mlp_port = requested_port;
+ if (requested_port == 0) {
+ requested_port = tcp->tcp_anon_priv_bind ?
+ tcp_get_next_priv_port(tcp) :
+ tcp_update_next_port(tcps->tcps_next_port_to_try,
+ tcp, B_TRUE);
+ if (requested_port == 0) {
+ return (-TNOADDR);
+ }
+ user_specified = B_FALSE;
+
+ /*
+ * If the user went through one of the RPC interfaces to create
+ * this socket and RPC is MLP in this zone, then give him an
+ * anonymous MLP.
+ */
+ connp = tcp->tcp_connp;
+ if (connp->conn_anon_mlp && is_system_labeled()) {
+ zone = crgetzone(cr);
+ addrtype = tsol_mlp_addr_type(zone->zone_id,
+ IPV6_VERSION, &v6addr,
+ tcps->tcps_netstack->netstack_ip);
+ if (addrtype == mlptSingle) {
+ return (-TNOADDR);
+ }
+ mlptype = tsol_mlp_port_type(zone, IPPROTO_TCP,
+ PMAPPORT, addrtype);
+ mlp_port = PMAPPORT;
+ }
+ } else {
+ int i;
+ boolean_t priv = B_FALSE;
+
+ /*
+ * If the requested_port is in the well-known privileged range,
+ * verify that the stream was opened by a privileged user.
+ * Note: No locks are held when inspecting tcp_g_*epriv_ports
+ * but instead the code relies on:
+ * - the fact that the address of the array and its size never
+ * changes
+ * - the atomic assignment of the elements of the array
+ */
+ if (requested_port < tcps->tcps_smallest_nonpriv_port) {
+ priv = B_TRUE;
+ } else {
+ for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
+ if (requested_port ==
+ tcps->tcps_g_epriv_ports[i]) {
+ priv = B_TRUE;
+ break;
+ }
+ }
+ }
+ if (priv) {
+ if (secpolicy_net_privaddr(cr, requested_port,
+ IPPROTO_TCP) != 0) {
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1,
+ SL_ERROR|SL_TRACE,
+ "tcp_bind: no priv for port %d",
+ requested_port);
+ }
+ return (-TACCES);
+ }
+ }
+ user_specified = B_TRUE;
+
+ connp = tcp->tcp_connp;
+ if (is_system_labeled()) {
+ zone = crgetzone(cr);
+ addrtype = tsol_mlp_addr_type(zone->zone_id,
+ IPV6_VERSION, &v6addr,
+ tcps->tcps_netstack->netstack_ip);
+ if (addrtype == mlptSingle) {
+ return (-TNOADDR);
+ }
+ mlptype = tsol_mlp_port_type(zone, IPPROTO_TCP,
+ requested_port, addrtype);
+ }
+ }
+
+ if (mlptype != mlptSingle) {
+ if (secpolicy_net_bindmlp(cr) != 0) {
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1,
+ SL_ERROR|SL_TRACE,
+ "tcp_bind: no priv for multilevel port %d",
+ requested_port);
+ }
+ return (-TACCES);
+ }
+
+ /*
+ * If we're specifically binding a shared IP address and the
+ * port is MLP on shared addresses, then check to see if this
+ * zone actually owns the MLP. Reject if not.
+ */
+ if (mlptype == mlptShared && addrtype == mlptShared) {
+ /*
+ * No need to handle exclusive-stack zones since
+ * ALL_ZONES only applies to the shared stack.
+ */
+ zoneid_t mlpzone;
+
+ mlpzone = tsol_mlp_findzone(IPPROTO_TCP,
+ htons(mlp_port));
+ if (connp->conn_zoneid != mlpzone) {
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1,
+ SL_ERROR|SL_TRACE,
+ "tcp_bind: attempt to bind port "
+ "%d on shared addr in zone %d "
+ "(should be %d)",
+ mlp_port, connp->conn_zoneid,
+ mlpzone);
+ }
+ return (-TACCES);
+ }
+ }
+
+ if (!user_specified) {
+ int err;
+ err = tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
+ requested_port, B_TRUE);
+ if (err != 0) {
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1,
+ SL_ERROR|SL_TRACE,
+ "tcp_bind: cannot establish anon "
+ "MLP for port %d",
+ requested_port);
+ }
+ return (err);
+ }
+ connp->conn_anon_port = B_TRUE;
+ }
+ connp->conn_mlp_type = mlptype;
+ }
+
+ allocated_port = tcp_bindi(tcp, requested_port, &v6addr,
+ tcp->tcp_reuseaddr, B_FALSE, bind_to_req_port_only, user_specified);
+
+ if (allocated_port == 0) {
+ connp->conn_mlp_type = mlptSingle;
+ if (connp->conn_anon_port) {
+ connp->conn_anon_port = B_FALSE;
+ (void) tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
+ requested_port, B_FALSE);
+ }
+ if (bind_to_req_port_only) {
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1,
+ SL_ERROR|SL_TRACE,
+ "tcp_bind: requested addr busy");
+ }
+ return (-TADDRBUSY);
+ } else {
+ /* If we are out of ports, fail the bind. */
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1,
+ SL_ERROR|SL_TRACE,
+ "tcp_bind: out of ports?");
+ }
+ return (-TNOADDR);
+ }
+ }
+
+ /* Pass the allocated port back */
+ *requested_port_ptr = allocated_port;
+ return (0);
+}
+
+static int
+tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
+ boolean_t bind_to_req_port_only)
+{
+ tcp_t *tcp = connp->conn_tcp;
+
+ sin_t *sin;
+ sin6_t *sin6;
+ sin6_t sin6addr;
+ in_port_t requested_port;
+ ipaddr_t v4addr;
+ in6_addr_t v6addr;
+ uint_t origipversion;
+ int error = 0;
+
+ ASSERT((uintptr_t)len <= (uintptr_t)INT_MAX);
+
+ if (tcp->tcp_state == TCPS_BOUND) {
+ return (0);
+ } else if (tcp->tcp_state > TCPS_BOUND) {
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
+ "tcp_bind: bad state, %d", tcp->tcp_state);
+ }
+ return (-TOUTSTATE);
+ }
+ origipversion = tcp->tcp_ipversion;
+
+ if (sa != NULL && !OK_32PTR((char *)sa)) {
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1,
+ SL_ERROR|SL_TRACE,
+ "tcp_bind: bad address parameter, "
+ "address %p, len %d",
+ (void *)sa, len);
+ }
+ return (-TPROTO);
+ }
+
+ switch (len) {
+ case 0: /* request for a generic port */
+ if (tcp->tcp_family == AF_INET) {
+ sin = (sin_t *)&sin6addr;
+ *sin = sin_null;
+ sin->sin_family = AF_INET;
+ tcp->tcp_ipversion = IPV4_VERSION;
+ IN6_IPADDR_TO_V4MAPPED(INADDR_ANY, &v6addr);
+ } else {
+ ASSERT(tcp->tcp_family == AF_INET6);
+ sin6 = (sin6_t *)&sin6addr;
+ *sin6 = sin6_null;
+ sin6->sin6_family = AF_INET6;
+ tcp->tcp_ipversion = IPV6_VERSION;
+ V6_SET_ZERO(v6addr);
+ }
+ requested_port = 0;
+ break;
+
+ case sizeof (sin_t): /* Complete IPv4 address */
+ sin = (sin_t *)sa;
+ /*
+ * With sockets sockfs will accept bogus sin_family in
+ * bind() and replace it with the family used in the socket
+ * call.
+ */
+ if (sin->sin_family != AF_INET ||
+ tcp->tcp_family != AF_INET) {
+ return (EAFNOSUPPORT);
+ }
+ requested_port = ntohs(sin->sin_port);
+ tcp->tcp_ipversion = IPV4_VERSION;
+ v4addr = sin->sin_addr.s_addr;
+ IN6_IPADDR_TO_V4MAPPED(v4addr, &v6addr);
+ break;
+
+ case sizeof (sin6_t): /* Complete IPv6 address */
+ sin6 = (sin6_t *)sa;
+ if (sin6->sin6_family != AF_INET6 ||
+ tcp->tcp_family != AF_INET6) {
+ return (EAFNOSUPPORT);
+ }
+ requested_port = ntohs(sin6->sin6_port);
+ tcp->tcp_ipversion = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ?
+ IPV4_VERSION : IPV6_VERSION;
+ v6addr = sin6->sin6_addr;
+ break;
+
+ default:
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
+ "tcp_bind: bad address length, %d", len);
+ }
+ return (EAFNOSUPPORT);
+ /* return (-TBADADDR); */
+ }
+
+ tcp->tcp_bound_source_v6 = v6addr;
+
+ /* Check for change in ipversion */
+ if (origipversion != tcp->tcp_ipversion) {
+ ASSERT(tcp->tcp_family == AF_INET6);
+ error = tcp->tcp_ipversion == IPV6_VERSION ?
+ tcp_header_init_ipv6(tcp) : tcp_header_init_ipv4(tcp);
+ if (error) {
+ return (ENOMEM);
+ }
+ }
+
+ /*
+ * Initialize family specific fields. Copy of the src addr.
+ * in tcp_t is needed for the lookup funcs.
+ */
+ if (tcp->tcp_ipversion == IPV6_VERSION) {
+ tcp->tcp_ip6h->ip6_src = v6addr;
+ } else {
+ IN6_V4MAPPED_TO_IPADDR(&v6addr, tcp->tcp_ipha->ipha_src);
+ }
+ tcp->tcp_ip_src_v6 = v6addr;
+
+ bind_to_req_port_only = requested_port != 0 && bind_to_req_port_only;
+
+ error = tcp_bind_select_lport(tcp, &requested_port,
+ bind_to_req_port_only, cr);
+
+ return (error);
+}
+
+/*
+ * Return unix error is tli error is TSYSERR, otherwise return a negative
+ * tli error.
+ */
+int
+tcp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
+ boolean_t bind_to_req_port_only)
+{
+ int error;
+ tcp_t *tcp = connp->conn_tcp;
+
+ if (tcp->tcp_state >= TCPS_BOUND) {
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
+ "tcp_bind: bad state, %d", tcp->tcp_state);
+ }
+ return (-TOUTSTATE);
+ }
+
+ error = tcp_bind_check(connp, sa, len, cr, bind_to_req_port_only);
+ if (error != 0)
+ return (error);
+
+ ASSERT(tcp->tcp_state == TCPS_BOUND);
+
+ tcp->tcp_conn_req_max = 0;
+
+ /*
+ * We need to make sure that the conn_recv is set to a non-null
+ * value before we insert the conn into the classifier table.
+ * This is to avoid a race with an incoming packet which does an
+ * ipcl_classify().
+ */
+ connp->conn_recv = tcp_conn_request;
+
+ if (tcp->tcp_family == AF_INET6) {
+ ASSERT(tcp->tcp_connp->conn_af_isv6);
+ error = ip_proto_bind_laddr_v6(connp, NULL, IPPROTO_TCP,
+ &tcp->tcp_bound_source_v6, 0, B_FALSE);
+ } else {
+ ASSERT(!tcp->tcp_connp->conn_af_isv6);
+ error = ip_proto_bind_laddr_v4(connp, NULL, IPPROTO_TCP,
+ tcp->tcp_ipha->ipha_src, 0, B_FALSE);
+ }
+ return (tcp_post_ip_bind(tcp, NULL, error));
+}
+
+int
+tcp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
+ socklen_t len, cred_t *cr)
+{
+ int error;
+ conn_t *connp = (conn_t *)proto_handle;
+ squeue_t *sqp = connp->conn_sqp;
+
+ ASSERT(sqp != NULL);
+
+ error = squeue_synch_enter(sqp, connp, 0);
+ if (error != 0) {
+ /* failed to enter */
+ return (ENOSR);
+ }
+
+ /* binding to a NULL address really means unbind */
+ if (sa == NULL) {
+ if (connp->conn_tcp->tcp_state < TCPS_LISTEN)
+ error = tcp_do_unbind(connp);
+ else
+ error = EINVAL;
+ } else {
+ error = tcp_do_bind(connp, sa, len, cr, B_TRUE);
+ }
+
+ squeue_synch_exit(sqp, connp);
+
+ if (error < 0) {
+ if (error == -TOUTSTATE)
+ error = EINVAL;
+ else
+ error = proto_tlitosyserr(-error);
+ }
+
+ return (error);
+}
+
+/*
+ * If the return value from this function is positive, it's a UNIX error.
+ * Otherwise, if it's negative, then the absolute value is a TLI error.
+ * the TPI routine tcp_tpi_connect() is a wrapper function for this.
+ */
+int
+tcp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
+ cred_t *cr, pid_t pid)
+{
+ tcp_t *tcp = connp->conn_tcp;
+ sin_t *sin = (sin_t *)sa;
+ sin6_t *sin6 = (sin6_t *)sa;
+ ipaddr_t *dstaddrp;
+ in_port_t dstport;
+ uint_t srcid;
+ int error = 0;
+
+ switch (len) {
+ default:
+ /*
+ * Should never happen
+ */
+ return (EINVAL);
+
+ case sizeof (sin_t):
+ sin = (sin_t *)sa;
+ if (sin->sin_port == 0) {
+ return (-TBADADDR);
+ }
+ if (tcp->tcp_connp && tcp->tcp_connp->conn_ipv6_v6only) {
+ return (EAFNOSUPPORT);
+ }
+ break;
+
+ case sizeof (sin6_t):
+ sin6 = (sin6_t *)sa;
+ if (sin6->sin6_port == 0) {
+ return (-TBADADDR);
+ }
+ break;
+ }
+ /*
+ * If we're connecting to an IPv4-mapped IPv6 address, we need to
+ * make sure that the template IP header in the tcp structure is an
+ * IPv4 header, and that the tcp_ipversion is IPV4_VERSION. We
+ * need to this before we call tcp_bindi() so that the port lookup
+ * code will look for ports in the correct port space (IPv4 and
+ * IPv6 have separate port spaces).
+ */
+ if (tcp->tcp_family == AF_INET6 && tcp->tcp_ipversion == IPV6_VERSION &&
+ IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ int err = 0;
+
+ err = tcp_header_init_ipv4(tcp);
+ if (err != 0) {
+ error = ENOMEM;
+ goto connect_failed;
+ }
+ if (tcp->tcp_lport != 0)
+ *(uint16_t *)tcp->tcp_tcph->th_lport = tcp->tcp_lport;
+ }
+
+ switch (tcp->tcp_state) {
+ case TCPS_LISTEN:
+ /*
+ * Listening sockets are not allowed to issue connect().
+ */
+ if (IPCL_IS_NONSTR(connp))
+ return (EOPNOTSUPP);
+ /* FALLTHRU */
+ case TCPS_IDLE:
+ /*
+ * We support quick connect, refer to comments in
+ * tcp_connect_*()
+ */
+ /* FALLTHRU */
+ case TCPS_BOUND:
+ /*
+ * We must bump the generation before the operation start.
+ * This is done to ensure that any upcall made later on sends
+ * up the right generation to the socket.
+ */
+ SOCK_CONNID_BUMP(tcp->tcp_connid);
+
+ if (tcp->tcp_family == AF_INET6) {
+ if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ return (tcp_connect_ipv6(tcp,
+ &sin6->sin6_addr,
+ sin6->sin6_port, sin6->sin6_flowinfo,
+ sin6->__sin6_src_id, sin6->sin6_scope_id,
+ cr, pid));
+ }
+ /*
+ * Destination adress is mapped IPv6 address.
+ * Source bound address should be unspecified or
+ * IPv6 mapped address as well.
+ */
+ if (!IN6_IS_ADDR_UNSPECIFIED(
+ &tcp->tcp_bound_source_v6) &&
+ !IN6_IS_ADDR_V4MAPPED(&tcp->tcp_bound_source_v6)) {
+ return (EADDRNOTAVAIL);
+ }
+ dstaddrp = &V4_PART_OF_V6((sin6->sin6_addr));
+ dstport = sin6->sin6_port;
+ srcid = sin6->__sin6_src_id;
+ } else {
+ dstaddrp = &sin->sin_addr.s_addr;
+ dstport = sin->sin_port;
+ srcid = 0;
+ }
+
+ error = tcp_connect_ipv4(tcp, dstaddrp, dstport, srcid, cr,
+ pid);
+ break;
+ default:
+ return (-TOUTSTATE);
+ }
+ /*
+ * Note: Code below is the "failure" case
+ */
+connect_failed:
+ if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
+ tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
+ return (error);
+}
+
+int
+tcp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
+ socklen_t len, sock_connid_t *id, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ tcp_t *tcp = connp->conn_tcp;
+ squeue_t *sqp = connp->conn_sqp;
+ int error;
+
+ error = proto_verify_ip_addr(tcp->tcp_family, sa, len);
+ if (error != 0) {
+ return (error);
+ }
+
+ error = squeue_synch_enter(sqp, connp, 0);
+ if (error != 0) {
+ /* failed to enter */
+ return (ENOSR);
+ }
+
+ /*
+ * TCP supports quick connect, so no need to do an implicit bind
+ */
+ error = tcp_do_connect(connp, sa, len, cr, curproc->p_pid);
+ if (error == 0) {
+ *id = connp->conn_tcp->tcp_connid;
+ } else if (error < 0) {
+ if (error == -TOUTSTATE) {
+ switch (connp->conn_tcp->tcp_state) {
+ case TCPS_SYN_SENT:
+ error = EALREADY;
+ break;
+ case TCPS_ESTABLISHED:
+ error = EISCONN;
+ break;
+ case TCPS_LISTEN:
+ error = EOPNOTSUPP;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ } else {
+ error = proto_tlitosyserr(-error);
+ }
+ }
+done:
+ squeue_synch_exit(sqp, connp);
+
+ return ((error == 0) ? EINPROGRESS : error);
+}
+
+/* ARGSUSED */
+sock_lower_handle_t
+tcp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
+ uint_t *smodep, int *errorp, int flags, cred_t *credp)
+{
+ conn_t *connp;
+ boolean_t isv6 = family == AF_INET6;
+ if (type != SOCK_STREAM || (family != AF_INET && family != AF_INET6) ||
+ (proto != 0 && proto != IPPROTO_TCP)) {
+ *errorp = EPROTONOSUPPORT;
+ return (NULL);
+ }
+
+ connp = tcp_create_common(NULL, credp, isv6, B_TRUE, errorp);
+ if (connp == NULL) {
+ return (NULL);
+ }
+
+ /*
+ * Put the ref for TCP. Ref for IP was already put
+ * by ipcl_conn_create. Also Make the conn_t globally
+ * visible to walkers
+ */
+ mutex_enter(&connp->conn_lock);
+ CONN_INC_REF_LOCKED(connp);
+ ASSERT(connp->conn_ref == 2);
+ connp->conn_state_flags &= ~CONN_INCIPIENT;
+
+ connp->conn_flags |= IPCL_NONSTR;
+ mutex_exit(&connp->conn_lock);
+
+ ASSERT(errorp != NULL);
+ *errorp = 0;
+ *sock_downcalls = &sock_tcp_downcalls;
+ *smodep = SM_CONNREQUIRED | SM_EXDATA | SM_ACCEPTSUPP;
+
+ return ((sock_lower_handle_t)connp);
+}
+
+/* ARGSUSED */
+void
+tcp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
+ sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ struct sock_proto_props sopp;
+
+ sopp.sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
+ SOCKOPT_MAXPSZ | SOCKOPT_MAXBLK | SOCKOPT_RCVTIMER |
+ SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ;
+
+ sopp.sopp_rxhiwat = SOCKET_RECVHIWATER;
+ sopp.sopp_rxlowat = SOCKET_RECVLOWATER;
+ sopp.sopp_maxpsz = INFPSZ;
+ sopp.sopp_maxblk = INFPSZ;
+ sopp.sopp_rcvtimer = SOCKET_TIMER_INTERVAL;
+ sopp.sopp_rcvthresh = SOCKET_RECVHIWATER >> 3;
+ sopp.sopp_maxaddrlen = sizeof (sin6_t);
+ sopp.sopp_minpsz = (tcp_rinfo.mi_minpsz == 1) ? 0 :
+ tcp_rinfo.mi_minpsz;
+
+ connp->conn_upcalls = sock_upcalls;
+ connp->conn_upper_handle = sock_handle;
+
+ (*sock_upcalls->su_set_proto_props)(sock_handle, &sopp);
+}
+
+/* ARGSUSED */
+int
+tcp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+
+ tcp_close_common(connp, flags);
+
+ ip_close_helper_stream(connp);
+
+ /*
+ * Drop IP's reference on the conn. This is the last reference
+ * on the connp if the state was less than established. If the
+ * connection has gone into timewait state, then we will have
+ * one ref for the TCP and one more ref (total of two) for the
+ * classifier connected hash list (a timewait connections stays
+ * in connected hash till closed).
+ *
+ * We can't assert the references because there might be other
+ * transient reference places because of some walkers or queued
+ * packets in squeue for the timewait state.
+ */
+ CONN_DEC_REF(connp);
+ return (0);
+}
+
+/* ARGSUSED */
+int
+tcp_sendmsg(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
+ cred_t *cr)
+{
+ tcp_t *tcp;
+ uint32_t msize;
+ conn_t *connp = (conn_t *)proto_handle;
+ int32_t tcpstate;
+
+ ASSERT(connp->conn_ref >= 2);
+
+ if (msg->msg_controllen != 0) {
+ return (EOPNOTSUPP);
+
+ }
+ switch (DB_TYPE(mp)) {
+ case M_DATA:
+ tcp = connp->conn_tcp;
+ ASSERT(tcp != NULL);
+
+ tcpstate = tcp->tcp_state;
+ if (tcpstate < TCPS_ESTABLISHED) {
+ freemsg(mp);
+ return (ENOTCONN);
+ } else if (tcpstate > TCPS_CLOSE_WAIT) {
+ freemsg(mp);
+ return (EPIPE);
+ }
+
+ if (is_system_labeled())
+ msg_setcredpid(mp, cr, curproc->p_pid);
+
+ /* XXX pass the size down and to the squeue */
+ msize = msgdsize(mp);
+
+ mutex_enter(&tcp->tcp_non_sq_lock);
+ tcp->tcp_squeue_bytes += msize;
+ /*
+ * Squeue Flow Control
+ */
+ if (TCP_UNSENT_BYTES(tcp) > tcp->tcp_xmit_hiwater) {
+ tcp_setqfull(tcp);
+ }
+ mutex_exit(&tcp->tcp_non_sq_lock);
+
+ /*
+ * The application may pass in an address in the msghdr, but
+ * we ignore the address on connection-oriented sockets.
+ * Just like BSD this code does not generate an error for
+ * TCP (a CONNREQUIRED socket) when sending to an address
+ * passed in with sendto/sendmsg. Instead the data is
+ * delivered on the connection as if no address had been
+ * supplied.
+ */
+ CONN_INC_REF(connp);
+
+ if (msg != NULL && msg->msg_flags & MSG_OOB) {
+ SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
+ tcp_output_urgent, connp, tcp_squeue_flag,
+ SQTAG_TCP_OUTPUT);
+ } else {
+ SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output,
+ connp, tcp_squeue_flag, SQTAG_TCP_OUTPUT);
+ }
+
+ return (0);
+
+ default:
+ ASSERT(0);
+ }
+
+ freemsg(mp);
+ return (0);
+}
+
+/* ARGSUSED */
+void
+tcp_output_urgent(void *arg, mblk_t *mp, void *arg2)
+{
+ int len;
+ uint32_t msize;
+ conn_t *connp = (conn_t *)arg;
+ tcp_t *tcp = connp->conn_tcp;
+
+ msize = msgdsize(mp);
+
+ len = msize - 1;
+ if (len < 0) {
+ freemsg(mp);
+ return;
+ }
+
+ /*
+ * Try to force urgent data out on the wire.
+ * Even if we have unsent data this will
+ * at least send the urgent flag.
+ * XXX does not handle more flag correctly.
+ */
+ len += tcp->tcp_unsent;
+ len += tcp->tcp_snxt;
+ tcp->tcp_urg = len;
+ tcp->tcp_valid_bits |= TCP_URG_VALID;
+
+ /* Bypass tcp protocol for fused tcp loopback */
+ if (tcp->tcp_fused && tcp_fuse_output(tcp, mp, msize))
+ return;
+ tcp_wput_data(tcp, mp, B_TRUE);
+}
+
+/* ARGSUSED */
+int
+tcp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr,
+ socklen_t *addrlen, cred_t *cr)
+{
+ sin_t *sin;
+ sin6_t *sin6;
+ conn_t *connp = (conn_t *)proto_handle;
+ tcp_t *tcp = connp->conn_tcp;
+
+ ASSERT(tcp != NULL);
+ if (tcp->tcp_state < TCPS_SYN_RCVD)
+ return (ENOTCONN);
+
+ addr->sa_family = tcp->tcp_family;
+ switch (tcp->tcp_family) {
+ case AF_INET:
+ if (*addrlen < sizeof (sin_t))
+ return (EINVAL);
+
+ sin = (sin_t *)addr;
+ *sin = sin_null;
+ sin->sin_family = AF_INET;
+ if (tcp->tcp_ipversion == IPV4_VERSION) {
+ IN6_V4MAPPED_TO_IPADDR(&tcp->tcp_remote_v6,
+ sin->sin_addr.s_addr);
+ }
+ sin->sin_port = tcp->tcp_fport;
+ *addrlen = sizeof (struct sockaddr_in);
+ break;
+ case AF_INET6:
+ sin6 = (sin6_t *)addr;
+ *sin6 = sin6_null;
+ sin6->sin6_family = AF_INET6;
+
+ if (*addrlen < sizeof (struct sockaddr_in6))
+ return (EINVAL);
+
+ if (tcp->tcp_ipversion == IPV6_VERSION) {
+ sin6->sin6_flowinfo = tcp->tcp_ip6h->ip6_vcf &
+ ~IPV6_VERS_AND_FLOW_MASK;
+ }
+
+ sin6->sin6_addr = tcp->tcp_remote_v6;
+ sin6->sin6_port = tcp->tcp_fport;
+ *addrlen = sizeof (struct sockaddr_in6);
+ break;
+ }
+ return (0);
+}
+
+/* ARGSUSED */
+int
+tcp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr,
+ socklen_t *addrlenp, cred_t *cr)
+{
+ sin_t *sin;
+ sin6_t *sin6;
+ conn_t *connp = (conn_t *)proto_handle;
+ tcp_t *tcp = connp->conn_tcp;
+
+ switch (tcp->tcp_family) {
+ case AF_INET:
+ ASSERT(tcp->tcp_ipversion == IPV4_VERSION);
+ if (*addrlenp < sizeof (sin_t))
+ return (EINVAL);
+ sin = (sin_t *)addr;
+ *sin = sin_null;
+ sin->sin_family = AF_INET;
+ *addrlenp = sizeof (sin_t);
+ if (tcp->tcp_state >= TCPS_BOUND) {
+ sin->sin_addr.s_addr = tcp->tcp_ipha->ipha_src;
+ sin->sin_port = tcp->tcp_lport;
+ }
+ break;
+
+ case AF_INET6:
+ if (*addrlenp < sizeof (sin6_t))
+ return (EINVAL);
+ sin6 = (sin6_t *)addr;
+ *sin6 = sin6_null;
+ sin6->sin6_family = AF_INET6;
+ *addrlenp = sizeof (sin6_t);
+ if (tcp->tcp_state >= TCPS_BOUND) {
+ sin6->sin6_port = tcp->tcp_lport;
+ if (tcp->tcp_ipversion == IPV4_VERSION) {
+ IN6_IPADDR_TO_V4MAPPED(tcp->tcp_ipha->ipha_src,
+ &sin6->sin6_addr);
+ } else {
+ sin6->sin6_addr = tcp->tcp_ip6h->ip6_src;
+ }
+ }
+ break;
+ }
+ return (0);
+}
+
+/*
+ * tcp_fallback
+ *
+ * A direct socket is falling back to using STREAMS. Hanging
+ * off of the queue is a temporary tcp_t, which was created using
+ * tcp_open(). The tcp_open() was called as part of the regular
+ * sockfs create path, i.e., the SO_SOCKSTR flag is passed down,
+ * and therefore the temporary tcp_t is marked to be a socket
+ * (i.e., IPCL_SOCKET, tcp_issocket). So the optimizations
+ * introduced by FireEngine will be used.
+ *
+ * The tcp_t associated with the socket falling back will
+ * still be marked as a socket, although the direct socket flag
+ * (IPCL_NONSTR) is removed. A fall back to true TPI semantics
+ * will not take place until a _SIOCSOCKFALLBACK ioctl is issued.
+ *
+ * If the above mentioned behavior, i.e., the tmp tcp_t is created
+ * as a STREAMS/TPI endpoint, then we will need to do more work here.
+ * Such as inserting the direct socket into the acceptor hash.
+ */
+void
+tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
+ boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb)
+{
+ tcp_t *tcp, *eager;
+ conn_t *connp = (conn_t *)proto_handle;
+ int error;
+ struct T_capability_ack tca;
+ struct sockaddr_in6 laddr, faddr;
+ socklen_t laddrlen, faddrlen;
+ short opts;
+ struct stroptions *stropt;
+ mblk_t *stropt_mp;
+ mblk_t *mp;
+ mblk_t *conn_ind_head = NULL;
+ mblk_t *conn_ind_tail = NULL;
+ mblk_t *ordrel_mp;
+ mblk_t *fused_sigurp_mp;
+
+ tcp = connp->conn_tcp;
+ /*
+ * No support for acceptor fallback
+ */
+ ASSERT(q->q_qinfo != &tcp_acceptor_rinit);
+
+ stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
+
+ /* Pre-allocate the T_ordrel_ind mblk. */
+ ASSERT(tcp->tcp_ordrel_mp == NULL);
+ ordrel_mp = allocb_wait(sizeof (struct T_ordrel_ind), BPRI_HI,
+ STR_NOSIG, NULL);
+ ordrel_mp->b_datap->db_type = M_PROTO;
+ ((struct T_ordrel_ind *)ordrel_mp->b_rptr)->PRIM_type = T_ORDREL_IND;
+ ordrel_mp->b_wptr += sizeof (struct T_ordrel_ind);
+
+ /* Pre-allocate the M_PCSIG anyway */
+ fused_sigurp_mp = allocb_wait(1, BPRI_HI, STR_NOSIG, NULL);
+
+ /*
+ * Enter the squeue so that no new packets can come in
+ */
+ error = squeue_synch_enter(connp->conn_sqp, connp, 0);
+ if (error != 0) {
+ /* failed to enter, free all the pre-allocated messages. */
+ freeb(stropt_mp);
+ freeb(ordrel_mp);
+ freeb(fused_sigurp_mp);
+ return;
+ }
+
+ /* Disable I/OAT during fallback */
+ tcp->tcp_sodirect = NULL;
+
+ connp->conn_dev = (dev_t)RD(q)->q_ptr;
+ connp->conn_minor_arena = WR(q)->q_ptr;
+
+ RD(q)->q_ptr = WR(q)->q_ptr = connp;
+
+ connp->conn_tcp->tcp_rq = connp->conn_rq = RD(q);
+ connp->conn_tcp->tcp_wq = connp->conn_wq = WR(q);
+
+ WR(q)->q_qinfo = &tcp_sock_winit;
+
+ if (!direct_sockfs)
+ tcp_disable_direct_sockfs(tcp);
+
+ /*
+ * free the helper stream
+ */
+ ip_close_helper_stream(connp);
+
+ /*
+ * Notify the STREAM head about options
+ */
+ DB_TYPE(stropt_mp) = M_SETOPTS;
+ stropt = (struct stroptions *)stropt_mp->b_rptr;
+ stropt_mp->b_wptr += sizeof (struct stroptions);
+ stropt = (struct stroptions *)stropt_mp->b_rptr;
+ stropt->so_flags |= SO_HIWAT | SO_WROFF | SO_MAXBLK;
+
+ stropt->so_wroff = tcp->tcp_hdr_len + (tcp->tcp_loopback ? 0 :
+ tcp->tcp_tcps->tcps_wroff_xtra);
+ if (tcp->tcp_snd_sack_ok)
+ stropt->so_wroff += TCPOPT_MAX_SACK_LEN;
+ stropt->so_hiwat = tcp->tcp_fused ?
+ tcp_fuse_set_rcv_hiwat(tcp, tcp->tcp_recv_hiwater) :
+ MAX(tcp->tcp_recv_hiwater, tcp->tcp_tcps->tcps_sth_rcv_hiwat);
+ stropt->so_maxblk = tcp_maxpsz_set(tcp, B_FALSE);
+
+ putnext(RD(q), stropt_mp);
+
+ /*
+ * Collect the information needed to sync with the sonode
+ */
+ tcp_do_capability_ack(tcp, &tca, TC1_INFO|TC1_ACCEPTOR_ID);
+
+ laddrlen = faddrlen = sizeof (sin6_t);
+ (void) tcp_getsockname(proto_handle, (struct sockaddr *)&laddr,
+ &laddrlen, CRED());
+ error = tcp_getpeername(proto_handle, (struct sockaddr *)&faddr,
+ &faddrlen, CRED());
+ if (error != 0)
+ faddrlen = 0;
+
+ opts = 0;
+ if (tcp->tcp_oobinline)
+ opts |= SO_OOBINLINE;
+ if (tcp->tcp_dontroute)
+ opts |= SO_DONTROUTE;
+
+ /*
+ * Notify the socket that the protocol is now quiescent,
+ * and it's therefore safe move data from the socket
+ * to the stream head.
+ */
+ (*quiesced_cb)(connp->conn_upper_handle, q, &tca,
+ (struct sockaddr *)&laddr, laddrlen,
+ (struct sockaddr *)&faddr, faddrlen, opts);
+
+ while ((mp = tcp->tcp_rcv_list) != NULL) {
+ tcp->tcp_rcv_list = mp->b_next;
+ mp->b_next = NULL;
+ putnext(q, mp);
+ }
+ tcp->tcp_rcv_last_head = NULL;
+ tcp->tcp_rcv_last_tail = NULL;
+ tcp->tcp_rcv_cnt = 0;
+
+ /*
+ * No longer a direct socket
+ */
+ connp->conn_flags &= ~IPCL_NONSTR;
+
+ tcp->tcp_ordrel_mp = ordrel_mp;
+
+ if (tcp->tcp_fused) {
+ ASSERT(tcp->tcp_fused_sigurg_mp == NULL);
+ tcp->tcp_fused_sigurg_mp = fused_sigurp_mp;
+ } else {
+ freeb(fused_sigurp_mp);
+ }
+
+ /*
+ * Send T_CONN_IND messages for all ESTABLISHED connections.
+ */
+ mutex_enter(&tcp->tcp_eager_lock);
+ for (eager = tcp->tcp_eager_next_q; eager != NULL;
+ eager = eager->tcp_eager_next_q) {
+ mp = eager->tcp_conn.tcp_eager_conn_ind;
+
+ eager->tcp_conn.tcp_eager_conn_ind = NULL;
+ ASSERT(mp != NULL);
+ /*
+ * TLI/XTI applications will get confused by
+ * sending eager as an option since it violates
+ * the option semantics. So remove the eager as
+ * option since TLI/XTI app doesn't need it anyway.
+ */
+ if (!TCP_IS_SOCKET(tcp)) {
+ struct T_conn_ind *conn_ind;
+
+ conn_ind = (struct T_conn_ind *)mp->b_rptr;
+ conn_ind->OPT_length = 0;
+ conn_ind->OPT_offset = 0;
+ }
+ if (conn_ind_head == NULL) {
+ conn_ind_head = mp;
+ } else {
+ conn_ind_tail->b_next = mp;
+ }
+ conn_ind_tail = mp;
+ }
+ mutex_exit(&tcp->tcp_eager_lock);
+
+ mp = conn_ind_head;
+ while (mp != NULL) {
+ mblk_t *nmp = mp->b_next;
+ mp->b_next = NULL;
+
+ putnext(tcp->tcp_rq, mp);
+ mp = nmp;
+ }
+
+ /*
+ * There should be atleast two ref's (IP + TCP)
+ */
+ ASSERT(connp->conn_ref >= 2);
+ squeue_synch_exit(connp->conn_sqp, connp);
+}
+
+/* ARGSUSED */
+static void
+tcp_shutdown_output(void *arg, mblk_t *mp, void *arg2)
+{
+ conn_t *connp = (conn_t *)arg;
+ tcp_t *tcp = connp->conn_tcp;
+
+ freemsg(mp);
+
+ if (tcp->tcp_fused)
+ tcp_unfuse(tcp);
+
+ if (tcp_xmit_end(tcp) != 0) {
+ /*
+ * We were crossing FINs and got a reset from
+ * the other side. Just ignore it.
+ */
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1,
+ SL_ERROR|SL_TRACE,
+ "tcp_shutdown_output() out of state %s",
+ tcp_display(tcp, NULL, DISP_ADDR_AND_PORT));
+ }
+ }
+}
+
+/* ARGSUSED */
+int
+tcp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ tcp_t *tcp = connp->conn_tcp;
+
+ /*
+ * X/Open requires that we check the connected state.
+ */
+ if (tcp->tcp_state < TCPS_SYN_SENT)
+ return (ENOTCONN);
+
+ /* shutdown the send side */
+ if (how != SHUT_RD) {
+ mblk_t *bp;
+
+ bp = allocb_wait(0, BPRI_HI, STR_NOSIG, NULL);
+ CONN_INC_REF(connp);
+ SQUEUE_ENTER_ONE(connp->conn_sqp, bp, tcp_shutdown_output,
+ connp, SQ_NODRAIN, SQTAG_TCP_SHUTDOWN_OUTPUT);
+
+ (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
+ SOCK_OPCTL_SHUT_SEND, 0);
+ }
+
+ /* shutdown the recv side */
+ if (how != SHUT_WR)
+ (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
+ SOCK_OPCTL_SHUT_RECV, 0);
+
+ return (0);
+}
+
+/*
+ * SOP_LISTEN() calls into tcp_listen().
+ */
+/* ARGSUSED */
+int
+tcp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ int error;
+ squeue_t *sqp = connp->conn_sqp;
+
+ error = squeue_synch_enter(sqp, connp, 0);
+ if (error != 0) {
+ /* failed to enter */
+ return (ENOBUFS);
+ }
+
+ error = tcp_do_listen(connp, backlog, cr);
+ if (error == 0) {
+ (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
+ SOCK_OPCTL_ENAB_ACCEPT, (uintptr_t)backlog);
+ } else if (error < 0) {
+ if (error == -TOUTSTATE)
+ error = EINVAL;
+ else
+ error = proto_tlitosyserr(-error);
+ }
+ squeue_synch_exit(sqp, connp);
+ return (error);
+}
+
+static int
+tcp_do_listen(conn_t *connp, int backlog, cred_t *cr)
+{
+ tcp_t *tcp = connp->conn_tcp;
+ sin_t *sin;
+ sin6_t *sin6;
+ int error = 0;
+ tcp_stack_t *tcps = tcp->tcp_tcps;
+
+ if (tcp->tcp_state >= TCPS_BOUND) {
+ if ((tcp->tcp_state == TCPS_BOUND ||
+ tcp->tcp_state == TCPS_LISTEN) &&
+ backlog > 0) {
+ /*
+ * Handle listen() increasing backlog.
+ * This is more "liberal" then what the TPI spec
+ * requires but is needed to avoid a t_unbind
+ * when handling listen() since the port number
+ * might be "stolen" between the unbind and bind.
+ */
+ goto do_listen;
+ }
+ if (tcp->tcp_debug) {
+ (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
+ "tcp_bind: bad state, %d", tcp->tcp_state);
+ }
+ return (-TOUTSTATE);
+ } else {
+ int32_t len;
+ sin6_t addr;
+
+ /* Do an implicit bind: Request for a generic port. */
+ if (tcp->tcp_family == AF_INET) {
+ len = sizeof (sin_t);
+ sin = (sin_t *)&addr;
+ *sin = sin_null;
+ sin->sin_family = AF_INET;
+ tcp->tcp_ipversion = IPV4_VERSION;
+ } else {
+ ASSERT(tcp->tcp_family == AF_INET6);
+ len = sizeof (sin6_t);
+ sin6 = (sin6_t *)&addr;
+ *sin6 = sin6_null;
+ sin6->sin6_family = AF_INET6;
+ tcp->tcp_ipversion = IPV6_VERSION;
+ }
+
+ error = tcp_bind_check(connp, (struct sockaddr *)&addr, len,
+ cr, B_FALSE);
+ if (error)
+ return (error);
+ /* Fall through and do the fanout insertion */
+ }
+
+do_listen:
+ ASSERT(tcp->tcp_state == TCPS_BOUND || tcp->tcp_state == TCPS_LISTEN);
+ tcp->tcp_conn_req_max = backlog;
+ if (tcp->tcp_conn_req_max) {
+ if (tcp->tcp_conn_req_max < tcps->tcps_conn_req_min)
+ tcp->tcp_conn_req_max = tcps->tcps_conn_req_min;
+ if (tcp->tcp_conn_req_max > tcps->tcps_conn_req_max_q)
+ tcp->tcp_conn_req_max = tcps->tcps_conn_req_max_q;
+ /*
+ * If this is a listener, do not reset the eager list
+ * and other stuffs. Note that we don't check if the
+ * existing eager list meets the new tcp_conn_req_max
+ * requirement.
+ */
+ if (tcp->tcp_state != TCPS_LISTEN) {
+ tcp->tcp_state = TCPS_LISTEN;
+ /* Initialize the chain. Don't need the eager_lock */
+ tcp->tcp_eager_next_q0 = tcp->tcp_eager_prev_q0 = tcp;
+ tcp->tcp_eager_next_drop_q0 = tcp;
+ tcp->tcp_eager_prev_drop_q0 = tcp;
+ tcp->tcp_second_ctimer_threshold =
+ tcps->tcps_ip_abort_linterval;
+ }
+ }
+
+ /*
+ * We can call ip_bind directly which returns a T_BIND_ACK mp. The
+ * processing continues in tcp_rput_other().
+ *
+ * We need to make sure that the conn_recv is set to a non-null
+ * value before we insert the conn into the classifier table.
+ * This is to avoid a race with an incoming packet which does an
+ * ipcl_classify().
+ */
+ connp->conn_recv = tcp_conn_request;
+ if (tcp->tcp_family == AF_INET) {
+ error = ip_proto_bind_laddr_v4(connp, NULL,
+ IPPROTO_TCP, tcp->tcp_bound_source, tcp->tcp_lport, B_TRUE);
+ } else {
+ error = ip_proto_bind_laddr_v6(connp, NULL, IPPROTO_TCP,
+ &tcp->tcp_bound_source_v6, tcp->tcp_lport, B_TRUE);
+ }
+ return (tcp_post_ip_bind(tcp, NULL, error));
+}
+
+void
+tcp_clr_flowctrl(sock_lower_handle_t proto_handle)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ tcp_t *tcp = connp->conn_tcp;
+ tcp_stack_t *tcps = tcp->tcp_tcps;
+ uint_t thwin;
+
+ (void) squeue_synch_enter(connp->conn_sqp, connp, 0);
+
+ /* Flow control condition has been removed. */
+ tcp->tcp_rwnd = tcp->tcp_recv_hiwater;
+ thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win))
+ << tcp->tcp_rcv_ws;
+ thwin -= tcp->tcp_rnxt - tcp->tcp_rack;
+ /*
+ * Send back a window update immediately if TCP is above
+ * ESTABLISHED state and the increase of the rcv window
+ * that the other side knows is at least 1 MSS after flow
+ * control is lifted.
+ */
+ if (tcp->tcp_state >= TCPS_ESTABLISHED &&
+ (tcp->tcp_recv_hiwater - thwin >= tcp->tcp_mss)) {
+ tcp_xmit_ctl(NULL, tcp,
+ (tcp->tcp_swnd == 0) ? tcp->tcp_suna :
+ tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK);
+ BUMP_MIB(&tcps->tcps_mib, tcpOutWinUpdate);
+ }
+
+ squeue_synch_exit(connp->conn_sqp, connp);
+}
+
+/* ARGSUSED */
+int
+tcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
+ int mode, int32_t *rvalp, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ int error;
+
+ switch (cmd) {
+ case ND_SET:
+ case ND_GET:
+ case TCP_IOC_DEFAULT_Q:
+ case _SIOCSOCKFALLBACK:
+ case TCP_IOC_ABORT_CONN:
+ case TI_GETPEERNAME:
+ case TI_GETMYNAME:
+ ip1dbg(("tcp_ioctl: cmd 0x%x on non sreams socket",
+ cmd));
+ error = EINVAL;
+ break;
+ default:
+ /*
+ * Pass on to IP using helper stream
+ */
+ error = ldi_ioctl(
+ connp->conn_helper_info->ip_helper_stream_handle,
+ cmd, arg, mode, cr, rvalp);
+ break;
+ }
+ return (error);
+}
+
+sock_downcalls_t sock_tcp_downcalls = {
+ tcp_activate,
+ tcp_accept,
+ tcp_bind,
+ tcp_listen,
+ tcp_connect,
+ tcp_getpeername,
+ tcp_getsockname,
+ tcp_getsockopt,
+ tcp_setsockopt,
+ tcp_sendmsg,
+ NULL,
+ NULL,
+ NULL,
+ tcp_shutdown,
+ tcp_clr_flowctrl,
+ tcp_ioctl,
+ tcp_close,
+};
diff --git a/usr/src/uts/common/inet/tcp/tcp_fusion.c b/usr/src/uts/common/inet/tcp/tcp_fusion.c
index a192c7ad07..15b5d04d61 100644
--- a/usr/src/uts/common/inet/tcp/tcp_fusion.c
+++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c
@@ -261,10 +261,9 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph)
tcp->tcp_kssl_ent == NULL &&
!IPP_ENABLED(IPP_LOCAL_OUT|IPP_LOCAL_IN, ipst)) {
mblk_t *mp;
- struct stroptions *stropt;
queue_t *peer_rq = peer_tcp->tcp_rq;
- ASSERT(!TCP_IS_DETACHED(peer_tcp) && peer_rq != NULL);
+ ASSERT(!TCP_IS_DETACHED(peer_tcp));
ASSERT(tcp->tcp_fused_sigurg_mp == NULL);
ASSERT(peer_tcp->tcp_fused_sigurg_mp == NULL);
ASSERT(tcp->tcp_kssl_ctx == NULL);
@@ -276,19 +275,25 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph)
* This is why we pre-allocate the M_PCSIG mblks for both
* endpoints which will only be used during/after unfuse.
*/
- if ((mp = allocb(1, BPRI_HI)) == NULL)
- goto failed;
+ if (!IPCL_IS_NONSTR(tcp->tcp_connp)) {
+ if ((mp = allocb(1, BPRI_HI)) == NULL)
+ goto failed;
- tcp->tcp_fused_sigurg_mp = mp;
+ tcp->tcp_fused_sigurg_mp = mp;
+ }
- if ((mp = allocb(1, BPRI_HI)) == NULL)
- goto failed;
+ if (!IPCL_IS_NONSTR(peer_tcp->tcp_connp)) {
+ if ((mp = allocb(1, BPRI_HI)) == NULL)
+ goto failed;
- peer_tcp->tcp_fused_sigurg_mp = mp;
+ peer_tcp->tcp_fused_sigurg_mp = mp;
+ }
- /* Allocate M_SETOPTS mblk */
- if ((mp = allocb(sizeof (*stropt), BPRI_HI)) == NULL)
+ if (!IPCL_IS_NONSTR(peer_tcp->tcp_connp) &&
+ (mp = allocb(sizeof (struct stroptions),
+ BPRI_HI)) == NULL) {
goto failed;
+ }
/* If either tcp or peer_tcp sodirect enabled then disable */
if (tcp->tcp_sodirect != NULL) {
@@ -329,12 +334,12 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph)
* us data as soon as fusion is finished, and we need to be
* able to flow control it in case it sends down huge amount
* of data while we're still detached. To prevent that we
- * inherit the listener's q_hiwat value; this is temporary
- * since we'll repeat the process in tcp_accept_finish().
+ * inherit the listener's recv_hiwater value; this is temporary
+ * since we'll repeat the process intcp_accept_finish().
*/
if (!tcp->tcp_refuse) {
(void) tcp_fuse_set_rcv_hiwat(tcp,
- tcp->tcp_saved_listener->tcp_rq->q_hiwat);
+ tcp->tcp_saved_listener->tcp_recv_hiwater);
/*
* Set the stream head's write offset value to zero
@@ -342,30 +347,53 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph)
* headers; tell it to not break up the writes (this
* would reduce the amount of work done by kmem); and
* configure our receive buffer. Note that we can only
- * do this for the active connect tcp since our eager
- * is still detached; it will be dealt with later in
+ * do this for the active connect tcp since our eager is
+ * still detached; it will be dealt with later in
* tcp_accept_finish().
*/
- DB_TYPE(mp) = M_SETOPTS;
- mp->b_wptr += sizeof (*stropt);
-
- stropt = (struct stroptions *)mp->b_rptr;
- stropt->so_flags = SO_MAXBLK | SO_WROFF | SO_HIWAT;
- stropt->so_maxblk = tcp_maxpsz_set(peer_tcp, B_FALSE);
- stropt->so_wroff = 0;
-
- /*
- * Record the stream head's high water mark for
- * peer endpoint; this is used for flow-control
- * purposes in tcp_fuse_output().
- */
- stropt->so_hiwat = tcp_fuse_set_rcv_hiwat(peer_tcp,
- peer_rq->q_hiwat);
-
- tcp->tcp_refuse = B_FALSE;
- peer_tcp->tcp_refuse = B_FALSE;
- /* Send the options up */
- putnext(peer_rq, mp);
+ if (!IPCL_IS_NONSTR(peer_tcp->tcp_connp)) {
+ struct stroptions *stropt;
+
+ DB_TYPE(mp) = M_SETOPTS;
+ mp->b_wptr += sizeof (*stropt);
+
+ stropt = (struct stroptions *)mp->b_rptr;
+ stropt->so_flags = SO_MAXBLK|SO_WROFF|SO_HIWAT;
+ stropt->so_maxblk = tcp_maxpsz_set(peer_tcp,
+ B_FALSE);
+ stropt->so_wroff = 0;
+
+ /*
+ * Record the stream head's high water mark for
+ * peer endpoint; this is used for flow-control
+ * purposes in tcp_fuse_output().
+ */
+ stropt->so_hiwat = tcp_fuse_set_rcv_hiwat(
+ peer_tcp, peer_rq->q_hiwat);
+
+ tcp->tcp_refuse = B_FALSE;
+ peer_tcp->tcp_refuse = B_FALSE;
+ /* Send the options up */
+ putnext(peer_rq, mp);
+ } else {
+ struct sock_proto_props sopp;
+
+ /* The peer is a non-STREAMS end point */
+ ASSERT(IPCL_IS_TCP(peer_connp));
+
+ (void) tcp_fuse_set_rcv_hiwat(tcp,
+ tcp->tcp_saved_listener->tcp_recv_hiwater);
+
+ sopp.sopp_flags = SOCKOPT_MAXBLK |
+ SOCKOPT_WROFF | SOCKOPT_RCVHIWAT;
+ sopp.sopp_maxblk = tcp_maxpsz_set(peer_tcp,
+ B_FALSE);
+ sopp.sopp_wroff = 0;
+ sopp.sopp_rxhiwat = tcp_fuse_set_rcv_hiwat(
+ peer_tcp, peer_tcp->tcp_recv_hiwater);
+ (*peer_connp->conn_upcalls->su_set_proto_props)
+ (peer_connp->conn_upper_handle, &sopp);
+ }
}
tcp->tcp_refuse = B_FALSE;
peer_tcp->tcp_refuse = B_FALSE;
@@ -399,8 +427,6 @@ tcp_unfuse(tcp_t *tcp)
ASSERT(peer_tcp->tcp_fused && peer_tcp->tcp_loopback_peer == tcp);
ASSERT(tcp->tcp_connp->conn_sqp == peer_tcp->tcp_connp->conn_sqp);
ASSERT(tcp->tcp_unsent == 0 && peer_tcp->tcp_unsent == 0);
- ASSERT(tcp->tcp_fused_sigurg_mp != NULL);
- ASSERT(peer_tcp->tcp_fused_sigurg_mp != NULL);
/*
* We disable synchronous streams, drain any queued data and
@@ -420,10 +446,16 @@ tcp_unfuse(tcp_t *tcp)
/* Unfuse the endpoints */
peer_tcp->tcp_fused = tcp->tcp_fused = B_FALSE;
peer_tcp->tcp_loopback_peer = tcp->tcp_loopback_peer = NULL;
- freeb(peer_tcp->tcp_fused_sigurg_mp);
- freeb(tcp->tcp_fused_sigurg_mp);
- peer_tcp->tcp_fused_sigurg_mp = NULL;
- tcp->tcp_fused_sigurg_mp = NULL;
+ if (!IPCL_IS_NONSTR(peer_tcp->tcp_connp)) {
+ ASSERT(peer_tcp->tcp_fused_sigurg_mp != NULL);
+ freeb(peer_tcp->tcp_fused_sigurg_mp);
+ peer_tcp->tcp_fused_sigurg_mp = NULL;
+ }
+ if (!IPCL_IS_NONSTR(tcp->tcp_connp)) {
+ ASSERT(tcp->tcp_fused_sigurg_mp != NULL);
+ freeb(tcp->tcp_fused_sigurg_mp);
+ tcp->tcp_fused_sigurg_mp = NULL;
+ }
}
/*
@@ -527,6 +559,7 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size)
uint_t max_unread;
boolean_t flow_stopped, peer_data_queued = B_FALSE;
boolean_t urgent = (DB_TYPE(mp) != M_DATA);
+ boolean_t push = B_FALSE;
mblk_t *mp1 = mp;
ill_t *ilp, *olp;
ipif_t *iifp, *oifp;
@@ -546,7 +579,6 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size)
ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO ||
DB_TYPE(mp) == M_PCPROTO);
-
/* If this connection requires IP, unfuse and use regular path */
if (tcp_loopback_needs_ip(tcp, ns) ||
tcp_loopback_needs_ip(peer_tcp, ns) ||
@@ -749,7 +781,38 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size)
* Enqueue data into the peer's receive list; we may or may not
* drain the contents depending on the conditions below.
*/
- tcp_rcv_enqueue(peer_tcp, mp, recv_size);
+ if (IPCL_IS_NONSTR(peer_tcp->tcp_connp) &&
+ peer_tcp->tcp_connp->conn_upper_handle != NULL) {
+ int error;
+ int flags = 0;
+
+ if ((tcp->tcp_valid_bits & TCP_URG_VALID) &&
+ (tcp->tcp_urg == tcp->tcp_snxt)) {
+ flags = MSG_OOB;
+ (*peer_tcp->tcp_connp->conn_upcalls->su_signal_oob)
+ (peer_tcp->tcp_connp->conn_upper_handle, 0);
+ tcp->tcp_valid_bits &= ~TCP_URG_VALID;
+ }
+ (*peer_tcp->tcp_connp->conn_upcalls->su_recv)(
+ peer_tcp->tcp_connp->conn_upper_handle, mp, recv_size,
+ flags, &error, &push);
+ } else {
+ if (IPCL_IS_NONSTR(peer_tcp->tcp_connp) &&
+ (tcp->tcp_valid_bits & TCP_URG_VALID) &&
+ (tcp->tcp_urg == tcp->tcp_snxt)) {
+ /*
+ * Can not deal with urgent pointers
+ * that arrive before the connection has been
+ * accept()ed.
+ */
+ tcp->tcp_valid_bits &= ~TCP_URG_VALID;
+ freemsg(mp);
+ mutex_exit(&peer_tcp->tcp_non_sq_lock);
+ return (B_TRUE);
+ }
+
+ tcp_rcv_enqueue(peer_tcp, mp, recv_size);
+ }
/* In case it wrapped around and also to keep it constant */
peer_tcp->tcp_rwnd += recv_size;
@@ -797,6 +860,7 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size)
(peer_tcp->tcp_rcv_cnt >= peer_tcp->tcp_fuse_rcv_hiwater ||
peer_tcp->tcp_fuse_rcv_unread_cnt >= max_unread)) ||
(!peer_tcp->tcp_direct_sockfs && !TCP_IS_DETACHED(peer_tcp) &&
+ !IPCL_IS_NONSTR(peer_tcp->tcp_connp) &&
!canputnext(peer_tcp->tcp_rq))) {
peer_data_queued = B_TRUE;
}
@@ -861,7 +925,8 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size)
* will pull the data via tcp_fuse_rrw().
*/
if (urgent || (!flow_stopped && !peer_tcp->tcp_direct_sockfs)) {
- ASSERT(peer_tcp->tcp_rcv_list != NULL);
+ ASSERT(IPCL_IS_NONSTR(peer_tcp->tcp_connp) ||
+ peer_tcp->tcp_rcv_list != NULL);
/*
* For TLI-based streams, a thread in tcp_accept_swap()
* can race with us. That thread will ensure that the
@@ -897,6 +962,8 @@ boolean_t
tcp_fuse_rcv_drain(queue_t *q, tcp_t *tcp, mblk_t **sigurg_mpp)
{
mblk_t *mp;
+ conn_t *connp = tcp->tcp_connp;
+
#ifdef DEBUG
uint_t cnt = 0;
#endif
@@ -907,7 +974,7 @@ tcp_fuse_rcv_drain(queue_t *q, tcp_t *tcp, mblk_t **sigurg_mpp)
ASSERT(tcp->tcp_loopback);
ASSERT(tcp->tcp_fused || tcp->tcp_fused_sigurg);
ASSERT(!tcp->tcp_fused || tcp->tcp_loopback_peer != NULL);
- ASSERT(sigurg_mpp != NULL || tcp->tcp_fused);
+ ASSERT(IPCL_IS_NONSTR(connp) || sigurg_mpp != NULL || tcp->tcp_fused);
/* No need for the push timer now, in case it was scheduled */
if (tcp->tcp_push_tid != 0) {
@@ -921,34 +988,41 @@ tcp_fuse_rcv_drain(queue_t *q, tcp_t *tcp, mblk_t **sigurg_mpp)
* works properly.
*/
if (tcp->tcp_fused_sigurg) {
- /*
- * sigurg_mpp is normally NULL, i.e. when we're still
- * fused and didn't get here because of tcp_unfuse().
- * In this case try hard to allocate the M_PCSIG mblk.
- */
- if (sigurg_mpp == NULL &&
- (mp = allocb(1, BPRI_HI)) == NULL &&
- (mp = allocb_tryhard(1)) == NULL) {
- /* Alloc failed; try again next time */
- tcp->tcp_push_tid = TCP_TIMER(tcp, tcp_push_timer,
- MSEC_TO_TICK(tcps->tcps_push_timer_interval));
- return (B_TRUE);
- } else if (sigurg_mpp != NULL) {
+ tcp->tcp_fused_sigurg = B_FALSE;
+ if (IPCL_IS_NONSTR(connp)) {
+ (*connp->conn_upcalls->su_signal_oob)
+ (connp->conn_upper_handle, 0);
+ } else {
/*
- * Use the supplied M_PCSIG mblk; it means we're
- * either unfused or in the process of unfusing,
- * and the drain must happen now.
+ * sigurg_mpp is normally NULL, i.e. when we're still
+ * fused and didn't get here because of tcp_unfuse().
+ * In this case try hard to allocate the M_PCSIG mblk.
*/
- mp = *sigurg_mpp;
- *sigurg_mpp = NULL;
- }
- ASSERT(mp != NULL);
+ if (sigurg_mpp == NULL &&
+ (mp = allocb(1, BPRI_HI)) == NULL &&
+ (mp = allocb_tryhard(1)) == NULL) {
+ /* Alloc failed; try again next time */
+ tcp->tcp_push_tid = TCP_TIMER(tcp,
+ tcp_push_timer,
+ MSEC_TO_TICK(
+ tcps->tcps_push_timer_interval));
+ return (B_TRUE);
+ } else if (sigurg_mpp != NULL) {
+ /*
+ * Use the supplied M_PCSIG mblk; it means we're
+ * either unfused or in the process of unfusing,
+ * and the drain must happen now.
+ */
+ mp = *sigurg_mpp;
+ *sigurg_mpp = NULL;
+ }
+ ASSERT(mp != NULL);
- tcp->tcp_fused_sigurg = B_FALSE;
- /* Send up the signal */
- DB_TYPE(mp) = M_PCSIG;
- *mp->b_wptr++ = (uchar_t)SIGURG;
- putnext(q, mp);
+ /* Send up the signal */
+ DB_TYPE(mp) = M_PCSIG;
+ *mp->b_wptr++ = (uchar_t)SIGURG;
+ putnext(q, mp);
+ }
/*
* Let the regular tcp_rcv_drain() path handle
* draining the data if we're no longer fused.
@@ -980,6 +1054,7 @@ tcp_fuse_rcv_drain(queue_t *q, tcp_t *tcp, mblk_t **sigurg_mpp)
#ifdef DEBUG
cnt += msgdsize(mp);
#endif
+ ASSERT(!IPCL_IS_NONSTR(connp));
if (sd_rd_eof) {
freemsg(mp);
} else {
@@ -991,12 +1066,14 @@ tcp_fuse_rcv_drain(queue_t *q, tcp_t *tcp, mblk_t **sigurg_mpp)
if (tcp->tcp_direct_sockfs && !sd_rd_eof)
(void) strrput_sig(q, B_TRUE);
+#ifdef DEBUG
ASSERT(cnt == tcp->tcp_rcv_cnt);
+#endif
tcp->tcp_rcv_last_head = NULL;
tcp->tcp_rcv_last_tail = NULL;
tcp->tcp_rcv_cnt = 0;
tcp->tcp_fuse_rcv_unread_cnt = 0;
- tcp->tcp_rwnd = q->q_hiwat;
+ tcp->tcp_rwnd = tcp->tcp_recv_hiwater;
if (peer_tcp->tcp_flow_stopped && (TCP_UNSENT_BYTES(peer_tcp) <=
peer_tcp->tcp_xmit_lowater)) {
@@ -1409,8 +1486,10 @@ tcp_fuse_disable_pair(tcp_t *tcp, boolean_t unfusing)
}
/* Disable synchronous streams */
- tcp_fuse_syncstr_disable(tcp);
- tcp_fuse_syncstr_disable(peer_tcp);
+ if (!IPCL_IS_NONSTR(tcp->tcp_connp))
+ tcp_fuse_syncstr_disable(tcp);
+ if (!IPCL_IS_NONSTR(peer_tcp->tcp_connp))
+ tcp_fuse_syncstr_disable(peer_tcp);
}
/*
diff --git a/usr/src/uts/common/inet/tcp/tcp_opt_data.c b/usr/src/uts/common/inet/tcp/tcp_opt_data.c
index 4f0d767774..d977c27e53 100644
--- a/usr/src/uts/common/inet/tcp/tcp_opt_data.c
+++ b/usr/src/uts/common/inet/tcp/tcp_opt_data.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/stream.h>
#define _SUN_TPI_VERSION 2
@@ -43,8 +41,8 @@
extern int tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr);
-extern int tcp_opt_get(queue_t *q, int level, int name, uchar_t *ptr);
-extern int tcp_opt_set(queue_t *q, uint_t optset_context, int level,
+extern int tcp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr);
+extern int tcp_tpi_opt_set(queue_t *q, uint_t optset_context, int level,
int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
void *thisdg_attrs, cred_t *cr, mblk_t *mblk);
@@ -125,10 +123,10 @@ opdes_t tcp_opt_arr[] = {
{ IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
(OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT),
- 40, -1 /* not initialized */ },
+ IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
{ T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
(OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT),
- 40, -1 /* not initialized */ },
+ IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
{ IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 },
{ T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 },
@@ -244,8 +242,8 @@ uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
optdb_obj_t tcp_opt_obj = {
tcp_opt_default, /* TCP default value function pointer */
- tcp_opt_get, /* TCP get function pointer */
- tcp_opt_set, /* TCP set function pointer */
+ tcp_tpi_opt_get, /* TCP get function pointer */
+ tcp_tpi_opt_set, /* TCP set function pointer */
B_TRUE, /* TCP is tpi provider */
TCP_OPT_ARR_CNT, /* TCP option database count of entries */
tcp_opt_arr, /* TCP option database */
diff --git a/usr/src/uts/common/inet/tcp/tcpddi.c b/usr/src/uts/common/inet/tcp/tcpddi.c
index ee5b0181b6..91da903826 100644
--- a/usr/src/uts/common/inet/tcp/tcpddi.c
+++ b/usr/src/uts/common/inet/tcp/tcpddi.c
@@ -29,12 +29,18 @@
#include <sys/modctl.h>
#include <inet/common.h>
#include <inet/ip.h>
+#include <inet/tcp_impl.h>
+#include <sys/strsubr.h>
+#include <sys/socketvar.h>
#define INET_NAME "tcp"
#define INET_MODSTRTAB dummymodinfo
#define INET_DEVSTRTAB tcpinfov4
#define INET_DEVDESC "TCP STREAMS driver"
#define INET_MODDESC "TCP dummy STREAMS module"
+#define INET_SOCKDESC "TCP socket module"
+#define INET_SOCK_PROTO_CREATE_FUNC (*tcp_create)
+#define INET_SOCK_PROTO_FB_FUNC (*tcp_fallback)
#define INET_DEVMINOR 0
#define INET_MODMTFLAGS D_MP
/*
diff --git a/usr/src/uts/common/inet/tcp_impl.h b/usr/src/uts/common/inet/tcp_impl.h
index 98d8d17f61..97374be482 100644
--- a/usr/src/uts/common/inet/tcp_impl.h
+++ b/usr/src/uts/common/inet/tcp_impl.h
@@ -39,6 +39,7 @@ extern "C" {
#ifdef _KERNEL
+#include <inet/optcom.h>
#include <inet/tcp.h>
#define TCP_MOD_ID 5105
@@ -274,6 +275,14 @@ extern int tcp_fuse_maxpsz_set(tcp_t *);
extern optdb_obj_t tcp_opt_obj;
extern uint_t tcp_max_optsize;
+extern sock_lower_handle_t tcp_create(int, int, int, sock_downcalls_t **,
+ uint_t *, int *, int, cred_t *);
+extern void tcp_fallback(sock_lower_handle_t, queue_t *, boolean_t,
+ so_proto_quiesced_cb_t);
+
+extern sock_downcalls_t sock_tcp_downcalls;
+
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/inet/tcp_stack.h b/usr/src/uts/common/inet/tcp_stack.h
index 43da079d5a..173875f0da 100644
--- a/usr/src/uts/common/inet/tcp_stack.h
+++ b/usr/src/uts/common/inet/tcp_stack.h
@@ -30,6 +30,8 @@
#include <sys/netstack.h>
#include <inet/ip.h>
#include <inet/ipdrop.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
#ifdef __cplusplus
extern "C" {
@@ -232,6 +234,7 @@ struct tcp_stack {
uint32_t tcps_rst_cnt;
/* The number of RST not sent because of the rate limit. */
uint32_t tcps_rst_unsent;
+ ldi_ident_t tcps_ldi_ident;
};
typedef struct tcp_stack tcp_stack_t;
diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c
index 70677c86d8..5f819f1285 100644
--- a/usr/src/uts/common/inet/udp/udp.c
+++ b/usr/src/uts/common/inet/udp/udp.c
@@ -40,13 +40,13 @@
#include <sys/strsubr.h>
#include <sys/suntpi.h>
#include <sys/xti_inet.h>
-#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/policy.h>
#include <sys/ucred.h>
#include <sys/zone.h>
#include <sys/socket.h>
+#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/vtrace.h>
#include <sys/sdt.h>
@@ -68,7 +68,7 @@
#include <inet/ip_if.h>
#include <inet/ip_multi.h>
#include <inet/ip_ndp.h>
-#include <inet/mi.h>
+#include <inet/proto_set.h>
#include <inet/mib2.h>
#include <inet/nd.h>
#include <inet/optcom.h>
@@ -150,17 +150,14 @@ typedef struct udpattrs_s {
} udpattrs_t;
static void udp_addr_req(queue_t *q, mblk_t *mp);
-static void udp_bind(queue_t *q, mblk_t *mp);
+static void udp_tpi_bind(queue_t *q, mblk_t *mp);
static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
-static void udp_bind_result(conn_t *, mblk_t *);
-static void udp_bind_ack(conn_t *, mblk_t *mp);
-static void udp_bind_error(conn_t *, mblk_t *mp);
static int udp_build_hdrs(udp_t *udp);
static void udp_capability_req(queue_t *q, mblk_t *mp);
-static int udp_close(queue_t *q);
-static void udp_connect(queue_t *q, mblk_t *mp);
-static void udp_disconnect(queue_t *q, mblk_t *mp);
+static int udp_tpi_close(queue_t *q, int flags);
+static void udp_tpi_connect(queue_t *q, mblk_t *mp);
+static void udp_tpi_disconnect(queue_t *q, mblk_t *mp);
static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
int sys_error);
static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive,
@@ -171,8 +168,8 @@ static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
char *value, caddr_t cp, cred_t *cr);
static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
char *value, caddr_t cp, cred_t *cr);
-static void udp_icmp_error(queue_t *q, mblk_t *mp);
-static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp);
+static void udp_icmp_error(conn_t *, mblk_t *);
+static void udp_icmp_error_ipv6(conn_t *, mblk_t *);
static void udp_info_req(queue_t *q, mblk_t *mp);
static void udp_input(void *, mblk_t *, void *);
static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim,
@@ -201,15 +198,16 @@ static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp,
ipha_t *ipha);
static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr,
t_scalar_t destlen, t_scalar_t err);
-static void udp_unbind(queue_t *q, mblk_t *mp);
+static void udp_tpi_unbind(queue_t *q, mblk_t *mp);
static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
boolean_t random);
static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t,
- int *, boolean_t);
+ int *, boolean_t, struct nmsghdr *, cred_t *, pid_t);
static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6,
- int *error);
+ int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid);
static void udp_wput_other(queue_t *q, mblk_t *mp);
static void udp_wput_iocdata(queue_t *q, mblk_t *mp);
+static void udp_wput_fallback(queue_t *q, mblk_t *mp);
static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size);
static void *udp_stack_init(netstackid_t stackid, netstack_t *ns);
@@ -226,6 +224,25 @@ static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp,
static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing);
static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t);
+static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *,
+ cred_t *, pid_t);
+
+/* Common routine for TPI and socket module */
+static conn_t *udp_do_open(cred_t *, boolean_t, int);
+static void udp_do_close(conn_t *);
+static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
+ boolean_t);
+static int udp_do_unbind(conn_t *);
+static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *);
+static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *);
+
+int udp_getsockname(sock_lower_handle_t,
+ struct sockaddr *, socklen_t *, cred_t *);
+int udp_getpeername(sock_lower_handle_t,
+ struct sockaddr *, socklen_t *, cred_t *);
+static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t);
+static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int);
+
#define UDP_RECV_HIWATER (56 * 1024)
#define UDP_RECV_LOWATER 128
#define UDP_XMIT_HIWATER (56 * 1024)
@@ -240,12 +257,12 @@ static struct module_info udp_mod_info = {
* We have separate open functions for the /dev/udp and /dev/udp6 devices.
*/
static struct qinit udp_rinitv4 = {
- NULL, NULL, udp_openv4, udp_close, NULL,
+ NULL, NULL, udp_openv4, udp_tpi_close, NULL,
&udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
};
static struct qinit udp_rinitv6 = {
- NULL, NULL, udp_openv6, udp_close, NULL,
+ NULL, NULL, udp_openv6, udp_tpi_close, NULL,
&udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
};
@@ -254,17 +271,22 @@ static struct qinit udp_winit = {
&udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE
};
+/* UDP entry point during fallback */
+struct qinit udp_fallback_sock_winit = {
+ (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
+};
+
/*
* UDP needs to handle I_LINK and I_PLINK since ifconfig
* likes to use it as a place to hang the various streams.
*/
static struct qinit udp_lrinit = {
- (pfi_t)udp_lrput, NULL, udp_openv4, udp_close, NULL,
+ (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL,
&udp_mod_info
};
static struct qinit udp_lwinit = {
- (pfi_t)udp_lwput, NULL, udp_openv4, udp_close, NULL,
+ (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL,
&udp_mod_info
};
@@ -559,30 +581,19 @@ udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
* duplicating the us->us_next_port_to_try.
*/
static void
-udp_bind(queue_t *q, mblk_t *mp)
+udp_tpi_bind(queue_t *q, mblk_t *mp)
{
sin_t *sin;
sin6_t *sin6;
mblk_t *mp1;
- in_port_t port; /* Host byte order */
- in_port_t requested_port; /* Host byte order */
struct T_bind_req *tbr;
- int count;
- in6_addr_t v6src;
- boolean_t bind_to_req_port_only;
- int loopmax;
- udp_fanout_t *udpf;
- in_port_t lport; /* Network byte order */
- zoneid_t zoneid;
conn_t *connp;
udp_t *udp;
- boolean_t is_inaddr_any;
- mlp_type_t addrtype, mlptype;
- udp_stack_t *us;
+ int error;
+ struct sockaddr *sa;
connp = Q_TO_CONN(q);
udp = connp->conn_udp;
- us = udp->udp_us;
if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
"udp_bind: bad req, len %u",
@@ -607,6 +618,10 @@ udp_bind(queue_t *q, mblk_t *mp)
}
mp = mp1;
+
+ /* Reset the message type in preparation for shipping it back. */
+ DB_TYPE(mp) = M_PCPROTO;
+
tbr = (struct T_bind_req *)mp->b_rptr;
switch (tbr->ADDR_length) {
case 0: /* Request for a generic port */
@@ -617,6 +632,7 @@ udp_bind(queue_t *q, mblk_t *mp)
*sin = sin_null;
sin->sin_family = AF_INET;
mp->b_wptr = (uchar_t *)&sin[1];
+ sa = (struct sockaddr *)sin;
} else {
ASSERT(udp->udp_family == AF_INET6);
tbr->ADDR_length = sizeof (sin6_t);
@@ -624,38 +640,36 @@ udp_bind(queue_t *q, mblk_t *mp)
*sin6 = sin6_null;
sin6->sin6_family = AF_INET6;
mp->b_wptr = (uchar_t *)&sin6[1];
+ sa = (struct sockaddr *)sin6;
}
- port = 0;
break;
case sizeof (sin_t): /* Complete IPv4 address */
- sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset,
+ sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
sizeof (sin_t));
- if (sin == NULL || !OK_32PTR((char *)sin)) {
+ if (sa == NULL || !OK_32PTR((char *)sa)) {
udp_err_ack(q, mp, TSYSERR, EINVAL);
return;
}
if (udp->udp_family != AF_INET ||
- sin->sin_family != AF_INET) {
+ sa->sa_family != AF_INET) {
udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
return;
}
- port = ntohs(sin->sin_port);
break;
case sizeof (sin6_t): /* complete IPv6 address */
- sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset,
+ sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
sizeof (sin6_t));
- if (sin6 == NULL || !OK_32PTR((char *)sin6)) {
+ if (sa == NULL || !OK_32PTR((char *)sa)) {
udp_err_ack(q, mp, TSYSERR, EINVAL);
return;
}
if (udp->udp_family != AF_INET6 ||
- sin6->sin6_family != AF_INET6) {
+ sa->sa_family != AF_INET6) {
udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
return;
}
- port = ntohs(sin6->sin6_port);
break;
default: /* Invalid request */
@@ -665,503 +679,21 @@ udp_bind(queue_t *q, mblk_t *mp)
return;
}
- requested_port = port;
-
- if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ)
- bind_to_req_port_only = B_FALSE;
- else /* T_BIND_REQ and requested_port != 0 */
- bind_to_req_port_only = B_TRUE;
-
- if (requested_port == 0) {
- /*
- * If the application passed in zero for the port number, it
- * doesn't care which port number we bind to. Get one in the
- * valid range.
- */
- if (udp->udp_anon_priv_bind) {
- port = udp_get_next_priv_port(udp);
- } else {
- port = udp_update_next_port(udp,
- us->us_next_port_to_try, B_TRUE);
- }
- } else {
- /*
- * If the port is in the well-known privileged range,
- * make sure the caller was privileged.
- */
- int i;
- boolean_t priv = B_FALSE;
-
- if (port < us->us_smallest_nonpriv_port) {
- priv = B_TRUE;
- } else {
- for (i = 0; i < us->us_num_epriv_ports; i++) {
- if (port == us->us_epriv_ports[i]) {
- priv = B_TRUE;
- break;
- }
- }
- }
-
- if (priv) {
- cred_t *cr = DB_CREDDEF(mp, connp->conn_cred);
-
- if (secpolicy_net_privaddr(cr, port,
- IPPROTO_UDP) != 0) {
- udp_err_ack(q, mp, TACCES, 0);
- return;
- }
- }
- }
-
- if (port == 0) {
- udp_err_ack(q, mp, TNOADDR, 0);
- return;
- }
-
- /*
- * The state must be TS_UNBND. TPI mandates that users must send
- * TPI primitives only 1 at a time and wait for the response before
- * sending the next primitive.
- */
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) {
- rw_exit(&udp->udp_rwlock);
- (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
- "udp_bind: bad state, %u", udp->udp_state);
- udp_err_ack(q, mp, TOUTSTATE, 0);
- return;
- }
- udp->udp_pending_op = tbr->PRIM_type;
- /*
- * Copy the source address into our udp structure. This address
- * may still be zero; if so, IP will fill in the correct address
- * each time an outbound packet is passed to it. Since the udp is
- * not yet in the bind hash list, we don't grab the uf_lock to
- * change udp_ipversion
- */
- if (udp->udp_family == AF_INET) {
- ASSERT(sin != NULL);
- ASSERT(udp->udp_ipversion == IPV4_VERSION);
- udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
- udp->udp_ip_snd_options_len;
- IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src);
- } else {
- ASSERT(sin6 != NULL);
- v6src = sin6->sin6_addr;
- if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
- /*
- * no need to hold the uf_lock to set the udp_ipversion
- * since we are not yet in the fanout list
- */
- udp->udp_ipversion = IPV4_VERSION;
- udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
- UDPH_SIZE + udp->udp_ip_snd_options_len;
- } else {
- udp->udp_ipversion = IPV6_VERSION;
- udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
- }
- }
-
- /*
- * If udp_reuseaddr is not set, then we have to make sure that
- * the IP address and port number the application requested
- * (or we selected for the application) is not being used by
- * another stream. If another stream is already using the
- * requested IP address and port, the behavior depends on
- * "bind_to_req_port_only". If set the bind fails; otherwise we
- * search for any an unused port to bind to the the stream.
- *
- * As per the BSD semantics, as modified by the Deering multicast
- * changes, if udp_reuseaddr is set, then we allow multiple binds
- * to the same port independent of the local IP address.
- *
- * This is slightly different than in SunOS 4.X which did not
- * support IP multicast. Note that the change implemented by the
- * Deering multicast code effects all binds - not only binding
- * to IP multicast addresses.
- *
- * Note that when binding to port zero we ignore SO_REUSEADDR in
- * order to guarantee a unique port.
- */
- count = 0;
- if (udp->udp_anon_priv_bind) {
- /*
- * loopmax = (IPPORT_RESERVED-1) -
- * us->us_min_anonpriv_port + 1
- */
- loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
- } else {
- loopmax = us->us_largest_anon_port -
- us->us_smallest_anon_port + 1;
- }
-
- is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
- zoneid = connp->conn_zoneid;
-
- for (;;) {
- udp_t *udp1;
- boolean_t found_exclbind = B_FALSE;
-
- /*
- * Walk through the list of udp streams bound to
- * requested port with the same IP address.
- */
- lport = htons(port);
- udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
- us->us_bind_fanout_size)];
- mutex_enter(&udpf->uf_lock);
- for (udp1 = udpf->uf_udp; udp1 != NULL;
- udp1 = udp1->udp_bind_hash) {
- if (lport != udp1->udp_port)
- continue;
-
- /*
- * On a labeled system, we must treat bindings to ports
- * on shared IP addresses by sockets with MAC exemption
- * privilege as being in all zones, as there's
- * otherwise no way to identify the right receiver.
- */
- if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) ||
- IPCL_ZONE_MATCH(connp,
- udp1->udp_connp->conn_zoneid)) &&
- !connp->conn_mac_exempt && \
- !udp1->udp_connp->conn_mac_exempt)
- continue;
+ cred_t *cr = DB_CREDDEF(mp, connp->conn_cred);
+ error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
+ tbr->PRIM_type != O_T_BIND_REQ);
- /*
- * If UDP_EXCLBIND is set for either the bound or
- * binding endpoint, the semantics of bind
- * is changed according to the following chart.
- *
- * spec = specified address (v4 or v6)
- * unspec = unspecified address (v4 or v6)
- * A = specified addresses are different for endpoints
- *
- * bound bind to allowed?
- * -------------------------------------
- * unspec unspec no
- * unspec spec no
- * spec unspec no
- * spec spec yes if A
- *
- * For labeled systems, SO_MAC_EXEMPT behaves the same
- * as UDP_EXCLBIND, except that zoneid is ignored.
- */
- if (udp1->udp_exclbind || udp->udp_exclbind ||
- udp1->udp_connp->conn_mac_exempt ||
- connp->conn_mac_exempt) {
- if (V6_OR_V4_INADDR_ANY(
- udp1->udp_bound_v6src) ||
- is_inaddr_any ||
- IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
- &v6src)) {
- found_exclbind = B_TRUE;
- break;
- }
- continue;
- }
-
- /*
- * Check ipversion to allow IPv4 and IPv6 sockets to
- * have disjoint port number spaces.
- */
- if (udp->udp_ipversion != udp1->udp_ipversion) {
-
- /*
- * On the first time through the loop, if the
- * the user intentionally specified a
- * particular port number, then ignore any
- * bindings of the other protocol that may
- * conflict. This allows the user to bind IPv6
- * alone and get both v4 and v6, or bind both
- * both and get each seperately. On subsequent
- * times through the loop, we're checking a
- * port that we chose (not the user) and thus
- * we do not allow casual duplicate bindings.
- */
- if (count == 0 && requested_port != 0)
- continue;
- }
-
- /*
- * No difference depending on SO_REUSEADDR.
- *
- * If existing port is bound to a
- * non-wildcard IP address and
- * the requesting stream is bound to
- * a distinct different IP addresses
- * (non-wildcard, also), keep going.
- */
- if (!is_inaddr_any &&
- !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) &&
- !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
- &v6src)) {
- continue;
- }
- break;
- }
-
- if (!found_exclbind &&
- (udp->udp_reuseaddr && requested_port != 0)) {
- break;
- }
-
- if (udp1 == NULL) {
- /*
- * No other stream has this IP address
- * and port number. We can use it.
- */
- break;
- }
- mutex_exit(&udpf->uf_lock);
- if (bind_to_req_port_only) {
- /*
- * We get here only when requested port
- * is bound (and only first of the for()
- * loop iteration).
- *
- * The semantics of this bind request
- * require it to fail so we return from
- * the routine (and exit the loop).
- *
- */
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- udp_err_ack(q, mp, TADDRBUSY, 0);
- return;
- }
-
- if (udp->udp_anon_priv_bind) {
- port = udp_get_next_priv_port(udp);
- } else {
- if ((count == 0) && (requested_port != 0)) {
- /*
- * If the application wants us to find
- * a port, get one to start with. Set
- * requested_port to 0, so that we will
- * update us->us_next_port_to_try below.
- */
- port = udp_update_next_port(udp,
- us->us_next_port_to_try, B_TRUE);
- requested_port = 0;
- } else {
- port = udp_update_next_port(udp, port + 1,
- B_FALSE);
- }
- }
-
- if (port == 0 || ++count >= loopmax) {
- /*
- * We've tried every possible port number and
- * there are none available, so send an error
- * to the user.
- */
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- udp_err_ack(q, mp, TNOADDR, 0);
- return;
- }
- }
-
- /*
- * Copy the source address into our udp structure. This address
- * may still be zero; if so, ip will fill in the correct address
- * each time an outbound packet is passed to it.
- * If we are binding to a broadcast or multicast address then
- * udp_bind_ack will clear the source address when it receives
- * the T_BIND_ACK.
- */
- udp->udp_v6src = udp->udp_bound_v6src = v6src;
- udp->udp_port = lport;
- /*
- * Now reset the the next anonymous port if the application requested
- * an anonymous port, or we handed out the next anonymous port.
- */
- if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) {
- us->us_next_port_to_try = port + 1;
- }
-
- /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */
- if (udp->udp_family == AF_INET) {
- sin->sin_port = udp->udp_port;
- } else {
- int error;
-
- sin6->sin6_port = udp->udp_port;
- /* Rebuild the header template */
- error = udp_build_hdrs(udp);
- if (error != 0) {
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- mutex_exit(&udpf->uf_lock);
+ if (error != 0) {
+ if (error > 0) {
udp_err_ack(q, mp, TSYSERR, error);
- return;
- }
- }
- udp->udp_state = TS_IDLE;
- udp_bind_hash_insert(udpf, udp);
- mutex_exit(&udpf->uf_lock);
- rw_exit(&udp->udp_rwlock);
-
- if (cl_inet_bind) {
- /*
- * Running in cluster mode - register bind information
- */
- if (udp->udp_ipversion == IPV4_VERSION) {
- (*cl_inet_bind)(IPPROTO_UDP, AF_INET,
- (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
- (in_port_t)udp->udp_port);
} else {
- (*cl_inet_bind)(IPPROTO_UDP, AF_INET6,
- (uint8_t *)&(udp->udp_v6src),
- (in_port_t)udp->udp_port);
+ udp_err_ack(q, mp, -error, 0);
}
-
- }
-
- connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
- if (is_system_labeled() && (!connp->conn_anon_port ||
- connp->conn_anon_mlp)) {
- uint16_t mlpport;
- cred_t *cr = connp->conn_cred;
- zone_t *zone;
-
- zone = crgetzone(cr);
- connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth :
- mlptSingle;
- addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION,
- &v6src, us->us_netstack->netstack_ip);
- if (addrtype == mlptSingle) {
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- udp_err_ack(q, mp, TNOADDR, 0);
- connp->conn_anon_port = B_FALSE;
- connp->conn_mlp_type = mlptSingle;
- return;
- }
- mlpport = connp->conn_anon_port ? PMAPPORT : port;
- mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
- addrtype);
- if (mlptype != mlptSingle &&
- (connp->conn_mlp_type == mlptSingle ||
- secpolicy_net_bindmlp(cr) != 0)) {
- if (udp->udp_debug) {
- (void) strlog(UDP_MOD_ID, 0, 1,
- SL_ERROR|SL_TRACE,
- "udp_bind: no priv for multilevel port %d",
- mlpport);
- }
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- udp_err_ack(q, mp, TACCES, 0);
- connp->conn_anon_port = B_FALSE;
- connp->conn_mlp_type = mlptSingle;
- return;
- }
-
- /*
- * If we're specifically binding a shared IP address and the
- * port is MLP on shared addresses, then check to see if this
- * zone actually owns the MLP. Reject if not.
- */
- if (mlptype == mlptShared && addrtype == mlptShared) {
- /*
- * No need to handle exclusive-stack zones since
- * ALL_ZONES only applies to the shared stack.
- */
- zoneid_t mlpzone;
-
- mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
- htons(mlpport));
- if (connp->conn_zoneid != mlpzone) {
- if (udp->udp_debug) {
- (void) strlog(UDP_MOD_ID, 0, 1,
- SL_ERROR|SL_TRACE,
- "udp_bind: attempt to bind port "
- "%d on shared addr in zone %d "
- "(should be %d)",
- mlpport, connp->conn_zoneid,
- mlpzone);
- }
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- udp_err_ack(q, mp, TACCES, 0);
- connp->conn_anon_port = B_FALSE;
- connp->conn_mlp_type = mlptSingle;
- return;
- }
- }
- if (connp->conn_anon_port) {
- int error;
-
- error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
- port, B_TRUE);
- if (error != 0) {
- if (udp->udp_debug) {
- (void) strlog(UDP_MOD_ID, 0, 1,
- SL_ERROR|SL_TRACE,
- "udp_bind: cannot establish anon "
- "MLP for port %d", port);
- }
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- udp_err_ack(q, mp, TACCES, 0);
- connp->conn_anon_port = B_FALSE;
- connp->conn_mlp_type = mlptSingle;
- return;
- }
- }
- connp->conn_mlp_type = mlptype;
- }
-
- /* Pass the protocol number in the message following the address. */
- *mp->b_wptr++ = IPPROTO_UDP;
- if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
- /*
- * Append a request for an IRE if udp_v6src not
- * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address).
- */
- mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
- if (!mp->b_cont) {
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- udp_err_ack(q, mp, TSYSERR, ENOMEM);
- return;
- }
- mp->b_cont->b_wptr += sizeof (ire_t);
- mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
+ } else {
+ tbr->PRIM_type = T_BIND_ACK;
+ qreply(q, mp);
}
- if (udp->udp_family == AF_INET6)
- mp = ip_bind_v6(q, mp, connp, NULL);
- else
- mp = ip_bind_v4(q, mp, connp);
-
- /* The above return NULL if the bind needs to be deferred */
- if (mp != NULL)
- udp_bind_result(connp, mp);
- else
- CONN_INC_REF(connp);
-}
-
-/*
- * This is called from ip_wput_nondata to handle the results of a
- * deferred UDP bind. It is called once the bind has been completed.
- */
-void
-udp_resume_bind(conn_t *connp, mblk_t *mp)
-{
- ASSERT(connp != NULL && IPCL_IS_UDP(connp));
-
- udp_bind_result(connp, mp);
-
- CONN_OPER_PENDING_DONE(connp);
}
/*
@@ -1174,32 +706,25 @@ udp_resume_bind(conn_t *connp, mblk_t *mp)
* T_OK_ACK - for the T_CONN_REQ
* T_CONN_CON - to keep the TPI user happy
*
- * The connect completes in udp_bind_result.
+ * The connect completes in udp_do_connect.
* When a T_BIND_ACK is received information is extracted from the IRE
* and the two appended messages are sent to the TPI user.
* Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
* convert it to an error ack for the appropriate primitive.
*/
static void
-udp_connect(queue_t *q, mblk_t *mp)
+udp_tpi_connect(queue_t *q, mblk_t *mp)
{
- sin6_t *sin6;
- sin_t *sin;
+ mblk_t *mp1;
+ udp_t *udp;
+ conn_t *connp = Q_TO_CONN(q);
+ int error;
+ socklen_t len;
+ struct sockaddr *sa;
struct T_conn_req *tcr;
- in6_addr_t v6dst;
- ipaddr_t v4dst;
- uint16_t dstport;
- uint32_t flowinfo;
- mblk_t *mp1, *mp2;
- udp_fanout_t *udpf;
- udp_t *udp, *udp1;
- ushort_t ipversion;
- udp_stack_t *us;
- conn_t *connp = Q_TO_CONN(q);
udp = connp->conn_udp;
tcr = (struct T_conn_req *)mp->b_rptr;
- us = udp->udp_us;
/* A bit of sanity checking */
if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
@@ -1218,285 +743,87 @@ udp_connect(queue_t *q, mblk_t *mp)
* Make sure that address family matches the type of
* family of the the address passed down
*/
+ len = tcr->DEST_length;
switch (tcr->DEST_length) {
default:
udp_err_ack(q, mp, TBADADDR, 0);
return;
case sizeof (sin_t):
- sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset,
+ sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
sizeof (sin_t));
- if (sin == NULL || !OK_32PTR((char *)sin)) {
- udp_err_ack(q, mp, TSYSERR, EINVAL);
- return;
- }
- if (udp->udp_family != AF_INET ||
- sin->sin_family != AF_INET) {
- udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
- return;
- }
- v4dst = sin->sin_addr.s_addr;
- dstport = sin->sin_port;
- IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
- ASSERT(udp->udp_ipversion == IPV4_VERSION);
- ipversion = IPV4_VERSION;
break;
case sizeof (sin6_t):
- sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset,
+ sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
sizeof (sin6_t));
- if (sin6 == NULL || !OK_32PTR((char *)sin6)) {
- udp_err_ack(q, mp, TSYSERR, EINVAL);
- return;
- }
- if (udp->udp_family != AF_INET6 ||
- sin6->sin6_family != AF_INET6) {
- udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
- return;
- }
- v6dst = sin6->sin6_addr;
- dstport = sin6->sin6_port;
- if (IN6_IS_ADDR_V4MAPPED(&v6dst)) {
- IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
- ipversion = IPV4_VERSION;
- flowinfo = 0;
- } else {
- ipversion = IPV6_VERSION;
- flowinfo = sin6->sin6_flowinfo;
- }
break;
}
- if (dstport == 0) {
- udp_err_ack(q, mp, TBADADDR, 0);
- return;
- }
-
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- /*
- * This UDP must have bound to a port already before doing a connect.
- * TPI mandates that users must send TPI primitives only 1 at a time
- * and wait for the response before sending the next primitive.
- */
- if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
- rw_exit(&udp->udp_rwlock);
- (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
- "udp_connect: bad state, %u", udp->udp_state);
- udp_err_ack(q, mp, TOUTSTATE, 0);
+ error = proto_verify_ip_addr(udp->udp_family, sa, len);
+ if (error != 0) {
+ udp_err_ack(q, mp, TSYSERR, error);
return;
}
- udp->udp_pending_op = T_CONN_REQ;
- ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL);
-
- if (ipversion == IPV4_VERSION) {
- udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
- udp->udp_ip_snd_options_len;
- } else {
- udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
- }
-
- udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
- us->us_bind_fanout_size)];
-
- mutex_enter(&udpf->uf_lock);
- if (udp->udp_state == TS_DATA_XFER) {
- /* Already connected - clear out state */
- udp->udp_v6src = udp->udp_bound_v6src;
- udp->udp_state = TS_IDLE;
- }
/*
- * Create a default IP header with no IP options.
+ * We have to send a connection confirmation to
+ * keep TLI happy.
*/
- udp->udp_dstport = dstport;
- udp->udp_ipversion = ipversion;
- if (ipversion == IPV4_VERSION) {
- /*
- * Interpret a zero destination to mean loopback.
- * Update the T_CONN_REQ (sin/sin6) since it is used to
- * generate the T_CONN_CON.
- */
- if (v4dst == INADDR_ANY) {
- v4dst = htonl(INADDR_LOOPBACK);
- IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
- if (udp->udp_family == AF_INET) {
- sin->sin_addr.s_addr = v4dst;
- } else {
- sin6->sin6_addr = v6dst;
- }
- }
- udp->udp_v6dst = v6dst;
- udp->udp_flowinfo = 0;
-
- /*
- * If the destination address is multicast and
- * an outgoing multicast interface has been set,
- * use the address of that interface as our
- * source address if no source address has been set.
- */
- if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY &&
- CLASSD(v4dst) &&
- udp->udp_multicast_if_addr != INADDR_ANY) {
- IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr,
- &udp->udp_v6src);
- }
+ if (udp->udp_family == AF_INET) {
+ mp1 = mi_tpi_conn_con(NULL, (char *)sa,
+ sizeof (sin_t), NULL, 0);
} else {
- ASSERT(udp->udp_ipversion == IPV6_VERSION);
- /*
- * Interpret a zero destination to mean loopback.
- * Update the T_CONN_REQ (sin/sin6) since it is used to
- * generate the T_CONN_CON.
- */
- if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
- v6dst = ipv6_loopback;
- sin6->sin6_addr = v6dst;
- }
- udp->udp_v6dst = v6dst;
- udp->udp_flowinfo = flowinfo;
- /*
- * If the destination address is multicast and
- * an outgoing multicast interface has been set,
- * then the ip bind logic will pick the correct source
- * address (i.e. matching the outgoing multicast interface).
- */
+ mp1 = mi_tpi_conn_con(NULL, (char *)sa,
+ sizeof (sin6_t), NULL, 0);
}
-
- /*
- * Verify that the src/port/dst/port is unique for all
- * connections in TS_DATA_XFER
- */
- for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
- if (udp1->udp_state != TS_DATA_XFER)
- continue;
- if (udp->udp_port != udp1->udp_port ||
- udp->udp_ipversion != udp1->udp_ipversion ||
- dstport != udp1->udp_dstport ||
- !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) ||
- !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) ||
- !(IPCL_ZONE_MATCH(udp->udp_connp,
- udp1->udp_connp->conn_zoneid) ||
- IPCL_ZONE_MATCH(udp1->udp_connp,
- udp->udp_connp->conn_zoneid)))
- continue;
- mutex_exit(&udpf->uf_lock);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- udp_err_ack(q, mp, TBADADDR, 0);
- return;
- }
- udp->udp_state = TS_DATA_XFER;
- mutex_exit(&udpf->uf_lock);
-
- /*
- * Send down bind to IP to verify that there is a route
- * and to determine the source address.
- * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput.
- */
- if (udp->udp_family == AF_INET)
- mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t));
- else
- mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t));
if (mp1 == NULL) {
-bind_failed:
- mutex_enter(&udpf->uf_lock);
- udp->udp_state = TS_IDLE;
- udp->udp_pending_op = -1;
- mutex_exit(&udpf->uf_lock);
- rw_exit(&udp->udp_rwlock);
udp_err_ack(q, mp, TSYSERR, ENOMEM);
return;
}
- rw_exit(&udp->udp_rwlock);
/*
- * We also have to send a connection confirmation to
- * keep TLI happy. Prepare it for udp_bind_result.
+ * ok_ack for T_CONN_REQ
*/
- if (udp->udp_family == AF_INET)
- mp2 = mi_tpi_conn_con(NULL, (char *)sin,
- sizeof (*sin), NULL, 0);
- else
- mp2 = mi_tpi_conn_con(NULL, (char *)sin6,
- sizeof (*sin6), NULL, 0);
- if (mp2 == NULL) {
- freemsg(mp1);
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- goto bind_failed;
- }
-
mp = mi_tpi_ok_ack_alloc(mp);
if (mp == NULL) {
/* Unable to reuse the T_CONN_REQ for the ack. */
- freemsg(mp2);
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- mutex_enter(&udpf->uf_lock);
- udp->udp_state = TS_IDLE;
- udp->udp_pending_op = -1;
- mutex_exit(&udpf->uf_lock);
- rw_exit(&udp->udp_rwlock);
+ freemsg(mp1);
udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
return;
}
- /* Hang onto the T_OK_ACK and T_CONN_CON for later. */
- linkb(mp1, mp);
- linkb(mp1, mp2);
-
- mblk_setcred(mp1, connp->conn_cred);
- if (udp->udp_family == AF_INET)
- mp1 = ip_bind_v4(q, mp1, connp);
- else
- mp1 = ip_bind_v6(q, mp1, connp, NULL);
-
- /* The above return NULL if the bind needs to be deferred */
- if (mp1 != NULL)
- udp_bind_result(connp, mp1);
- else
- CONN_INC_REF(connp);
+ error = udp_do_connect(connp, sa, len);
+ if (error != 0) {
+ freeb(mp1);
+ if (error < 0)
+ udp_err_ack(q, mp, -error, 0);
+ else
+ udp_err_ack(q, mp, TSYSERR, error);
+ } else {
+ putnext(connp->conn_rq, mp);
+ putnext(connp->conn_rq, mp1);
+ }
}
static int
-udp_close(queue_t *q)
+udp_tpi_close(queue_t *q, int flags)
{
- conn_t *connp = (conn_t *)q->q_ptr;
- udp_t *udp;
-
- ASSERT(connp != NULL && IPCL_IS_UDP(connp));
- udp = connp->conn_udp;
-
- udp_quiesce_conn(connp);
- ip_quiesce_conn(connp);
- /*
- * Disable read-side synchronous stream
- * interface and drain any queued data.
- */
- udp_rcv_drain(q, udp, B_TRUE);
- ASSERT(!udp->udp_direct_sockfs);
-
- qprocsoff(q);
-
- ASSERT(udp->udp_rcv_cnt == 0);
- ASSERT(udp->udp_rcv_msgcnt == 0);
- ASSERT(udp->udp_rcv_list_head == NULL);
- ASSERT(udp->udp_rcv_list_tail == NULL);
-
- udp_close_free(connp);
+ conn_t *connp;
- /*
- * Now we are truly single threaded on this stream, and can
- * delete the things hanging off the connp, and finally the connp.
- * We removed this connp from the fanout list, it cannot be
- * accessed thru the fanouts, and we already waited for the
- * conn_ref to drop to 0. We are already in close, so
- * there cannot be any other thread from the top. qprocsoff
- * has completed, and service has completed or won't run in
- * future.
- */
- ASSERT(connp->conn_ref == 1);
- inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
- connp->conn_ref--;
- ipcl_conn_destroy(connp);
+ if (flags & SO_FALLBACK) {
+ /*
+ * stream is being closed while in fallback
+ * simply free the resources that were allocated
+ */
+ inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
+ qprocsoff(q);
+ goto done;
+ }
+ connp = Q_TO_CONN(q);
+ udp_do_close(connp);
+done:
q->q_ptr = WR(q)->q_ptr = NULL;
return (0);
}
@@ -1567,39 +894,21 @@ udp_close_free(conn_t *connp)
udp->udp_connp = connp;
}
-/*
- * This routine handles each T_DISCON_REQ message passed to udp
- * as an indicating that UDP is no longer connected. This results
- * in sending a T_BIND_REQ to IP to restore the binding to just
- * the local address/port.
- *
- * This routine sends down a T_BIND_REQ to IP with the following mblks:
- * T_BIND_REQ - specifying just the local address/port
- * T_OK_ACK - for the T_DISCON_REQ
- *
- * The disconnect completes in udp_bind_result.
- * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user.
- * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
- * convert it to an error ack for the appropriate primitive.
- */
-static void
-udp_disconnect(queue_t *q, mblk_t *mp)
+static int
+udp_do_disconnect(conn_t *connp)
{
udp_t *udp;
- mblk_t *mp1;
+ mblk_t *ire_mp;
udp_fanout_t *udpf;
udp_stack_t *us;
- conn_t *connp = Q_TO_CONN(q);
+ int error;
udp = connp->conn_udp;
us = udp->udp_us;
rw_enter(&udp->udp_rwlock, RW_WRITER);
if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) {
rw_exit(&udp->udp_rwlock);
- (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
- "udp_disconnect: bad state, %u", udp->udp_state);
- udp_err_ack(q, mp, TOUTSTATE, 0);
- return;
+ return (-TOUTSTATE);
}
udp->udp_pending_op = T_DISCON_REQ;
udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
@@ -1609,57 +918,85 @@ udp_disconnect(queue_t *q, mblk_t *mp)
udp->udp_state = TS_IDLE;
mutex_exit(&udpf->uf_lock);
- /*
- * Send down bind to IP to remove the full binding and revert
- * to the local address binding.
- */
- if (udp->udp_family == AF_INET)
- mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t));
- else
- mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t));
- if (mp1 == NULL) {
+ if (udp->udp_family == AF_INET6) {
+ /* Rebuild the header template */
+ error = udp_build_hdrs(udp);
+ if (error != 0) {
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
+ return (error);
+ }
+ }
+
+ ire_mp = allocb(sizeof (ire_t), BPRI_HI);
+ if (ire_mp == NULL) {
+ mutex_enter(&udpf->uf_lock);
udp->udp_pending_op = -1;
+ mutex_exit(&udpf->uf_lock);
rw_exit(&udp->udp_rwlock);
- udp_err_ack(q, mp, TSYSERR, ENOMEM);
- return;
+ return (ENOMEM);
}
- mp = mi_tpi_ok_ack_alloc(mp);
+
+ rw_exit(&udp->udp_rwlock);
+
+ if (udp->udp_family == AF_INET6) {
+ error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP,
+ &udp->udp_bound_v6src, udp->udp_port, B_TRUE);
+ } else {
+ error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP,
+ V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE);
+ }
+
+ return (udp_post_ip_bind_connect(udp, ire_mp, error));
+}
+
+
+static void
+udp_tpi_disconnect(queue_t *q, mblk_t *mp)
+{
+ conn_t *connp = Q_TO_CONN(q);
+ int error;
+
+ /*
+ * Allocate the largest primitive we need to send back
+ * T_error_ack is > than T_ok_ack
+ */
+ mp = reallocb(mp, sizeof (struct T_error_ack), 1);
if (mp == NULL) {
/* Unable to reuse the T_DISCON_REQ for the ack. */
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM);
+ udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
return;
}
- if (udp->udp_family == AF_INET6) {
- int error;
+ error = udp_do_disconnect(connp);
- /* Rebuild the header template */
- error = udp_build_hdrs(udp);
- if (error != 0) {
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error);
- freemsg(mp1);
- return;
+ if (error != 0) {
+ if (error < 0) {
+ udp_err_ack(q, mp, -error, 0);
+ } else {
+ udp_err_ack(q, mp, TSYSERR, error);
}
+ } else {
+ mp = mi_tpi_ok_ack_alloc(mp);
+ ASSERT(mp != NULL);
+ qreply(q, mp);
}
+}
- rw_exit(&udp->udp_rwlock);
- /* Append the T_OK_ACK to the T_BIND_REQ for udp_bind_ack */
- linkb(mp1, mp);
+int
+udp_disconnect(conn_t *connp)
+{
+ int error;
+ udp_t *udp = connp->conn_udp;
- if (udp->udp_family == AF_INET6)
- mp1 = ip_bind_v6(q, mp1, connp, NULL);
- else
- mp1 = ip_bind_v4(q, mp1, connp);
+ udp->udp_dgram_errind = B_FALSE;
- /* The above return NULL if the bind needs to be deferred */
- if (mp1 != NULL)
- udp_bind_result(connp, mp1);
- else
- CONN_INC_REF(connp);
+ error = udp_do_disconnect(connp);
+
+ if (error < 0)
+ error = proto_tlitosyserr(-error);
+
+ return (error);
}
/* This routine creates a T_ERROR_ACK message and passes it upstream. */
@@ -1783,8 +1120,8 @@ udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
* Assumes that IP has pulled up everything up to and including the ICMP header.
*/
static void
-udp_icmp_error(queue_t *q, mblk_t *mp)
-{
+udp_icmp_error(conn_t *connp, mblk_t *mp)
+ {
icmph_t *icmph;
ipha_t *ipha;
int iph_hdr_length;
@@ -1793,15 +1130,16 @@ udp_icmp_error(queue_t *q, mblk_t *mp)
sin6_t sin6;
mblk_t *mp1;
int error = 0;
- udp_t *udp = Q_TO_UDP(q);
+ udp_t *udp = connp->conn_udp;
+ mp1 = NULL;
ipha = (ipha_t *)mp->b_rptr;
ASSERT(OK_32PTR(mp->b_rptr));
if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
- udp_icmp_error_ipv6(q, mp);
+ udp_icmp_error_ipv6(connp, mp);
return;
}
ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
@@ -1850,27 +1188,66 @@ udp_icmp_error(queue_t *q, mblk_t *mp)
return;
}
+
switch (udp->udp_family) {
case AF_INET:
sin = sin_null;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = ipha->ipha_dst;
sin.sin_port = udpha->uha_dst_port;
- mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0,
- error);
+ if (IPCL_IS_NONSTR(connp)) {
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ if (udp->udp_state == TS_DATA_XFER) {
+ if (sin.sin_port == udp->udp_dstport &&
+ sin.sin_addr.s_addr ==
+ V4_PART_OF_V6(udp->udp_v6dst)) {
+
+ rw_exit(&udp->udp_rwlock);
+ (*connp->conn_upcalls->su_set_error)
+ (connp->conn_upper_handle, error);
+ goto done;
+ }
+ } else {
+ udp->udp_delayed_error = error;
+ *((sin_t *)&udp->udp_delayed_addr) = sin;
+ }
+ rw_exit(&udp->udp_rwlock);
+ } else {
+ mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
+ NULL, 0, error);
+ }
break;
case AF_INET6:
sin6 = sin6_null;
sin6.sin6_family = AF_INET6;
IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
sin6.sin6_port = udpha->uha_dst_port;
+ if (IPCL_IS_NONSTR(connp)) {
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ if (udp->udp_state == TS_DATA_XFER) {
+ if (sin6.sin6_port == udp->udp_dstport &&
+ IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
+ &udp->udp_v6dst)) {
+ rw_exit(&udp->udp_rwlock);
+ (*connp->conn_upcalls->su_set_error)
+ (connp->conn_upper_handle, error);
+ goto done;
+ }
+ } else {
+ udp->udp_delayed_error = error;
+ *((sin6_t *)&udp->udp_delayed_addr) = sin6;
+ }
+ rw_exit(&udp->udp_rwlock);
+ } else {
- mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
- NULL, 0, error);
+ mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
+ NULL, 0, error);
+ }
break;
}
- if (mp1)
- putnext(q, mp1);
+ if (mp1 != NULL)
+ putnext(connp->conn_rq, mp1);
+done:
freemsg(mp);
}
@@ -1881,7 +1258,7 @@ udp_icmp_error(queue_t *q, mblk_t *mp)
* ICMPv6 header.
*/
static void
-udp_icmp_error_ipv6(queue_t *q, mblk_t *mp)
+udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
{
icmp6_t *icmp6;
ip6_t *ip6h, *outer_ip6h;
@@ -1891,7 +1268,7 @@ udp_icmp_error_ipv6(queue_t *q, mblk_t *mp)
sin6_t sin6;
mblk_t *mp1;
int error = 0;
- udp_t *udp = Q_TO_UDP(q);
+ udp_t *udp = connp->conn_udp;
udp_stack_t *us = udp->udp_us;
outer_ip6h = (ip6_t *)mp->b_rptr;
@@ -1982,7 +1359,13 @@ udp_icmp_error_ipv6(queue_t *q, mblk_t *mp)
* message. Free it, then send our empty message.
*/
freemsg(mp);
- putnext(q, newmp);
+ if (!IPCL_IS_NONSTR(connp)) {
+ putnext(connp->conn_rq, newmp);
+ } else {
+ (*connp->conn_upcalls->su_recv)
+ (connp->conn_upper_handle, newmp, 0, 0, &error,
+ NULL);
+ }
return;
}
case ICMP6_TIME_EXCEEDED:
@@ -2018,10 +1401,30 @@ udp_icmp_error_ipv6(queue_t *q, mblk_t *mp)
sin6.sin6_port = udpha->uha_dst_port;
sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
- mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0,
- error);
- if (mp1)
- putnext(q, mp1);
+ if (IPCL_IS_NONSTR(connp)) {
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ if (udp->udp_state == TS_DATA_XFER) {
+ if (sin6.sin6_port == udp->udp_dstport &&
+ IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
+ &udp->udp_v6dst)) {
+ rw_exit(&udp->udp_rwlock);
+ (*connp->conn_upcalls->su_set_error)
+ (connp->conn_upper_handle, error);
+ goto done;
+ }
+ } else {
+ udp->udp_delayed_error = error;
+ *((sin6_t *)&udp->udp_delayed_addr) = sin6;
+ }
+ rw_exit(&udp->udp_rwlock);
+ } else {
+ mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
+ NULL, 0, error);
+ if (mp1 != NULL)
+ putnext(connp->conn_rq, mp1);
+ }
+
+done:
freemsg(mp);
}
@@ -2166,6 +1569,18 @@ udp_copy_info(struct T_info_ack *tap, udp_t *udp)
tap->OPT_size = udp_max_optsize;
}
+static void
+udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
+ t_uscalar_t cap_bits1)
+{
+ tcap->CAP_bits1 = 0;
+
+ if (cap_bits1 & TC1_INFO) {
+ udp_copy_info(&tcap->INFO_ack, udp);
+ tcap->CAP_bits1 |= TC1_INFO;
+ }
+}
+
/*
* This routine responds to T_CAPABILITY_REQ messages. It is called by
* udp_wput. Much of the T_CAPABILITY_ACK information is copied from
@@ -2187,12 +1602,7 @@ udp_capability_req(queue_t *q, mblk_t *mp)
return;
tcap = (struct T_capability_ack *)mp->b_rptr;
- tcap->CAP_bits1 = 0;
-
- if (cap_bits1 & TC1_INFO) {
- udp_copy_info(&tcap->INFO_ack, udp);
- tcap->CAP_bits1 |= TC1_INFO;
- }
+ udp_do_capability_ack(udp, tcap, cap_bits1);
qreply(q, mp);
}
@@ -2378,12 +1788,10 @@ static int
udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
boolean_t isv6)
{
- int err;
+ int error;
udp_t *udp;
conn_t *connp;
dev_t conn_dev;
- zoneid_t zoneid;
- netstack_t *ns;
udp_stack_t *us;
vmem_t *minor_arena;
@@ -2396,20 +1804,6 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
if (sflag == MODOPEN)
return (EINVAL);
- ns = netstack_find_by_cred(credp);
- ASSERT(ns != NULL);
- us = ns->netstack_udp;
- ASSERT(us != NULL);
-
- /*
- * For exclusive stacks we set the zoneid to zero
- * to make UDP operate as if in the global zone.
- */
- if (ns->netstack_stackid != GLOBAL_NETSTACKID)
- zoneid = GLOBAL_ZONEID;
- else
- zoneid = crgetzoneid(credp);
-
if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
minor_arena = ip_minor_arena_la;
@@ -2419,25 +1813,34 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
* or a non socket application is doing the open.
* Try to allocate from the small arena.
*/
- if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
- netstack_rele(ns);
+ if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
return (EBUSY);
- }
+
minor_arena = ip_minor_arena_sa;
}
- *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
+ if (flag & SO_FALLBACK) {
+ /*
+ * Non streams socket needs a stream to fallback to
+ */
+ RD(q)->q_ptr = (void *)conn_dev;
+ WR(q)->q_qinfo = &udp_fallback_sock_winit;
+ WR(q)->q_ptr = (void *)minor_arena;
+ qprocson(q);
+ return (0);
+ }
- connp = ipcl_conn_create(IPCL_UDPCONN, KM_SLEEP, ns);
- connp->conn_dev = conn_dev;
- connp->conn_minor_arena = minor_arena;
+ connp = udp_do_open(credp, isv6, KM_SLEEP);
+ if (connp == NULL) {
+ inet_minor_free(minor_arena, conn_dev);
+ return (ENOMEM);
+ }
udp = connp->conn_udp;
+ us = udp->udp_us;
- /*
- * ipcl_conn_create did a netstack_hold. Undo the hold that was
- * done by netstack_find_by_cred()
- */
- netstack_rele(ns);
+ *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
+ connp->conn_dev = conn_dev;
+ connp->conn_minor_arena = minor_arena;
/*
* Initialize the udp_t structure for this stream.
@@ -2452,79 +1855,39 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
ASSERT(connp->conn_udp == udp);
ASSERT(udp->udp_connp == connp);
- /* Set the initial state of the stream and the privilege status. */
- udp->udp_state = TS_UNBND;
- if (isv6) {
- udp->udp_family = AF_INET6;
- udp->udp_ipversion = IPV6_VERSION;
- udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE;
- udp->udp_ttl = us->us_ipv6_hoplimit;
- connp->conn_af_isv6 = B_TRUE;
- connp->conn_flags |= IPCL_ISV6;
- } else {
- udp->udp_family = AF_INET;
- udp->udp_ipversion = IPV4_VERSION;
- udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE;
- udp->udp_ttl = us->us_ipv4_ttl;
- connp->conn_af_isv6 = B_FALSE;
- connp->conn_flags &= ~IPCL_ISV6;
- }
-
- udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
- udp->udp_pending_op = -1;
- connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
- connp->conn_zoneid = zoneid;
-
- udp->udp_open_time = lbolt64;
- udp->udp_open_pid = curproc->p_pid;
-
- /*
- * If the caller has the process-wide flag set, then default to MAC
- * exempt mode. This allows read-down to unlabeled hosts.
- */
- if (getpflags(NET_MAC_AWARE, credp) != 0)
- connp->conn_mac_exempt = B_TRUE;
-
if (flag & SO_SOCKSTR) {
connp->conn_flags |= IPCL_SOCKET;
udp->udp_issocket = B_TRUE;
udp->udp_direct_sockfs = B_TRUE;
}
- connp->conn_ulp_labeled = is_system_labeled();
-
- udp->udp_us = us;
-
q->q_hiwat = us->us_recv_hiwat;
WR(q)->q_hiwat = us->us_xmit_hiwat;
WR(q)->q_lowat = us->us_xmit_lowat;
- connp->conn_recv = udp_input;
- crhold(credp);
- connp->conn_cred = credp;
-
- mutex_enter(&connp->conn_lock);
- connp->conn_state_flags &= ~CONN_INCIPIENT;
- mutex_exit(&connp->conn_lock);
-
qprocson(q);
if (udp->udp_family == AF_INET6) {
/* Build initial header template for transmit */
- if ((err = udp_build_hdrs(udp)) != 0) {
+ if ((error = udp_build_hdrs(udp)) != 0) {
rw_exit(&udp->udp_rwlock);
qprocsoff(q);
+ inet_minor_free(minor_arena, conn_dev);
ipcl_conn_destroy(connp);
- return (err);
+ return (error);
}
}
rw_exit(&udp->udp_rwlock);
/* Set the Stream head write offset and high watermark. */
- (void) mi_set_sth_wroff(q,
+ (void) proto_set_tx_wroff(q, connp,
udp->udp_max_hdr_len + us->us_wroff_extra);
- (void) mi_set_sth_hiwat(q, udp_set_rcv_hiwat(udp, q->q_hiwat));
+ /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */
+ (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat));
+ mutex_enter(&connp->conn_lock);
+ connp->conn_state_flags &= ~CONN_INCIPIENT;
+ mutex_exit(&connp->conn_lock);
return (0);
}
@@ -2582,21 +1945,16 @@ udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
* This routine retrieves the current status of socket options.
* It returns the size of the option retrieved.
*/
-int
-udp_opt_get_locked(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
+static int
+udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
{
- int *i1 = (int *)ptr;
- conn_t *connp;
- udp_t *udp;
- ip6_pkt_t *ipp;
- int len;
- udp_stack_t *us;
-
- connp = Q_TO_CONN(q);
- udp = connp->conn_udp;
- ipp = &udp->udp_sticky_ipp;
- us = udp->udp_us;
+ udp_t *udp = connp->conn_udp;
+ udp_stack_t *us = udp->udp_us;
+ int *i1 = (int *)ptr;
+ ip6_pkt_t *ipp = &udp->udp_sticky_ipp;
+ int len;
+ ASSERT(RW_READ_HELD(&udp->udp_rwlock));
switch (level) {
case SOL_SOCKET:
switch (name) {
@@ -2625,10 +1983,10 @@ udp_opt_get_locked(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
break; /* goto sizeof (int) option return */
case SO_SNDBUF:
- *i1 = q->q_hiwat;
+ *i1 = udp->udp_xmit_hiwat;
break; /* goto sizeof (int) option return */
case SO_RCVBUF:
- *i1 = RD(q)->q_hiwat;
+ *i1 = udp->udp_rcv_disply_hiwat;
break; /* goto sizeof (int) option return */
case SO_DGRAM_ERRIND:
*i1 = udp->udp_dgram_errind;
@@ -2907,15 +2265,15 @@ udp_opt_get_locked(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
}
int
-udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
+udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
{
- udp_t *udp;
+ udp_t *udp;
int err;
udp = Q_TO_UDP(q);
rw_enter(&udp->udp_rwlock, RW_READER);
- err = udp_opt_get_locked(q, level, name, ptr);
+ err = udp_opt_get(Q_TO_CONN(q), level, name, ptr);
rw_exit(&udp->udp_rwlock);
return (err);
}
@@ -2924,83 +2282,34 @@ udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
* This routine sets socket options.
*/
/* ARGSUSED */
-int
-udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
- int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
- uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
+static int
+udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
+ uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr,
+ void *thisdg_attrs, boolean_t checkonly)
{
udpattrs_t *attrs = thisdg_attrs;
int *i1 = (int *)invalp;
boolean_t onoff = (*i1 == 0) ? 0 : 1;
- boolean_t checkonly;
+ udp_t *udp = connp->conn_udp;
+ udp_stack_t *us = udp->udp_us;
int error;
- conn_t *connp;
- udp_t *udp;
uint_t newlen;
- udp_stack_t *us;
size_t sth_wroff;
- connp = Q_TO_CONN(q);
- udp = connp->conn_udp;
- us = udp->udp_us;
-
- switch (optset_context) {
- case SETFN_OPTCOM_CHECKONLY:
- checkonly = B_TRUE;
- /*
- * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
- * inlen != 0 implies value supplied and
- * we have to "pretend" to set it.
- * inlen == 0 implies that there is no
- * value part in T_CHECK request and just validation
- * done elsewhere should be enough, we just return here.
- */
- if (inlen == 0) {
- *outlenp = 0;
- return (0);
- }
- break;
- case SETFN_OPTCOM_NEGOTIATE:
- checkonly = B_FALSE;
- break;
- case SETFN_UD_NEGOTIATE:
- case SETFN_CONN_NEGOTIATE:
- checkonly = B_FALSE;
- /*
- * Negotiating local and "association-related" options
- * through T_UNITDATA_REQ.
- *
- * Following routine can filter out ones we do not
- * want to be "set" this way.
- */
- if (!udp_opt_allow_udr_set(level, name)) {
- *outlenp = 0;
- return (EINVAL);
- }
- break;
- default:
- /*
- * We should never get here
- */
- *outlenp = 0;
- return (EINVAL);
- }
-
- ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
- (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
-
+ ASSERT(RW_WRITE_HELD(&udp->udp_rwlock));
/*
* For fixed length options, no sanity check
* of passed in length is done. It is assumed *_optcom_req()
* routines do the right thing.
*/
-
switch (level) {
case SOL_SOCKET:
switch (name) {
case SO_REUSEADDR:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_reuseaddr = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case SO_DEBUG:
if (!checkonly)
@@ -3011,16 +2320,22 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
* but are only meaningful to IP.
*/
case SO_DONTROUTE:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_dontroute = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case SO_USELOOPBACK:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_useloopback = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case SO_BROADCAST:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_broadcast = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case SO_SNDBUF:
@@ -3029,7 +2344,8 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
return (ENOBUFS);
}
if (!checkonly) {
- q->q_hiwat = *i1;
+ udp->udp_xmit_hiwat = *i1;
+ connp->conn_wq->q_hiwat = *i1;
}
break;
case SO_RCVBUF:
@@ -3038,10 +2354,13 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
return (ENOBUFS);
}
if (!checkonly) {
- RD(q)->q_hiwat = *i1;
+ int size;
+
+ udp->udp_rcv_disply_hiwat = *i1;
+ size = udp_set_rcv_hiwat(udp, *i1);
rw_exit(&udp->udp_rwlock);
- (void) mi_set_sth_hiwat(RD(q),
- udp_set_rcv_hiwat(udp, *i1));
+ (void) proto_set_rx_hiwat(connp->conn_rq, connp,
+ size);
rw_enter(&udp->udp_rwlock, RW_WRITER);
}
break;
@@ -3065,11 +2384,20 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
udp->udp_timestamp = onoff;
break;
case SO_ANON_MLP:
- /* Pass option along to IP level for handling */
- return (-EINVAL);
+ if (!checkonly) {
+ connp->conn_anon_mlp = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
+ break;
case SO_MAC_EXEMPT:
- /* Pass option along to IP level for handling */
- return (-EINVAL);
+ if (secpolicy_net_mac_aware(cr) != 0 ||
+ udp->udp_state != TS_UNBND)
+ return (EACCES);
+ if (!checkonly) {
+ connp->conn_mac_exempt = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
+ break;
case SCM_UCRED: {
struct ucred_s *ucr;
cred_t *cr, *newcr;
@@ -3149,7 +2477,8 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
UDPH_SIZE + udp->udp_ip_snd_options_len;
sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
rw_exit(&udp->udp_rwlock);
- (void) mi_set_sth_wroff(RD(q), sth_wroff);
+ (void) proto_set_tx_wroff(connp->conn_rq, connp,
+ sth_wroff);
rw_enter(&udp->udp_rwlock, RW_WRITER);
break;
@@ -3173,6 +2502,7 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
if (!checkonly) {
udp->udp_multicast_if_addr =
inap->s_addr;
+ PASS_OPT_TO_IP(connp);
}
break;
}
@@ -3181,8 +2511,10 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
udp->udp_multicast_ttl = *invalp;
break;
case IP_MULTICAST_LOOP:
- if (!checkonly)
+ if (!checkonly) {
connp->conn_multicast_loop = *invalp;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IP_RECVOPTS:
if (!checkonly)
@@ -3193,12 +2525,16 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
udp->udp_recvdstaddr = onoff;
break;
case IP_RECVIF:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_recvif = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IP_RECVSLLA:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_recvslla = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IP_RECVTTL:
if (!checkonly)
@@ -3278,12 +2614,16 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
*/
return (-EINVAL);
case IP_BOUND_IF:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_bound_if = *i1;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IP_UNSPEC_SRC:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_unspec_source = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IP_BROADCAST_TTL:
if (!checkonly)
@@ -3315,8 +2655,10 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
switch (name) {
case IPV6_MULTICAST_IF:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_multicast_if_index = *i1;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_UNICAST_HOPS:
/* -1 means use default */
@@ -3371,8 +2713,10 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
*outlenp = 0;
return (EINVAL);
}
- if (!checkonly)
+ if (!checkonly) {
connp->conn_multicast_loop = *i1;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_JOIN_GROUP:
case IPV6_LEAVE_GROUP:
@@ -3389,53 +2733,71 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
*/
return (-EINVAL);
case IPV6_BOUND_IF:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_bound_if = *i1;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_UNSPEC_SRC:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_unspec_source = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
/*
* Set boolean switches for ancillary data delivery
*/
case IPV6_RECVPKTINFO:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_ip_recvpktinfo = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_RECVTCLASS:
if (!checkonly) {
udp->udp_ipv6_recvtclass = onoff;
+ PASS_OPT_TO_IP(connp);
}
break;
case IPV6_RECVPATHMTU:
if (!checkonly) {
udp->udp_ipv6_recvpathmtu = onoff;
+ PASS_OPT_TO_IP(connp);
}
break;
case IPV6_RECVHOPLIMIT:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_ipv6_recvhoplimit = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_RECVHOPOPTS:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_ipv6_recvhopopts = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_RECVDSTOPTS:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_ipv6_recvdstopts = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case _OLD_IPV6_RECVDSTOPTS:
if (!checkonly)
udp->udp_old_ipv6_recvdstopts = onoff;
break;
case IPV6_RECVRTHDRDSTOPTS:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_ipv6_recvrthdrdstopts = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
case IPV6_RECVRTHDR:
- if (!checkonly)
+ if (!checkonly) {
udp->udp_ipv6_recvrthdr = onoff;
+ PASS_OPT_TO_IP(connp);
+ }
break;
/*
* Set sticky options or ancillary data.
@@ -3477,6 +2839,7 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
error = udp_build_hdrs(udp);
if (error != 0)
return (error);
+ PASS_OPT_TO_IP(connp);
}
break;
case IPV6_HOPLIMIT:
@@ -3541,8 +2904,9 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
} else {
sin6_t *sin6 = (sin6_t *)invalp;
- if (sin6->sin6_family != AF_INET6)
+ if (sin6->sin6_family != AF_INET6) {
return (EAFNOSUPPORT);
+ }
if (IN6_IS_ADDR_V4MAPPED(
&sin6->sin6_addr))
return (EADDRNOTAVAIL);
@@ -3557,6 +2921,7 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
error = udp_build_hdrs(udp);
if (error != 0)
return (error);
+ PASS_OPT_TO_IP(connp);
}
break;
case IPV6_HOPOPTS: {
@@ -3785,6 +3150,8 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
}
if (!checkonly) {
+ int size;
+
udp->udp_nat_t_endpoint = onoff;
udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
@@ -3795,8 +3162,10 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
udp->udp_max_hdr_len +=
sizeof (uint32_t);
}
- (void) mi_set_sth_wroff(RD(q),
- udp->udp_max_hdr_len + us->us_wroff_extra);
+ size = udp->udp_max_hdr_len +
+ us->us_wroff_extra;
+ (void) proto_set_tx_wroff(connp->conn_rq, connp,
+ size);
}
break;
default:
@@ -3820,20 +3189,82 @@ udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
}
int
-udp_opt_set(queue_t *q, uint_t optset_context, int level,
- int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
- uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
+udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
+ uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
+ void *thisdg_attrs, cred_t *cr)
{
- udp_t *udp;
- int err;
+ int error;
+ boolean_t checkonly;
- udp = Q_TO_UDP(q);
+ error = 0;
+ switch (optset_context) {
+ case SETFN_OPTCOM_CHECKONLY:
+ checkonly = B_TRUE;
+ /*
+ * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
+ * inlen != 0 implies value supplied and
+ * we have to "pretend" to set it.
+ * inlen == 0 implies that there is no
+ * value part in T_CHECK request and just validation
+ * done elsewhere should be enough, we just return here.
+ */
+ if (inlen == 0) {
+ *outlenp = 0;
+ goto done;
+ }
+ break;
+ case SETFN_OPTCOM_NEGOTIATE:
+ checkonly = B_FALSE;
+ break;
+ case SETFN_UD_NEGOTIATE:
+ case SETFN_CONN_NEGOTIATE:
+ checkonly = B_FALSE;
+ /*
+ * Negotiating local and "association-related" options
+ * through T_UNITDATA_REQ.
+ *
+ * Following routine can filter out ones we do not
+ * want to be "set" this way.
+ */
+ if (!udp_opt_allow_udr_set(level, name)) {
+ *outlenp = 0;
+ error = EINVAL;
+ goto done;
+ }
+ break;
+ default:
+ /*
+ * We should never get here
+ */
+ *outlenp = 0;
+ error = EINVAL;
+ goto done;
+ }
+
+ ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
+ (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
+
+ error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp,
+ outvalp, cr, thisdg_attrs, checkonly);
+done:
+ return (error);
+}
+
+/* ARGSUSED */
+int
+udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
+ uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
+ void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
+{
+ conn_t *connp = Q_TO_CONN(q);
+ int error;
+ udp_t *udp = connp->conn_udp;
rw_enter(&udp->udp_rwlock, RW_WRITER);
- err = udp_opt_set_locked(q, optset_context, level, name, inlen, invalp,
- outlenp, outvalp, thisdg_attrs, cr, mblk);
+ error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
+ outlenp, outvalp, thisdg_attrs, cr);
rw_exit(&udp->udp_rwlock);
- return (err);
+ return (error);
}
/*
@@ -3853,8 +3284,11 @@ udp_build_hdrs(udp_t *udp)
udpha_t *udpha;
ip6_pkt_t *ipp = &udp->udp_sticky_ipp;
size_t sth_wroff;
+ conn_t *connp = udp->udp_connp;
ASSERT(RW_WRITE_HELD(&udp->udp_rwlock));
+ ASSERT(connp != NULL);
+
hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE;
ASSERT(hdrs_len != 0);
if (hdrs_len != udp->udp_sticky_hdrs_len) {
@@ -3892,7 +3326,8 @@ udp_build_hdrs(udp_t *udp)
udp->udp_max_hdr_len = hdrs_len;
sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
rw_exit(&udp->udp_rwlock);
- (void) mi_set_sth_wroff(udp->udp_connp->conn_rq, sth_wroff);
+ (void) proto_set_tx_wroff(udp->udp_connp->conn_rq,
+ udp->udp_connp, sth_wroff);
rw_enter(&udp->udp_rwlock, RW_WRITER);
}
return (0);
@@ -4164,6 +3599,33 @@ udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len)
}
}
+static void
+udp_queue_fallback(udp_t *udp, mblk_t *mp)
+{
+ ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
+ if (IPCL_IS_NONSTR(udp->udp_connp)) {
+ /*
+ * fallback has started but messages have not been moved yet
+ */
+ if (udp->udp_fallback_queue_head == NULL) {
+ ASSERT(udp->udp_fallback_queue_tail == NULL);
+ udp->udp_fallback_queue_head = mp;
+ udp->udp_fallback_queue_tail = mp;
+ } else {
+ ASSERT(udp->udp_fallback_queue_tail != NULL);
+ udp->udp_fallback_queue_tail->b_next = mp;
+ udp->udp_fallback_queue_tail = mp;
+ }
+ mutex_exit(&udp->udp_recv_lock);
+ } else {
+ /*
+ * no more fallbacks possible, ok to drop lock.
+ */
+ mutex_exit(&udp->udp_recv_lock);
+ putnext(udp->udp_connp->conn_rq, mp);
+ }
+}
+
/* ARGSUSED2 */
static void
udp_input(void *arg1, mblk_t *mp, void *arg2)
@@ -4222,7 +3684,7 @@ udp_input(void *arg1, mblk_t *mp, void *arg2)
/*
* ICMP messages.
*/
- udp_icmp_error(connp->conn_rq, mp);
+ udp_icmp_error(connp, mp);
return;
}
}
@@ -4403,7 +3865,6 @@ udp_input(void *arg1, mblk_t *mp, void *arg2)
UDP_STAT(us, udp_in_recvucred);
}
- /* XXX FIXME: apply to AF_INET6 as well */
/*
* If SO_TIMESTAMP is set allocate the appropriate sized
* buffer. Since gethrestime() expects a pointer aligned
@@ -4873,7 +4334,6 @@ udp_input(void *arg1, mblk_t *mp, void *arg2)
dstopt += ipp.ipp_dstoptslen;
udi_size -= toh->len;
}
-
if (cr != NULL) {
struct T_opthdr *toh;
@@ -4915,23 +4375,37 @@ udp_input(void *arg1, mblk_t *mp, void *arg2)
if (options_mp != NULL)
freeb(options_mp);
- if (udp_bits.udpb_direct_sockfs) {
- /*
- * There is nothing above us except for the stream head;
- * use the read-side synchronous stream interface in
- * order to reduce the time spent in interrupt thread.
- */
- ASSERT(udp->udp_issocket);
- udp_rcv_enqueue(connp->conn_rq, udp, mp, mp_len);
+ if (IPCL_IS_NONSTR(connp)) {
+ int error;
+
+ if ((*connp->conn_upcalls->su_recv)
+ (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error,
+ NULL) < 0) {
+ mutex_enter(&udp->udp_recv_lock);
+ if (error == ENOSPC) {
+ /*
+ * let's confirm while holding the lock
+ */
+ if ((*connp->conn_upcalls->su_recv)
+ (connp->conn_upper_handle, NULL, 0, 0,
+ &error, NULL) < 0) {
+ if (error == ENOSPC) {
+ connp->conn_flow_cntrld =
+ B_TRUE;
+ } else {
+ ASSERT(error == EOPNOTSUPP);
+ }
+ }
+ mutex_exit(&udp->udp_recv_lock);
+ } else {
+ ASSERT(error == EOPNOTSUPP);
+ udp_queue_fallback(udp, mp);
+ }
+ }
} else {
- /*
- * Use regular STREAMS interface to pass data upstream
- * if this is not a socket endpoint, or if we have
- * switched over to the slow mode due to sockmod being
- * popped or a module being pushed on top of us.
- */
putnext(connp->conn_rq, mp);
}
+ ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
return;
tossit:
@@ -4942,243 +4416,6 @@ tossit:
}
/*
- * Handle the results of a T_BIND_REQ whether deferred by IP or handled
- * immediately.
- */
-static void
-udp_bind_result(conn_t *connp, mblk_t *mp)
-{
- struct T_error_ack *tea;
-
- switch (mp->b_datap->db_type) {
- case M_PROTO:
- case M_PCPROTO:
- /* M_PROTO messages contain some type of TPI message. */
- ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <=
- (uintptr_t)INT_MAX);
- if (mp->b_wptr - mp->b_rptr < sizeof (t_scalar_t)) {
- freemsg(mp);
- return;
- }
- tea = (struct T_error_ack *)mp->b_rptr;
-
- switch (tea->PRIM_type) {
- case T_ERROR_ACK:
- switch (tea->ERROR_prim) {
- case O_T_BIND_REQ:
- case T_BIND_REQ:
- udp_bind_error(connp, mp);
- return;
- default:
- break;
- }
- ASSERT(0);
- freemsg(mp);
- return;
-
- case T_BIND_ACK:
- udp_bind_ack(connp, mp);
- return;
-
- default:
- break;
- }
- freemsg(mp);
- return;
- default:
- /* FIXME: other cases? */
- ASSERT(0);
- freemsg(mp);
- return;
- }
-}
-
-/*
- * Process a T_BIND_ACK
- */
-static void
-udp_bind_ack(conn_t *connp, mblk_t *mp)
-{
- udp_t *udp = connp->conn_udp;
- mblk_t *mp1;
- ire_t *ire;
- struct T_bind_ack *tba;
- uchar_t *addrp;
- ipa_conn_t *ac;
- ipa6_conn_t *ac6;
- udp_fanout_t *udpf;
- udp_stack_t *us = udp->udp_us;
-
- ASSERT(udp->udp_pending_op != -1);
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- /*
- * If a broadcast/multicast address was bound set
- * the source address to 0.
- * This ensures no datagrams with broadcast address
- * as source address are emitted (which would violate
- * RFC1122 - Hosts requirements)
- *
- * Note that when connecting the returned IRE is
- * for the destination address and we only perform
- * the broadcast check for the source address (it
- * is OK to connect to a broadcast/multicast address.)
- */
- mp1 = mp->b_cont;
- if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) {
- ire = (ire_t *)mp1->b_rptr;
-
- /*
- * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast
- * local address.
- */
- udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
- us->us_bind_fanout_size)];
- if (ire->ire_type == IRE_BROADCAST &&
- udp->udp_state != TS_DATA_XFER) {
- ASSERT(udp->udp_pending_op == T_BIND_REQ ||
- udp->udp_pending_op == O_T_BIND_REQ);
- /* This was just a local bind to a broadcast addr */
- mutex_enter(&udpf->uf_lock);
- V6_SET_ZERO(udp->udp_v6src);
- mutex_exit(&udpf->uf_lock);
- if (udp->udp_family == AF_INET6)
- (void) udp_build_hdrs(udp);
- } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
- /*
- * Local address not yet set - pick it from the
- * T_bind_ack
- */
- tba = (struct T_bind_ack *)mp->b_rptr;
- addrp = &mp->b_rptr[tba->ADDR_offset];
- switch (udp->udp_family) {
- case AF_INET:
- if (tba->ADDR_length == sizeof (ipa_conn_t)) {
- ac = (ipa_conn_t *)addrp;
- } else {
- ASSERT(tba->ADDR_length ==
- sizeof (ipa_conn_x_t));
- ac = &((ipa_conn_x_t *)addrp)->acx_conn;
- }
- mutex_enter(&udpf->uf_lock);
- IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr,
- &udp->udp_v6src);
- mutex_exit(&udpf->uf_lock);
- break;
- case AF_INET6:
- if (tba->ADDR_length == sizeof (ipa6_conn_t)) {
- ac6 = (ipa6_conn_t *)addrp;
- } else {
- ASSERT(tba->ADDR_length ==
- sizeof (ipa6_conn_x_t));
- ac6 = &((ipa6_conn_x_t *)
- addrp)->ac6x_conn;
- }
- mutex_enter(&udpf->uf_lock);
- udp->udp_v6src = ac6->ac6_laddr;
- mutex_exit(&udpf->uf_lock);
- (void) udp_build_hdrs(udp);
- break;
- }
- }
- mp1 = mp1->b_cont;
- }
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- /*
- * Look for one or more appended ACK message added by
- * udp_connect or udp_disconnect.
- * If none found just send up the T_BIND_ACK.
- * udp_connect has appended a T_OK_ACK and a T_CONN_CON.
- * udp_disconnect has appended a T_OK_ACK.
- */
- if (mp1 != NULL) {
- if (mp->b_cont == mp1)
- mp->b_cont = NULL;
- else {
- ASSERT(mp->b_cont->b_cont == mp1);
- mp->b_cont->b_cont = NULL;
- }
- freemsg(mp);
- mp = mp1;
- while (mp != NULL) {
- mp1 = mp->b_cont;
- mp->b_cont = NULL;
- putnext(connp->conn_rq, mp);
- mp = mp1;
- }
- return;
- }
- freemsg(mp->b_cont);
- mp->b_cont = NULL;
- putnext(connp->conn_rq, mp);
-}
-
-static void
-udp_bind_error(conn_t *connp, mblk_t *mp)
-{
- udp_t *udp = connp->conn_udp;
- struct T_error_ack *tea;
- udp_fanout_t *udpf;
- udp_stack_t *us = udp->udp_us;
-
- tea = (struct T_error_ack *)mp->b_rptr;
-
- /*
- * If our O_T_BIND_REQ/T_BIND_REQ fails,
- * clear out the associated port and source
- * address before passing the message
- * upstream. If this was caused by a T_CONN_REQ
- * revert back to bound state.
- */
-
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- ASSERT(udp->udp_pending_op != -1);
- tea->ERROR_prim = udp->udp_pending_op;
- udp->udp_pending_op = -1;
- udpf = &us->us_bind_fanout[
- UDP_BIND_HASH(udp->udp_port,
- us->us_bind_fanout_size)];
- mutex_enter(&udpf->uf_lock);
-
- switch (tea->ERROR_prim) {
- case T_CONN_REQ:
- ASSERT(udp->udp_state == TS_DATA_XFER);
- /* Connect failed */
- /* Revert back to the bound source */
- udp->udp_v6src = udp->udp_bound_v6src;
- udp->udp_state = TS_IDLE;
- mutex_exit(&udpf->uf_lock);
- if (udp->udp_family == AF_INET6)
- (void) udp_build_hdrs(udp);
- rw_exit(&udp->udp_rwlock);
- break;
-
- case T_DISCON_REQ:
- case T_BIND_REQ:
- case O_T_BIND_REQ:
- V6_SET_ZERO(udp->udp_v6src);
- V6_SET_ZERO(udp->udp_bound_v6src);
- udp->udp_state = TS_UNBND;
- udp_bind_hash_remove(udp, B_TRUE);
- udp->udp_port = 0;
- mutex_exit(&udpf->uf_lock);
- if (udp->udp_family == AF_INET6)
- (void) udp_build_hdrs(udp);
- rw_exit(&udp->udp_rwlock);
- break;
-
- default:
- mutex_exit(&udpf->uf_lock);
- rw_exit(&udp->udp_rwlock);
- (void) mi_strlog(connp->conn_rq, 1,
- SL_ERROR|SL_TRACE,
- "udp_input_other: bad ERROR_prim, "
- "len %d", tea->ERROR_prim);
- }
- putnext(connp->conn_rq, mp);
-}
-
-/*
* return SNMP stuff in buffer in mpdata. We don't hold any lock and report
* information that can be changing beneath us.
*/
@@ -5589,64 +4826,23 @@ done:
* is called by udp_wput to handle T_UNBIND_REQ messages.
*/
static void
-udp_unbind(queue_t *q, mblk_t *mp)
+udp_tpi_unbind(queue_t *q, mblk_t *mp)
{
- udp_t *udp = Q_TO_UDP(q);
- udp_fanout_t *udpf;
- udp_stack_t *us = udp->udp_us;
-
- if (cl_inet_unbind != NULL) {
- /*
- * Running in cluster mode - register unbind information
- */
- if (udp->udp_ipversion == IPV4_VERSION) {
- (*cl_inet_unbind)(IPPROTO_UDP, AF_INET,
- (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
- (in_port_t)udp->udp_port);
- } else {
- (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6,
- (uint8_t *)&(udp->udp_v6src),
- (in_port_t)udp->udp_port);
- }
- }
+ conn_t *connp = Q_TO_CONN(q);
+ int error;
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
- rw_exit(&udp->udp_rwlock);
- udp_err_ack(q, mp, TOUTSTATE, 0);
+ error = udp_do_unbind(connp);
+ if (error) {
+ if (error < 0)
+ udp_err_ack(q, mp, -error, 0);
+ else
+ udp_err_ack(q, mp, TSYSERR, error);
return;
}
- udp->udp_pending_op = T_UNBIND_REQ;
- rw_exit(&udp->udp_rwlock);
- /*
- * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK
- * and therefore ip_unbind must never return NULL.
- */
- mp = ip_unbind(q, mp);
+ mp = mi_tpi_ok_ack_alloc(mp);
ASSERT(mp != NULL);
ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
-
- /*
- * Once we're unbound from IP, the pending operation may be cleared
- * here.
- */
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
- us->us_bind_fanout_size)];
- mutex_enter(&udpf->uf_lock);
- udp_bind_hash_remove(udp, B_TRUE);
- V6_SET_ZERO(udp->udp_v6src);
- V6_SET_ZERO(udp->udp_bound_v6src);
- udp->udp_port = 0;
- mutex_exit(&udpf->uf_lock);
-
- udp->udp_pending_op = -1;
- udp->udp_state = TS_UNBND;
- if (udp->udp_family == AF_INET6)
- (void) udp_build_hdrs(udp);
- rw_exit(&udp->udp_rwlock);
-
qreply(q, mp);
}
@@ -5748,27 +4944,29 @@ udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst)
static mblk_t *
udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
- uint_t srcid, int *error, boolean_t insert_spi)
+ uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg,
+ cred_t *cr, pid_t pid)
{
- udp_t *udp = connp->conn_udp;
- queue_t *q = connp->conn_wq;
- mblk_t *mp1 = mp;
- mblk_t *mp2;
- ipha_t *ipha;
- int ip_hdr_length;
- uint32_t ip_len;
- udpha_t *udpha;
- boolean_t lock_held = B_FALSE;
+ udp_t *udp = connp->conn_udp;
+ mblk_t *mp1 = mp;
+ mblk_t *mp2;
+ ipha_t *ipha;
+ int ip_hdr_length;
+ uint32_t ip_len;
+ udpha_t *udpha;
+ boolean_t lock_held = B_FALSE;
in_port_t uha_src_port;
udpattrs_t attrs;
- uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH];
+ uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH];
uint32_t ip_snd_opt_len = 0;
- ip4_pkt_t pktinfo;
- ip4_pkt_t *pktinfop = &pktinfo;
- ip_opt_info_t optinfo;
+ ip4_pkt_t pktinfo;
+ ip4_pkt_t *pktinfop = &pktinfo;
+ ip_opt_info_t optinfo;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
udp_stack_t *us = udp->udp_us;
ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
+ queue_t *q = connp->conn_wq;
+ ire_t *ire;
*error = 0;
@@ -5784,26 +4982,55 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
* If options passed in, feed it for verification and handling
*/
attrs.udpattr_credset = B_FALSE;
- if (DB_TYPE(mp) != M_DATA) {
- mp1 = mp->b_cont;
- if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) {
+ if (IPCL_IS_NONSTR(connp)) {
+ if (msg->msg_controllen != 0) {
attrs.udpattr_ipp4 = pktinfop;
attrs.udpattr_mb = mp;
- if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0)
+
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ *error = process_auxiliary_options(connp,
+ msg->msg_control, msg->msg_controllen,
+ &attrs, &udp_opt_obj, udp_opt_set);
+ rw_exit(&udp->udp_rwlock);
+ if (*error)
goto done;
- /*
- * Note: success in processing options.
- * mp option buffer represented by
- * OPT_length/offset now potentially modified
- * and contain option setting results
- */
- ASSERT(*error == 0);
+ }
+ } else {
+ if (DB_TYPE(mp) != M_DATA) {
+ mp1 = mp->b_cont;
+ if (((struct T_unitdata_req *)
+ mp->b_rptr)->OPT_length != 0) {
+ attrs.udpattr_ipp4 = pktinfop;
+ attrs.udpattr_mb = mp;
+ if (udp_unitdata_opt_process(q, mp, error,
+ &attrs) < 0)
+ goto done;
+ /*
+ * Note: success in processing options.
+ * mp option buffer represented by
+ * OPT_length/offset now potentially modified
+ * and contain option setting results
+ */
+ ASSERT(*error == 0);
+ }
}
}
/* mp1 points to the M_DATA mblk carrying the packet */
ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA);
+ /*
+ * Determine whether we need to mark the mblk with the user's
+ * credentials.
+ */
+ ire = connp->conn_ire_cache;
+ if (is_system_labeled() || CLASSD(v4dst) || (ire == NULL) ||
+ (ire->ire_addr != v4dst) ||
+ (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) {
+ if (cr != NULL && DB_CRED(mp) == NULL)
+ msg_setcredpid(mp, cr, pid);
+ }
+
rw_enter(&udp->udp_rwlock, RW_READER);
lock_held = B_TRUE;
/*
@@ -6235,7 +5462,7 @@ udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid)
ipha_t *ipha = (ipha_t *)mp->b_rptr;
udp_stack_t *us = udp->udp_us;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
- boolean_t ll_multicast = B_FALSE;
+ boolean_t ll_multicast = B_FALSE;
dev_q = ire->ire_stq->q_next;
ASSERT(dev_q != NULL);
@@ -6248,6 +5475,7 @@ udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid)
DEV_Q_FLOW_BLOCKED(dev_q)) {
BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
+
if (ipst->ips_ip_output_queue)
(void) putq(connp->conn_wq, mp);
else
@@ -6397,11 +5625,11 @@ udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst)
return (err);
}
-void
-udp_output_connected(void *arg, mblk_t *mp)
+static int
+udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr,
+ pid_t pid)
{
- conn_t *connp = (conn_t *)arg;
- udp_t *udp = connp->conn_udp;
+ udp_t *udp = connp->conn_udp;
udp_stack_t *us = udp->udp_us;
ipaddr_t v4dst;
in_port_t dstport;
@@ -6416,7 +5644,7 @@ udp_output_connected(void *arg, mblk_t *mp)
/* M_DATA for connected socket */
- ASSERT(udp->udp_issocket);
+ ASSERT(udp->udp_issocket || IPCL_IS_NONSTR(connp));
UDP_DBGSTAT(us, udp_data_conn);
mutex_enter(&connp->conn_lock);
@@ -6428,7 +5656,7 @@ udp_output_connected(void *arg, mblk_t *mp)
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
"udp_wput_end: connp %p (%S)", connp,
"not-connected; address required");
- return;
+ return (EDESTADDRREQ);
}
mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst);
@@ -6466,20 +5694,100 @@ udp_output_connected(void *arg, mblk_t *mp)
* family of the socket.
*/
mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error,
- insert_spi);
+ insert_spi, msg, cr, pid);
} else {
- mp = udp_output_v6(connp, mp, sin6, &error);
+ mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid);
}
if (error == 0) {
ASSERT(mp == NULL);
- return;
+ return (0);
}
UDP_STAT(us, udp_out_err_output);
ASSERT(mp != NULL);
- /* mp is freed by the following routine */
- udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, (t_scalar_t)addrlen,
- (t_scalar_t)error);
+ if (IPCL_IS_NONSTR(connp)) {
+ freemsg(mp);
+ return (error);
+ } else {
+ /* mp is freed by the following routine */
+ udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr,
+ (t_scalar_t)addrlen, (t_scalar_t)error);
+ return (0);
+ }
+}
+
+/* ARGSUSED */
+static int
+udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr,
+ socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid)
+{
+
+ udp_t *udp = connp->conn_udp;
+ boolean_t insert_spi = udp->udp_nat_t_endpoint;
+ int error = 0;
+ sin6_t *sin6;
+ sin_t *sin;
+ uint_t srcid;
+ uint16_t port;
+ ipaddr_t v4dst;
+
+
+ ASSERT(addr != NULL);
+
+ switch (udp->udp_family) {
+ case AF_INET6:
+ sin6 = (sin6_t *)addr;
+ if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ /*
+ * Destination is a non-IPv4-compatible IPv6 address.
+ * Send out an IPv6 format packet.
+ */
+ mp = udp_output_v6(connp, mp, sin6, &error, msg, cr,
+ pid);
+ if (error != 0)
+ goto ud_error;
+
+ return (0);
+ }
+ /*
+ * If the local address is not zero or a mapped address
+ * return an error. It would be possible to send an IPv4
+ * packet but the response would never make it back to the
+ * application since it is bound to a non-mapped address.
+ */
+ if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) &&
+ !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
+ error = EADDRNOTAVAIL;
+ goto ud_error;
+ }
+ /* Send IPv4 packet without modifying udp_ipversion */
+ /* Extract port and ipaddr */
+ port = sin6->sin6_port;
+ IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst);
+ srcid = sin6->__sin6_src_id;
+ break;
+
+ case AF_INET:
+ sin = (sin_t *)addr;
+ /* Extract port and ipaddr */
+ port = sin->sin_port;
+ v4dst = sin->sin_addr.s_addr;
+ srcid = 0;
+ break;
+ }
+
+ mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi,
+ msg, cr, pid);
+
+ if (error == 0) {
+ ASSERT(mp == NULL);
+ return (0);
+ }
+
+ud_error:
+ ASSERT(mp != NULL);
+
+ return (error);
}
/*
@@ -6496,18 +5804,12 @@ udp_output_connected(void *arg, mblk_t *mp)
void
udp_wput(queue_t *q, mblk_t *mp)
{
- sin6_t *sin6;
- sin_t *sin;
- ipaddr_t v4dst;
- uint16_t port;
- uint_t srcid;
conn_t *connp = Q_TO_CONN(q);
udp_t *udp = connp->conn_udp;
int error = 0;
struct sockaddr *addr;
socklen_t addrlen;
- udp_stack_t *us = udp->udp_us;
- boolean_t insert_spi = udp->udp_nat_t_endpoint;
+ udp_stack_t *us = udp->udp_us;
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START,
"udp_wput_start: queue %p mp %p", q, mp);
@@ -6533,7 +5835,7 @@ udp_wput(queue_t *q, mblk_t *mp)
"not-connected; address required");
return;
}
- udp_output_connected(connp, mp);
+ (void) udp_send_connected(connp, mp, NULL, NULL, -1);
return;
case M_PROTO:
@@ -6587,67 +5889,8 @@ udp_wput(queue_t *q, mblk_t *mp)
}
ASSERT(addr != NULL);
- switch (udp->udp_family) {
- case AF_INET6:
- sin6 = (sin6_t *)addr;
- if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
- (sin6->sin6_family != AF_INET6)) {
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
- "udp_wput_end: q %p (%S)", q, "badaddr");
- error = EADDRNOTAVAIL;
- goto ud_error;
- }
-
- if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
- /*
- * Destination is a non-IPv4-compatible IPv6 address.
- * Send out an IPv6 format packet.
- */
- mp = udp_output_v6(connp, mp, sin6, &error);
- if (error != 0)
- goto ud_error;
-
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
- "udp_wput_end: q %p (%S)", q, "udp_output_v6");
- return;
- }
- /*
- * If the local address is not zero or a mapped address
- * return an error. It would be possible to send an IPv4
- * packet but the response would never make it back to the
- * application since it is bound to a non-mapped address.
- */
- if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) &&
- !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
- "udp_wput_end: q %p (%S)", q, "badaddr");
- error = EADDRNOTAVAIL;
- goto ud_error;
- }
- /* Send IPv4 packet without modifying udp_ipversion */
- /* Extract port and ipaddr */
- port = sin6->sin6_port;
- IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst);
- srcid = sin6->__sin6_src_id;
- break;
-
- case AF_INET:
- sin = (sin_t *)addr;
- if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
- (sin->sin_family != AF_INET)) {
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
- "udp_wput_end: q %p (%S)", q, "badaddr");
- error = EADDRNOTAVAIL;
- goto ud_error;
- }
- /* Extract port and ipaddr */
- port = sin->sin_port;
- v4dst = sin->sin_addr.s_addr;
- srcid = 0;
- break;
- }
-
- mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi);
+ error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL,
+ -1);
if (error != 0) {
ud_error:
UDP_STAT(us, udp_out_err_output);
@@ -6658,13 +5901,25 @@ ud_error:
}
}
+/* ARGSUSED */
+static void
+udp_wput_fallback(queue_t *wq, mblk_t *mp)
+{
+#ifdef DEBUG
+ cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
+#endif
+ freemsg(mp);
+}
+
+
/*
* udp_output_v6():
* Assumes that udp_wput did some sanity checking on the destination
* address.
*/
static mblk_t *
-udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error)
+udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error,
+ struct nmsghdr *msg, cred_t *cr, pid_t pid)
{
ip6_t *ip6h;
ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */
@@ -6674,6 +5929,7 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error)
size_t ip_len;
udpha_t *udph;
udp_t *udp = connp->conn_udp;
+ udp_stack_t *us = udp->udp_us;
queue_t *q = connp->conn_wq;
ip6_pkt_t ipp_s; /* For ancillary data options */
ip6_pkt_t *ipp = &ipp_s;
@@ -6689,8 +5945,8 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error)
ip6_hbh_t *hopoptsptr = NULL;
uint_t hopoptslen = 0;
boolean_t is_ancillary = B_FALSE;
- udp_stack_t *us = udp->udp_us;
size_t sth_wroff = 0;
+ ire_t *ire;
*error = 0;
@@ -6714,19 +5970,51 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error)
*/
attrs.udpattr_credset = B_FALSE;
opt_present = B_FALSE;
- if (DB_TYPE(mp) != M_DATA) {
- mp1 = mp->b_cont;
- if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) {
+ if (IPCL_IS_NONSTR(connp)) {
+ if (msg->msg_controllen != 0) {
attrs.udpattr_ipp6 = ipp;
attrs.udpattr_mb = mp;
- if (udp_unitdata_opt_process(q, mp, error,
- &attrs) < 0) {
+
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ *error = process_auxiliary_options(connp,
+ msg->msg_control, msg->msg_controllen,
+ &attrs, &udp_opt_obj, udp_opt_set);
+ rw_exit(&udp->udp_rwlock);
+ if (*error)
goto done;
- }
ASSERT(*error == 0);
opt_present = B_TRUE;
}
+ } else {
+ if (DB_TYPE(mp) != M_DATA) {
+ mp1 = mp->b_cont;
+ if (((struct T_unitdata_req *)
+ mp->b_rptr)->OPT_length != 0) {
+ attrs.udpattr_ipp6 = ipp;
+ attrs.udpattr_mb = mp;
+ if (udp_unitdata_opt_process(q, mp, error,
+ &attrs) < 0) {
+ goto done;
+ }
+ ASSERT(*error == 0);
+ opt_present = B_TRUE;
+ }
+ }
}
+
+ /*
+ * Determine whether we need to mark the mblk with the user's
+ * credentials.
+ */
+ ire = connp->conn_ire_cache;
+ if (is_system_labeled() || IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) ||
+ (ire == NULL) ||
+ (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) ||
+ (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) {
+ if (cr != NULL && DB_CRED(mp) == NULL)
+ msg_setcredpid(mp, cr, pid);
+ }
+
rw_enter(&udp->udp_rwlock, RW_READER);
ignore = ipp->ipp_sticky_ignored;
@@ -7268,7 +6556,7 @@ no_options:
done:
if (sth_wroff != 0) {
- (void) mi_set_sth_wroff(RD(q),
+ (void) proto_set_tx_wroff(RD(q), connp,
udp->udp_max_hdr_len + us->us_wroff_extra);
}
if (hopoptsptr != NULL && !is_ancillary) {
@@ -7284,7 +6572,7 @@ done:
static int
-udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
+i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
{
sin_t *sin = (sin_t *)sa;
sin6_t *sin6 = (sin6_t *)sa;
@@ -7404,7 +6692,7 @@ udp_wput_cmdblk(queue_t *q, mblk_t *mp)
rw_enter(&udp->udp_rwlock, RW_READER);
switch (cmdp->cb_cmd) {
case TI_GETPEERNAME:
- cmdp->cb_error = udp_getpeername(udp, data, &cmdp->cb_len);
+ cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len);
break;
case TI_GETMYNAME:
cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len);
@@ -7419,6 +6707,21 @@ udp_wput_cmdblk(queue_t *q, mblk_t *mp)
}
static void
+udp_disable_direct_sockfs(udp_t *udp)
+{
+ udp->udp_issocket = B_FALSE;
+ if (udp->udp_direct_sockfs) {
+ /*
+ * Disable read-side synchronous stream interface and
+ * drain any queued data.
+ */
+ udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE);
+ ASSERT(!udp->udp_direct_sockfs);
+ UDP_STAT(udp->udp_us, udp_sock_fallback);
+ }
+}
+
+static void
udp_wput_other(queue_t *q, mblk_t *mp)
{
uchar_t *rptr = mp->b_rptr;
@@ -7458,12 +6761,12 @@ udp_wput_other(queue_t *q, mblk_t *mp)
return;
case O_T_BIND_REQ:
case T_BIND_REQ:
- udp_bind(q, mp);
+ udp_tpi_bind(q, mp);
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
"udp_wput_other_end: q %p (%S)", q, "bindreq");
return;
case T_CONN_REQ:
- udp_connect(q, mp);
+ udp_tpi_connect(q, mp);
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
"udp_wput_other_end: q %p (%S)", q, "connreq");
return;
@@ -7488,7 +6791,7 @@ udp_wput_other(queue_t *q, mblk_t *mp)
"udp_wput_other_end: q %p (%S)", q, "unitdatareq");
return;
case T_UNBIND_REQ:
- udp_unbind(q, mp);
+ udp_tpi_unbind(q, mp);
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
"udp_wput_other_end: q %p (%S)", q, "unbindreq");
return;
@@ -7509,7 +6812,7 @@ udp_wput_other(queue_t *q, mblk_t *mp)
return;
case T_DISCON_REQ:
- udp_disconnect(q, mp);
+ udp_tpi_disconnect(q, mp);
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
"udp_wput_other_end: q %p (%S)", q, "disconreq");
return;
@@ -7596,18 +6899,8 @@ udp_wput_other(queue_t *q, mblk_t *mp)
DB_TYPE(mp) = M_IOCNAK;
iocp->ioc_error = EINVAL;
} else {
- udp->udp_issocket = B_FALSE;
- if (udp->udp_direct_sockfs) {
- /*
- * Disable read-side synchronous
- * stream interface and drain any
- * queued data.
- */
- udp_rcv_drain(RD(q), udp,
- B_FALSE);
- ASSERT(!udp->udp_direct_sockfs);
- UDP_STAT(us, udp_sock_fallback);
- }
+ udp_disable_direct_sockfs(udp);
+
DB_TYPE(mp) = M_IOCACK;
iocp->ioc_error = 0;
}
@@ -7640,12 +6933,12 @@ udp_wput_other(queue_t *q, mblk_t *mp)
static void
udp_wput_iocdata(queue_t *q, mblk_t *mp)
{
- mblk_t *mp1;
- struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
+ mblk_t *mp1;
+ struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
STRUCT_HANDLE(strbuf, sb);
- udp_t *udp = Q_TO_UDP(q);
- int error;
- uint_t addrlen;
+ udp_t *udp = Q_TO_UDP(q);
+ int error;
+ uint_t addrlen;
/* Make sure it is one of ours. */
switch (iocp->ioc_cmd) {
@@ -7699,16 +6992,17 @@ udp_wput_iocdata(queue_t *q, mblk_t *mp)
}
mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
+
if (mp1 == NULL)
return;
rw_enter(&udp->udp_rwlock, RW_READER);
switch (iocp->ioc_cmd) {
case TI_GETMYNAME:
- error = udp_getmyname(udp, (void *)mp1->b_rptr, &addrlen);
+ error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen);
break;
case TI_GETPEERNAME:
- error = udp_getpeername(udp, (void *)mp1->b_rptr, &addrlen);
+ error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen);
break;
}
rw_exit(&udp->udp_rwlock);
@@ -7755,7 +7049,7 @@ udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
}
void
-udp_ddi_init(void)
+udp_ddi_g_init(void)
{
udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
udp_opt_obj.odb_opt_arr_cnt);
@@ -7769,11 +7063,13 @@ udp_ddi_init(void)
}
void
-udp_ddi_destroy(void)
+udp_ddi_g_destroy(void)
{
netstack_unregister(NS_UDP);
}
+#define INET_NAME "ip"
+
/*
* Initialize the UDP stack instance.
*/
@@ -7783,6 +7079,8 @@ udp_stack_init(netstackid_t stackid, netstack_t *ns)
udp_stack_t *us;
udpparam_t *pa;
int i;
+ int error = 0;
+ major_t major;
us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
us->us_netstack = ns;
@@ -7825,6 +7123,10 @@ udp_stack_init(netstackid_t stackid, netstack_t *ns)
us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics);
us->us_mibkp = udp_kstat_init(stackid);
+
+ major = mod_name_to_major(INET_NAME);
+ error = ldi_ident_from_major(major, &us->us_ldi_ident);
+ ASSERT(error == 0);
return (us);
}
@@ -7856,6 +7158,8 @@ udp_stack_fini(netstackid_t stackid, void *arg)
udp_kstat2_fini(stackid, us->us_kstat);
us->us_kstat = NULL;
bzero(&us->us_statistics, sizeof (us->us_statistics));
+
+ ldi_ident_release(us->us_ldi_ident);
kmem_free(us, sizeof (*us));
}
@@ -8192,8 +7496,6 @@ udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing)
mblk_t *mp;
udp_stack_t *us = udp->udp_us;
- ASSERT(q == RD(q));
-
mutex_enter(&udp->udp_drain_lock);
/*
* There is no race with a concurrent udp_input() sending
@@ -8222,6 +7524,7 @@ udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing)
if (closing) {
freemsg(mp);
} else {
+ ASSERT(q == RD(q));
putnext(q, mp);
}
}
@@ -8282,3 +7585,1802 @@ udp_lwput(queue_t *q, mblk_t *mp)
{
freemsg(mp);
}
+
+/*
+ * Below routines for UDP socket module.
+ */
+
+static conn_t *
+udp_do_open(cred_t *credp, boolean_t isv6, int flags)
+{
+ udp_t *udp;
+ conn_t *connp;
+ zoneid_t zoneid;
+ netstack_t *ns;
+ udp_stack_t *us;
+
+ ns = netstack_find_by_cred(credp);
+ ASSERT(ns != NULL);
+ us = ns->netstack_udp;
+ ASSERT(us != NULL);
+
+ /*
+ * For exclusive stacks we set the zoneid to zero
+ * to make UDP operate as if in the global zone.
+ */
+ if (ns->netstack_stackid != GLOBAL_NETSTACKID)
+ zoneid = GLOBAL_ZONEID;
+ else
+ zoneid = crgetzoneid(credp);
+
+ ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
+
+ connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
+ if (connp == NULL) {
+ netstack_rele(ns);
+ return (NULL);
+ }
+ udp = connp->conn_udp;
+
+ /*
+ * ipcl_conn_create did a netstack_hold. Undo the hold that was
+ * done by netstack_find_by_cred()
+ */
+ netstack_rele(ns);
+
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ ASSERT(connp->conn_ulp == IPPROTO_UDP);
+ ASSERT(connp->conn_udp == udp);
+ ASSERT(udp->udp_connp == connp);
+
+ /* Set the initial state of the stream and the privilege status. */
+ udp->udp_state = TS_UNBND;
+ if (isv6) {
+ udp->udp_family = AF_INET6;
+ udp->udp_ipversion = IPV6_VERSION;
+ udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE;
+ udp->udp_ttl = us->us_ipv6_hoplimit;
+ connp->conn_af_isv6 = B_TRUE;
+ connp->conn_flags |= IPCL_ISV6;
+ } else {
+ udp->udp_family = AF_INET;
+ udp->udp_ipversion = IPV4_VERSION;
+ udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE;
+ udp->udp_ttl = us->us_ipv4_ttl;
+ connp->conn_af_isv6 = B_FALSE;
+ connp->conn_flags &= ~IPCL_ISV6;
+ }
+
+ udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
+ udp->udp_pending_op = -1;
+ connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
+ connp->conn_zoneid = zoneid;
+
+ udp->udp_open_time = lbolt64;
+ udp->udp_open_pid = curproc->p_pid;
+
+ /*
+ * If the caller has the process-wide flag set, then default to MAC
+ * exempt mode. This allows read-down to unlabeled hosts.
+ */
+ if (getpflags(NET_MAC_AWARE, credp) != 0)
+ connp->conn_mac_exempt = B_TRUE;
+
+ connp->conn_ulp_labeled = is_system_labeled();
+
+ udp->udp_us = us;
+
+ connp->conn_recv = udp_input;
+ crhold(credp);
+ connp->conn_cred = credp;
+
+ *((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
+
+ rw_exit(&udp->udp_rwlock);
+
+ return (connp);
+}
+
+/* ARGSUSED */
+sock_lower_handle_t
+udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
+ uint_t *smodep, int *errorp, int flags, cred_t *credp)
+{
+ udp_t *udp = NULL;
+ udp_stack_t *us;
+ conn_t *connp;
+ boolean_t isv6;
+
+ if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
+ (proto != 0 && proto != IPPROTO_UDP)) {
+ *errorp = EPROTONOSUPPORT;
+ return (NULL);
+ }
+
+ if (family == AF_INET6)
+ isv6 = B_TRUE;
+ else
+ isv6 = B_FALSE;
+
+ connp = udp_do_open(credp, isv6, flags);
+ if (connp == NULL) {
+ *errorp = ENOMEM;
+ return (NULL);
+ }
+
+ udp = connp->conn_udp;
+ ASSERT(udp != NULL);
+ us = udp->udp_us;
+ ASSERT(us != NULL);
+
+ connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET;
+
+ /* Set flow control */
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat);
+ udp->udp_rcv_disply_hiwat = us->us_recv_hiwat;
+ udp->udp_rcv_lowat = udp_mod_info.mi_lowat;
+ udp->udp_xmit_hiwat = us->us_xmit_hiwat;
+ udp->udp_xmit_lowat = us->us_xmit_lowat;
+
+ if (udp->udp_family == AF_INET6) {
+ /* Build initial header template for transmit */
+ if ((*errorp = udp_build_hdrs(udp)) != 0) {
+ rw_exit(&udp->udp_rwlock);
+ ipcl_conn_destroy(connp);
+ return (NULL);
+ }
+ }
+ rw_exit(&udp->udp_rwlock);
+
+ connp->conn_flow_cntrld = B_FALSE;
+
+ ASSERT(us->us_ldi_ident != NULL);
+
+ if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) {
+ ip1dbg(("create of IP helper stream failed\n"));
+ udp_do_close(connp);
+ return (NULL);
+ }
+
+ /* Set the send flow control */
+ connp->conn_wq->q_hiwat = us->us_xmit_hiwat;
+ connp->conn_wq->q_lowat = us->us_xmit_lowat;
+
+ mutex_enter(&connp->conn_lock);
+ connp->conn_state_flags &= ~CONN_INCIPIENT;
+ mutex_exit(&connp->conn_lock);
+
+ *errorp = 0;
+ *smodep = SM_ATOMIC;
+ *sock_downcalls = &sock_udp_downcalls;
+ return ((sock_lower_handle_t)connp);
+}
+
+/* ARGSUSED */
+void
+udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
+ sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ udp_t *udp = connp->conn_udp;
+ udp_stack_t *us = udp->udp_us;
+ struct sock_proto_props sopp;
+
+ connp->conn_upcalls = sock_upcalls;
+ connp->conn_upper_handle = sock_handle;
+
+ sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT |
+ SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
+ sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
+ sopp.sopp_maxblk = INFPSZ;
+ sopp.sopp_rxhiwat = udp->udp_rcv_hiwat;
+ sopp.sopp_maxaddrlen = sizeof (sin6_t);
+ sopp.sopp_maxpsz =
+ (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
+ UDP_MAXPACKET_IPV6;
+ sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
+ udp_mod_info.mi_minpsz;
+
+ (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
+ &sopp);
+}
+
+static void
+udp_do_close(conn_t *connp)
+{
+ udp_t *udp;
+
+ ASSERT(connp != NULL && IPCL_IS_UDP(connp));
+ udp = connp->conn_udp;
+
+ udp_quiesce_conn(connp);
+ ip_quiesce_conn(connp);
+
+ if (!IPCL_IS_NONSTR(connp)) {
+ /*
+ * Disable read-side synchronous stream
+ * interface and drain any queued data.
+ */
+ ASSERT(connp->conn_wq != NULL);
+ udp_rcv_drain(connp->conn_wq, udp, B_TRUE);
+ ASSERT(!udp->udp_direct_sockfs);
+
+ ASSERT(connp->conn_rq != NULL);
+ qprocsoff(connp->conn_rq);
+ }
+
+ ASSERT(udp->udp_rcv_cnt == 0);
+ ASSERT(udp->udp_rcv_msgcnt == 0);
+ ASSERT(udp->udp_rcv_list_head == NULL);
+ ASSERT(udp->udp_rcv_list_tail == NULL);
+
+ udp_close_free(connp);
+
+ /*
+ * Now we are truly single threaded on this stream, and can
+ * delete the things hanging off the connp, and finally the connp.
+ * We removed this connp from the fanout list, it cannot be
+ * accessed thru the fanouts, and we already waited for the
+ * conn_ref to drop to 0. We are already in close, so
+ * there cannot be any other thread from the top. qprocsoff
+ * has completed, and service has completed or won't run in
+ * future.
+ */
+ ASSERT(connp->conn_ref == 1);
+ if (!IPCL_IS_NONSTR(connp)) {
+ inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
+ } else {
+ ip_close_helper_stream(connp);
+ }
+
+ connp->conn_ref--;
+ ipcl_conn_destroy(connp);
+}
+
+/* ARGSUSED */
+int
+udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+
+ udp_do_close(connp);
+ return (0);
+}
+
+static int
+udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
+ boolean_t bind_to_req_port_only)
+{
+ sin_t *sin;
+ sin6_t *sin6;
+ sin6_t sin6addr;
+ in_port_t port; /* Host byte order */
+ in_port_t requested_port; /* Host byte order */
+ int count;
+ in6_addr_t v6src;
+ int loopmax;
+ udp_fanout_t *udpf;
+ in_port_t lport; /* Network byte order */
+ zoneid_t zoneid;
+ udp_t *udp;
+ boolean_t is_inaddr_any;
+ mlp_type_t addrtype, mlptype;
+ udp_stack_t *us;
+ int error = 0;
+ mblk_t *mp = NULL;
+
+ udp = connp->conn_udp;
+ us = udp->udp_us;
+
+ if (udp->udp_state != TS_UNBND) {
+ (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
+ "udp_bind: bad state, %u", udp->udp_state);
+ return (-TOUTSTATE);
+ }
+
+ switch (len) {
+ case 0:
+ if (udp->udp_family == AF_INET) {
+ sin = (sin_t *)&sin6addr;
+ *sin = sin_null;
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ udp->udp_ipversion = IPV4_VERSION;
+ } else {
+ ASSERT(udp->udp_family == AF_INET6);
+ sin6 = (sin6_t *)&sin6addr;
+ *sin6 = sin6_null;
+ sin6->sin6_family = AF_INET6;
+ V6_SET_ZERO(sin6->sin6_addr);
+ udp->udp_ipversion = IPV6_VERSION;
+ }
+ port = 0;
+ break;
+
+ case sizeof (sin_t): /* Complete IPv4 address */
+ sin = (sin_t *)sa;
+
+ if (sin == NULL || !OK_32PTR((char *)sin))
+ return (EINVAL);
+
+ if (udp->udp_family != AF_INET ||
+ sin->sin_family != AF_INET) {
+ return (EAFNOSUPPORT);
+ }
+ port = ntohs(sin->sin_port);
+ break;
+
+ case sizeof (sin6_t): /* complete IPv6 address */
+ sin6 = (sin6_t *)sa;
+
+ if (sin6 == NULL || !OK_32PTR((char *)sin6))
+ return (EINVAL);
+
+ if (udp->udp_family != AF_INET6 ||
+ sin6->sin6_family != AF_INET6) {
+ return (EAFNOSUPPORT);
+ }
+ port = ntohs(sin6->sin6_port);
+ break;
+
+ default: /* Invalid request */
+ (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
+ "udp_bind: bad ADDR_length length %u", len);
+ return (-TBADADDR);
+ }
+
+ requested_port = port;
+
+ if (requested_port == 0 || !bind_to_req_port_only)
+ bind_to_req_port_only = B_FALSE;
+ else /* T_BIND_REQ and requested_port != 0 */
+ bind_to_req_port_only = B_TRUE;
+
+ if (requested_port == 0) {
+ /*
+ * If the application passed in zero for the port number, it
+ * doesn't care which port number we bind to. Get one in the
+ * valid range.
+ */
+ if (udp->udp_anon_priv_bind) {
+ port = udp_get_next_priv_port(udp);
+ } else {
+ port = udp_update_next_port(udp,
+ us->us_next_port_to_try, B_TRUE);
+ }
+ } else {
+ /*
+ * If the port is in the well-known privileged range,
+ * make sure the caller was privileged.
+ */
+ int i;
+ boolean_t priv = B_FALSE;
+
+ if (port < us->us_smallest_nonpriv_port) {
+ priv = B_TRUE;
+ } else {
+ for (i = 0; i < us->us_num_epriv_ports; i++) {
+ if (port == us->us_epriv_ports[i]) {
+ priv = B_TRUE;
+ break;
+ }
+ }
+ }
+
+ if (priv) {
+ if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
+ return (-TACCES);
+ }
+ }
+
+ if (port == 0)
+ return (-TNOADDR);
+
+ /*
+ * The state must be TS_UNBND. TPI mandates that users must send
+ * TPI primitives only 1 at a time and wait for the response before
+ * sending the next primitive.
+ */
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) {
+ rw_exit(&udp->udp_rwlock);
+ (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
+ "udp_bind: bad state, %u", udp->udp_state);
+ return (-TOUTSTATE);
+ }
+ /* XXX how to remove the T_BIND_REQ? Should set it before calling */
+ udp->udp_pending_op = T_BIND_REQ;
+ /*
+ * Copy the source address into our udp structure. This address
+ * may still be zero; if so, IP will fill in the correct address
+ * each time an outbound packet is passed to it. Since the udp is
+ * not yet in the bind hash list, we don't grab the uf_lock to
+ * change udp_ipversion
+ */
+ if (udp->udp_family == AF_INET) {
+ ASSERT(sin != NULL);
+ ASSERT(udp->udp_ipversion == IPV4_VERSION);
+ udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
+ udp->udp_ip_snd_options_len;
+ IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src);
+ } else {
+ ASSERT(sin6 != NULL);
+ v6src = sin6->sin6_addr;
+ if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
+ /*
+ * no need to hold the uf_lock to set the udp_ipversion
+ * since we are not yet in the fanout list
+ */
+ udp->udp_ipversion = IPV4_VERSION;
+ udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
+ UDPH_SIZE + udp->udp_ip_snd_options_len;
+ } else {
+ udp->udp_ipversion = IPV6_VERSION;
+ udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
+ }
+ }
+
+ /*
+ * If udp_reuseaddr is not set, then we have to make sure that
+ * the IP address and port number the application requested
+ * (or we selected for the application) is not being used by
+ * another stream. If another stream is already using the
+ * requested IP address and port, the behavior depends on
+ * "bind_to_req_port_only". If set the bind fails; otherwise we
+ * search for any an unused port to bind to the the stream.
+ *
+ * As per the BSD semantics, as modified by the Deering multicast
+ * changes, if udp_reuseaddr is set, then we allow multiple binds
+ * to the same port independent of the local IP address.
+ *
+ * This is slightly different than in SunOS 4.X which did not
+ * support IP multicast. Note that the change implemented by the
+ * Deering multicast code effects all binds - not only binding
+ * to IP multicast addresses.
+ *
+ * Note that when binding to port zero we ignore SO_REUSEADDR in
+ * order to guarantee a unique port.
+ */
+
+ count = 0;
+ if (udp->udp_anon_priv_bind) {
+ /*
+ * loopmax = (IPPORT_RESERVED-1) -
+ * us->us_min_anonpriv_port + 1
+ */
+ loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
+ } else {
+ loopmax = us->us_largest_anon_port -
+ us->us_smallest_anon_port + 1;
+ }
+
+ is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
+ zoneid = connp->conn_zoneid;
+
+ for (;;) {
+ udp_t *udp1;
+ boolean_t found_exclbind = B_FALSE;
+
+ /*
+ * Walk through the list of udp streams bound to
+ * requested port with the same IP address.
+ */
+ lport = htons(port);
+ udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
+ us->us_bind_fanout_size)];
+ mutex_enter(&udpf->uf_lock);
+ for (udp1 = udpf->uf_udp; udp1 != NULL;
+ udp1 = udp1->udp_bind_hash) {
+ if (lport != udp1->udp_port)
+ continue;
+
+ /*
+ * On a labeled system, we must treat bindings to ports
+ * on shared IP addresses by sockets with MAC exemption
+ * privilege as being in all zones, as there's
+ * otherwise no way to identify the right receiver.
+ */
+ if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) ||
+ IPCL_ZONE_MATCH(connp,
+ udp1->udp_connp->conn_zoneid)) &&
+ !connp->conn_mac_exempt && \
+ !udp1->udp_connp->conn_mac_exempt)
+ continue;
+
+ /*
+ * If UDP_EXCLBIND is set for either the bound or
+ * binding endpoint, the semantics of bind
+ * is changed according to the following chart.
+ *
+ * spec = specified address (v4 or v6)
+ * unspec = unspecified address (v4 or v6)
+ * A = specified addresses are different for endpoints
+ *
+ * bound bind to allowed?
+ * -------------------------------------
+ * unspec unspec no
+ * unspec spec no
+ * spec unspec no
+ * spec spec yes if A
+ *
+ * For labeled systems, SO_MAC_EXEMPT behaves the same
+ * as UDP_EXCLBIND, except that zoneid is ignored.
+ */
+ if (udp1->udp_exclbind || udp->udp_exclbind ||
+ udp1->udp_connp->conn_mac_exempt ||
+ connp->conn_mac_exempt) {
+ if (V6_OR_V4_INADDR_ANY(
+ udp1->udp_bound_v6src) ||
+ is_inaddr_any ||
+ IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
+ &v6src)) {
+ found_exclbind = B_TRUE;
+ break;
+ }
+ continue;
+ }
+
+ /*
+ * Check ipversion to allow IPv4 and IPv6 sockets to
+ * have disjoint port number spaces.
+ */
+ if (udp->udp_ipversion != udp1->udp_ipversion) {
+
+ /*
+ * On the first time through the loop, if the
+ * the user intentionally specified a
+ * particular port number, then ignore any
+ * bindings of the other protocol that may
+ * conflict. This allows the user to bind IPv6
+ * alone and get both v4 and v6, or bind both
+ * both and get each seperately. On subsequent
+ * times through the loop, we're checking a
+ * port that we chose (not the user) and thus
+ * we do not allow casual duplicate bindings.
+ */
+ if (count == 0 && requested_port != 0)
+ continue;
+ }
+
+ /*
+ * No difference depending on SO_REUSEADDR.
+ *
+ * If existing port is bound to a
+ * non-wildcard IP address and
+ * the requesting stream is bound to
+ * a distinct different IP addresses
+ * (non-wildcard, also), keep going.
+ */
+ if (!is_inaddr_any &&
+ !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) &&
+ !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
+ &v6src)) {
+ continue;
+ }
+ break;
+ }
+
+ if (!found_exclbind &&
+ (udp->udp_reuseaddr && requested_port != 0)) {
+ break;
+ }
+
+ if (udp1 == NULL) {
+ /*
+ * No other stream has this IP address
+ * and port number. We can use it.
+ */
+ break;
+ }
+ mutex_exit(&udpf->uf_lock);
+ if (bind_to_req_port_only) {
+ /*
+ * We get here only when requested port
+ * is bound (and only first of the for()
+ * loop iteration).
+ *
+ * The semantics of this bind request
+ * require it to fail so we return from
+ * the routine (and exit the loop).
+ *
+ */
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
+ return (-TADDRBUSY);
+ }
+
+ if (udp->udp_anon_priv_bind) {
+ port = udp_get_next_priv_port(udp);
+ } else {
+ if ((count == 0) && (requested_port != 0)) {
+ /*
+ * If the application wants us to find
+ * a port, get one to start with. Set
+ * requested_port to 0, so that we will
+ * update us->us_next_port_to_try below.
+ */
+ port = udp_update_next_port(udp,
+ us->us_next_port_to_try, B_TRUE);
+ requested_port = 0;
+ } else {
+ port = udp_update_next_port(udp, port + 1,
+ B_FALSE);
+ }
+ }
+
+ if (port == 0 || ++count >= loopmax) {
+ /*
+ * We've tried every possible port number and
+ * there are none available, so send an error
+ * to the user.
+ */
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
+ return (-TNOADDR);
+ }
+ }
+
+ /*
+ * Copy the source address into our udp structure. This address
+ * may still be zero; if so, ip will fill in the correct address
+ * each time an outbound packet is passed to it.
+ * If we are binding to a broadcast or multicast address then
+ * udp_post_ip_bind_connect will clear the source address
+ * when udp_do_bind success.
+ */
+ udp->udp_v6src = udp->udp_bound_v6src = v6src;
+ udp->udp_port = lport;
+ /*
+ * Now reset the the next anonymous port if the application requested
+ * an anonymous port, or we handed out the next anonymous port.
+ */
+ if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) {
+ us->us_next_port_to_try = port + 1;
+ }
+
+ /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */
+ if (udp->udp_family == AF_INET) {
+ sin->sin_port = udp->udp_port;
+ } else {
+ sin6->sin6_port = udp->udp_port;
+ /* Rebuild the header template */
+ error = udp_build_hdrs(udp);
+ if (error != 0) {
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
+ mutex_exit(&udpf->uf_lock);
+ return (error);
+ }
+ }
+ udp->udp_state = TS_IDLE;
+ udp_bind_hash_insert(udpf, udp);
+ mutex_exit(&udpf->uf_lock);
+ rw_exit(&udp->udp_rwlock);
+
+ if (cl_inet_bind) {
+ /*
+ * Running in cluster mode - register bind information
+ */
+ if (udp->udp_ipversion == IPV4_VERSION) {
+ (*cl_inet_bind)(IPPROTO_UDP, AF_INET,
+ (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
+ (in_port_t)udp->udp_port);
+ } else {
+ (*cl_inet_bind)(IPPROTO_UDP, AF_INET6,
+ (uint8_t *)&(udp->udp_v6src),
+ (in_port_t)udp->udp_port);
+ }
+
+ }
+
+ connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
+ if (is_system_labeled() && (!connp->conn_anon_port ||
+ connp->conn_anon_mlp)) {
+ uint16_t mlpport;
+ cred_t *cr = connp->conn_cred;
+ zone_t *zone;
+
+ zone = crgetzone(cr);
+ connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth :
+ mlptSingle;
+ addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION,
+ &v6src, us->us_netstack->netstack_ip);
+ if (addrtype == mlptSingle) {
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
+ connp->conn_anon_port = B_FALSE;
+ connp->conn_mlp_type = mlptSingle;
+ return (-TNOADDR);
+ }
+ mlpport = connp->conn_anon_port ? PMAPPORT : port;
+ mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
+ addrtype);
+ if (mlptype != mlptSingle &&
+ (connp->conn_mlp_type == mlptSingle ||
+ secpolicy_net_bindmlp(cr) != 0)) {
+ if (udp->udp_debug) {
+ (void) strlog(UDP_MOD_ID, 0, 1,
+ SL_ERROR|SL_TRACE,
+ "udp_bind: no priv for multilevel port %d",
+ mlpport);
+ }
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
+ connp->conn_anon_port = B_FALSE;
+ connp->conn_mlp_type = mlptSingle;
+ return (-TACCES);
+ }
+
+ /*
+ * If we're specifically binding a shared IP address and the
+ * port is MLP on shared addresses, then check to see if this
+ * zone actually owns the MLP. Reject if not.
+ */
+ if (mlptype == mlptShared && addrtype == mlptShared) {
+ /*
+ * No need to handle exclusive-stack zones since
+ * ALL_ZONES only applies to the shared stack.
+ */
+ zoneid_t mlpzone;
+
+ mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
+ htons(mlpport));
+ if (connp->conn_zoneid != mlpzone) {
+ if (udp->udp_debug) {
+ (void) strlog(UDP_MOD_ID, 0, 1,
+ SL_ERROR|SL_TRACE,
+ "udp_bind: attempt to bind port "
+ "%d on shared addr in zone %d "
+ "(should be %d)",
+ mlpport, connp->conn_zoneid,
+ mlpzone);
+ }
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
+ connp->conn_anon_port = B_FALSE;
+ connp->conn_mlp_type = mlptSingle;
+ return (-TACCES);
+ }
+ }
+ if (connp->conn_anon_port) {
+ error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
+ port, B_TRUE);
+ if (error != 0) {
+ if (udp->udp_debug) {
+ (void) strlog(UDP_MOD_ID, 0, 1,
+ SL_ERROR|SL_TRACE,
+ "udp_bind: cannot establish anon "
+ "MLP for port %d", port);
+ }
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
+ connp->conn_anon_port = B_FALSE;
+ connp->conn_mlp_type = mlptSingle;
+ return (-TACCES);
+ }
+ }
+ connp->conn_mlp_type = mlptype;
+ }
+
+ if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
+ /*
+ * Append a request for an IRE if udp_v6src not
+ * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address).
+ */
+ mp = allocb(sizeof (ire_t), BPRI_HI);
+ if (!mp) {
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
+ return (ENOMEM);
+ }
+ mp->b_wptr += sizeof (ire_t);
+ mp->b_datap->db_type = IRE_DB_REQ_TYPE;
+ }
+ if (udp->udp_family == AF_INET6) {
+ ASSERT(udp->udp_connp->conn_af_isv6);
+ error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP,
+ &udp->udp_bound_v6src, udp->udp_port, B_TRUE);
+ } else {
+ ASSERT(!udp->udp_connp->conn_af_isv6);
+ error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP,
+ V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port,
+ B_TRUE);
+ }
+
+ (void) udp_post_ip_bind_connect(udp, mp, error);
+ return (error);
+}
+
+int
+udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
+ socklen_t len, cred_t *cr)
+{
+ int error;
+ conn_t *connp;
+
+ connp = (conn_t *)proto_handle;
+
+ if (sa == NULL)
+ error = udp_do_unbind(connp);
+ else
+ error = udp_do_bind(connp, sa, len, cr, B_TRUE);
+
+ if (error < 0) {
+ if (error == -TOUTSTATE)
+ error = EINVAL;
+ else
+ error = proto_tlitosyserr(-error);
+ }
+
+ return (error);
+}
+
+static int
+udp_implicit_bind(conn_t *connp, cred_t *cr)
+{
+ int error;
+
+ error = udp_do_bind(connp, NULL, 0, cr, B_FALSE);
+ return ((error < 0) ? proto_tlitosyserr(-error) : error);
+}
+
+/*
+ * This routine removes a port number association from a stream. It
+ * is called by udp_unbind and udp_tpi_unbind.
+ */
+static int
+udp_do_unbind(conn_t *connp)
+{
+ udp_t *udp = connp->conn_udp;
+ udp_fanout_t *udpf;
+ udp_stack_t *us = udp->udp_us;
+
+ if (cl_inet_unbind != NULL) {
+ /*
+ * Running in cluster mode - register unbind information
+ */
+ if (udp->udp_ipversion == IPV4_VERSION) {
+ (*cl_inet_unbind)(IPPROTO_UDP, AF_INET,
+ (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
+ (in_port_t)udp->udp_port);
+ } else {
+ (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6,
+ (uint8_t *)&(udp->udp_v6src),
+ (in_port_t)udp->udp_port);
+ }
+ }
+
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
+ rw_exit(&udp->udp_rwlock);
+ return (-TOUTSTATE);
+ }
+ udp->udp_pending_op = T_UNBIND_REQ;
+ rw_exit(&udp->udp_rwlock);
+
+ /*
+ * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK
+ * and therefore ip_unbind must never return NULL.
+ */
+ ip_unbind(connp);
+
+ /*
+ * Once we're unbound from IP, the pending operation may be cleared
+ * here.
+ */
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
+ us->us_bind_fanout_size)];
+
+ mutex_enter(&udpf->uf_lock);
+ udp_bind_hash_remove(udp, B_TRUE);
+ V6_SET_ZERO(udp->udp_v6src);
+ V6_SET_ZERO(udp->udp_bound_v6src);
+ udp->udp_port = 0;
+ mutex_exit(&udpf->uf_lock);
+
+ udp->udp_pending_op = -1;
+ udp->udp_state = TS_UNBND;
+ if (udp->udp_family == AF_INET6)
+ (void) udp_build_hdrs(udp);
+ rw_exit(&udp->udp_rwlock);
+
+ return (0);
+}
+
+static int
+udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error)
+{
+ ire_t *ire;
+ udp_fanout_t *udpf;
+ udp_stack_t *us = udp->udp_us;
+
+ ASSERT(udp->udp_pending_op != -1);
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ if (error == 0) {
+ /* For udp_do_connect() success */
+ /* udp_do_bind() success will do nothing in here */
+ /*
+ * If a broadcast/multicast address was bound, set
+ * the source address to 0.
+ * This ensures no datagrams with broadcast address
+ * as source address are emitted (which would violate
+ * RFC1122 - Hosts requirements)
+ *
+ * Note that when connecting the returned IRE is
+ * for the destination address and we only perform
+ * the broadcast check for the source address (it
+ * is OK to connect to a broadcast/multicast address.)
+ */
+ if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) {
+ ire = (ire_t *)ire_mp->b_rptr;
+
+ /*
+ * Note: we get IRE_BROADCAST for IPv6 to "mark" a
+ * multicast local address.
+ */
+ udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
+ us->us_bind_fanout_size)];
+ if (ire->ire_type == IRE_BROADCAST &&
+ udp->udp_state != TS_DATA_XFER) {
+ ASSERT(udp->udp_pending_op == T_BIND_REQ ||
+ udp->udp_pending_op == O_T_BIND_REQ);
+ /*
+ * This was just a local bind to a broadcast
+ * addr.
+ */
+ mutex_enter(&udpf->uf_lock);
+ V6_SET_ZERO(udp->udp_v6src);
+ mutex_exit(&udpf->uf_lock);
+ if (udp->udp_family == AF_INET6)
+ (void) udp_build_hdrs(udp);
+ } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
+ if (udp->udp_family == AF_INET6)
+ (void) udp_build_hdrs(udp);
+ }
+ }
+ } else {
+ udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
+ us->us_bind_fanout_size)];
+ mutex_enter(&udpf->uf_lock);
+
+ if (udp->udp_state == TS_DATA_XFER) {
+ /* Connect failed */
+ /* Revert back to the bound source */
+ udp->udp_v6src = udp->udp_bound_v6src;
+ udp->udp_state = TS_IDLE;
+ } else {
+ /* For udp_do_bind() failed */
+ V6_SET_ZERO(udp->udp_v6src);
+ V6_SET_ZERO(udp->udp_bound_v6src);
+ udp->udp_state = TS_UNBND;
+ udp_bind_hash_remove(udp, B_TRUE);
+ udp->udp_port = 0;
+ }
+ mutex_exit(&udpf->uf_lock);
+ if (udp->udp_family == AF_INET6)
+ (void) udp_build_hdrs(udp);
+ }
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
+ if (ire_mp != NULL)
+ freeb(ire_mp);
+ return (error);
+}
+
+/*
+ * It associates a default destination address with the stream.
+ */
+static int
+udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len)
+{
+ sin6_t *sin6;
+ sin_t *sin;
+ in6_addr_t v6dst;
+ ipaddr_t v4dst;
+ uint16_t dstport;
+ uint32_t flowinfo;
+ mblk_t *ire_mp;
+ udp_fanout_t *udpf;
+ udp_t *udp, *udp1;
+ ushort_t ipversion;
+ udp_stack_t *us;
+ int error;
+
+ udp = connp->conn_udp;
+ us = udp->udp_us;
+
+ /*
+ * Address has been verified by the caller
+ */
+ switch (len) {
+ default:
+ /*
+ * Should never happen
+ */
+ return (EINVAL);
+
+ case sizeof (sin_t):
+ sin = (sin_t *)sa;
+ v4dst = sin->sin_addr.s_addr;
+ dstport = sin->sin_port;
+ IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
+ ASSERT(udp->udp_ipversion == IPV4_VERSION);
+ ipversion = IPV4_VERSION;
+ break;
+
+ case sizeof (sin6_t):
+ sin6 = (sin6_t *)sa;
+ v6dst = sin6->sin6_addr;
+ dstport = sin6->sin6_port;
+ if (IN6_IS_ADDR_V4MAPPED(&v6dst)) {
+ IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
+ ipversion = IPV4_VERSION;
+ flowinfo = 0;
+ } else {
+ ipversion = IPV6_VERSION;
+ flowinfo = sin6->sin6_flowinfo;
+ }
+ break;
+ }
+
+ if (dstport == 0)
+ return (-TBADADDR);
+
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+
+ /*
+ * This UDP must have bound to a port already before doing a connect.
+ * TPI mandates that users must send TPI primitives only 1 at a time
+ * and wait for the response before sending the next primitive.
+ */
+ if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
+ rw_exit(&udp->udp_rwlock);
+ (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
+ "udp_connect: bad state, %u", udp->udp_state);
+ return (-TOUTSTATE);
+ }
+ udp->udp_pending_op = T_CONN_REQ;
+ ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL);
+
+ if (ipversion == IPV4_VERSION) {
+ udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
+ udp->udp_ip_snd_options_len;
+ } else {
+ udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
+ }
+
+ udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
+ us->us_bind_fanout_size)];
+
+ mutex_enter(&udpf->uf_lock);
+ if (udp->udp_state == TS_DATA_XFER) {
+ /* Already connected - clear out state */
+ udp->udp_v6src = udp->udp_bound_v6src;
+ udp->udp_state = TS_IDLE;
+ }
+
+ /*
+ * Create a default IP header with no IP options.
+ */
+ udp->udp_dstport = dstport;
+ udp->udp_ipversion = ipversion;
+ if (ipversion == IPV4_VERSION) {
+ /*
+ * Interpret a zero destination to mean loopback.
+ * Update the T_CONN_REQ (sin/sin6) since it is used to
+ * generate the T_CONN_CON.
+ */
+ if (v4dst == INADDR_ANY) {
+ v4dst = htonl(INADDR_LOOPBACK);
+ IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
+ if (udp->udp_family == AF_INET) {
+ sin->sin_addr.s_addr = v4dst;
+ } else {
+ sin6->sin6_addr = v6dst;
+ }
+ }
+ udp->udp_v6dst = v6dst;
+ udp->udp_flowinfo = 0;
+
+ /*
+ * If the destination address is multicast and
+ * an outgoing multicast interface has been set,
+ * use the address of that interface as our
+ * source address if no source address has been set.
+ */
+ if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY &&
+ CLASSD(v4dst) &&
+ udp->udp_multicast_if_addr != INADDR_ANY) {
+ IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr,
+ &udp->udp_v6src);
+ }
+ } else {
+ ASSERT(udp->udp_ipversion == IPV6_VERSION);
+ /*
+ * Interpret a zero destination to mean loopback.
+ * Update the T_CONN_REQ (sin/sin6) since it is used to
+ * generate the T_CONN_CON.
+ */
+ if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
+ v6dst = ipv6_loopback;
+ sin6->sin6_addr = v6dst;
+ }
+ udp->udp_v6dst = v6dst;
+ udp->udp_flowinfo = flowinfo;
+ /*
+ * If the destination address is multicast and
+ * an outgoing multicast interface has been set,
+ * then the ip bind logic will pick the correct source
+ * address (i.e. matching the outgoing multicast interface).
+ */
+ }
+
+ /*
+ * Verify that the src/port/dst/port is unique for all
+ * connections in TS_DATA_XFER
+ */
+ for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
+ if (udp1->udp_state != TS_DATA_XFER)
+ continue;
+ if (udp->udp_port != udp1->udp_port ||
+ udp->udp_ipversion != udp1->udp_ipversion ||
+ dstport != udp1->udp_dstport ||
+ !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) ||
+ !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) ||
+ !(IPCL_ZONE_MATCH(udp->udp_connp,
+ udp1->udp_connp->conn_zoneid) ||
+ IPCL_ZONE_MATCH(udp1->udp_connp,
+ udp->udp_connp->conn_zoneid)))
+ continue;
+ mutex_exit(&udpf->uf_lock);
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
+ return (-TBADADDR);
+ }
+ udp->udp_state = TS_DATA_XFER;
+ mutex_exit(&udpf->uf_lock);
+
+ ire_mp = allocb(sizeof (ire_t), BPRI_HI);
+ if (ire_mp == NULL) {
+ mutex_enter(&udpf->uf_lock);
+ udp->udp_state = TS_IDLE;
+ udp->udp_pending_op = -1;
+ mutex_exit(&udpf->uf_lock);
+ rw_exit(&udp->udp_rwlock);
+ return (ENOMEM);
+ }
+
+ rw_exit(&udp->udp_rwlock);
+
+ ire_mp->b_wptr += sizeof (ire_t);
+ ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE;
+
+ if (udp->udp_family == AF_INET) {
+ error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP,
+ &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port,
+ V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport,
+ B_TRUE, B_TRUE);
+ } else {
+ error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP,
+ &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst,
+ &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE);
+ }
+
+ return (udp_post_ip_bind_connect(udp, ire_mp, error));
+}
+
+/* ARGSUSED */
+static int
+udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
+ socklen_t len, sock_connid_t *id, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ udp_t *udp = connp->conn_udp;
+ int error;
+ boolean_t did_bind = B_FALSE;
+
+ if (sa == NULL) {
+ /*
+ * Disconnect
+ * Make sure we are connected
+ */
+ if (udp->udp_state != TS_DATA_XFER)
+ return (EINVAL);
+
+ error = udp_disconnect(connp);
+ return (error);
+ }
+
+ error = proto_verify_ip_addr(udp->udp_family, sa, len);
+ if (error != 0)
+ goto done;
+
+ /* do an implicit bind if necessary */
+ if (udp->udp_state == TS_UNBND) {
+ error = udp_implicit_bind(connp, cr);
+ /*
+ * We could be racing with an actual bind, in which case
+ * we would see EPROTO. We cross our fingers and try
+ * to connect.
+ */
+ if (!(error == 0 || error == EPROTO))
+ goto done;
+ did_bind = B_TRUE;
+ }
+ /*
+ * set SO_DGRAM_ERRIND
+ */
+ udp->udp_dgram_errind = B_TRUE;
+
+ error = udp_do_connect(connp, sa, len);
+
+ if (error != 0 && did_bind) {
+ int unbind_err;
+
+ unbind_err = udp_do_unbind(connp);
+ ASSERT(unbind_err == 0);
+ }
+
+ if (error == 0) {
+ *id = 0;
+ (*connp->conn_upcalls->su_connected)
+ (connp->conn_upper_handle, 0, NULL, -1);
+ } else if (error < 0) {
+ error = proto_tlitosyserr(-error);
+ }
+
+done:
+ if (error != 0 && udp->udp_state == TS_DATA_XFER) {
+ /*
+ * No need to hold locks to set state
+ * after connect failure socket state is undefined
+ * We set the state only to imitate old sockfs behavior
+ */
+ udp->udp_state = TS_IDLE;
+ }
+ return (error);
+}
+
+/* ARGSUSED */
+int
+udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
+ cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ udp_t *udp = connp->conn_udp;
+ udp_stack_t *us = udp->udp_us;
+ int error = 0;
+
+ ASSERT(DB_TYPE(mp) == M_DATA);
+
+ /*
+ * If the socket is connected and no change in destination
+ */
+ if (msg->msg_namelen == 0) {
+ error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid);
+ if (error == EDESTADDRREQ)
+ return (error);
+ else
+ return (udp->udp_dgram_errind ? error : 0);
+ }
+
+ /*
+ * Do an implicit bind if necessary.
+ */
+ if (udp->udp_state == TS_UNBND) {
+ error = udp_implicit_bind(connp, cr);
+ /*
+ * We could be racing with an actual bind, in which case
+ * we would see EPROTO. We cross our fingers and try
+ * to send.
+ */
+ if (!(error == 0 || error == EPROTO)) {
+ freemsg(mp);
+ return (error);
+ }
+ }
+
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+
+ if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) {
+ rw_exit(&udp->udp_rwlock);
+ freemsg(mp);
+ return (EISCONN);
+ }
+
+
+ if (udp->udp_delayed_error != 0) {
+ boolean_t match;
+
+ error = udp->udp_delayed_error;
+ match = B_FALSE;
+ udp->udp_delayed_error = 0;
+ switch (udp->udp_family) {
+ case AF_INET: {
+ /* Compare just IP address and port */
+ sin_t *sin1 = (sin_t *)msg->msg_name;
+ sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr;
+
+ if (msg->msg_namelen == sizeof (sin_t) &&
+ sin1->sin_port == sin2->sin_port &&
+ sin1->sin_addr.s_addr == sin2->sin_addr.s_addr)
+ match = B_TRUE;
+
+ break;
+ }
+ case AF_INET6: {
+ sin6_t *sin1 = (sin6_t *)msg->msg_name;
+ sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr;
+
+ if (msg->msg_namelen == sizeof (sin6_t) &&
+ sin1->sin6_port == sin2->sin6_port &&
+ IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
+ &sin2->sin6_addr))
+ match = B_TRUE;
+ break;
+ }
+ default:
+ ASSERT(0);
+ }
+
+ *((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
+
+ if (match) {
+ rw_exit(&udp->udp_rwlock);
+ freemsg(mp);
+ return (error);
+ }
+ }
+
+ error = proto_verify_ip_addr(udp->udp_family,
+ (struct sockaddr *)msg->msg_name, msg->msg_namelen);
+ rw_exit(&udp->udp_rwlock);
+
+ if (error != 0) {
+ freemsg(mp);
+ return (error);
+ }
+
+ error = udp_send_not_connected(connp, mp,
+ (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr,
+ curproc->p_pid);
+ if (error != 0) {
+ UDP_STAT(us, udp_out_err_output);
+ freemsg(mp);
+ }
+ return (udp->udp_dgram_errind ? error : 0);
+}
+
+void
+udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
+ boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ udp_t *udp;
+ struct T_capability_ack tca;
+ struct sockaddr_in6 laddr, faddr;
+ socklen_t laddrlen, faddrlen;
+ short opts;
+ struct stroptions *stropt;
+ mblk_t *stropt_mp;
+ int error;
+
+ udp = connp->conn_udp;
+
+ stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
+
+ /*
+ * setup the fallback stream that was allocated
+ */
+ connp->conn_dev = (dev_t)RD(q)->q_ptr;
+ connp->conn_minor_arena = WR(q)->q_ptr;
+
+ RD(q)->q_ptr = WR(q)->q_ptr = connp;
+
+ WR(q)->q_qinfo = &udp_winit;
+
+ connp->conn_rq = RD(q);
+ connp->conn_wq = WR(q);
+
+ /* Notify stream head about options before sending up data */
+ stropt_mp->b_datap->db_type = M_SETOPTS;
+ stropt_mp->b_wptr += sizeof (*stropt);
+ stropt = (struct stroptions *)stropt_mp->b_rptr;
+ stropt->so_flags = SO_WROFF | SO_HIWAT;
+ stropt->so_wroff =
+ (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra);
+ stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
+ putnext(RD(q), stropt_mp);
+
+ /*
+ * Free the helper stream
+ */
+ ip_close_helper_stream(connp);
+
+ if (!direct_sockfs)
+ udp_disable_direct_sockfs(udp);
+
+ /*
+ * Collect the information needed to sync with the sonode
+ */
+ udp_do_capability_ack(udp, &tca, TC1_INFO);
+
+ laddrlen = faddrlen = sizeof (sin6_t);
+ (void) udp_getsockname((sock_lower_handle_t)connp,
+ (struct sockaddr *)&laddr, &laddrlen, NULL);
+ error = udp_getpeername((sock_lower_handle_t)connp,
+ (struct sockaddr *)&faddr, &faddrlen, NULL);
+ if (error != 0)
+ faddrlen = 0;
+
+ opts = 0;
+ if (udp->udp_dgram_errind)
+ opts |= SO_DGRAM_ERRIND;
+ if (udp->udp_dontroute)
+ opts |= SO_DONTROUTE;
+
+ /*
+ * Once we grab the drain lock, no data will be send up
+ * to the socket. So we notify the socket that the endpoint
+ * is quiescent and it's therefore safe move data from
+ * the socket to the stream head.
+ */
+ (*quiesced_cb)(connp->conn_upper_handle, q, &tca,
+ (struct sockaddr *)&laddr, laddrlen,
+ (struct sockaddr *)&faddr, faddrlen, opts);
+
+ /*
+ * push up any packets that were queued in udp_t
+ */
+
+ mutex_enter(&udp->udp_recv_lock);
+ while (udp->udp_fallback_queue_head != NULL) {
+ mblk_t *mp;
+ mp = udp->udp_fallback_queue_head;
+ udp->udp_fallback_queue_head = mp->b_next;
+ mutex_exit(&udp->udp_recv_lock);
+ mp->b_next = NULL;
+ putnext(RD(q), mp);
+ mutex_enter(&udp->udp_recv_lock);
+ }
+ udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
+ /*
+ * No longer a streams less socket
+ */
+ connp->conn_flags &= ~IPCL_NONSTR;
+ mutex_exit(&udp->udp_recv_lock);
+
+ ASSERT(connp->conn_ref >= 1);
+}
+
+static int
+udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
+{
+ sin_t *sin = (sin_t *)sa;
+ sin6_t *sin6 = (sin6_t *)sa;
+
+ ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
+ ASSERT(udp != NULL);
+
+ if (udp->udp_state != TS_DATA_XFER)
+ return (ENOTCONN);
+
+ switch (udp->udp_family) {
+ case AF_INET:
+ ASSERT(udp->udp_ipversion == IPV4_VERSION);
+
+ if (*salenp < sizeof (sin_t))
+ return (EINVAL);
+
+ *salenp = sizeof (sin_t);
+ *sin = sin_null;
+ sin->sin_family = AF_INET;
+ sin->sin_port = udp->udp_dstport;
+ sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst);
+ break;
+ case AF_INET6:
+ if (*salenp < sizeof (sin6_t))
+ return (EINVAL);
+
+ *salenp = sizeof (sin6_t);
+ *sin6 = sin6_null;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = udp->udp_dstport;
+ sin6->sin6_addr = udp->udp_v6dst;
+ sin6->sin6_flowinfo = udp->udp_flowinfo;
+ break;
+ }
+
+ return (0);
+}
+
+/* ARGSUSED */
+int
+udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
+ socklen_t *salenp, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ udp_t *udp = connp->conn_udp;
+ int error;
+
+ ASSERT(udp != NULL);
+
+ rw_enter(&udp->udp_rwlock, RW_READER);
+
+ error = udp_do_getpeername(udp, sa, salenp);
+
+ rw_exit(&udp->udp_rwlock);
+
+ return (error);
+}
+
+static int
+udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
+{
+ sin_t *sin = (sin_t *)sa;
+ sin6_t *sin6 = (sin6_t *)sa;
+
+ ASSERT(udp != NULL);
+ ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
+
+ switch (udp->udp_family) {
+ case AF_INET:
+ ASSERT(udp->udp_ipversion == IPV4_VERSION);
+
+ if (*salenp < sizeof (sin_t))
+ return (EINVAL);
+
+ *salenp = sizeof (sin_t);
+ *sin = sin_null;
+ sin->sin_family = AF_INET;
+ if (udp->udp_state == TS_UNBND) {
+ break;
+ }
+ sin->sin_port = udp->udp_port;
+
+ if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
+ !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
+ sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src);
+ } else {
+ /*
+ * INADDR_ANY
+ * udp_v6src is not set, we might be bound to
+ * broadcast/multicast. Use udp_bound_v6src as
+ * local address instead (that could
+ * also still be INADDR_ANY)
+ */
+ sin->sin_addr.s_addr =
+ V4_PART_OF_V6(udp->udp_bound_v6src);
+ }
+ break;
+
+ case AF_INET6:
+ if (*salenp < sizeof (sin6_t))
+ return (EINVAL);
+
+ *salenp = sizeof (sin6_t);
+ *sin6 = sin6_null;
+ sin6->sin6_family = AF_INET6;
+ if (udp->udp_state == TS_UNBND) {
+ break;
+ }
+ sin6->sin6_port = udp->udp_port;
+
+ if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
+ sin6->sin6_addr = udp->udp_v6src;
+ } else {
+ /*
+ * UNSPECIFIED
+ * udp_v6src is not set, we might be bound to
+ * broadcast/multicast. Use udp_bound_v6src as
+ * local address instead (that could
+ * also still be UNSPECIFIED)
+ */
+ sin6->sin6_addr = udp->udp_bound_v6src;
+ }
+ }
+ return (0);
+}
+
+/* ARGSUSED */
+int
+udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
+ socklen_t *salenp, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ udp_t *udp = connp->conn_udp;
+ int error;
+
+ ASSERT(udp != NULL);
+ rw_enter(&udp->udp_rwlock, RW_READER);
+
+ error = udp_do_getsockname(udp, sa, salenp);
+
+ rw_exit(&udp->udp_rwlock);
+
+ return (error);
+}
+
+int
+udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
+ void *optvalp, socklen_t *optlen, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ udp_t *udp = connp->conn_udp;
+ int error;
+ t_uscalar_t max_optbuf_len;
+ void *optvalp_buf;
+ int len;
+
+ error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
+ udp_opt_obj.odb_opt_des_arr,
+ udp_opt_obj.odb_opt_arr_cnt,
+ udp_opt_obj.odb_topmost_tpiprovider,
+ B_FALSE, B_TRUE, cr);
+ if (error != 0) {
+ if (error < 0)
+ error = proto_tlitosyserr(-error);
+ return (error);
+ }
+
+ optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
+ rw_enter(&udp->udp_rwlock, RW_READER);
+ len = udp_opt_get(connp, level, option_name, optvalp_buf);
+ rw_exit(&udp->udp_rwlock);
+
+ if (len < 0) {
+ /*
+ * Pass on to IP
+ */
+ kmem_free(optvalp_buf, max_optbuf_len);
+ return (ip_get_options(connp, level, option_name,
+ optvalp, optlen, cr));
+ } else {
+ /*
+ * update optlen and copy option value
+ */
+ t_uscalar_t size = MIN(len, *optlen);
+ bcopy(optvalp_buf, optvalp, size);
+ bcopy(&size, optlen, sizeof (size));
+
+ kmem_free(optvalp_buf, max_optbuf_len);
+ return (0);
+ }
+}
+
+int
+udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
+ const void *optvalp, socklen_t optlen, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ udp_t *udp = connp->conn_udp;
+ int error;
+
+ error = proto_opt_check(level, option_name, optlen, NULL,
+ udp_opt_obj.odb_opt_des_arr,
+ udp_opt_obj.odb_opt_arr_cnt,
+ udp_opt_obj.odb_topmost_tpiprovider,
+ B_TRUE, B_FALSE, cr);
+
+ if (error != 0) {
+ if (error < 0)
+ error = proto_tlitosyserr(-error);
+ return (error);
+ }
+
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
+ optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
+ NULL, cr);
+ rw_exit(&udp->udp_rwlock);
+
+ if (error < 0) {
+ /*
+ * Pass on to ip
+ */
+ error = ip_set_options(connp, level, option_name, optvalp,
+ optlen, cr);
+ }
+
+ return (error);
+}
+
+void
+udp_clr_flowctrl(sock_lower_handle_t proto_handle)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ udp_t *udp = connp->conn_udp;
+
+ mutex_enter(&udp->udp_recv_lock);
+ connp->conn_flow_cntrld = B_FALSE;
+ mutex_exit(&udp->udp_recv_lock);
+}
+
+/* ARGSUSED */
+int
+udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+
+ /* shut down the send side */
+ if (how != SHUT_RD)
+ (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
+ SOCK_OPCTL_SHUT_SEND, 0);
+ /* shut down the recv side */
+ if (how != SHUT_WR)
+ (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
+ SOCK_OPCTL_SHUT_RECV, 0);
+ return (0);
+}
+
+int
+udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
+ int mode, int32_t *rvalp, cred_t *cr)
+{
+ conn_t *connp = (conn_t *)proto_handle;
+ int error;
+
+ switch (cmd) {
+ case ND_SET:
+ case ND_GET:
+ case _SIOCSOCKFALLBACK:
+ case TI_GETPEERNAME:
+ case TI_GETMYNAME:
+ ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
+ cmd));
+ error = EINVAL;
+ break;
+ default:
+ /*
+ * Pass on to IP using helper stream
+ */
+ error = ldi_ioctl(
+ connp->conn_helper_info->ip_helper_stream_handle,
+ cmd, arg, mode, cr, rvalp);
+ break;
+ }
+ return (error);
+}
+
+/* ARGSUSED */
+int
+udp_accept(sock_lower_handle_t lproto_handle,
+ sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
+ cred_t *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/* ARGSUSED */
+int
+udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+sock_downcalls_t sock_udp_downcalls = {
+ udp_activate, /* sd_activate */
+ udp_accept, /* sd_accept */
+ udp_bind, /* sd_bind */
+ udp_listen, /* sd_listen */
+ udp_connect, /* sd_connect */
+ udp_getpeername, /* sd_getpeername */
+ udp_getsockname, /* sd_getsockname */
+ udp_getsockopt, /* sd_getsockopt */
+ udp_setsockopt, /* sd_setsockopt */
+ udp_send, /* sd_send */
+ NULL, /* sd_send_uio */
+ NULL, /* sd_recv_uio */
+ NULL, /* sd_poll */
+ udp_shutdown, /* sd_shutdown */
+ udp_clr_flowctrl, /* sd_setflowctrl */
+ udp_ioctl, /* sd_ioctl */
+ udp_close /* sd_close */
+};
diff --git a/usr/src/uts/common/inet/udp/udp_opt_data.c b/usr/src/uts/common/inet/udp/udp_opt_data.c
index f900d0f3e1..0ec5a2c45e 100644
--- a/usr/src/uts/common/inet/udp/udp_opt_data.c
+++ b/usr/src/uts/common/inet/udp/udp_opt_data.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/stream.h>
#define _SUN_TPI_VERSION 2
@@ -85,9 +83,11 @@ opdes_t udp_opt_arr[] = {
{ SO_PROTOTYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, OP_PASSNEXT, sizeof (int), 0 },
{ IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
- (OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT), 40, -1 /* not initialized */ },
+ (OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT),
+ IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
{ T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
- (OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT), 40, -1 /* not initialized */ },
+ (OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT),
+ IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
{ IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 },
{ T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 },
@@ -318,8 +318,8 @@ uint_t udp_max_optsize; /* initialized when UDP driver is loaded */
optdb_obj_t udp_opt_obj = {
udp_opt_default, /* UDP default value function pointer */
- udp_opt_get, /* UDP get function pointer */
- udp_opt_set, /* UDP set function pointer */
+ udp_tpi_opt_get, /* UDP get function pointer */
+ udp_tpi_opt_set, /* UDP set function pointer */
B_TRUE, /* UDP is tpi provider */
UDP_OPT_ARR_CNT, /* UDP option database count of entries */
udp_opt_arr, /* UDP option database */
diff --git a/usr/src/uts/common/inet/udp/udpddi.c b/usr/src/uts/common/inet/udp/udpddi.c
index 0b80531ab8..63248365cd 100644
--- a/usr/src/uts/common/inet/udp/udpddi.c
+++ b/usr/src/uts/common/inet/udp/udpddi.c
@@ -30,6 +30,8 @@
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/udp_impl.h>
+#include <sys/strsubr.h>
+#include <sys/socketvar.h>
#define INET_NAME "udp"
#define INET_MODDESC "UDP dummy STREAMS module"
@@ -38,6 +40,9 @@
#define INET_MODSTRTAB dummymodinfo
#define INET_DEVSTRTAB udpinfov4
#define INET_MODMTFLAGS D_MP
+#define INET_SOCKDESC "UDP socket module"
+#define INET_SOCK_PROTO_CREATE_FUNC (*udp_create)
+#define INET_SOCK_PROTO_FB_FUNC (*udp_fallback)
/*
* We define both synchronous STREAMS and sockfs direct-access
* mode for UDP module instance, because it is autopushed on
diff --git a/usr/src/uts/common/inet/udp_impl.h b/usr/src/uts/common/inet/udp_impl.h
index 468fa553f4..38d255ac9d 100644
--- a/usr/src/uts/common/inet/udp_impl.h
+++ b/usr/src/uts/common/inet/udp_impl.h
@@ -252,7 +252,9 @@ struct udp_stack {
*/
in_port_t us_min_anonpriv_port;
+ ldi_ident_t us_ldi_ident;
};
+
typedef struct udp_stack udp_stack_t;
/* Internal udp control structure, one per open stream */
@@ -313,9 +315,14 @@ typedef struct udp_s {
/* Following protected by udp_rwlock */
mblk_t *udp_rcv_list_head; /* b_next chain of mblks */
mblk_t *udp_rcv_list_tail; /* last mblk in chain */
+ kmutex_t udp_recv_lock; /* recv lock */
uint_t udp_rcv_cnt; /* total data in rcv_list */
uint_t udp_rcv_msgcnt; /* total msgs in rcv_list */
+ size_t udp_rcv_disply_hiwat; /* user's view of rcvbuf */
size_t udp_rcv_hiwat; /* receive high watermark */
+ size_t udp_rcv_lowat; /* receive low watermark */
+ size_t udp_xmit_hiwat; /* Send buffer high watermark */
+ size_t udp_xmit_lowat; /* Send buffer low watermark */
uint_t udp_label_len; /* length of security label */
uint_t udp_label_len_v6; /* len of v6 security label */
in6_addr_t udp_v6lastdst; /* most recent destination */
@@ -323,6 +330,10 @@ typedef struct udp_s {
uint64_t udp_open_time; /* time when this was opened */
pid_t udp_open_pid; /* process id when this was opened */
udp_stack_t *udp_us; /* Stack instance for zone */
+ int udp_delayed_error;
+ mblk_t *udp_fallback_queue_head;
+ mblk_t *udp_fallback_queue_tail;
+ struct sockaddr_storage udp_delayed_addr;
} udp_t;
/* UDP Protocol header */
@@ -351,7 +362,6 @@ typedef struct udpahdr_s {
#define UDP_STAT(us, x) ((us)->us_statistics.x.value.ui64++)
#define UDP_STAT_UPDATE(us, x, n) \
((us)->us_statistics.x.value.ui64 += (n))
-
#ifdef DEBUG
#define UDP_DBGSTAT(us, x) UDP_STAT(us, x)
#else
@@ -359,25 +369,19 @@ typedef struct udpahdr_s {
#endif /* DEBUG */
extern int udp_opt_default(queue_t *, t_scalar_t, t_scalar_t, uchar_t *);
-extern int udp_opt_get(queue_t *, t_scalar_t, t_scalar_t, uchar_t *);
-extern int udp_opt_set(queue_t *, uint_t, int, int, uint_t, uchar_t *,
+extern int udp_tpi_opt_get(queue_t *, t_scalar_t, t_scalar_t, uchar_t *);
+extern int udp_tpi_opt_set(queue_t *, uint_t, int, int, uint_t, uchar_t *,
uint_t *, uchar_t *, void *, cred_t *, mblk_t *);
extern mblk_t *udp_snmp_get(queue_t *, mblk_t *);
extern int udp_snmp_set(queue_t *, t_scalar_t, t_scalar_t, uchar_t *, int);
extern void udp_close_free(conn_t *);
extern void udp_quiesce_conn(conn_t *);
-extern void udp_ddi_init(void);
-extern void udp_ddi_destroy(void);
-extern void udp_resume_bind(conn_t *, mblk_t *);
-extern void udp_wput(queue_t *, mblk_t *);
-
-extern int udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name,
- uchar_t *ptr);
-extern int udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name,
- uchar_t *ptr);
-extern int udp_opt_set(queue_t *q, uint_t optset_context,
- int level, int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
- uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk);
+extern void udp_ddi_g_init(void);
+extern void udp_ddi_g_destroy(void);
+extern void udp_g_q_inactive(udp_stack_t *);
+extern void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr,
+ socklen_t addrlen);
+extern void udp_wput(queue_t *, mblk_t *);
/*
* Object to represent database of options to search passed to
@@ -387,6 +391,13 @@ extern int udp_opt_set(queue_t *q, uint_t optset_context,
extern optdb_obj_t udp_opt_obj;
extern uint_t udp_max_optsize;
+extern sock_lower_handle_t udp_create(int, int, int, sock_downcalls_t **,
+ uint_t *, int *, int, cred_t *);
+extern void udp_fallback(sock_lower_handle_t, queue_t *, boolean_t,
+ so_proto_quiesced_cb_t);
+
+extern sock_downcalls_t sock_udp_downcalls;
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_isns.c b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_isns.c
index 0f166f77b7..2708d10c5b 100644
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_isns.c
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_isns.c
@@ -42,6 +42,7 @@
#include <sys/iscsit/isns_protocol.h>
#include <iscsit.h>
#include <iscsit_isns.h>
+#include <sys/ksocket.h>
/* local defines */
#define MAX_XID (2^16)
@@ -177,7 +178,7 @@ static void
isnst_esi_thread(void *arg);
static boolean_t
-isnst_handle_esi_req(struct sonode *so, isns_pdu_t *pdu, size_t pl_size);
+isnst_handle_esi_req(ksocket_t so, isns_pdu_t *pdu, size_t pl_size);
static void isnst_esi_start(isns_portal_list_t *portal);
static void isnst_esi_stop();
@@ -303,22 +304,22 @@ isnst_esi_stop_thread(isns_esi_tinfo_t *tinfop)
list_remove(&esi_list, tinfop);
/*
- * The only way to break a thread waiting in soaccept() is to signal
- * it with EINTR. See idm_so_tgt_svc_offline for more detail.
- */
- tinfop->esi_so->so_error = EINTR;
- cv_signal(&tinfop->esi_so->so_connind_cv);
-
- /*
- * Must also drop the global lock in case the esi thread is running
- * and trying to update the server timestamps.
+ * The only way to break a thread waiting in ksocket_accept() is to call
+ * ksocket_close.
*/
mutex_exit(&isns_esi_mutex);
ISNS_GLOBAL_UNLOCK();
+ idm_soshutdown(tinfop->esi_so);
+ idm_sodestroy(tinfop->esi_so);
thread_join(tinfop->esi_thread_did);
ISNS_GLOBAL_LOCK();
mutex_enter(&isns_esi_mutex);
+ tinfop->esi_thread_running = B_FALSE;
+ tinfop->esi_so = NULL;
+ tinfop->esi_port = 0;
+ tinfop->esi_registered = B_FALSE;
+ cv_signal(&isns_esi_cv);
tinfop->esi_portal->portal_esi = NULL;
kmem_free(tinfop, sizeof (isns_esi_tinfo_t));
}
@@ -630,18 +631,22 @@ isnst_stop()
*/
static void
-isnst_update_server_timestamp(struct sonode *so)
+isnst_update_server_timestamp(ksocket_t so)
{
iscsit_isns_svr_t *svr;
struct in_addr *sin = NULL, *svr_in;
struct in6_addr *sin6 = NULL, *svr_in6;
-
- if (so->so_faddr_sa->sa_family == AF_INET) {
- sin = &((struct sockaddr_in *)
- ((void *)so->so_faddr_sa))->sin_addr;
+ struct sockaddr_in6 t_addr;
+ socklen_t t_addrlen;
+
+ bzero(&t_addr, sizeof (struct sockaddr_in6));
+ t_addrlen = sizeof (struct sockaddr_in6);
+ (void) ksocket_getpeername(so, (struct sockaddr *)&t_addr, &t_addrlen,
+ CRED());
+ if (((struct sockaddr *)(&t_addr))->sa_family == AF_INET) {
+ sin = &((struct sockaddr_in *)((void *)(&t_addr)))->sin_addr;
} else {
- sin6 = &((struct sockaddr_in6 *)
- ((void *)so->so_faddr_sa))->sin6_addr;
+ sin6 = &(&t_addr)->sin6_addr;
}
/*
@@ -1982,7 +1987,7 @@ static void *
isnst_open_so(struct sockaddr_storage *sa)
{
int sa_sz;
- struct sonode *so;
+ ksocket_t so;
/* determin local IP address */
if (sa->ss_family == AF_INET) {
@@ -2000,7 +2005,8 @@ isnst_open_so(struct sockaddr_storage *sa)
}
if (so != NULL) {
- if (soconnect(so, (struct sockaddr *)sa, sa_sz, 0, 0) != 0) {
+ if (ksocket_connect(so, (struct sockaddr *)sa, sa_sz, CRED())
+ != 0) {
/* not calling isnst_close_so() to */
/* make dtrace output look clear */
idm_soshutdown(so);
@@ -2133,7 +2139,7 @@ static void
isnst_esi_thread(void *arg)
{
isns_esi_tinfo_t *tinfop;
- struct sonode *newso;
+ ksocket_t newso;
struct sockaddr_in sin;
struct sockaddr_in6 sin6;
uint32_t on;
@@ -2141,6 +2147,14 @@ isnst_esi_thread(void *arg)
isns_pdu_t *pdu;
size_t pl_size;
int family;
+ struct sockaddr_in t_addr;
+ struct sockaddr_in6 t_addr6;
+ socklen_t t_addrlen;
+ socklen_t t_addrlen6;
+
+ bzero(&t_addr, sizeof (struct sockaddr_in6));
+ t_addrlen = sizeof (struct sockaddr_in);
+ t_addrlen6 = sizeof (struct sockaddr_in6);
tinfop = (isns_esi_tinfo_t *)arg;
tinfop->esi_thread_did = curthread->t_did;
@@ -2155,7 +2169,6 @@ isnst_esi_thread(void *arg)
family = AF_INET6;
}
-
if ((tinfop->esi_so =
idm_socreate(family, SOCK_STREAM, 0)) == NULL) {
cmn_err(CE_WARN,
@@ -2166,7 +2179,7 @@ isnst_esi_thread(void *arg)
mutex_exit(&isns_esi_mutex);
thread_exit();
}
-
+ ksocket_hold(tinfop->esi_so);
/*
* Set options, bind, and listen until we're told to stop
*/
@@ -2181,17 +2194,19 @@ isnst_esi_thread(void *arg)
&sin.sin_addr.s_addr, sizeof (in_addr_t));
on = 1;
- (void) sosetsockopt(tinfop->esi_so, SOL_SOCKET, SO_REUSEADDR,
- (char *)&on, sizeof (on));
+ (void) ksocket_setsockopt(tinfop->esi_so, SOL_SOCKET,
+ SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
- if (sobind(tinfop->esi_so, (struct sockaddr *)&sin,
- sizeof (sin), 0, 0) != 0) {
+ if (ksocket_bind(tinfop->esi_so, (struct sockaddr *)&sin,
+ sizeof (sin), CRED()) != 0) {
idm_sodestroy(tinfop->esi_so);
tinfop->esi_so = NULL;
tinfop->esi_thread_failed = B_TRUE;
} else {
+ (void) ksocket_getsockname(tinfop->esi_so,
+ (struct sockaddr *)(&t_addr), &t_addrlen, CRED());
tinfop->esi_port = ntohs(((struct sockaddr_in *)
- ((void *)tinfop->esi_so->so_laddr_sa))->sin_port);
+ (&t_addr))->sin_port);
}
break;
@@ -2205,17 +2220,19 @@ isnst_esi_thread(void *arg)
&sin6.sin6_addr.s6_addr, sizeof (in6_addr_t));
on = 1;
- (void) sosetsockopt(tinfop->esi_so, SOL_SOCKET,
- SO_REUSEADDR, (char *)&on, sizeof (on));
+ (void) ksocket_setsockopt(tinfop->esi_so, SOL_SOCKET,
+ SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
- if (sobind(tinfop->esi_so, (struct sockaddr *)&sin6,
- sizeof (sin6), 0, 0) != 0) {
+ if (ksocket_bind(tinfop->esi_so, (struct sockaddr *)&sin6,
+ sizeof (sin6), CRED()) != 0) {
idm_sodestroy(tinfop->esi_so);
tinfop->esi_so = NULL;
tinfop->esi_thread_failed = B_TRUE;
} else {
+ (void) ksocket_getsockname(tinfop->esi_so,
+ (struct sockaddr *)(&t_addr6), &t_addrlen6, CRED());
tinfop->esi_port = ntohs(((struct sockaddr_in6 *)
- ((void *)tinfop->esi_so->so_laddr_sa))->sin6_port);
+ (&t_addr6))->sin6_port);
}
break;
@@ -2226,7 +2243,7 @@ isnst_esi_thread(void *arg)
goto esi_thread_exit;
}
- if ((rc = solisten(tinfop->esi_so, 5)) != 0) {
+ if ((rc = ksocket_listen(tinfop->esi_so, 5, CRED())) != 0) {
cmn_err(CE_WARN, "isnst_esi_thread: listen failure 0x%x", rc);
goto esi_thread_exit;
}
@@ -2244,21 +2261,21 @@ isnst_esi_thread(void *arg)
DTRACE_PROBE2(iscsit__isns__esi__accept__wait,
boolean_t, tinfop->esi_thread_running,
boolean_t, tinfop->esi_thread_failed);
- if ((rc = soaccept(tinfop->esi_so, 0, &newso)) != 0) {
+ if ((rc = ksocket_accept(tinfop->esi_so, NULL, NULL,
+ &newso, CRED())) != 0) {
mutex_enter(&isns_esi_mutex);
DTRACE_PROBE2(iscsit__isns__esi__accept__fail,
boolean_t, tinfop->esi_thread_running,
boolean_t, tinfop->esi_thread_failed);
/*
- * If we were interrupted with EINTR, it's not
- * really a failure.
+ * If we were interrupted with EINTR
+ * it's not really a failure.
*/
if (rc != EINTR) {
cmn_err(CE_WARN, "isnst_esi_thread: "
"accept failure (0x%x)", rc);
tinfop->esi_thread_failed = B_TRUE;
}
-
tinfop->esi_thread_running = B_FALSE;
continue;
}
@@ -2281,7 +2298,7 @@ isnst_esi_thread(void *arg)
tinfop->esi_registered = B_TRUE;
}
- (void) soshutdown(newso, SHUT_RDWR);
+ (void) ksocket_close(newso, CRED());
/*
* Do not hold the esi mutex during server timestamp
@@ -2295,15 +2312,7 @@ isnst_esi_thread(void *arg)
}
mutex_exit(&isns_esi_mutex);
esi_thread_exit:
- idm_soshutdown(tinfop->esi_so);
- idm_sodestroy(tinfop->esi_so);
- mutex_enter(&isns_esi_mutex);
- tinfop->esi_thread_running = B_FALSE;
- tinfop->esi_so = NULL;
- tinfop->esi_port = 0;
- tinfop->esi_registered = B_FALSE;
- cv_signal(&isns_esi_cv);
- mutex_exit(&isns_esi_mutex);
+ ksocket_rele(tinfop->esi_so);
thread_exit();
}
@@ -2312,7 +2321,7 @@ esi_thread_exit:
*/
static boolean_t
-isnst_handle_esi_req(struct sonode *so, isns_pdu_t *pdu, size_t pl_size)
+isnst_handle_esi_req(ksocket_t ks, isns_pdu_t *pdu, size_t pl_size)
{
isns_pdu_t *rsp_pdu;
isns_resp_t *rsp;
@@ -2353,7 +2362,7 @@ isnst_handle_esi_req(struct sonode *so, isns_pdu_t *pdu, size_t pl_size)
bcopy(pdu->payload, rsp->data, pl_len - 4);
rsp_pdu->payload_len = htons(pl_len);
- if (isnst_send_pdu(so, rsp_pdu) != 0) {
+ if (isnst_send_pdu(ks, rsp_pdu) != 0) {
cmn_err(CE_WARN, "isnst_handle_esi_req: Send response failed");
esirv = B_FALSE;
}
diff --git a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_isns.h b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_isns.h
index 40c111f491..af0d8982bb 100644
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_isns.h
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_isns.h
@@ -62,7 +62,7 @@ typedef struct {
struct isns_portal_list_s *esi_portal;
kthread_t *esi_thread;
kt_did_t esi_thread_did;
- struct sonode *esi_so;
+ ksocket_t esi_so;
uint16_t esi_port;
boolean_t esi_thread_running;
boolean_t esi_thread_failed;
diff --git a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_radiuspacket.c b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_radiuspacket.c
index 2441e3b65c..912158cb2d 100644
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_radiuspacket.c
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_radiuspacket.c
@@ -32,18 +32,19 @@
#include <sys/idm/idm_so.h>
#include <sys/iscsit/radius_packet.h>
#include <sys/iscsit/radius_protocol.h>
+#include <sys/ksocket.h>
static void encode_chap_password(int identifier, int chap_passwd_len,
uint8_t *chap_passwd, uint8_t *result);
-static size_t iscsit_net_recvmsg(void *socket, struct msghdr *msg,
+static size_t iscsit_net_recvmsg(ksocket_t socket, struct msghdr *msg,
int timeout);
/*
* See radius_packet.h.
*/
int
-iscsit_snd_radius_request(void *socket, iscsi_ipaddr_t rsvr_ip_addr,
+iscsit_snd_radius_request(ksocket_t socket, iscsi_ipaddr_t rsvr_ip_addr,
uint32_t rsvr_port, radius_packet_data_t *req_data)
{
int i; /* Loop counter. */
@@ -164,7 +165,7 @@ iscsit_snd_radius_request(void *socket, iscsi_ipaddr_t rsvr_ip_addr,
* See radius_packet.h.
*/
int
-iscsit_rcv_radius_response(void *socket, uint8_t *shared_secret,
+iscsit_rcv_radius_response(ksocket_t socket, uint8_t *shared_secret,
uint32_t shared_secret_len, uint8_t *req_authenticator,
radius_packet_data_t *resp_data)
{
@@ -177,8 +178,6 @@ iscsit_rcv_radius_response(void *socket, uint8_t *shared_secret,
struct iovec iov[1];
struct nmsghdr msg;
- struct sonode *so = (struct sonode *)socket;
- int ret = 0;
tmp_data = kmem_zalloc(MAX_RAD_PACKET_LEN, KM_SLEEP);
iov[0].iov_base = (char *)tmp_data;
@@ -193,11 +192,6 @@ iscsit_rcv_radius_response(void *socket, uint8_t *shared_secret,
msg.msg_iov = iov;
msg.msg_iovlen = 1;
- (void) VOP_IOCTL(SOTOV(so), I_POP, 0, FKIOCTL, CRED(), &ret, NULL);
- if (ret != 0) {
- return (RAD_RSP_RCVD_NO_DATA);
- }
-
received_len = iscsit_net_recvmsg(socket, &msg, RAD_RCV_TIMEOUT);
if (received_len <= (size_t)0) {
@@ -313,36 +307,32 @@ encode_chap_password(int identifier, int chap_passwd_len,
*/
/* ARGSUSED */
static size_t
-iscsit_net_recvmsg(void *socket, struct msghdr *msg, int timeout)
+iscsit_net_recvmsg(ksocket_t socket, struct msghdr *msg, int timeout)
{
- int idx;
- int total_len = 0;
- struct uio uio;
- uchar_t pri = 0;
- int prflag = MSG_ANY;
- rval_t rval;
- struct sonode *sonode = (struct sonode *)socket;
-
- /* Initialization of the uio structure. */
- bzero(&uio, sizeof (uio));
- uio.uio_iov = msg->msg_iov;
- uio.uio_iovcnt = msg->msg_iovlen;
- uio.uio_segflg = UIO_SYSSPACE;
-
- for (idx = 0; idx < msg->msg_iovlen; idx++) {
- total_len += (msg->msg_iov)[idx].iov_len;
- }
- uio.uio_resid = total_len;
-
+ int prflag = msg->msg_flags;
+ size_t recv = 0;
+ struct sockaddr_in6 l_addr, f_addr;
+ socklen_t l_addrlen;
+ socklen_t f_addrlen;
+
+ bzero(&l_addr, sizeof (struct sockaddr_in6));
+ bzero(&f_addr, sizeof (struct sockaddr_in6));
+ l_addrlen = sizeof (struct sockaddr_in6);
+ f_addrlen = sizeof (struct sockaddr_in6);
/* If timeout requested on receive */
if (timeout > 0) {
boolean_t loopback = B_FALSE;
+ (void) ksocket_getsockname(socket, (struct sockaddr *)(&l_addr),
+ &l_addrlen, CRED());
+ (void) ksocket_getpeername(socket, (struct sockaddr *)(&f_addr),
+ &f_addrlen, CRED());
+
/* And this isn't a loopback connection */
- if (sonode->so_laddr.soa_sa->sa_family == AF_INET) {
+ if (((struct sockaddr *)(&l_addr))->sa_family == AF_INET) {
struct sockaddr_in *lin = (struct sockaddr_in *)
- ((void *)sonode->so_laddr.soa_sa);
+ ((void *)(&l_addr));
struct sockaddr_in *fin = (struct sockaddr_in *)
- ((void *)sonode->so_faddr.soa_sa);
+ ((void *)(&f_addr));
if ((lin->sin_family == fin->sin_family) &&
(bcmp(&lin->sin_addr, &fin->sin_addr,
@@ -351,9 +341,9 @@ iscsit_net_recvmsg(void *socket, struct msghdr *msg, int timeout)
}
} else {
struct sockaddr_in6 *lin6 = (struct sockaddr_in6 *)
- ((void *)sonode->so_laddr.soa_sa);
+ ((void *)(&l_addr));
struct sockaddr_in6 *fin6 = (struct sockaddr_in6 *)
- ((void *)sonode->so_faddr.soa_sa);
+ ((void *)(&f_addr));
if ((lin6->sin6_family == fin6->sin6_family) &&
(bcmp(&lin6->sin6_addr, &fin6->sin6_addr,
@@ -361,23 +351,20 @@ iscsit_net_recvmsg(void *socket, struct msghdr *msg, int timeout)
loopback = B_TRUE;
}
}
-
if (loopback == B_FALSE) {
- /*
- * Then poll device for up to the timeout
- * period or the requested data is received.
- */
- if (kstrgetmsg(SOTOV(sonode),
- NULL, NULL, &pri, &prflag, timeout * 1000,
- &rval) == ETIME) {
+ struct timeval tl;
+ tl.tv_sec = timeout;
+ tl.tv_usec = 0;
+ /* Set recv timeout */
+ if (ksocket_setsockopt(socket, SOL_SOCKET, SO_RCVTIMEO,
+ &tl, sizeof (struct timeval), CRED()))
return (0);
- }
}
}
/*
* Receive the requested data. Block until all
- * data is received.
+ * data is received or timeout.
*
* resid occurs only when the connection is
* disconnected. In that case it will return
@@ -385,6 +372,6 @@ iscsit_net_recvmsg(void *socket, struct msghdr *msg, int timeout)
* In general this is the total amount we
* requested.
*/
- (void) sorecvmsg((struct sonode *)socket, msg, &uio);
- return (total_len - uio.uio_resid);
+ (void) ksocket_recvmsg(socket, msg, prflag, &recv, CRED());
+ return (recv);
}
diff --git a/usr/src/uts/common/io/ib/clients/rds/rds_opt.c b/usr/src/uts/common/io/ib/clients/rds/rds_opt.c
index f0e863d0f3..902d838ff4 100644
--- a/usr/src/uts/common/io/ib/clients/rds/rds_opt.c
+++ b/usr/src/uts/common/io/ib/clients/rds/rds_opt.c
@@ -19,14 +19,12 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/ib/clients/rds/rds.h>
-#include <inet/mi.h>
+#include <inet/proto_set.h>
#define rds_max_buf 2097152
opdes_t rds_opt_arr[] = {
@@ -143,7 +141,7 @@ rds_opt_set(queue_t *q, uint_t optset_context, int level,
}
if (!checkonly) {
RD(q)->q_hiwat = *i1;
- (void) mi_set_sth_hiwat(RD(q), *i1);
+ (void) proto_set_rx_hiwat(RD(q), NULL, *i1);
}
break;
default:
diff --git a/usr/src/uts/common/io/ib/clients/rds/rdsddi.c b/usr/src/uts/common/io/ib/clients/rds/rdsddi.c
index 306a2a593e..877e56fe8a 100644
--- a/usr/src/uts/common/io/ib/clients/rds/rdsddi.c
+++ b/usr/src/uts/common/io/ib/clients/rds/rdsddi.c
@@ -23,7 +23,6 @@
* Use is subject to license terms.
*/
-
#include <sys/types.h>
#include <sys/conf.h>
#include <sys/modctl.h>
@@ -43,6 +42,7 @@
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/mi.h>
+#include <inet/proto_set.h>
#include <sys/ib/clients/rds/rds.h>
#include <sys/policy.h>
#include <inet/ipclassifier.h>
@@ -226,8 +226,8 @@ rds_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
WR(q)->q_lowat = rds_xmit_lowat;
/* Set the Stream head watermarks */
- (void) mi_set_sth_hiwat(q, rds_recv_hiwat);
- (void) mi_set_sth_lowat(q, rds_recv_lowat);
+ (void) proto_set_rx_hiwat(q, NULL, rds_recv_hiwat);
+ (void) proto_set_rx_lowat(q, NULL, rds_recv_lowat);
return (0);
}
@@ -337,7 +337,7 @@ rds_deliver_new_msg(mblk_t *mp, ipaddr_t local_addr, ipaddr_t rem_addr,
if (rds->rds_port_quota > current_port_quota) {
/* this may result in stalling the port */
rds->rds_port_quota = current_port_quota;
- (void) mi_set_sth_hiwat(rds->rds_ulpd,
+ (void) proto_set_rx_hiwat(rds->rds_ulpd, NULL,
rds->rds_port_quota * UserBufferSize);
RDS_INCR_PORT_QUOTA_ADJUSTED();
}
@@ -599,7 +599,8 @@ rds_bind(queue_t *q, mblk_t *mp)
RDS_INCR_NPORT();
rds->rds_port_quota = RDS_CURRENT_PORT_QUOTA();
RDS_SET_PORT_QUOTA(rds->rds_port_quota);
- (void) mi_set_sth_hiwat(RD(q), rds->rds_port_quota * UserBufferSize);
+ (void) proto_set_rx_hiwat(RD(q), NULL,
+ rds->rds_port_quota * UserBufferSize);
qreply(q, mp);
}
@@ -859,7 +860,7 @@ rds_rsrv(queue_t *q)
current_port_quota = RDS_GET_PORT_QUOTA();
if (rds->rds_port_quota != current_port_quota) {
rds->rds_port_quota = current_port_quota;
- (void) mi_set_sth_hiwat(q,
+ (void) proto_set_rx_hiwat(q, NULL,
rds->rds_port_quota * UserBufferSize);
}
diff --git a/usr/src/uts/common/io/ib/clients/sdp/sdpddi.c b/usr/src/uts/common/io/ib/clients/sdp/sdpddi.c
index 0973888811..d0c3bb8b4e 100644
--- a/usr/src/uts/common/io/ib/clients/sdp/sdpddi.c
+++ b/usr/src/uts/common/io/ib/clients/sdp/sdpddi.c
@@ -23,7 +23,6 @@
* Use is subject to license terms.
*/
-
#include <sys/types.h>
#include <sys/conf.h>
#include <sys/modctl.h>
@@ -182,9 +181,12 @@ sdp_gen_ioctl(queue_t *q, mblk_t *mp)
/* LINTED */
iocp = (struct iocblk *)mp->b_rptr;
switch (iocp->ioc_cmd) {
+ uintptr_t send_enable;
case SIOCSENABLESDP:
bcopy(mp->b_cont->b_rptr, &enable, sizeof (int));
+ send_enable = enable;
+
/*
* Check for root privs.
* if not net config privs - return state of system SDP
@@ -202,7 +204,8 @@ sdp_gen_ioctl(queue_t *q, mblk_t *mp)
* action of enabling/disabling sdp is simply acked.
*/
rw_enter(&sdp_transport_lock, RW_READER);
- if ((enable == 1) && (sdp_transport_handle == NULL) &&
+ if ((send_enable == 1) &&
+ (sdp_transport_handle == NULL) &&
(priv == B_TRUE)) {
/* Initialize sdpib transport driver */
rw_exit(&sdp_transport_lock);
@@ -215,21 +218,20 @@ sdp_gen_ioctl(queue_t *q, mblk_t *mp)
enable = 0;
goto done;
}
- (void) sdp_ioctl(NULL, iocp->ioc_cmd, &enable,
- CRED());
- } else if ((enable == 0) &&
- (sdp_transport_handle != NULL) &&
- (priv == B_TRUE)) {
- (void) sdp_ioctl(NULL, iocp->ioc_cmd, &enable,
- CRED());
- (void) ldi_close(sdp_transport_handle,
- FNDELAY, kcred);
- sdp_transport_handle = NULL;
+ (void) ldi_ioctl(sdp_transport_handle,
+ iocp->ioc_cmd, (intptr_t)&send_enable,
+ FKIOCTL, CRED(), (int *)&enable);
+ } else if (sdp_transport_handle != NULL) {
+ (void) ldi_ioctl(sdp_transport_handle,
+ iocp->ioc_cmd, (intptr_t)&send_enable,
+ FKIOCTL, CRED(), (int *)&enable);
+ if (send_enable == 0 && priv == B_TRUE) {
+ (void) ldi_close(sdp_transport_handle,
+ FNDELAY, kcred);
+ sdp_transport_handle = NULL;
+ }
} else {
- ret = sdp_ioctl(NULL, iocp->ioc_cmd, &enable,
- CRED());
- if (ret == EINVAL)
- enable = 0;
+ enable = 0;
}
rw_exit(&sdp_transport_lock);
diff --git a/usr/src/uts/common/io/idm/idm_so.c b/usr/src/uts/common/io/idm/idm_so.c
index b8c236d749..c868c76ddd 100644
--- a/usr/src/uts/common/io/idm/idm_so.c
+++ b/usr/src/uts/common/io/idm/idm_so.c
@@ -45,7 +45,7 @@
#include <netinet/in.h>
#include <net/if.h>
#include <sys/sockio.h>
-
+#include <sys/ksocket.h>
#include <sys/idm/idm.h>
#include <sys/idm/idm_so.h>
#include <sys/idm/idm_text.h>
@@ -60,14 +60,13 @@ static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
-static idm_status_t idm_so_conn_create_common(idm_conn_t *ic,
- struct sonode *new_so);
+static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
static void idm_so_conn_destroy_common(idm_conn_t *ic);
static void idm_so_conn_connect_common(idm_conn_t *ic);
static void idm_set_ini_preconnect_options(idm_so_conn_t *sc);
static void idm_set_ini_postconnect_options(idm_so_conn_t *sc);
-static void idm_set_tgt_connect_options(struct sonode *sonode);
+static void idm_set_tgt_connect_options(ksocket_t so);
static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
@@ -180,58 +179,17 @@ idm_so_fini(void)
kmem_cache_destroy(idm.idm_sorx_pdu_cache);
}
-struct sonode *
+ksocket_t
idm_socreate(int domain, int type, int protocol)
{
- vnode_t *dvp;
- vnode_t *vp;
- struct snode *csp;
- int err;
- major_t maj;
-
- if ((vp = solookup(domain, type, protocol, NULL, &err)) == NULL) {
-
- /*
- * solookup calls sogetvp if the vp is not found in the cache.
- * Since the call to sogetvp is hardwired to use USERSPACE
- * and declared static we'll do the work here instead.
- */
- err = lookupname(type == SOCK_STREAM ? "/dev/tcp" : "/dev/udp",
- UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
- if (err != 0)
- return (NULL);
-
- /* Check that it is the correct vnode */
- if (vp->v_type != VCHR) {
- VN_RELE(vp);
- return (NULL);
- }
-
- csp = VTOS(VTOS(vp)->s_commonvp);
- if (!(csp->s_flag & SDIPSET)) {
- char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-
- err = ddi_dev_pathname(vp->v_rdev, S_IFCHR,
- pathname);
- if (err == 0) {
- err = devfs_lookupname(pathname, NULLVPP,
- &dvp);
- }
- VN_RELE(vp);
- kmem_free(pathname, MAXPATHLEN);
- if (err != 0) {
- return (NULL);
- }
- vp = dvp;
- }
+ ksocket_t ks;
- maj = getmajor(vp->v_rdev);
- if (!STREAMSTAB(maj)) {
- VN_RELE(vp);
- return (NULL);
- }
+ if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
+ CRED())) {
+ return (ks);
+ } else {
+ return (NULL);
}
- return (socreate(vp, domain, type, protocol, SOV_DEFAULT, NULL, &err));
}
/*
@@ -242,9 +200,9 @@ idm_socreate(int domain, int type, int protocol)
* regain control of a thread stuck in idm_sorecv.
*/
void
-idm_soshutdown(struct sonode *so)
+idm_soshutdown(ksocket_t so)
{
- (void) soshutdown(so, SHUT_RDWR);
+ (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
}
/*
@@ -254,13 +212,9 @@ idm_soshutdown(struct sonode *so)
* otherwise undefined behavior will result.
*/
void
-idm_sodestroy(struct sonode *so)
+idm_sodestroy(ksocket_t ks)
{
- vnode_t *vp = SOTOV(so);
-
- (void) VOP_CLOSE(vp, 0, 1, 0, kcred, NULL);
-
- VN_RELE(vp);
+ (void) ksocket_close(ks, CRED());
}
/*
@@ -303,8 +257,7 @@ idm_v6_addr_okay(struct in6_addr *addr6)
int
idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
{
- struct sonode *so4, *so6;
- vnode_t *vp, *vp4, *vp6;
+ ksocket_t so4, so6;
struct lifnum lifn;
struct lifconf lifc;
struct lifreq *lp;
@@ -332,19 +285,15 @@ idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
return (0);
}
- /* setup the vp's for each socket type */
- vp6 = SOTOV(so6);
- vp4 = SOTOV(so4);
- /* use vp6 for ioctls with unspecified families by default */
- vp = vp6;
retry_count:
/* snapshot the current number of interfaces */
lifn.lifn_family = PF_UNSPEC;
lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
lifn.lifn_count = 0;
- if (VOP_IOCTL(vp, SIOCGLIFNUM, (intptr_t)&lifn, FKIOCTL, kcred,
- &rval, NULL) != 0) {
+ /* use vp6 for ioctls with unspecified families by default */
+ if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
+ != 0) {
goto cleanup;
}
@@ -364,8 +313,7 @@ retry_count:
lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
lifc.lifc_len = bufsize;
lifc.lifc_buf = buf;
- rc = VOP_IOCTL(vp, SIOCGLIFCONF, (intptr_t)&lifc, FKIOCTL, kcred,
- &rval, NULL);
+ rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
if (rc != 0) {
goto cleanup;
}
@@ -401,16 +349,16 @@ retry_count:
*/
switch (ss.ss_family) {
case AF_INET:
- vp = vp4;
+ rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
+ &rval, CRED());
break;
case AF_INET6:
- vp = vp6;
+ rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
+ &rval, CRED());
break;
default:
continue;
}
- rc = VOP_IOCTL(vp, SIOCGLIFFLAGS, (intptr_t)lp, FKIOCTL, kcred,
- &rval, NULL);
if (rc == 0) {
/*
* If we got the flags, skip uninteresting
@@ -468,7 +416,7 @@ cleanup:
}
int
-idm_sorecv(struct sonode *so, void *msg, size_t len)
+idm_sorecv(ksocket_t so, void *msg, size_t len)
{
iovec_t iov;
@@ -495,13 +443,13 @@ idm_sorecv(struct sonode *so, void *msg, size_t len)
* -1 if sosendmsg returns success but uio_resid != 0
*/
int
-idm_sosendto(struct sonode *so, void *buff, size_t len,
+idm_sosendto(ksocket_t so, void *buff, size_t len,
struct sockaddr *name, socklen_t namelen)
{
struct msghdr msg;
- struct uio uio;
struct iovec iov[1];
int error;
+ size_t sent = 0;
iov[0].iov_base = buff;
iov[0].iov_len = len;
@@ -510,19 +458,12 @@ idm_sosendto(struct sonode *so, void *buff, size_t len,
bzero(&msg, sizeof (msg));
msg.msg_iov = iov;
msg.msg_iovlen = 1;
-
- /* Initialization of the uio structure. */
- uio.uio_iov = iov;
- uio.uio_iovcnt = 1;
- uio.uio_segflg = UIO_SYSSPACE;
- uio.uio_resid = len;
-
msg.msg_name = name;
msg.msg_namelen = namelen;
- if ((error = sosendmsg(so, &msg, &uio)) == 0) {
+ if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
/* Data sent */
- if (uio.uio_resid == 0) {
+ if (sent == len) {
/* All data sent. Success. */
return (0);
} else {
@@ -546,11 +487,11 @@ idm_sosendto(struct sonode *so, void *buff, size_t len,
* -1 if sosendmsg returns success but uio_resid != 0
*/
int
-idm_iov_sosend(struct sonode *so, iovec_t *iop, int iovlen, size_t total_len)
+idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
{
struct msghdr msg;
- struct uio uio;
int error;
+ size_t sent = 0;
ASSERT(iop != NULL);
@@ -559,16 +500,10 @@ idm_iov_sosend(struct sonode *so, iovec_t *iop, int iovlen, size_t total_len)
msg.msg_iov = iop;
msg.msg_iovlen = iovlen;
- /* Initialization of the uio structure. */
- bzero(&uio, sizeof (uio));
- uio.uio_iov = iop;
- uio.uio_iovcnt = iovlen;
- uio.uio_segflg = UIO_SYSSPACE;
- uio.uio_resid = total_len;
-
- if ((error = sosendmsg(so, &msg, &uio)) == 0) {
+ if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
+ == 0) {
/* Data sent */
- if (uio.uio_resid == 0) {
+ if (sent == total_len) {
/* All data sent. Success. */
return (0);
} else {
@@ -592,30 +527,25 @@ idm_iov_sosend(struct sonode *so, iovec_t *iop, int iovlen, size_t total_len)
* -1 if sorecvmsg returns success but uio_resid != 0
*/
int
-idm_iov_sorecv(struct sonode *so, iovec_t *iop, int iovlen, size_t total_len)
+idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
{
struct msghdr msg;
- struct uio uio;
int error;
+ size_t recv;
+ int flags;
ASSERT(iop != NULL);
/* Initialization of the message header. */
bzero(&msg, sizeof (msg));
msg.msg_iov = iop;
- msg.msg_flags = MSG_WAITALL;
msg.msg_iovlen = iovlen;
+ flags = MSG_WAITALL;
- /* Initialization of the uio structure. */
- bzero(&uio, sizeof (uio));
- uio.uio_iov = iop;
- uio.uio_iovcnt = iovlen;
- uio.uio_segflg = UIO_SYSSPACE;
- uio.uio_resid = total_len;
-
- if ((error = sorecvmsg(so, &msg, &uio)) == 0) {
+ if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
+ == 0) {
/* Received data */
- if (uio.uio_resid == 0) {
+ if (recv == total_len) {
/* All requested data received. Success */
return (0);
} else {
@@ -639,12 +569,14 @@ idm_set_ini_preconnect_options(idm_so_conn_t *sc)
int abort = 30000;
/* Pre-connect socket options */
- (void) sosetsockopt(sc->ic_so, IPPROTO_TCP, TCP_CONN_NOTIFY_THRESHOLD,
- (char *)&conn_notify, sizeof (int));
- (void) sosetsockopt(sc->ic_so, IPPROTO_TCP, TCP_CONN_ABORT_THRESHOLD,
- (char *)&conn_abort, sizeof (int));
- (void) sosetsockopt(sc->ic_so, IPPROTO_TCP, TCP_ABORT_THRESHOLD,
- (char *)&abort, sizeof (int));
+ (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
+ TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
+ CRED());
+ (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
+ TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
+ CRED());
+ (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_ABORT_THRESHOLD,
+ (char *)&abort, sizeof (int), CRED());
}
static void
@@ -655,28 +587,28 @@ idm_set_ini_postconnect_options(idm_so_conn_t *sc)
const int on = 1;
/* Set postconnect options */
- (void) sosetsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY,
- (char *)&on, sizeof (int));
- (void) sosetsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF,
- (char *)&rcvbuf, sizeof (int));
- (void) sosetsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF,
- (char *)&sndbuf, sizeof (int));
+ (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY,
+ (char *)&on, sizeof (int), CRED());
+ (void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF,
+ (char *)&rcvbuf, sizeof (int), CRED());
+ (void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF,
+ (char *)&sndbuf, sizeof (int), CRED());
}
static void
-idm_set_tgt_connect_options(struct sonode *sonode)
+idm_set_tgt_connect_options(ksocket_t ks)
{
int32_t rcvbuf = IDM_RCVBUF_SIZE;
int32_t sndbuf = IDM_SNDBUF_SIZE;
const int on = 1;
/* Set connect options */
- (void) sosetsockopt(sonode, SOL_SOCKET, SO_RCVBUF,
- (char *)&rcvbuf, sizeof (int));
- (void) sosetsockopt(sonode, SOL_SOCKET, SO_SNDBUF,
- (char *)&sndbuf, sizeof (int));
- (void) sosetsockopt(sonode, IPPROTO_TCP, TCP_NODELAY,
- (char *)&on, sizeof (on));
+ (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
+ (char *)&rcvbuf, sizeof (int), CRED());
+ (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
+ (char *)&sndbuf, sizeof (int), CRED());
+ (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
+ (char *)&on, sizeof (on), CRED());
}
static uint32_t
@@ -777,7 +709,7 @@ idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
static idm_status_t
idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
{
- struct sonode *so;
+ ksocket_t so;
idm_so_conn_t *so_conn;
idm_status_t idmrc;
@@ -789,8 +721,8 @@ idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
/* Bind the socket if configured to do so */
if (cr->cr_bound) {
- if (sobind(so, &cr->cr_bound_addr.sin,
- SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), 0, 0) != 0) {
+ if (ksocket_bind(so, &cr->cr_bound_addr.sin,
+ SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
idm_sodestroy(so);
return (IDM_STATUS_FAIL);
}
@@ -832,8 +764,8 @@ idm_so_ini_conn_connect(idm_conn_t *ic)
so_conn = ic->ic_transport_private;
- if (soconnect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
- (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), 0, 0) != 0) {
+ if (ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
+ (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED()) != 0) {
idm_soshutdown(so_conn->ic_so);
return (IDM_STATUS_FAIL);
}
@@ -846,7 +778,7 @@ idm_so_ini_conn_connect(idm_conn_t *ic)
}
idm_status_t
-idm_so_tgt_conn_create(idm_conn_t *ic, struct sonode *new_so)
+idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
{
idm_status_t idmrc;
@@ -875,7 +807,7 @@ idm_so_tgt_conn_connect(idm_conn_t *ic)
}
static idm_status_t
-idm_so_conn_create_common(idm_conn_t *ic, struct sonode *new_so)
+idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
{
idm_so_conn_t *so_conn;
@@ -917,18 +849,20 @@ static void
idm_so_conn_connect_common(idm_conn_t *ic)
{
idm_so_conn_t *so_conn;
+ struct sockaddr_in6 t_addr;
+ socklen_t t_addrlen = 0;
so_conn = ic->ic_transport_private;
-
- SOP_GETSOCKNAME(so_conn->ic_so);
+ bzero(&t_addr, sizeof (struct sockaddr_in6));
+ t_addrlen = sizeof (struct sockaddr_in6);
/* Set the local and remote addresses in the idm conn handle */
- mutex_enter(&so_conn->ic_so->so_lock);
- bcopy(so_conn->ic_so->so_laddr_sa, &ic->ic_laddr,
- so_conn->ic_so->so_laddr_len);
- bcopy(so_conn->ic_so->so_faddr_sa, &ic->ic_raddr,
- so_conn->ic_so->so_faddr_len);
- mutex_exit(&so_conn->ic_so->so_lock);
+ ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
+ &t_addrlen, CRED());
+ bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
+ ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
+ &t_addrlen, CRED());
+ bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
mutex_enter(&ic->ic_mutex);
so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
@@ -1027,16 +961,16 @@ idm_so_tgt_svc_online(idm_svc_t *is)
sin6_ip.sin6_port = htons(sr->sr_port);
sin6_ip.sin6_addr = in6addr_any;
- (void) sosetsockopt(so_svc->is_so, SOL_SOCKET, SO_REUSEADDR,
- (char *)&on, sizeof (on));
+ (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
+ SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
/*
* Turn off SO_MAC_EXEMPT so future sobinds succeed
*/
- (void) sosetsockopt(so_svc->is_so, SOL_SOCKET, SO_MAC_EXEMPT,
- (char *)&off, sizeof (off));
+ (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
+ SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
- if (sobind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
- sizeof (sin6_ip), 0, 0) != 0) {
+ if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
+ sizeof (sin6_ip), CRED()) != 0) {
mutex_exit(&is->is_mutex);
idm_sodestroy(so_svc->is_so);
return (IDM_STATUS_FAIL);
@@ -1045,7 +979,7 @@ idm_so_tgt_svc_online(idm_svc_t *is)
idm_set_tgt_connect_options(so_svc->is_so);
- if (solisten(so_svc->is_so, 5) != 0) {
+ if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
mutex_exit(&is->is_mutex);
idm_soshutdown(so_svc->is_so);
idm_sodestroy(so_svc->is_so);
@@ -1063,7 +997,7 @@ idm_so_tgt_svc_online(idm_svc_t *is)
idm_sodestroy(so_svc->is_so);
return (IDM_STATUS_FAIL);
}
-
+ ksocket_hold(so_svc->is_so);
/* Wait for the port watcher thread to start */
while (!so_svc->is_thread_running)
cv_wait(&is->is_cv, &is->is_mutex);
@@ -1081,33 +1015,20 @@ static void
idm_so_tgt_svc_offline(idm_svc_t *is)
{
idm_so_svc_t *so_svc;
-
mutex_enter(&is->is_mutex);
so_svc = (idm_so_svc_t *)is->is_so_svc;
so_svc->is_thread_running = B_FALSE;
mutex_exit(&is->is_mutex);
/*
- * When called from the kernel, soaccept blocks and cannot be woken
- * up via the sockfs API. soclose does not work like you would
- * hope. When the Volo project is available we can switch to that
- * API which should address this issue. For now, we will poke at
- * the socket to wake it up.
+ * Teardown socket
*/
- mutex_enter(&so_svc->is_so->so_lock);
- so_svc->is_so->so_error = EINTR;
- cv_signal(&so_svc->is_so->so_connind_cv);
- mutex_exit(&so_svc->is_so->so_lock);
+ idm_sodestroy(so_svc->is_so);
/*
* Now we expect the port watcher thread to terminate
*/
thread_join(so_svc->is_thread_did);
-
- /*
- * Teardown socket
- */
- idm_sodestroy(so_svc->is_so);
}
/*
@@ -1117,13 +1038,17 @@ void
idm_so_svc_port_watcher(void *arg)
{
idm_svc_t *svc = arg;
- struct sonode *new_so;
+ ksocket_t new_so;
idm_conn_t *ic;
idm_status_t idmrc;
idm_so_svc_t *so_svc;
int rc;
const uint32_t off = 0;
+ struct sockaddr_in6 t_addr;
+ socklen_t t_addrlen;
+ bzero(&t_addr, sizeof (struct sockaddr_in6));
+ t_addrlen = sizeof (struct sockaddr_in6);
mutex_enter(&svc->is_mutex);
so_svc = svc->is_so_svc;
@@ -1138,7 +1063,9 @@ idm_so_svc_port_watcher(void *arg)
while (so_svc->is_thread_running) {
mutex_exit(&svc->is_mutex);
- if ((rc = soaccept(so_svc->is_so, 0, &new_so)) != 0) {
+ if ((rc = ksocket_accept(so_svc->is_so,
+ (struct sockaddr *)&t_addr, &t_addrlen,
+ &new_so, CRED())) != 0) {
mutex_enter(&svc->is_mutex);
if (rc == ECONNABORTED)
continue;
@@ -1148,8 +1075,8 @@ idm_so_svc_port_watcher(void *arg)
/*
* Turn off SO_MAC_EXEMPT so future sobinds succeed
*/
- (void) sosetsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
- (char *)&off, sizeof (off));
+ (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
+ (char *)&off, sizeof (off), CRED());
idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
&ic);
@@ -1178,7 +1105,7 @@ idm_so_svc_port_watcher(void *arg)
mutex_enter(&svc->is_mutex);
}
-
+ ksocket_rele(so_svc->is_so);
so_svc->is_thread_running = B_FALSE;
mutex_exit(&svc->is_mutex);
diff --git a/usr/src/uts/common/io/ksocket/ksocket.c b/usr/src/uts/common/io/ksocket/ksocket.c
new file mode 100644
index 0000000000..512cab56c0
--- /dev/null
+++ b/usr/src/uts/common/io/ksocket/ksocket.c
@@ -0,0 +1,733 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/file.h>
+#include <sys/stropts.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysmacros.h>
+#include <sys/filio.h> /* FIO* ioctls */
+#include <sys/sockio.h> /* SIOC* ioctls */
+#include <sys/cmn_err.h>
+#include <sys/ksocket.h>
+#include <io/ksocket/ksocket_impl.h>
+#include <fs/sockfs/sockcommon.h>
+
+#define SOCKETMOD_TCP "tcp"
+#define SOCKETMOD_UDP "udp"
+/*
+ * Kernel Sockets
+ *
+ * Mostly a wrapper around the private socket_* functions.
+ */
+int
+ksocket_socket(ksocket_t *ksp, int domain, int type, int protocol, int flags,
+ struct cred *cr)
+{
+ static const int version = SOV_DEFAULT;
+ int error = 0;
+ struct sonode *so;
+ *ksp = NULL;
+
+ if (domain == AF_NCA || domain == AF_UNIX)
+ return (EAFNOSUPPORT);
+
+ ASSERT(flags == KSOCKET_SLEEP || flags == KSOCKET_NOSLEEP);
+ so = socket_create(domain, type, protocol, NULL, NULL, version, flags,
+ cr, &error);
+ if (so == NULL) {
+ if (error == EAFNOSUPPORT) {
+ char *mod = NULL;
+
+ /*
+ * Could be that root file sytem is not loaded or
+ * soconfig has not run yet.
+ */
+ if (type == SOCK_STREAM && (domain == AF_INET ||
+ domain == AF_INET6) && (protocol == 0 ||
+ protocol == IPPROTO_TCP)) {
+ mod = SOCKETMOD_TCP;
+ } else if (type == SOCK_DGRAM && (domain == AF_INET ||
+ domain == AF_INET6) && (protocol == 0 ||
+ protocol == IPPROTO_UDP)) {
+ mod = SOCKETMOD_UDP;
+ } else {
+ return (EAFNOSUPPORT);
+ }
+
+ so = socket_create(domain, type, protocol, NULL,
+ mod, version, flags, cr, &error);
+ if (so == NULL)
+ return (error);
+ } else {
+ return (error);
+ }
+ }
+
+ so->so_mode |= SM_KERNEL;
+
+ *ksp = SOTOKS(so);
+
+ return (0);
+}
+int
+ksocket_bind(ksocket_t ks, struct sockaddr *addr, socklen_t addrlen,
+ struct cred *cr)
+{
+ int error;
+
+ if (!KSOCKET_VALID(ks))
+ return (ENOTSOCK);
+
+ error = socket_bind(KSTOSO(ks), addr, addrlen, _SOBIND_SOCKBSD, cr);
+
+ return (error);
+}
+
+int
+ksocket_listen(ksocket_t ks, int backlog, struct cred *cr)
+{
+ if (!KSOCKET_VALID(ks))
+ return (ENOTSOCK);
+
+ return (socket_listen(KSTOSO(ks), backlog, cr));
+}
+
+int
+ksocket_accept(ksocket_t ks, struct sockaddr *addr,
+ socklen_t *addrlenp, ksocket_t *nks, struct cred *cr)
+{
+ int error;
+ struct sonode *nso = NULL;
+
+ *nks = NULL;
+
+ if (!KSOCKET_VALID(ks))
+ return (ENOTSOCK);
+
+ if (addr != NULL && addrlenp == NULL)
+ return (EFAULT);
+
+ error = socket_accept(KSTOSO(ks), KSOCKET_FMODE(ks), cr, &nso);
+ if (error != 0)
+ return (error);
+
+ ASSERT(nso != NULL);
+
+ nso->so_mode |= SM_KERNEL;
+
+ if (addr != NULL && addrlenp != NULL) {
+ error = socket_getpeername(nso, addr, addrlenp, B_TRUE, cr);
+ if (error != 0) {
+ (void) socket_close(nso, 0, cr);
+ socket_destroy(nso);
+ return ((error == ENOTCONN) ? ECONNABORTED : error);
+ }
+ }
+
+ *nks = SOTOKS(nso);
+
+ return (error);
+}
+
+int
+ksocket_connect(ksocket_t ks, const struct sockaddr *addr, socklen_t addrlen,
+ struct cred *cr)
+{
+ if (!KSOCKET_VALID(ks))
+ return (ENOTSOCK);
+
+ return (socket_connect(KSTOSO(ks), addr, addrlen,
+ KSOCKET_FMODE(ks), 0, cr));
+}
+
+int
+ksocket_send(ksocket_t ks, void *msg, size_t msglen, int flags,
+ size_t *sent, struct cred *cr)
+{
+ int error;
+ struct nmsghdr msghdr;
+ struct uio auio;
+ struct iovec iov;
+
+ if (!KSOCKET_VALID(ks)) {
+ if (sent != NULL)
+ *sent = 0;
+ return (ENOTSOCK);
+ }
+
+ iov.iov_base = msg;
+ iov.iov_len = msglen;
+
+ bzero(&auio, sizeof (struct uio));
+ auio.uio_loffset = 0;
+ auio.uio_iov = &iov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = msglen;
+ if (flags & MSG_USERSPACE)
+ auio.uio_segflg = UIO_USERSPACE;
+ else
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+ auio.uio_limit = 0;
+ auio.uio_fmode = KSOCKET_FMODE(ks);
+
+ msghdr.msg_name = NULL;
+ msghdr.msg_namelen = 0;
+ msghdr.msg_control = NULL;
+ msghdr.msg_controllen = 0;
+ msghdr.msg_flags = flags | MSG_EOR;
+
+ error = socket_sendmsg(KSTOSO(ks), &msghdr, &auio, cr);
+ if (error != 0) {
+ if (sent != NULL)
+ *sent = 0;
+ return (error);
+ }
+
+ if (sent != NULL)
+ *sent = msglen - auio.uio_resid;
+ return (0);
+}
+
+int
+ksocket_sendto(ksocket_t ks, void *msg, size_t msglen, int flags,
+ struct sockaddr *name, socklen_t namelen, size_t *sent, struct cred *cr)
+{
+ int error;
+ struct nmsghdr msghdr;
+ struct uio auio;
+ struct iovec iov;
+
+ if (!KSOCKET_VALID(ks)) {
+ if (sent != NULL)
+ *sent = 0;
+ return (ENOTSOCK);
+ }
+
+ iov.iov_base = msg;
+ iov.iov_len = msglen;
+
+ bzero(&auio, sizeof (struct uio));
+ auio.uio_loffset = 0;
+ auio.uio_iov = &iov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = msglen;
+ if (flags & MSG_USERSPACE)
+ auio.uio_segflg = UIO_USERSPACE;
+ else
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+ auio.uio_limit = 0;
+ auio.uio_fmode = KSOCKET_FMODE(ks);
+
+ msghdr.msg_iov = &iov;
+ msghdr.msg_iovlen = 1;
+ msghdr.msg_name = (char *)name;
+ msghdr.msg_namelen = namelen;
+ msghdr.msg_control = NULL;
+ msghdr.msg_controllen = 0;
+ msghdr.msg_flags = flags | MSG_EOR;
+
+ error = socket_sendmsg(KSTOSO(ks), &msghdr, &auio, cr);
+ if (error != 0) {
+ if (sent != NULL)
+ *sent = 0;
+ return (error);
+ }
+ if (sent != NULL)
+ *sent = msglen - auio.uio_resid;
+ return (0);
+}
+
+int
+ksocket_sendmsg(ksocket_t ks, struct nmsghdr *msg, int flags,
+ size_t *sent, struct cred *cr)
+{
+ int error;
+ ssize_t len;
+ int i;
+ struct uio auio;
+
+ if (!KSOCKET_VALID(ks)) {
+ if (sent != NULL)
+ *sent = 0;
+ return (ENOTSOCK);
+ }
+
+ bzero(&auio, sizeof (struct uio));
+ auio.uio_loffset = 0;
+ auio.uio_iov = msg->msg_iov;
+ auio.uio_iovcnt = msg->msg_iovlen;
+ if (flags & MSG_USERSPACE)
+ auio.uio_segflg = UIO_USERSPACE;
+ else
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+ auio.uio_limit = 0;
+ auio.uio_fmode = KSOCKET_FMODE(ks);
+ len = 0;
+ for (i = 0; i < msg->msg_iovlen; i++) {
+ ssize_t iovlen;
+ iovlen = (msg->msg_iov)[i].iov_len;
+ len += iovlen;
+ if (len < 0 || iovlen < 0)
+ return (EINVAL);
+ }
+ auio.uio_resid = len;
+
+ msg->msg_flags = flags | MSG_EOR;
+
+ error = socket_sendmsg(KSTOSO(ks), msg, &auio, cr);
+ if (error != 0) {
+ if (sent != NULL)
+ *sent = 0;
+ return (error);
+ }
+
+ if (sent != NULL)
+ *sent = len - auio.uio_resid;
+ return (0);
+}
+
+
+int
+ksocket_recv(ksocket_t ks, void *msg, size_t msglen, int flags,
+ size_t *recv, struct cred *cr)
+{
+ int error;
+ struct nmsghdr msghdr;
+ struct uio auio;
+ struct iovec iov;
+
+ if (!KSOCKET_VALID(ks)) {
+ if (recv != NULL)
+ *recv = 0;
+ return (ENOTSOCK);
+ }
+
+ iov.iov_base = msg;
+ iov.iov_len = msglen;
+
+ bzero(&auio, sizeof (struct uio));
+ auio.uio_loffset = 0;
+ auio.uio_iov = &iov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = msglen;
+ if (flags & MSG_USERSPACE)
+ auio.uio_segflg = UIO_USERSPACE;
+ else
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+ auio.uio_limit = 0;
+ auio.uio_fmode = KSOCKET_FMODE(ks);
+
+ msghdr.msg_name = NULL;
+ msghdr.msg_namelen = 0;
+ msghdr.msg_control = NULL;
+ msghdr.msg_controllen = 0;
+ msghdr.msg_flags = flags & (MSG_OOB | MSG_PEEK | MSG_WAITALL |
+ MSG_DONTWAIT | MSG_USERSPACE);
+
+ error = socket_recvmsg(KSTOSO(ks), &msghdr, &auio, cr);
+ if (error != 0) {
+ if (recv != NULL)
+ *recv = 0;
+ return (error);
+ }
+
+ if (recv != NULL)
+ *recv = msglen - auio.uio_resid;
+ return (0);
+}
+
+int
+ksocket_recvfrom(ksocket_t ks, void *msg, size_t msglen, int flags,
+ struct sockaddr *name, socklen_t *namelen, size_t *recv, struct cred *cr)
+{
+ int error;
+ struct nmsghdr msghdr;
+ struct uio auio;
+ struct iovec iov;
+
+ if (!KSOCKET_VALID(ks)) {
+ if (recv != NULL)
+ *recv = 0;
+ return (ENOTSOCK);
+ }
+
+ iov.iov_base = msg;
+ iov.iov_len = msglen;
+
+ bzero(&auio, sizeof (struct uio));
+ auio.uio_loffset = 0;
+ auio.uio_iov = &iov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = msglen;
+ if (flags & MSG_USERSPACE)
+ auio.uio_segflg = UIO_USERSPACE;
+ else
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+ auio.uio_limit = 0;
+ auio.uio_fmode = KSOCKET_FMODE(ks);
+
+ msghdr.msg_name = (char *)name;
+ msghdr.msg_namelen = *namelen;
+ msghdr.msg_control = NULL;
+ msghdr.msg_controllen = 0;
+ msghdr.msg_flags = flags & (MSG_OOB | MSG_PEEK | MSG_WAITALL |
+ MSG_DONTWAIT | MSG_USERSPACE);
+
+ error = socket_recvmsg(KSTOSO(ks), &msghdr, &auio, cr);
+ if (error != 0) {
+ if (recv != NULL)
+ *recv = 0;
+ return (error);
+ }
+ if (recv != NULL)
+ *recv = msglen - auio.uio_resid;
+
+ bcopy(msghdr.msg_name, name, msghdr.msg_namelen);
+ bcopy(&msghdr.msg_namelen, namelen, sizeof (msghdr.msg_namelen));
+ return (0);
+}
+
+int
+ksocket_recvmsg(ksocket_t ks, struct nmsghdr *msg, int flags, size_t *recv,
+ struct cred *cr)
+{
+ int error;
+ ssize_t len;
+ int i;
+ struct uio auio;
+
+ if (!KSOCKET_VALID(ks)) {
+ if (recv != NULL)
+ *recv = 0;
+ return (ENOTSOCK);
+ }
+
+ bzero(&auio, sizeof (struct uio));
+ auio.uio_loffset = 0;
+ auio.uio_iov = msg->msg_iov;
+ auio.uio_iovcnt = msg->msg_iovlen;
+ if (msg->msg_flags & MSG_USERSPACE)
+ auio.uio_segflg = UIO_USERSPACE;
+ else
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+ auio.uio_limit = 0;
+ auio.uio_fmode = KSOCKET_FMODE(ks);
+ len = 0;
+
+ for (i = 0; i < msg->msg_iovlen; i++) {
+ ssize_t iovlen;
+ iovlen = (msg->msg_iov)[i].iov_len;
+ len += iovlen;
+ if (len < 0 || iovlen < 0)
+ return (EINVAL);
+ }
+ auio.uio_resid = len;
+
+ msg->msg_flags = flags & (MSG_OOB | MSG_PEEK | MSG_WAITALL |
+ MSG_DONTWAIT | MSG_USERSPACE);
+
+ error = socket_recvmsg(KSTOSO(ks), msg, &auio, cr);
+ if (error != 0) {
+ if (recv != NULL)
+ *recv = 0;
+ return (error);
+ }
+ if (recv != NULL)
+ *recv = len - auio.uio_resid;
+ return (0);
+
+}
+
+int
+ksocket_shutdown(ksocket_t ks, int how, struct cred *cr)
+{
+ struct sonode *so;
+
+ if (!KSOCKET_VALID(ks))
+ return (ENOTSOCK);
+
+ so = KSTOSO(ks);
+
+ return (socket_shutdown(so, how, cr));
+}
+
+int
+ksocket_close(ksocket_t ks, struct cred *cr)
+{
+ struct sonode *so;
+ so = KSTOSO(ks);
+
+ mutex_enter(&so->so_lock);
+
+ if (!KSOCKET_VALID(ks)) {
+ mutex_exit(&so->so_lock);
+ return (ENOTSOCK);
+ }
+
+ so->so_state |= SS_CLOSING;
+
+ if (so->so_count > 1) {
+ mutex_enter(&so->so_acceptq_lock);
+ cv_broadcast(&so->so_acceptq_cv);
+ mutex_exit(&so->so_acceptq_lock);
+ cv_broadcast(&so->so_rcv_cv);
+ cv_broadcast(&so->so_state_cv);
+ cv_broadcast(&so->so_want_cv);
+ cv_broadcast(&so->so_snd_cv);
+ cv_broadcast(&so->so_copy_cv);
+ }
+ while (so->so_count > 1)
+ cv_wait(&so->so_closing_cv, &so->so_lock);
+
+ mutex_exit(&so->so_lock);
+ /* Remove callbacks, if any */
+ (void) ksocket_setcallbacks(ks, NULL, NULL, cr);
+
+ (void) socket_close(so, 0, cr);
+ socket_destroy(so);
+
+ return (0);
+}
+
+int
+ksocket_getsockname(ksocket_t ks, struct sockaddr *addr, socklen_t *addrlen,
+ struct cred *cr)
+{
+ struct sonode *so;
+
+ if (!KSOCKET_VALID(ks))
+ return (ENOTSOCK);
+
+ so = KSTOSO(ks);
+
+ if (addrlen == NULL || (addr == NULL && *addrlen != 0))
+ return (EFAULT);
+
+ return (socket_getsockname(so, addr, addrlen, cr));
+}
+
+int
+ksocket_getpeername(ksocket_t ks, struct sockaddr *addr, socklen_t *addrlen,
+ struct cred *cr)
+{
+ struct sonode *so;
+
+ if (!KSOCKET_VALID(ks))
+ return (ENOTSOCK);
+
+ so = KSTOSO(ks);
+
+ if (addrlen == NULL || (addr == NULL && *addrlen != 0))
+ return (EFAULT);
+
+ return (socket_getpeername(so, addr, addrlen, B_FALSE, cr));
+}
+
+int
+ksocket_getsockopt(ksocket_t ks, int level, int optname, void *optval,
+ int *optlen, struct cred *cr)
+{
+ struct sonode *so;
+
+ if (!KSOCKET_VALID(ks))
+ return (ENOTSOCK);
+
+ so = KSTOSO(ks);
+
+ if (optlen == NULL)
+ return (EFAULT);
+ if (*optlen > SO_MAXARGSIZE)
+ return (EINVAL);
+
+ return (socket_getsockopt(so, level, optname, optval,
+ (socklen_t *)optlen, 0, cr));
+}
+
+int
+ksocket_setsockopt(ksocket_t ks, int level, int optname, const void *optval,
+ int optlen, struct cred *cr)
+{
+ struct sonode *so;
+
+ if (!KSOCKET_VALID(ks))
+ return (ENOTSOCK);
+
+ so = KSTOSO(ks);
+
+ if (optval == NULL)
+ optlen = 0;
+
+ return (socket_setsockopt(so, level, optname, optval,
+ (t_uscalar_t)optlen, cr));
+}
+
+/* ARGSUSED */
+int
+ksocket_setcallbacks(ksocket_t ks, ksocket_callbacks_t *cb, void *arg,
+ struct cred *cr)
+{
+ struct sonode *so;
+
+ if (!KSOCKET_VALID(ks))
+ return (ENOTSOCK);
+
+ so = KSTOSO(ks);
+
+ if (cb == NULL && arg != NULL)
+ return (EFAULT);
+ if (cb == NULL) {
+ mutex_enter(&so->so_lock);
+ bzero(&(so->so_ksock_callbacks), sizeof (ksocket_callbacks_t));
+ so->so_ksock_cb_arg = NULL;
+ mutex_exit(&so->so_lock);
+ } else {
+ mutex_enter(&so->so_lock);
+ SETCALLBACK(so, cb, connected, KSOCKET_CB_CONNECTED)
+ SETCALLBACK(so, cb, connectfailed, KSOCKET_CB_CONNECTFAILED)
+ SETCALLBACK(so, cb, disconnected, KSOCKET_CB_DISCONNECTED)
+ SETCALLBACK(so, cb, newdata, KSOCKET_CB_NEWDATA)
+ SETCALLBACK(so, cb, newconn, KSOCKET_CB_NEWCONN)
+ SETCALLBACK(so, cb, cansend, KSOCKET_CB_CANSEND)
+ SETCALLBACK(so, cb, oobdata, KSOCKET_CB_OOBDATA)
+ SETCALLBACK(so, cb, cantsendmore, KSOCKET_CB_CANTSENDMORE)
+ SETCALLBACK(so, cb, cantrecvmore, KSOCKET_CB_CANTRECVMORE)
+ so->so_ksock_cb_arg = arg;
+ mutex_exit(&so->so_lock);
+ }
+ return (0);
+}
+
+int
+ksocket_ioctl(ksocket_t ks, int cmd, intptr_t arg, int *rvalp, struct cred *cr)
+{
+ struct sonode *so;
+ int rval;
+
+ if (!KSOCKET_VALID(ks))
+ return (ENOTSOCK);
+
+ so = KSTOSO(ks);
+
+ switch (cmd) {
+ default:
+ /* STREAM iotcls are not supported */
+ if ((cmd & 0xffffff00U) == STR) {
+ rval = EOPNOTSUPP;
+ } else {
+ rval = socket_ioctl(so, cmd, arg,
+ KSOCKET_FMODE(ks) | FKIOCTL, cr, rvalp);
+ }
+ break;
+ case FIOASYNC:
+ case SIOCSPGRP:
+ case FIOSETOWN:
+ case SIOCGPGRP:
+ case FIOGETOWN:
+ rval = EOPNOTSUPP;
+ break;
+ }
+
+ return (rval);
+}
+
+int
+ksocket_sendmblk(ksocket_t ks, struct nmsghdr *msg, int flags,
+ mblk_t **mpp, cred_t *cr)
+{
+ struct sonode *so;
+ int i_val;
+ socklen_t val_len;
+ mblk_t *mp = *mpp;
+ int error;
+
+ if (!KSOCKET_VALID(ks))
+ return (ENOTSOCK);
+
+ so = KSTOSO(ks);
+
+ if (flags & MSG_MBLK_QUICKRELE) {
+ error = socket_getsockopt(so, SOL_SOCKET, SO_SND_COPYAVOID,
+ &i_val, &val_len, 0, CRED());
+ if (error != 0)
+ return (error);
+
+ /* Zero copy is not enable */
+ if (i_val == 0)
+ return (ECANCELED);
+
+ for (; mp != NULL; mp = mp->b_cont)
+ mp->b_datap->db_struioflag |= STRUIO_ZC;
+ }
+
+ error = socket_sendmblk(so, msg, flags, cr, mpp);
+
+ return (error);
+}
+
+
+void
+ksocket_hold(ksocket_t ks)
+{
+ struct sonode *so;
+ so = KSTOSO(ks);
+
+ if (!mutex_owned(&so->so_lock)) {
+ mutex_enter(&so->so_lock);
+ so->so_count++;
+ mutex_exit(&so->so_lock);
+ } else
+ so->so_count++;
+}
+
+void
+ksocket_rele(ksocket_t ks)
+{
+ struct sonode *so;
+
+ so = KSTOSO(ks);
+ /*
+ * When so_count equals 1 means no thread working on this ksocket
+ */
+ if (so->so_count < 2)
+ cmn_err(CE_PANIC, "ksocket_rele: sonode ref count 0 or 1");
+
+ if (!mutex_owned(&so->so_lock)) {
+ mutex_enter(&so->so_lock);
+ if (--so->so_count == 1)
+ cv_signal(&so->so_closing_cv);
+ mutex_exit(&so->so_lock);
+ } else {
+ if (--so->so_count == 1)
+ cv_signal(&so->so_closing_cv);
+ }
+}
diff --git a/usr/src/uts/common/io/ksocket/ksocket_impl.h b/usr/src/uts/common/io/ksocket/ksocket_impl.h
new file mode 100644
index 0000000000..ac5251540f
--- /dev/null
+++ b/usr/src/uts/common/io/ksocket/ksocket_impl.h
@@ -0,0 +1,74 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _INET_KSOCKET_KSOCKET_IMPL_H
+#define _INET_KSOCKET_KSOCKET_IMPL_H
+
+#define KSTOSO(ks) ((struct sonode *)(ks))
+#define SOTOKS(so) ((ksocket_t)(uintptr_t)(so))
+
+#define IS_KERNEL_SOCKET(so) ((so)->so_mode & SM_KERNEL)
+
+#define KSOCKET_MOD_VERSION "kernel socket module"
+
+#define __KSOCKET_EV_connected KSOCKET_EV_CONNECTED
+#define __KSOCKET_EV_connectfailed KSOCKET_EV_CONNECTFAILED
+#define __KSOCKET_EV_disconnected KSOCKET_EV_DISCONNECTED
+#define __KSOCKET_EV_oobdata KSOCKET_EV_OOBDATA
+#define __KSOCKET_EV_newdata KSOCKET_EV_NEWDATA
+#define __KSOCKET_EV_newconn KSOCKET_EV_NEWCONN
+#define __KSOCKET_EV_cansend KSOCKET_EV_CANSEND
+#define __KSOCKET_EV_cantsendmore KSOCKET_EV_CANTSENDMORE
+#define __KSOCKET_EV_cantrecvmore KSOCKET_EV_CANTRECVMORE
+#define __KSOCKET_EV_error KSOCKET_EV_ERROR
+
+#define KSOCKET_CALLBACK(so, cbfn, arg) \
+ if ((so)->so_ksock_callbacks.ksock_cb_##cbfn != NULL) { \
+ (*(so)->so_ksock_callbacks.ksock_cb_##cbfn)(SOTOKS(so), \
+ __KSOCKET_EV_##cbfn, (so)->so_ksock_cb_arg, (arg)); \
+ }
+
+#define KSOCKET_FMODE(so) FREAD|FWRITE| \
+ ((KSTOSO(so)->so_state & (SS_NDELAY|SS_NONBLOCK)) ? FNDELAY : 0)
+
+#define KSOCKET_VALID(ks) \
+ ((ks) != NULL && (KSTOSO(ks))->so_mode & SM_KERNEL && \
+ !((KSTOSO(ks))->so_state & SS_CLOSING))
+
+#define SETCALLBACK(so, cb, cbfn, cbflg) \
+ if ((cb)->ksock_cb_flags & (cbflg)) { \
+ (so)->so_ksock_callbacks.ksock_cb_##cbfn \
+ = (cb)->ksock_cb_##cbfn; \
+ if ((cb)->ksock_cb_##cbfn == NULL) \
+ (so)->so_ksock_callbacks.ksock_cb_flags \
+ &= ~(cbflg); \
+ else \
+ (so)->so_ksock_callbacks.ksock_cb_flags \
+ |= (cbflg); \
+ }
+
+
+#endif /* _INET_KSOCKET_KSOCKET_IMPL_H */
diff --git a/usr/src/uts/common/io/ksocket/ksocket_mod.c b/usr/src/uts/common/io/ksocket/ksocket_mod.c
new file mode 100644
index 0000000000..da3b4091a5
--- /dev/null
+++ b/usr/src/uts/common/io/ksocket/ksocket_mod.c
@@ -0,0 +1,57 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/sunddi.h>
+#include <sys/errno.h>
+#include <sys/modctl.h>
+
+#include <io/ksocket/ksocket_impl.h>
+
+static struct modlmisc modlmisc = {
+ &mod_miscops, KSOCKET_MOD_VERSION
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlmisc, NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h
index cc42247897..6d59ce3810 100644
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h
@@ -546,7 +546,7 @@ typedef struct iscsi_conn {
kcondvar_t conn_state_change;
boolean_t conn_state_destroy;
- struct sonode *conn_socket; /* aka. kernel net. socket */
+ void *conn_socket; /* kernel socket */
/* base connection information */
iscsi_sockaddr_t conn_base_addr;
@@ -846,7 +846,7 @@ typedef struct iscsi_network {
int (*connect)(void *, struct sockaddr *, int, int, int);
int (*listen)(void *, int);
void* (*accept)(void *, struct sockaddr *, int *);
- int (*getsockname)(void *);
+ int (*getsockname)(void *, struct sockaddr *, socklen_t *);
int (*getsockopt)(void *, int, int, void *, int *, int);
int (*setsockopt)(void *, int, int, void *, int);
int (*shutdown)(void *, int);
diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_ioctl.c b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_ioctl.c
index e5967dab8c..611b2bc967 100644
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_ioctl.c
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_ioctl.c
@@ -237,12 +237,16 @@ iscsi_ioctl_conn_props_get(iscsi_hba_t *ihp, iscsi_conn_props_t *cp)
iscsi_sess_t *isp;
iscsi_conn_t *icp;
boolean_t rtn;
+ struct sockaddr_in6 t_addr;
+ socklen_t t_addrlen;
/* Let's check the version. */
if (cp->cp_vers != ISCSI_INTERFACE_VERSION) {
return (B_FALSE);
}
+ bzero(&t_addr, sizeof (struct sockaddr_in6));
+ t_addrlen = sizeof (struct sockaddr_in6);
/* Let's find the session. */
rw_enter(&ihp->hba_sess_list_rwlock, RW_READER);
if (iscsi_sess_get(cp->cp_sess_oid, ihp, &isp) != 0) {
@@ -263,18 +267,15 @@ iscsi_ioctl_conn_props_get(iscsi_hba_t *ihp, iscsi_conn_props_t *cp)
ASSERT(icp->conn_sig == ISCSI_SIG_CONN);
if (icp->conn_oid == cp->cp_oid) {
-
- if (icp->conn_socket->so_laddr.soa_len <=
- sizeof (cp->cp_local)) {
- bcopy(icp->conn_socket->so_laddr.soa_sa,
- &cp->cp_local,
- icp->conn_socket->so_laddr.soa_len);
+ iscsi_net->getsockname(icp->conn_socket,
+ (struct sockaddr *)&t_addr, &t_addrlen);
+ if (t_addrlen <= sizeof (cp->cp_local)) {
+ bcopy(&t_addr, &cp->cp_local, t_addrlen);
}
- if (icp->conn_socket->so_faddr.soa_len <=
- sizeof (cp->cp_peer)) {
- bcopy(icp->conn_socket->so_faddr.soa_sa,
- &cp->cp_peer,
- icp->conn_socket->so_faddr.soa_len);
+ ksocket_getpeername((ksocket_t)(icp->conn_socket),
+ (struct sockaddr *)&t_addr, &t_addrlen, CRED());
+ if (t_addrlen <= sizeof (cp->cp_peer)) {
+ bcopy(&t_addr, &cp->cp_peer, t_addrlen);
}
if (icp->conn_state == ISCSI_CONN_STATE_LOGGED_IN) {
diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_login.c b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_login.c
index 8a1c1914b4..c1a201f73c 100644
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_login.c
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_login.c
@@ -1934,10 +1934,12 @@ iscsi_login_failure_str(uchar_t status_class, uchar_t status_detail)
static iscsi_status_t
iscsi_login_connect(iscsi_conn_t *icp)
{
- iscsi_hba_t *ihp;
- iscsi_sess_t *isp;
- struct sockaddr *addr;
- struct sonode *so = NULL;
+ iscsi_hba_t *ihp;
+ iscsi_sess_t *isp;
+ struct sockaddr *addr;
+ struct sockaddr_in6 t_addr;
+ struct sonode *so = NULL;
+ socklen_t t_addrlen;
ASSERT(icp != NULL);
isp = icp->conn_sess;
@@ -1946,6 +1948,8 @@ iscsi_login_connect(iscsi_conn_t *icp)
ASSERT(ihp != NULL);
addr = &icp->conn_curr_addr.sin;
+ t_addrlen = sizeof (struct sockaddr_in6);
+ bzero(&t_addr, sizeof (struct sockaddr_in6));
so = iscsi_net->socket(addr->sa_family, SOCK_STREAM, 0);
if (so == NULL) {
cmn_err(CE_WARN, "iscsi connection(%u) unable "
@@ -1982,7 +1986,8 @@ iscsi_login_connect(iscsi_conn_t *icp)
}
icp->conn_socket = so;
- if (iscsi_net->getsockname(icp->conn_socket) != 0) {
+ if (iscsi_net->getsockname(icp->conn_socket,
+ (struct sockaddr *)&t_addr, &t_addrlen) != 0) {
cmn_err(CE_NOTE, "iscsi connection(%u) failed to get "
"socket information", icp->conn_oid);
}
diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_net.c b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_net.c
index 23e64684a1..1f06106bf2 100644
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_net.c
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_net.c
@@ -34,8 +34,9 @@
#include <sys/fs/dv_node.h> /* declares: devfs_lookupname */
#include <sys/bootconf.h>
#include <sys/bootprops.h>
-
+#include <netinet/in.h>
#include "iscsi.h"
+#include <sys/ksocket.h>
/*
* This is a high level description of the default
@@ -60,42 +61,42 @@
* The following listing describes the iscsi_net
* entry points:
*
- * socket - Creates TCP/IP socket connection. In the
- * default implementation creates a sonode
- * via the sockfs kernel layer.
- * bind - Performs standard TCP/IP BSD operation. In
- * the default implementation this only act
- * as a soft binding based on the IP and routing
- * tables. It would be preferred if this was
- * a hard binding but that is currently not
- * possible with Solaris's networking stack.
- * connect - Performs standard TCP/IP BSD operation. This
- * establishes the TCP SYN to the peer IP address.
- * listen - Performs standard TCP/IP BSD operation. This
- * listens for incoming peer connections.
- * accept - Performs standard TCP/IP BSD operation. This
- * accepts incoming peer connections.
- * shutdown - This disconnects the TCP/IP connection while
- * maintaining the resources.
- * close - This disconnects the TCP/IP connection and
- * releases the resources.
+ * socket - Creates TCP/IP socket connection. In the
+ * default implementation creates a sonode
+ * via the sockfs kernel layer.
+ * bind - Performs standard TCP/IP BSD operation. In
+ * the default implementation this only act
+ * as a soft binding based on the IP and routing
+ * tables. It would be preferred if this was
+ * a hard binding but that is currently not
+ * possible with Solaris's networking stack.
+ * connect - Performs standard TCP/IP BSD operation. This
+ * establishes the TCP SYN to the peer IP address.
+ * listen - Performs standard TCP/IP BSD operation. This
+ * listens for incoming peer connections.
+ * accept - Performs standard TCP/IP BSD operation. This
+ * accepts incoming peer connections.
+ * shutdown - This disconnects the TCP/IP connection while
+ * maintaining the resources.
+ * close - This disconnects the TCP/IP connection and
+ * releases the resources.
*
- * getsockopt - Gets socket option for specified socket.
- * setsockopt - Sets socket option for specified socket.
+ * getsockopt - Gets socket option for specified socket.
+ * setsockopt - Sets socket option for specified socket.
*
* The current socket options that are used by the initiator
* are listed below.
*
- * TCP_CONN_NOTIFY_THRESHOLD
- * TCP_CONN_ABORT_THRESHOLD
- * TCP_ABORT_THRESHOLD
- * TCP_NODELAY
- * SO_RCVBUF
- * SO_SNDBUF
+ * TCP_CONN_NOTIFY_THRESHOLD
+ * TCP_CONN_ABORT_THRESHOLD
+ * TCP_ABORT_THRESHOLD
+ * TCP_NODELAY
+ * SO_RCVBUF
+ * SO_SNDBUF
*
* iscsi_net_poll - Poll socket interface for a specified amount
- * of data. If data not received in timeout
- * period fail request.
+ * of data. If data not received in timeout
+ * period fail request.
* iscsi_net_sendmsg - Send message on socket connection
* iscsi_net_recvmsg - Receive message on socket connection
*
@@ -109,8 +110,8 @@
* generate or validate the iSCSI
* header digest CRC.
* ISCSI_NET_DATA_DIGESt - The interface should either
- * generate or validate the iSCSI
- * data digest CRC.
+ * generate or validate the iSCSI
+ * data digest CRC.
*/
@@ -144,25 +145,18 @@ const int is_incoming_opcode_invalid[256] = {
/* 0xEX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0xFX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
-/*
- * Define macros to manipulate snode, vnode, and open device flags
- */
-#define VTYP_VALID(i) (((i) == VCHR) || ((i) == VBLK))
-#define STYP_VALID(i) (((i) == S_IFCHR) || ((i) == S_IFBLK))
-#define STYP_TO_VTYP(i) (((i) == S_IFCHR) ? VCHR : VBLK)
#define IP_4_BITS 32
#define IP_6_BITS 128
extern int modrootloaded;
-extern ib_boot_prop_t *iscsiboot_prop;
+extern ib_boot_prop_t *iscsiboot_prop;
/* prototypes */
/* for iSCSI boot */
static int net_up = 0;
static iscsi_status_t iscsi_net_interface();
-static int iscsi_ldi_vp_from_name(char *path, vnode_t **vpp);
/* boot prototypes end */
static void * iscsi_net_socket(int domain, int type, int protocol);
@@ -173,7 +167,7 @@ static int iscsi_net_connect(void *socket, struct sockaddr *
static int iscsi_net_listen(void *socket, int backlog);
static void * iscsi_net_accept(void *socket, struct sockaddr *addr,
int *addr_len);
-static int iscsi_net_getsockname(void *socket);
+static int iscsi_net_getsockname(void *socket, struct sockaddr *, socklen_t *);
static int iscsi_net_getsockopt(void *socket, int level,
int option_name, void *option_val, int *option_len, int flags);
static int iscsi_net_setsockopt(void *socket, int level,
@@ -198,7 +192,7 @@ static void iscsi_net_set_postconnect_options(void *socket);
/*
* +--------------------------------------------------------------------+
- * | network interface registration functions |
+ * | network interface registration functions |
* +--------------------------------------------------------------------+
*/
@@ -287,7 +281,7 @@ iscsi_net_set_postconnect_options(void *socket)
/*
* +--------------------------------------------------------------------+
- * | register network interfaces |
+ * | register network interfaces |
* +--------------------------------------------------------------------+
*/
@@ -297,93 +291,53 @@ iscsi_net_set_postconnect_options(void *socket)
static void *
iscsi_net_socket(int domain, int type, int protocol)
{
- vnode_t *dvp = NULL,
- *vp = NULL;
- struct snode *csp = NULL;
- int err = 0;
- major_t maj;
+ ksocket_t socket;
+ int err = 0;
if (!modrootloaded && !net_up && iscsiboot_prop) {
if (iscsi_net_interface() == ISCSI_STATUS_SUCCESS)
net_up = 1;
}
- /* ---- solookup: start ---- */
- if ((vp = solookup(domain, type, protocol, NULL, &err)) == NULL) {
-
- /*
- * solookup calls sogetvp if the vp is not found in
- * the cache. Since the call to sogetvp is hardwired
- * to use USERSPACE and declared static we'll do the
- * work here instead.
- */
- if (!modrootloaded) {
- err = iscsi_ldi_vp_from_name("/devices/pseudo/tcp@0:"
- "tcp", &vp);
- } else {
- err = lookupname(type == SOCK_STREAM ? "/dev/tcp" :
- "/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
- }
- if (err) {
- return (NULL);
- }
+ err = ksocket_socket(&socket, domain, type, protocol, KSOCKET_SLEEP,
+ CRED());
+ if (!err)
+ return ((void *)socket);
+ else
+ return (NULL);
- /* ---- check that it is the correct vnode ---- */
- if (vp->v_type != VCHR) {
- VN_RELE(vp);
- return (NULL);
- }
-
- csp = VTOS(VTOS(vp)->s_commonvp);
- if (!(csp->s_flag & SDIPSET)) {
- char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- err = ddi_dev_pathname(vp->v_rdev, S_IFCHR,
- pathname);
- if (err == 0) {
- err = devfs_lookupname(pathname, NULLVPP,
- &dvp);
- }
- VN_RELE(vp);
- kmem_free(pathname, MAXPATHLEN);
- if (err != 0) {
- return (NULL);
- }
- vp = dvp;
- }
-
- maj = getmajor(vp->v_rdev);
- if (!STREAMSTAB(maj)) {
- VN_RELE(vp);
- return (NULL);
- }
- }
- /* ---- solookup: end ---- */
- return (socreate(vp, domain, type, protocol, SOV_DEFAULT, NULL, &err));
}
/*
* iscsi_net_bind - bind socket to a specific sockaddr
*/
+/* ARGSUSED */
static int
iscsi_net_bind(void *socket, struct sockaddr *name, int name_len,
int backlog, int flags)
{
- return (sobind((struct sonode *)socket, name, name_len,
- backlog, flags));
+ ksocket_t ks = (ksocket_t)socket;
+ int error;
+ error = ksocket_bind(ks, name, name_len, CRED());
+ if (error == 0 && backlog != 0)
+ error = ksocket_listen(ks, backlog, CRED());
+
+ return (error);
}
/*
* iscsi_net_connect - connect socket to peer sockaddr
*/
+/* ARGSUSED */
static int
iscsi_net_connect(void *socket, struct sockaddr *name, int name_len,
int fflag, int flags)
{
+ ksocket_t ks = (ksocket_t)socket;
int rval;
iscsi_net_set_preconnect_options(socket);
- rval = soconnect((struct sonode *)socket, name,
- name_len, fflag, flags);
+ rval = ksocket_connect(ks, name, name_len, CRED());
iscsi_net_set_postconnect_options(socket);
return (rval);
@@ -395,7 +349,8 @@ iscsi_net_connect(void *socket, struct sockaddr *name, int name_len,
static int
iscsi_net_listen(void *socket, int backlog)
{
- return (solisten((struct sonode *)socket, backlog));
+ ksocket_t ks = (ksocket_t)socket;
+ return (ksocket_listen(ks, backlog, CRED()));
}
/*
@@ -404,41 +359,35 @@ iscsi_net_listen(void *socket, int backlog)
static void *
iscsi_net_accept(void *socket, struct sockaddr *addr, int *addr_len)
{
- struct sonode *listening_socket;
-
- (void) soaccept((struct sonode *)socket,
- ((struct sonode *)socket)->so_flag,
- &listening_socket);
- if (listening_socket != NULL) {
- bcopy(listening_socket->so_faddr_sa, addr,
- (socklen_t)listening_socket->so_faddr_len);
- *addr_len = listening_socket->so_faddr_len;
- } else {
- *addr_len = 0;
- }
+ ksocket_t listen_ks;
+ ksocket_t ks = (ksocket_t)socket;
- return ((void *)listening_socket);
+ ksocket_accept(ks, addr, (socklen_t *)addr_len, &listen_ks, CRED());
+
+ return ((void *)listen_ks);
}
/*
* iscsi_net_getsockname -
*/
static int
-iscsi_net_getsockname(void *socket)
+iscsi_net_getsockname(void *socket, struct sockaddr *addr, socklen_t *addrlen)
{
- return (sogetsockname((struct sonode *)socket));
+ ksocket_t ks = (ksocket_t)socket;
+ return (ksocket_getsockname(ks, addr, addrlen, CRED()));
}
/*
* iscsi_net_getsockopt - get value of option on socket
*/
+/* ARGSUSED */
static int
iscsi_net_getsockopt(void *socket, int level, int option_name,
void *option_val, int *option_len, int flags)
{
- return (sogetsockopt((struct sonode *)socket, level,
- option_name, option_val, (socklen_t *)option_len,
- flags));
+ ksocket_t ks = (ksocket_t)socket;
+ return (ksocket_getsockopt(ks, level, option_name, option_val,
+ option_len, CRED()));
}
/*
@@ -448,8 +397,9 @@ static int
iscsi_net_setsockopt(void *socket, int level, int option_name,
void *option_val, int option_len)
{
- return (sosetsockopt((struct sonode *)socket, level,
- option_name, option_val, option_len));
+ ksocket_t ks = (ksocket_t)socket;
+ return (ksocket_setsockopt(ks, level, option_name, option_val,
+ option_len, CRED()));
}
/*
@@ -458,7 +408,8 @@ iscsi_net_setsockopt(void *socket, int level, int option_name,
static int
iscsi_net_shutdown(void *socket, int how)
{
- return (soshutdown((struct sonode *)socket, how));
+ ksocket_t ks = (ksocket_t)socket;
+ return (ksocket_shutdown(ks, how, CRED()));
}
/*
@@ -467,26 +418,32 @@ iscsi_net_shutdown(void *socket, int how)
static void
iscsi_net_close(void *socket)
{
- vnode_t *vp = SOTOV((struct sonode *)socket);
- (void) soshutdown((struct sonode *)socket, 2);
- (void) VOP_CLOSE(vp, 0, 1, 0, kcred, NULL);
- VN_RELE(vp);
+ ksocket_t ks = (ksocket_t)socket;
+ (void) ksocket_close(ks, CRED());
}
/*
* iscsi_net_poll - poll socket for data
*/
+/* ARGSUSED */
static size_t
iscsi_net_poll(void *socket, clock_t timeout)
{
int pflag;
- uchar_t pri;
- rval_t rval;
+ char msg[64];
+ size_t recv = 0;
+ struct timeval tl;
+ ksocket_t ks = (ksocket_t)socket;
+ /* timeout is millisecond */
+ tl.tv_sec = timeout / 1000;
+ tl.tv_usec = (timeout % 1000) * 1000;
+
+ (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
+ sizeof (struct timeval), CRED());
- pri = 0;
pflag = MSG_ANY;
- return (kstrgetmsg(SOTOV((struct sonode *)socket), NULL, NULL,
- &pri, &pflag, timeout, &rval));
+ bzero(msg, sizeof (msg));
+ return (ksocket_recv(ks, msg, sizeof (msg), pflag, &recv, CRED()));
}
/*
@@ -496,24 +453,12 @@ iscsi_net_poll(void *socket, clock_t timeout)
static size_t
iscsi_net_sendmsg(void *socket, struct msghdr *msg)
{
- int i = 0;
- int total_len = 0;
- struct uio uio;
-
- /* Initialization of the uio structure. */
- bzero(&uio, sizeof (uio));
- uio.uio_iov = msg->msg_iov;
- uio.uio_iovcnt = msg->msg_iovlen;
- uio.uio_segflg = UIO_SYSSPACE;
-
- for (i = 0; i < msg->msg_iovlen; i++) {
- total_len += (msg->msg_iov)[i].iov_len;
- }
- uio.uio_resid = total_len;
-
- (void) sosendmsg((struct sonode *)socket, msg, &uio);
- DTRACE_PROBE2(sosendmsg, size_t, total_len, size_t, uio.uio_resid);
- return (total_len - uio.uio_resid);
+ ksocket_t ks = (ksocket_t)socket;
+ size_t sent = 0;
+ int flag = msg->msg_flags;
+ (void) ksocket_sendmsg(ks, msg, flag, &sent, CRED());
+ DTRACE_PROBE1(ksocket_sendmsg, size_t, sent);
+ return (sent);
}
/*
@@ -523,80 +468,25 @@ iscsi_net_sendmsg(void *socket, struct msghdr *msg)
static size_t
iscsi_net_recvmsg(void *socket, struct msghdr *msg, int timeout)
{
- int idx;
- int total_len = 0;
- struct uio uio;
- uchar_t pri = 0;
- int prflag = MSG_ANY;
- rval_t rval;
- struct sonode *sonode = (struct sonode *)socket;
-
- /* Initialization of the uio structure. */
- bzero(&uio, sizeof (uio));
- uio.uio_iov = msg->msg_iov;
- uio.uio_iovcnt = msg->msg_iovlen;
- uio.uio_segflg = UIO_SYSSPACE;
-
- for (idx = 0; idx < msg->msg_iovlen; idx++) {
- total_len += (msg->msg_iov)[idx].iov_len;
- }
- uio.uio_resid = total_len;
-
- /* If timeout requested on receive */
- if (timeout > 0) {
- boolean_t loopback = B_FALSE;
-
- /* And this isn't a loopback connection */
- if (sonode->so_laddr.soa_sa->sa_family == AF_INET) {
- struct sockaddr_in *lin =
- (struct sockaddr_in *)sonode->so_laddr.soa_sa;
- struct sockaddr_in *fin =
- (struct sockaddr_in *)sonode->so_faddr.soa_sa;
-
- if ((lin->sin_family == fin->sin_family) &&
- (bcmp(&lin->sin_addr, &fin->sin_addr,
- sizeof (struct in_addr)) == 0)) {
- loopback = B_TRUE;
- }
- } else {
- struct sockaddr_in6 *lin6 =
- (struct sockaddr_in6 *)sonode->so_laddr.soa_sa;
- struct sockaddr_in6 *fin6 =
- (struct sockaddr_in6 *)sonode->so_faddr.soa_sa;
-
- if ((lin6->sin6_family == fin6->sin6_family) &&
- (bcmp(&lin6->sin6_addr, &fin6->sin6_addr,
- sizeof (struct in6_addr)) == 0)) {
- loopback = B_TRUE;
- }
- }
-
- if (loopback == B_FALSE) {
- /*
- * Then poll device for up to the timeout
- * period or the requested data is received.
- */
- if (kstrgetmsg(SOTOV(sonode),
- NULL, NULL, &pri, &prflag, timeout * 1000,
- &rval) == ETIME) {
- return (0);
- }
- }
- }
-
+ int prflag = msg->msg_flags;
+ ksocket_t ks = (ksocket_t)socket;
+ size_t recv = 0;
+ struct timeval tl;
+
+ tl.tv_sec = timeout;
+ tl.tv_usec = 0;
+
+ /* Set recv timeout */
+ if (ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
+ sizeof (struct timeval), CRED()))
+ return (0);
/*
* Receive the requested data. Block until all
- * data is received.
- *
- * resid occurs only when the connection is
- * disconnected. In that case it will return
- * the amount of data that was not received.
- * In general this is the total amount we
- * requested.
+ * data is received or timeout.
*/
- (void) sorecvmsg((struct sonode *)socket, msg, &uio);
- DTRACE_PROBE2(sorecvmsg, size_t, total_len, size_t, uio.uio_resid);
- return (total_len - uio.uio_resid);
+ ksocket_recvmsg(ks, msg, prflag, &recv, CRED());
+ DTRACE_PROBE1(ksocket_recvmsg, size_t, recv);
+ return (recv);
}
/*
@@ -701,7 +591,7 @@ iscsi_net_sendpdu(void *socket, iscsi_hdr_t *ihp, char *data, int flags)
msg.msg_flags = MSG_WAITALL;
msg.msg_iovlen = iovlen;
- send_len = iscsi_net->sendmsg((struct sonode *)socket, &msg);
+ send_len = iscsi_net->sendmsg(socket, &msg);
DTRACE_PROBE2(sendmsg, size_t, total_len, size_t, send_len);
if (total_len != send_len) {
return (ISCSI_STATUS_TCP_TX_ERROR);
@@ -873,7 +763,6 @@ iscsi_net_recvdata(void *socket, iscsi_hdr_t *ihp, char *data,
}
if (dlength) {
-
/* calculate pad */
pad_len = ((ISCSI_PAD_WORD_LEN -
(dlength & (ISCSI_PAD_WORD_LEN - 1))) &
@@ -1067,83 +956,3 @@ iscsi_net_interface()
return (ISCSI_STATUS_SUCCESS);
}
}
-
-/*
- * vp is needed to create the socket for the time being.
- */
-static int
-iscsi_ldi_vp_from_name(char *path, vnode_t **vpp)
-{
- vnode_t *vp = NULL;
- int ret;
-
- /* sanity check required input parameters */
- if ((path == NULL) || (vpp == NULL))
- return (EINVAL);
-
- if (modrootloaded) {
- cred_t *saved_cred = curthread->t_cred;
-
- /* we don't want lookupname to fail because of credentials */
- curthread->t_cred = kcred;
-
- /*
- * all lookups should be done in the global zone. but
- * lookupnameat() won't actually do this if an absolute
- * path is passed in. since the ldi interfaces require an
- * absolute path we pass lookupnameat() a pointer to
- * the character after the leading '/' and tell it to
- * start searching at the current system root directory.
- */
- ASSERT(*path == '/');
- ret = lookupnameat(path + 1, UIO_SYSSPACE, FOLLOW, NULLVPP,
- &vp, rootdir);
-
- /* restore this threads credentials */
- curthread->t_cred = saved_cred;
-
- if (ret == 0) {
- if (!vn_matchops(vp, spec_getvnodeops()) ||
- !VTYP_VALID(vp->v_type)) {
- VN_RELE(vp);
- return (ENXIO);
- }
- }
- }
-
- if (vp == NULL) {
- dev_info_t *dip;
- dev_t dev;
- int spec_type;
-
- /*
- * Root is not mounted, the minor node is not specified,
- * or an OBP path has been specified.
- */
-
- /*
- * Determine if path can be pruned to produce an
- * OBP or devfs path for resolve_pathname.
- */
- if (strncmp(path, "/devices/", 9) == 0)
- path += strlen("/devices");
-
- /*
- * if no minor node was specified the DEFAULT minor node
- * will be returned. if there is no DEFAULT minor node
- * one will be fabricated of type S_IFCHR with the minor
- * number equal to the instance number.
- */
- ret = resolve_pathname(path, &dip, &dev, &spec_type);
- if (ret != 0)
- return (ENODEV);
-
- ASSERT(STYP_VALID(spec_type));
- vp = makespecvp(dev, STYP_TO_VTYP(spec_type));
- spec_assoc_vp_with_devi(vp, dip);
- ddi_release_devi(dip);
- }
-
- *vpp = vp;
- return (0);
-}
diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/isns_client.c b/usr/src/uts/common/io/scsi/adapters/iscsi/isns_client.c
index fd5d226e0f..5ed6acdc2b 100644
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/isns_client.c
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/isns_client.c
@@ -1518,7 +1518,11 @@ void
struct sockaddr_in6 s_in6;
} sa_rsvr = { 0 };
void *so;
+ struct sockaddr_in6 t_addr;
+ socklen_t t_addrlen;
+ bzero(&t_addr, sizeof (struct sockaddr_in6));
+ t_addrlen = sizeof (struct sockaddr_in6);
if (isns_server_addr->a_addr.i_insize == sizeof (struct in_addr)) {
/* IPv4 */
sa_rsvr.s_in4.sin_family = AF_INET;
@@ -1555,7 +1559,8 @@ void
return (NULL);
}
- (void) iscsi_net->getsockname(so);
+ (void) iscsi_net->getsockname(so, (struct sockaddr *)&t_addr,
+ &t_addrlen);
return (so);
}
@@ -2961,6 +2966,8 @@ isns_service_esi_scn(iscsi_thread_t *thread, void *arg)
isns_pdu_t *in_pdu;
size_t bytes_received, in_pdu_size = 0;
uint8_t *lhba_handle;
+ struct sockaddr_in6 t_addr;
+ socklen_t t_addrlen;
union {
struct sockaddr sin;
struct sockaddr_in s_in4;
@@ -2978,12 +2985,13 @@ isns_service_esi_scn(iscsi_thread_t *thread, void *arg)
/* Done using the argument - free it */
kmem_free(larg, sizeof (*larg));
+ bzero(&t_addr, sizeof (struct sockaddr_in6));
+ t_addrlen = sizeof (struct sockaddr_in6);
- if (((struct sonode *)listening_so)->so_laddr.soa_len <=
- sizeof (local_conn_prop)) {
- bcopy(((struct sonode *)listening_so)->so_laddr.soa_sa,
- &local_conn_prop,
- ((struct sonode *)listening_so)->so_laddr.soa_len);
+ (void) iscsi_net->getsockname(listening_so,
+ (struct sockaddr *)&t_addr, &t_addrlen);
+ if (t_addrlen <= sizeof (local_conn_prop)) {
+ bcopy(&t_addr, &local_conn_prop, t_addrlen);
}
if (iscsi_net->listen(listening_so, 5) < 0) {
@@ -2999,8 +3007,7 @@ isns_service_esi_scn(iscsi_thread_t *thread, void *arg)
/* Blocking call */
connecting_so = iscsi_net->accept(
- (struct sonode *)listening_so,
- &clnt_addr.sin, &clnt_len);
+ listening_so, &clnt_addr.sin, &clnt_len);
mutex_enter(&esi_scn_thr_mutex);
if (esi_scn_thr_to_shutdown == B_TRUE) {
@@ -3092,10 +3099,14 @@ find_local_portal(iscsi_addr_t *isns_server_addr,
struct sockaddr_in6 s_in6;
} serv_addr = { 0 };
void *so;
+ struct sockaddr_in6 t_addr;
+ socklen_t t_addrlen;
*local_addr = NULL;
*listening_so = NULL;
+ bzero(&t_addr, sizeof (struct sockaddr_in6));
+ t_addrlen = sizeof (struct sockaddr_in6);
/*
* Determine the local IP address.
*/
@@ -3104,16 +3115,14 @@ find_local_portal(iscsi_addr_t *isns_server_addr,
return (B_FALSE);
}
- if (((struct sonode *)so)->so_laddr.soa_len >
- sizeof (local_conn_prop)) {
+ iscsi_net->getsockname(so, (struct sockaddr *)&t_addr, &t_addrlen);
+ if (t_addrlen > sizeof (local_conn_prop)) {
iscsi_net->close(so);
return (B_FALSE);
}
- bcopy(((struct sonode *)so)->so_laddr.soa_sa,
- &local_conn_prop,
- ((struct sonode *)so)->so_laddr.soa_len);
-
+ bcopy(&t_addr, &local_conn_prop, t_addrlen);
+ t_addrlen = sizeof (struct sockaddr_in6);
if (local_conn_prop.soa4.sin_family == AF_INET) {
*local_addr = (iscsi_addr_t *)kmem_zalloc(sizeof (iscsi_addr_t),
KM_SLEEP);
@@ -3160,11 +3169,10 @@ find_local_portal(iscsi_addr_t *isns_server_addr,
return (B_FALSE);
}
- if (((struct sonode *)so)->so_laddr.soa_len <=
- sizeof (local_conn_prop)) {
- bcopy(((struct sonode *)so)->so_laddr.soa_sa,
- &local_conn_prop,
- ((struct sonode *)so)->so_laddr.soa_len);
+ (void) iscsi_net->getsockname(so, (struct sockaddr *)&t_addr,
+ &t_addrlen);
+ if (t_addrlen <= sizeof (local_conn_prop)) {
+ bcopy(&t_addr, &local_conn_prop, t_addrlen);
(*local_addr)->a_port = ntohs(local_conn_prop.soa4.sin_port);
} else {
(*local_addr)->a_port = ISNS_DEFAULT_ESI_SCN_PORT;
diff --git a/usr/src/uts/common/io/sock_conf.c b/usr/src/uts/common/io/sock_conf.c
new file mode 100644
index 0000000000..b6d31de8ea
--- /dev/null
+++ b/usr/src/uts/common/io/sock_conf.c
@@ -0,0 +1,251 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/sysmacros.h>
+#include <sys/atomic.h>
+#include <sys/strsubr.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/cmn_err.h>
+#include <sys/modctl.h>
+#include <sys/sdt.h>
+
+list_t smod_list;
+kmutex_t smod_list_lock;
+
+so_create_func_t sock_comm_create_function;
+so_destroy_func_t sock_comm_destroy_function;
+
+static smod_info_t *smod_create(const char *);
+static void smod_destroy(smod_info_t *);
+
+extern void smod_add(smod_info_t *);
+
+void
+smod_init(void)
+{
+ list_create(&smod_list, sizeof (smod_info_t),
+ offsetof(smod_info_t, smod_node));
+ mutex_init(&smod_list_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+static smod_info_t *
+smod_find(const char *modname)
+{
+ smod_info_t *smodp;
+
+ ASSERT(MUTEX_HELD(&smod_list_lock));
+
+ for (smodp = list_head(&smod_list); smodp != NULL;
+ smodp = list_next(&smod_list, smodp))
+ if (strcmp(smodp->smod_name, modname) == 0)
+ return (smodp);
+ return (NULL);
+}
+
+/*
+ * Register the socket module.
+ */
+int
+smod_register(const smod_reg_t *reg)
+{
+ smod_info_t *smodp;
+
+ /*
+ * Make sure the socket module does not depend on capabilities
+ * not available on the system.
+ */
+ if (reg->smod_version != SOCKMOD_VERSION ||
+ reg->smod_dc_version != SOCK_DC_VERSION ||
+ reg->smod_uc_version != SOCK_UC_VERSION) {
+ cmn_err(CE_WARN,
+ "Failed to register socket module %s: version mismatch",
+ reg->smod_name);
+ return (EINVAL);
+ }
+
+#ifdef DEBUG
+ mutex_enter(&smod_list_lock);
+ if ((smodp = smod_find(reg->smod_name)) != NULL) {
+ mutex_exit(&smod_list_lock);
+ return (EEXIST);
+ }
+ mutex_exit(&smod_list_lock);
+#endif
+
+ smodp = smod_create(reg->smod_name);
+ smodp->smod_version = reg->smod_version;
+ if (strcmp(smodp->smod_name, SOTPI_SMOD_NAME) == 0 ||
+ strcmp(smodp->smod_name, "socksctp") == 0 ||
+ strcmp(smodp->smod_name, "socksdp") == 0) {
+ ASSERT(smodp->smod_proto_create_func == NULL);
+ ASSERT(reg->__smod_priv != NULL);
+ smodp->smod_sock_create_func =
+ reg->__smod_priv->smodp_sock_create_func;
+ smodp->smod_sock_destroy_func =
+ reg->__smod_priv->smodp_sock_destroy_func;
+ smodp->smod_proto_create_func = NULL;
+ } else {
+ if (reg->smod_proto_create_func == NULL ||
+ (reg->__smod_priv != NULL &&
+ (reg->__smod_priv->smodp_sock_create_func != NULL ||
+ reg->__smod_priv->smodp_sock_destroy_func != NULL))) {
+#ifdef DEBUG
+ cmn_err(CE_CONT, "smod_register of %s failed",
+ smodp->smod_name);
+#endif
+ smod_destroy(smodp);
+ return (EINVAL);
+ }
+ smodp->smod_proto_create_func = reg->smod_proto_create_func;
+ smodp->smod_sock_create_func = sock_comm_create_function;
+ smodp->smod_sock_destroy_func = sock_comm_destroy_function;
+ smodp->smod_uc_version = reg->smod_uc_version;
+ smodp->smod_dc_version = reg->smod_dc_version;
+ if (reg->__smod_priv != NULL) {
+ smodp->smod_proto_fallback_func =
+ reg->__smod_priv->smodp_proto_fallback_func;
+ }
+ }
+ smod_add(smodp);
+ return (0);
+}
+
+/*
+ * Unregister the socket module
+ */
+int
+smod_unregister(const char *mod_name)
+{
+ smod_info_t *smodp;
+
+ mutex_enter(&smod_list_lock);
+ if ((smodp = smod_find(mod_name)) != NULL) {
+ if (smodp->smod_refcnt != 0) {
+ mutex_exit(&smod_list_lock);
+ return (EBUSY);
+ } else {
+ /*
+ * Delete the entry from the socket module list.
+ */
+ list_remove(&smod_list, smodp);
+ mutex_exit(&smod_list_lock);
+
+ smod_destroy(smodp);
+ return (0);
+ }
+ }
+ mutex_exit(&smod_list_lock);
+
+ return (ENXIO);
+}
+
+/*
+ * Initialize the socket module entry.
+ */
+static smod_info_t *
+smod_create(const char *modname)
+{
+ smod_info_t *smodp;
+ int len;
+
+ smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP);
+ len = strlen(modname) + 1;
+ smodp->smod_name = kmem_alloc(len, KM_SLEEP);
+ bcopy(modname, smodp->smod_name, len);
+ smodp->smod_name[len - 1] = '\0';
+ return (smodp);
+}
+
+/*
+ * Clean up the socket module part of the sockparams entry.
+ */
+static void
+smod_destroy(smod_info_t *smodp)
+{
+ ASSERT(smodp->smod_name != NULL);
+ ASSERT(smodp->smod_refcnt == 0);
+ ASSERT(!list_link_active(&smodp->smod_node));
+ ASSERT(strcmp(smodp->smod_name, "socktpi") != 0);
+
+ kmem_free(smodp->smod_name, strlen(smodp->smod_name) + 1);
+ smodp->smod_name = NULL;
+ smodp->smod_proto_create_func = NULL;
+ smodp->smod_sock_create_func = NULL;
+ smodp->smod_sock_destroy_func = NULL;
+ kmem_free(smodp, sizeof (*smodp));
+}
+
+/*
+ * Add an entry at the front of the socket module list.
+ */
+void
+smod_add(smod_info_t *smodp)
+{
+ ASSERT(smodp != NULL);
+ mutex_enter(&smod_list_lock);
+ list_insert_head(&smod_list, smodp);
+ mutex_exit(&smod_list_lock);
+}
+
+/*
+ * Lookup the socket module table by the socket module name.
+ * If there is an existing entry, then increase the reference count.
+ * Otherwise we load the module and in the module register function create
+ * a new entry and add it to the end of the socket module table.
+ */
+smod_info_t *
+smod_lookup_byname(const char *modname)
+{
+ smod_info_t *smodp;
+ int error;
+
+again:
+ /*
+ * If find an entry, increase the reference count and
+ * return the entry pointer.
+ */
+ mutex_enter(&smod_list_lock);
+ if ((smodp = smod_find(modname)) != NULL) {
+ SMOD_INC_REF(smodp);
+ mutex_exit(&smod_list_lock);
+ return (smodp);
+ }
+ mutex_exit(&smod_list_lock);
+
+ /*
+ * We have a sockmod, and it is not loaded.
+ * Load the module into the kernel, modload() will
+ * take care of the multiple threads.
+ */
+ DTRACE_PROBE1(load__socket__module, char *, modname);
+ error = modload(SOCKMOD_PATH, modname);
+ if (error == -1) {
+ cmn_err(CE_CONT, "modload of %s/%s failed",
+ SOCKMOD_PATH, modname);
+ return (NULL);
+ }
+ goto again;
+}
diff --git a/usr/src/uts/common/io/strplumb.c b/usr/src/uts/common/io/strplumb.c
index 27b9cc8843..33406bea05 100644
--- a/usr/src/uts/common/io/strplumb.c
+++ b/usr/src/uts/common/io/strplumb.c
@@ -62,6 +62,7 @@
#include <inet/ip6.h>
#include <inet/tcp.h>
#include <inet/sctp_ip.h>
+#include <inet/udp_impl.h>
#include <sys/strlog.h>
#include <sys/log.h>
diff --git a/usr/src/uts/common/netinet/icmp6.h b/usr/src/uts/common/netinet/icmp6.h
index 2d8903d6f1..560b825595 100644
--- a/usr/src/uts/common/netinet/icmp6.h
+++ b/usr/src/uts/common/netinet/icmp6.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _NETINET_ICMP6_H
#define _NETINET_ICMP6_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -481,6 +478,7 @@ typedef struct icmp6_filter {
#define ICMP6_FILTER_WILLBLOCK(type, filterp) \
((((filterp)->__icmp6_filt[(type) >> 5]) & (1 << ((type) & 31))) == 0)
+#define ICMP_IOC_DEFAULT_Q (('I' << 8) + 51)
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/os/fio.c b/usr/src/uts/common/os/fio.c
index 13f592993a..d78f4bbdb0 100644
--- a/usr/src/uts/common/os/fio.c
+++ b/usr/src/uts/common/os/fio.c
@@ -23,12 +23,10 @@
/* All Rights Reserved */
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/param.h>
@@ -1167,11 +1165,8 @@ f_getfl(int fd, int *flagp)
/*
* BSD fcntl() FASYNC compatibility.
- *
- * SCTP doesn't have an associated stream and thus
- * doesn't store flags on it.
*/
- if ((vp->v_type == VSOCK) && (vp->v_stream != NULL))
+ if (vp->v_type == VSOCK)
flag |= sock_getfasync(vp);
*flagp = flag;
error = 0;
diff --git a/usr/src/uts/common/os/modconf.c b/usr/src/uts/common/os/modconf.c
index 7c41975c48..cf25d86183 100644
--- a/usr/src/uts/common/os/modconf.c
+++ b/usr/src/uts/common/os/modconf.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/param.h>
@@ -59,6 +57,7 @@
#include <sys/cpc_pcbe.h>
#include <sys/kstat.h>
#include <sys/fs/sdev_node.h>
+#include <sys/socketvar.h>
#include <sys/kiconv.h>
extern int moddebug;
@@ -186,6 +185,17 @@ struct mod_ops mod_strmodops = {
};
/*
+ * Socket modules.
+ */
+static int mod_infosockmod(struct modlsockmod *, struct modlinkage *, int *);
+static int mod_installsockmod(struct modlsockmod *, struct modlinkage *);
+static int mod_removesockmod(struct modlsockmod *, struct modlinkage *);
+
+struct mod_ops mod_sockmodops = {
+ mod_installsockmod, mod_removesockmod, mod_infosockmod
+};
+
+/*
* Scheduling classes.
*/
static int mod_infosched(struct modlsched *, struct modlinkage *, int *);
@@ -1178,6 +1188,59 @@ mod_removestrmod(struct modlstrmod *modl, struct modlinkage *modlp)
}
/*
+ * Get status of a socket module.
+ */
+/*ARGSUSED*/
+static int
+mod_infosockmod(struct modlsockmod *modl, struct modlinkage *modlp, int *p0)
+{
+ *p0 = -1; /* no useful info */
+ return (0);
+}
+
+/*
+ * Install a socket module.
+ */
+/*ARGSUSED*/
+static int
+mod_installsockmod(struct modlsockmod *modl, struct modlinkage *modlp)
+{
+ struct modctl *mcp;
+ char *mod_name;
+
+ mcp = mod_getctl(modlp);
+ ASSERT(mcp != NULL);
+ mod_name = mcp->mod_modname;
+ if (strcmp(mod_name, modl->sockmod_reg_info->smod_name) != 0) {
+#ifdef DEBUG
+ cmn_err(CE_CONT, "mod_installsockmod: different names"
+ " %s != %s \n", mod_name,
+ modl->sockmod_reg_info->smod_name);
+#endif
+ return (EINVAL);
+ }
+
+ /*
+ * Register module.
+ */
+ return (smod_register(modl->sockmod_reg_info));
+}
+
+/*
+ * Remove a socket module.
+ */
+/*ARGSUSED*/
+static int
+mod_removesockmod(struct modlsockmod *modl, struct modlinkage *modlp)
+{
+ /*
+ * unregister from the global socket creation table
+ * check the refcnt in the lookup table
+ */
+ return (smod_unregister(modl->sockmod_reg_info->smod_name));
+}
+
+/*
* Get status of a scheduling class module.
*/
/*ARGSUSED1*/
diff --git a/usr/src/uts/common/os/move.c b/usr/src/uts/common/os/move.c
index 7e1c2f2d62..d4a127794f 100644
--- a/usr/src/uts/common/os/move.c
+++ b/usr/src/uts/common/os/move.c
@@ -558,8 +558,6 @@ uioainit(uio_t *uiop, uioa_t *uioap)
uioap->uioa_mbytes = 0;
- uioap->uioa_mbytes = 0;
-
/* uio_t/uioa_t uio_t common struct copy */
*((uio_t *)uioap) = *uiop;
diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c
index 236626a4f0..42d0b8e17c 100644
--- a/usr/src/uts/common/os/streamio.c
+++ b/usr/src/uts/common/os/streamio.c
@@ -77,6 +77,7 @@
#include <sys/policy.h>
#include <sys/dld.h>
#include <sys/zone.h>
+#include <sys/sodirect.h>
/*
* This define helps improve the readability of streams code while
@@ -1110,50 +1111,7 @@ strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
}
bp = getq_noenab(q, rbytes);
- if (bp != NULL && (bp->b_datap->db_flags & DBLK_UIOA)) {
- /*
- * A uioa flaged mblk_t chain, already uio processed,
- * add it to the sodirect uioa pending free list.
- *
- * Note, a b_cont chain headed by a DBLK_UIOA enable
- * mblk_t must have all mblk_t(s) DBLK_UIOA enabled.
- */
- mblk_t *bpt = sodp->sod_uioaft;
-
- ASSERT(sodp != NULL);
- ASSERT(msgdsize(bp) == sodp->sod_uioa.uioa_mbytes);
-
- /*
- * Add first mblk_t of "bp" chain to current sodirect uioa
- * free list tail mblk_t, if any, else empty list so new head.
- */
- if (bpt == NULL)
- sodp->sod_uioafh = bp;
- else
- bpt->b_cont = bp;
-
- /*
- * Walk mblk_t "bp" chain to find tail and adjust rptr of
- * each to reflect that uioamove() has consumed all data.
- */
- bpt = bp;
- for (;;) {
- bpt->b_rptr = bpt->b_wptr;
- if (bpt->b_cont == NULL)
- break;
- bpt = bpt->b_cont;
-
- ASSERT(bpt->b_datap->db_flags & DBLK_UIOA);
- }
- /* New sodirect uioa free list tail */
- sodp->sod_uioaft = bpt;
-
- /* Only 1 strget() with data returned per uioa_t */
- if (sodp->sod_uioa.uioa_state & UIOA_ENABLED) {
- sodp->sod_uioa.uioa_state &= UIOA_CLR;
- sodp->sod_uioa.uioa_state |= UIOA_FINI;
- }
- }
+ sod_uioa_mblk_done(sodp, bp);
return (bp);
}
diff --git a/usr/src/uts/common/os/strsubr.c b/usr/src/uts/common/os/strsubr.c
index 442ced2b51..469ef329db 100644
--- a/usr/src/uts/common/os/strsubr.c
+++ b/usr/src/uts/common/os/strsubr.c
@@ -286,7 +286,6 @@ static void outer_insert(syncq_t *, syncq_t *);
static void outer_remove(syncq_t *, syncq_t *);
static void write_now(syncq_t *);
static void clr_qfull(queue_t *);
-static void enable_svc(queue_t *);
static void runbufcalls(void);
static void sqenable(syncq_t *);
static void sqfill_events(syncq_t *, queue_t *, mblk_t *, void (*)());
@@ -8401,6 +8400,21 @@ mblk_setcred(mblk_t *mp, cred_t *cr)
}
}
+/*
+ * Set the cred and pid for each mblk in the message. It is assumed that
+ * the message passed in does not already have a cred.
+ */
+void
+msg_setcredpid(mblk_t *mp, cred_t *cr, pid_t pid)
+{
+ while (mp != NULL) {
+ ASSERT(DB_CRED(mp) == NULL);
+ mblk_setcred(mp, cr);
+ DB_CPID(mp) = pid;
+ mp = mp->b_cont;
+ }
+}
+
int
hcksum_assoc(mblk_t *mp, multidata_t *mmd, pdesc_t *pd,
uint32_t start, uint32_t stuff, uint32_t end, uint32_t value,
diff --git a/usr/src/uts/common/smbsrv/smb_kproto.h b/usr/src/uts/common/smbsrv/smb_kproto.h
index 2131c88e19..b14005074a 100644
--- a/usr/src/uts/common/smbsrv/smb_kproto.h
+++ b/usr/src/uts/common/smbsrv/smb_kproto.h
@@ -38,6 +38,7 @@ extern "C" {
#include <sys/socket.h>
#include <sys/strsubr.h>
#include <sys/socketvar.h>
+#include <sys/ksocket.h>
#include <sys/cred.h>
#include <smbsrv/smb_vops.h>
#include <smbsrv/smb_xdr.h>
@@ -307,19 +308,17 @@ uint32_t smb_decode_sd(struct smb_xa *, smb_sd_t *);
/*
* Socket functions
*/
-struct sonode *smb_socreate(int domain, int type, int protocol);
-void smb_soshutdown(struct sonode *so);
-void smb_sodestroy(struct sonode *so);
-int smb_sorecv(struct sonode *so, void *msg, size_t len);
-int smb_iov_sorecv(struct sonode *so, iovec_t *iop, int iovlen,
- size_t total_len);
+ksocket_t smb_socreate(int domain, int type, int protocol);
+void smb_soshutdown(ksocket_t so);
+void smb_sodestroy(ksocket_t so);
+int smb_sorecv(ksocket_t so, void *msg, size_t len);
int smb_net_init(void);
void smb_net_fini(void);
void smb_net_txl_constructor(smb_txlst_t *);
void smb_net_txl_destructor(smb_txlst_t *);
smb_txreq_t *smb_net_txr_alloc(void);
void smb_net_txr_free(smb_txreq_t *);
-int smb_net_txr_send(struct sonode *, smb_txlst_t *, smb_txreq_t *);
+int smb_net_txr_send(ksocket_t, smb_txlst_t *, smb_txreq_t *);
/*
* SMB RPC interface
@@ -489,7 +488,7 @@ void smb_request_cancel(smb_request_t *sr);
/*
* session functions (file smb_session.c)
*/
-smb_session_t *smb_session_create(struct sonode *, uint16_t, smb_server_t *);
+smb_session_t *smb_session_create(ksocket_t, uint16_t, smb_server_t *);
int smb_session_daemon(smb_session_list_t *);
void smb_session_reconnection_check(smb_session_list_t *, smb_session_t *);
void smb_session_timers(smb_session_list_t *);
diff --git a/usr/src/uts/common/smbsrv/smb_ktypes.h b/usr/src/uts/common/smbsrv/smb_ktypes.h
index 13f5783116..918746a701 100644
--- a/usr/src/uts/common/smbsrv/smb_ktypes.h
+++ b/usr/src/uts/common/smbsrv/smb_ktypes.h
@@ -46,6 +46,8 @@ extern "C" {
#include <sys/stat.h>
#include <sys/vnode.h>
#include <sys/cred.h>
+#include <netinet/in.h>
+#include <sys/ksocket.h>
#include <sys/fem.h>
#include <sys/door.h>
#include <smbsrv/smb.h>
@@ -683,7 +685,7 @@ typedef struct smb_session {
uint32_t capabilities;
struct smb_sign signing;
- struct sonode *sock;
+ ksocket_t sock;
smb_slist_t s_req_list;
smb_llist_t s_xa_list;
@@ -1453,7 +1455,7 @@ typedef struct {
typedef struct {
kthread_t *ld_kth;
kt_did_t ld_ktdid;
- struct sonode *ld_so;
+ ksocket_t ld_so;
struct sockaddr_in ld_sin;
smb_session_list_t ld_session_list;
} smb_listener_daemon_t;
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index cecccf50ab..451ce87f1f 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -329,6 +329,7 @@ CHKHDRS= \
kmem_impl.h \
kobj.h \
kobj_impl.h \
+ ksocket.h \
kstat.h \
kstr.h \
ksyms.h \
@@ -503,6 +504,7 @@ CHKHDRS= \
sobject.h \
socket.h \
socket_impl.h \
+ socket_proto.h \
socketvar.h \
sockio.h \
sodirect.h \
diff --git a/usr/src/uts/common/sys/idm/idm_so.h b/usr/src/uts/common/sys/idm/idm_so.h
index 134896ed4f..42c39c6461 100644
--- a/usr/src/uts/common/sys/idm/idm_so.h
+++ b/usr/src/uts/common/sys/idm/idm_so.h
@@ -31,7 +31,7 @@ extern "C" {
#endif
#include <sys/idm/idm_transport.h>
-
+#include <sys/ksocket.h>
/*
* Define TCP window size (send and receive buffer sizes)
*/
@@ -41,7 +41,7 @@ extern "C" {
/* sockets-specific portion of idm_svc_t */
typedef struct idm_so_svc_s {
- struct sonode *is_so;
+ ksocket_t is_so;
kthread_t *is_thread;
kt_did_t is_thread_did;
boolean_t is_thread_running;
@@ -49,7 +49,7 @@ typedef struct idm_so_svc_s {
/* sockets-specific portion of idm_conn_t */
typedef struct idm_so_conn_s {
- struct sonode *ic_so;
+ ksocket_t ic_so;
kthread_t *ic_tx_thread;
kt_did_t ic_tx_thread_did;
@@ -68,24 +68,24 @@ void idm_so_fini();
/* Socket functions */
-struct sonode *
+ksocket_t
idm_socreate(int domain, int type, int protocol);
-void idm_soshutdown(struct sonode *so);
+void idm_soshutdown(ksocket_t so);
-void idm_sodestroy(struct sonode *so);
+void idm_sodestroy(ksocket_t so);
int idm_get_ipaddr(idm_addr_list_t **);
-int idm_sorecv(struct sonode *so, void *msg, size_t len);
+int idm_sorecv(ksocket_t so, void *msg, size_t len);
-int idm_sosendto(struct sonode *so, void *buff, size_t len,
+int idm_sosendto(ksocket_t so, void *buff, size_t len,
struct sockaddr *name, socklen_t namelen);
-int idm_iov_sosend(struct sonode *so, iovec_t *iop, int iovlen,
+int idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen,
size_t total_len);
-int idm_iov_sorecv(struct sonode *so, iovec_t *iop, int iovlen,
+int idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen,
size_t total_len);
void idm_sotx_thread(void *arg);
diff --git a/usr/src/uts/common/sys/iscsit/radius_packet.h b/usr/src/uts/common/sys/iscsit/radius_packet.h
index bbf96d5cb2..80ee57a202 100644
--- a/usr/src/uts/common/sys/iscsit/radius_packet.h
+++ b/usr/src/uts/common/sys/iscsit/radius_packet.h
@@ -32,7 +32,7 @@ extern "C" {
#include <netinet/in.h>
#include <sys/types.h>
-
+#include <sys/ksocket.h>
#include <sys/iscsit/radius_protocol.h>
/* A total of RAD_RCV_TIMEOUT * RAD_RETRY_MAX seconds timeout. */
@@ -69,7 +69,7 @@ typedef struct radius_packet_data {
*
*/
int
-iscsit_snd_radius_request(void *socket,
+iscsit_snd_radius_request(ksocket_t socket,
iscsi_ipaddr_t rsvr_ip_addr,
uint32_t rsvr_port,
radius_packet_data_t *packet_data);
@@ -85,7 +85,7 @@ iscsit_snd_radius_request(void *socket,
* Return receive status.
*/
int
-iscsit_rcv_radius_response(void *socket,
+iscsit_rcv_radius_response(ksocket_t socket,
uint8_t *shared_secret,
uint32_t shared_secret_len,
uint8_t *req_authenticator,
diff --git a/usr/src/uts/common/sys/ksocket.h b/usr/src/uts/common/sys/ksocket.h
new file mode 100644
index 0000000000..fb834b027f
--- /dev/null
+++ b/usr/src/uts/common/sys/ksocket.h
@@ -0,0 +1,127 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_KSOCKET_H_
+#define _SYS_KSOCKET_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Opaque kernel socket type */
+typedef struct __ksocket *ksocket_t;
+struct nmsghdr;
+
+/* flag bit for each Callback Event */
+#define KSOCKET_CB_CONNECTED 0x00000001
+#define KSOCKET_CB_CONNECTFAILED 0x00000002
+#define KSOCKET_CB_DISCONNECTED 0x00000004
+#define KSOCKET_CB_NEWDATA 0x00000008
+#define KSOCKET_CB_NEWCONN 0x00000010
+#define KSOCKET_CB_CANSEND 0x00000020
+#define KSOCKET_CB_OOBDATA 0x00000040
+#define KSOCKET_CB_CANTSENDMORE 0x00000080
+#define KSOCKET_CB_CANTRECVMORE 0x00000100
+#define KSOCKET_CB_ERROR 0x00000200
+
+/*
+ * Kernel Socket Callback Events
+ */
+typedef enum ksocket_event {
+ KSOCKET_EV_CONNECTED,
+ KSOCKET_EV_CONNECTFAILED,
+ KSOCKET_EV_DISCONNECTED,
+ KSOCKET_EV_OOBDATA,
+ KSOCKET_EV_NEWDATA,
+ KSOCKET_EV_NEWCONN,
+ KSOCKET_EV_CANSEND,
+ KSOCKET_EV_CANTSENDMORE,
+ KSOCKET_EV_CANTRECVMORE,
+ KSOCKET_EV_ERROR
+} ksocket_callback_event_t;
+
+typedef void (*ksocket_callback_t)(ksocket_t, ksocket_callback_event_t,
+ void *, uintptr_t);
+
+typedef struct ksocket_callbacks {
+ uint32_t ksock_cb_flags;
+ ksocket_callback_t ksock_cb_connected;
+ ksocket_callback_t ksock_cb_connectfailed;
+ ksocket_callback_t ksock_cb_disconnected;
+ ksocket_callback_t ksock_cb_newdata;
+ ksocket_callback_t ksock_cb_newconn;
+ ksocket_callback_t ksock_cb_cansend;
+ ksocket_callback_t ksock_cb_oobdata;
+ ksocket_callback_t ksock_cb_cantsendmore;
+ ksocket_callback_t ksock_cb_cantrecvmore;
+ ksocket_callback_t ksock_cb_error;
+} ksocket_callbacks_t;
+
+#define KSOCKET_SLEEP SOCKET_SLEEP
+#define KSOCKET_NOSLEEP SOCKET_NOSLEEP
+
+extern int ksocket_socket(ksocket_t *, int, int, int, int, struct cred *);
+extern int ksocket_bind(ksocket_t, struct sockaddr *, socklen_t,
+ struct cred *);
+extern int ksocket_listen(ksocket_t, int, struct cred *);
+extern int ksocket_accept(ksocket_t, struct sockaddr *, socklen_t *,
+ ksocket_t *, struct cred *);
+extern int ksocket_connect(ksocket_t, const struct sockaddr *, socklen_t,
+ struct cred *);
+extern int ksocket_send(ksocket_t, void *, size_t, int, size_t *,
+ struct cred *);
+extern int ksocket_sendto(ksocket_t, void *, size_t, int,
+ struct sockaddr *, socklen_t, size_t *, struct cred *);
+extern int ksocket_sendmsg(ksocket_t, struct nmsghdr *, int, size_t *,
+ struct cred *);
+extern int ksocket_sendmblk(ksocket_t, struct nmsghdr *, int, mblk_t **,
+ struct cred *);
+extern int ksocket_recv(ksocket_t, void *, size_t, int, size_t *,
+ struct cred *);
+extern int ksocket_recvfrom(ksocket_t, void *, size_t, int,
+ struct sockaddr *, socklen_t *, size_t *, struct cred *);
+extern int ksocket_recvmsg(ksocket_t, struct nmsghdr *, int, size_t *,
+ struct cred *);
+extern int ksocket_shutdown(ksocket_t, int, struct cred *);
+extern int ksocket_setsockopt(ksocket_t, int, int, const void *, int,
+ struct cred *);
+extern int ksocket_getsockopt(ksocket_t, int, int, void *, int *,
+ struct cred *);
+extern int ksocket_getpeername(ksocket_t, struct sockaddr *, socklen_t *,
+ struct cred *);
+extern int ksocket_getsockname(ksocket_t, struct sockaddr *, socklen_t *,
+ struct cred *);
+extern int ksocket_ioctl(ksocket_t, int, intptr_t, int *, struct cred *);
+extern int ksocket_setcallbacks(ksocket_t, ksocket_callbacks_t *, void *,
+ struct cred *);
+extern int ksocket_close(ksocket_t, struct cred *);
+extern void ksocket_hold(ksocket_t);
+extern void ksocket_rele(ksocket_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_KSOCKET_H_ */
diff --git a/usr/src/uts/common/sys/modctl.h b/usr/src/uts/common/sys/modctl.h
index 47a83b15d9..ed0811c580 100644
--- a/usr/src/uts/common/sys/modctl.h
+++ b/usr/src/uts/common/sys/modctl.h
@@ -26,8 +26,6 @@
#ifndef _SYS_MODCTL_H
#define _SYS_MODCTL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* loadable module support.
*/
@@ -73,6 +71,7 @@ extern struct mod_ops mod_miscops;
extern struct mod_ops mod_schedops;
extern struct mod_ops mod_strmodops;
extern struct mod_ops mod_syscallops;
+extern struct mod_ops mod_sockmodops;
#ifdef _SYSCALL32_IMPL
extern struct mod_ops mod_syscallops32;
#endif
@@ -191,6 +190,13 @@ struct modldev {
struct devname_ops *dev_ops;
};
+/* For socket Modules. */
+struct modlsockmod {
+ struct mod_ops *sockmod_modops;
+ char *sockmod_linkinfo;
+ struct smod_reg_s *sockmod_reg_info;
+};
+
/* For kiconv modules */
struct modlkiconv {
struct mod_ops *kiconv_modops;
diff --git a/usr/src/uts/common/sys/socket.h b/usr/src/uts/common/sys/socket.h
index 0432b529be..593505a426 100644
--- a/usr/src/uts/common/sys/socket.h
+++ b/usr/src/uts/common/sys/socket.h
@@ -120,6 +120,15 @@ typedef void *_RESTRICT_KYWD Psocklen_t;
#ifdef _KERNEL
#define SO_SND_COPYAVOID 0x0800 /* Internal: use zero-copy */
+#define SO_SND_BUFINFO 0x1000 /* Internal: get buffer info */
+ /* when doing zero-copy */
+
+struct so_snd_bufinfo {
+ ushort_t sbi_wroff; /* Write offset */
+ ssize_t sbi_maxblk; /* Max size of a single mblk */
+ ssize_t sbi_maxpsz; /* Max total size of a mblk chain */
+ ushort_t sbi_tail; /* Extra space available at the end */
+};
#endif /* _KERNEL */
/*
@@ -143,6 +152,7 @@ typedef void *_RESTRICT_KYWD Psocklen_t;
#define SO_ANON_MLP 0x100a /* create MLP on anonymous bind */
#define SO_MAC_EXEMPT 0x100b /* allow dominated unlabeled peers */
#define SO_DOMAIN 0x100c /* get socket domain */
+#define SO_RCVPSH 0x100d /* receive interval to push data */
/* "Socket"-level control message types: */
#define SCM_RIGHTS 0x1010 /* access rights (array of int) */
@@ -167,6 +177,21 @@ typedef void *_RESTRICT_KYWD Psocklen_t;
*/
#define SO_ACCEPTOR 0x20000 /* acceptor socket */
#define SO_SOCKSTR 0x40000 /* normal socket stream */
+#define SO_FALLBACK 0x80000 /* fallback to TPI socket */
+
+/*
+ * Flags for socket_create() and socket_newconn()
+ */
+#define SOCKET_SLEEP KM_SLEEP
+#define SOCKET_NOSLEEP KM_NOSLEEP
+
+
+/*
+ * flags used by sockfs when falling back to tpi socket
+ */
+#define SO_FB_START 0x1
+#define SO_FB_FINISH 0x2
+
#endif /* _KERNEL */
/*
@@ -340,6 +365,8 @@ struct msghdr32 {
#define MSG_CTRUNC 0x10 /* Control data truncated */
#define MSG_TRUNC 0x20 /* Normal data truncated */
#define MSG_WAITALL 0x40 /* Wait for complete recv or error */
+#define MSG_DUPCTRL 0x800 /* Save control message for use with */
+ /* with left over data */
/* End of XPGv2 compliance */
#define MSG_DONTWAIT 0x80 /* Don't block for this recv */
#define MSG_NOTIFICATION 0x100 /* Notification, not data */
@@ -347,6 +374,18 @@ struct msghdr32 {
#define MSG_MAXIOVLEN 16
+#ifdef _KERNEL
+
+/*
+ * for kernel socket only
+ */
+#define MSG_MBLK_QUICKRELE 0x10000000 /* free mblk chain */
+ /* in timely manner */
+#define MSG_USERSPACE 0x20000000 /* buffer from user space */
+
+#endif /* _KERNEL */
+
+
/* Added for XPGv2 compliance */
#define SHUT_RD 0
#define SHUT_WR 1
diff --git a/usr/src/uts/common/sys/socket_proto.h b/usr/src/uts/common/sys/socket_proto.h
new file mode 100644
index 0000000000..8f60ea9e31
--- /dev/null
+++ b/usr/src/uts/common/sys/socket_proto.h
@@ -0,0 +1,182 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_SOCKET_PROTO_H_
+#define _SYS_SOCKET_PROTO_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/socket.h>
+
+/*
+ * Generation count
+ */
+typedef uint64_t sock_connid_t;
+
+#define SOCK_CONNID_INIT(id) { \
+ (id) = 0; \
+}
+#define SOCK_CONNID_BUMP(id) (++(id))
+#define SOCK_CONNID_LT(id1, id2) ((int64_t)((id1)-(id2)) < 0)
+
+/* Socket protocol properties */
+struct sock_proto_props {
+ uint_t sopp_flags; /* options to set */
+ ushort_t sopp_wroff; /* write offset */
+ ssize_t sopp_txhiwat; /* tx hi water mark */
+ ssize_t sopp_txlowat; /* tx lo water mark */
+ ssize_t sopp_rxhiwat; /* recv high water mark */
+ ssize_t sopp_rxlowat; /* recv low water mark */
+ ssize_t sopp_maxblk; /* maximum message block size */
+ ssize_t sopp_maxpsz; /* maximum packet size */
+ ssize_t sopp_minpsz; /* minimum packet size */
+ ushort_t sopp_tail; /* space available at the end */
+ uint_t sopp_zcopyflag; /* zero copy flag */
+ boolean_t sopp_oobinline; /* OOB inline */
+ uint_t sopp_rcvtimer; /* delayed recv notification (time) */
+ uint32_t sopp_rcvthresh; /* delayed recv notification (bytes) */
+ socklen_t sopp_maxaddrlen; /* maximum size of protocol address */
+};
+
+/* flags to determine which socket options are set */
+#define SOCKOPT_WROFF 0x0001 /* set write offset */
+#define SOCKOPT_RCVHIWAT 0x0002 /* set read side high water */
+#define SOCKOPT_RCVLOWAT 0x0004 /* set read side high water */
+#define SOCKOPT_MAXBLK 0x0008 /* set maximum message block size */
+#define SOCKOPT_TAIL 0x0010 /* set the extra allocated space */
+#define SOCKOPT_ZCOPY 0x0020 /* set/unset zero copy for sendfile */
+#define SOCKOPT_MAXPSZ 0x0040 /* set maxpsz for protocols */
+#define SOCKOPT_OOBINLINE 0x0080 /* set oob inline processing */
+#define SOCKOPT_RCVTIMER 0x0100
+#define SOCKOPT_RCVTHRESH 0x0200
+#define SOCKOPT_MAXADDRLEN 0x0400 /* set max address length */
+#define SOCKOPT_MINPSZ 0x0800 /* set minpsz for protocols */
+
+#define IS_SO_OOB_INLINE(so) ((so)->so_proto_props.sopp_oobinline)
+
+#ifdef _KERNEL
+
+struct T_capability_ack;
+
+typedef struct sock_upcalls_s sock_upcalls_t;
+typedef struct sock_downcalls_s sock_downcalls_t;
+
+/*
+ * Upcall and downcall handle for sockfs and transport layer.
+ */
+typedef struct __sock_upper_handle *sock_upper_handle_t;
+typedef struct __sock_lower_handle *sock_lower_handle_t;
+
+struct sock_downcalls_s {
+ void (*sd_activate)(sock_lower_handle_t, sock_upper_handle_t,
+ sock_upcalls_t *, int, cred_t *);
+ int (*sd_accept)(sock_lower_handle_t, sock_lower_handle_t,
+ sock_upper_handle_t, cred_t *);
+ int (*sd_bind)(sock_lower_handle_t, struct sockaddr *, socklen_t,
+ cred_t *);
+ int (*sd_listen)(sock_lower_handle_t, int, cred_t *);
+ int (*sd_connect)(sock_lower_handle_t, const struct sockaddr *,
+ socklen_t, sock_connid_t *, cred_t *);
+ int (*sd_getpeername)(sock_lower_handle_t, struct sockaddr *,
+ socklen_t *, cred_t *);
+ int (*sd_getsockname)(sock_lower_handle_t, struct sockaddr *,
+ socklen_t *, cred_t *);
+ int (*sd_getsockopt)(sock_lower_handle_t, int, int, void *,
+ socklen_t *, cred_t *);
+ int (*sd_setsockopt)(sock_lower_handle_t, int, int, const void *,
+ socklen_t, cred_t *);
+ int (*sd_send)(sock_lower_handle_t, mblk_t *, struct nmsghdr *,
+ cred_t *);
+ int (*sd_send_uio)(sock_lower_handle_t, uio_t *, struct nmsghdr *,
+ cred_t *);
+ int (*sd_recv_uio)(sock_lower_handle_t, uio_t *, struct nmsghdr *,
+ cred_t *);
+ short (*sd_poll)(sock_lower_handle_t, short, int, cred_t *);
+ int (*sd_shutdown)(sock_lower_handle_t, int, cred_t *);
+ void (*sd_clr_flowctrl)(sock_lower_handle_t);
+ int (*sd_ioctl)(sock_lower_handle_t, int, intptr_t, int,
+ int32_t *, cred_t *);
+ int (*sd_close)(sock_lower_handle_t, int, cred_t *);
+};
+
+typedef sock_lower_handle_t (*so_proto_create_func_t)(int, int, int,
+ sock_downcalls_t **, uint_t *, int *, int, cred_t *);
+
+typedef void (*so_proto_quiesced_cb_t)(sock_upper_handle_t, queue_t *,
+ struct T_capability_ack *, struct sockaddr *, socklen_t,
+ struct sockaddr *, socklen_t, short);
+typedef void (*so_proto_fallback_func_t)(sock_lower_handle_t, queue_t *,
+ boolean_t, so_proto_quiesced_cb_t);
+
+/*
+ * Upcalls and related information
+ */
+
+/*
+ * su_opctl() actions
+ */
+typedef enum sock_opctl_action {
+ SOCK_OPCTL_ENAB_ACCEPT = 0,
+ SOCK_OPCTL_SHUT_SEND,
+ SOCK_OPCTL_SHUT_RECV
+} sock_opctl_action_t;
+
+struct sock_upcalls_s {
+ sock_upper_handle_t (*su_newconn)(sock_upper_handle_t,
+ sock_lower_handle_t, sock_downcalls_t *, cred_t *, pid_t,
+ sock_upcalls_t **);
+ void (*su_connected)(sock_upper_handle_t, sock_connid_t, cred_t *,
+ pid_t);
+ int (*su_disconnected)(sock_upper_handle_t, sock_connid_t, int);
+ void (*su_opctl)(sock_upper_handle_t, sock_opctl_action_t,
+ uintptr_t);
+ ssize_t (*su_recv)(sock_upper_handle_t, mblk_t *, size_t, int,
+ int *, boolean_t *);
+ void (*su_set_proto_props)(sock_upper_handle_t,
+ struct sock_proto_props *);
+ void (*su_txq_full)(sock_upper_handle_t, boolean_t);
+ void (*su_signal_oob)(sock_upper_handle_t, ssize_t);
+ void (*su_zcopy_notify)(sock_upper_handle_t);
+ void (*su_set_error)(sock_upper_handle_t, int);
+};
+
+#define SOCK_UC_VERSION sizeof (sock_upcalls_t)
+#define SOCK_DC_VERSION sizeof (sock_downcalls_t)
+
+#define SOCKET_RECVHIWATER (48 * 1024)
+#define SOCKET_RECVLOWATER 1024
+
+#define SOCKET_NO_RCVTIMER 0
+#define SOCKET_TIMER_INTERVAL 50
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SOCKET_PROTO_H_ */
diff --git a/usr/src/uts/common/sys/socketvar.h b/usr/src/uts/common/sys/socketvar.h
index 37a699345a..510d9445cf 100644
--- a/usr/src/uts/common/sys/socketvar.h
+++ b/usr/src/uts/common/sys/socketvar.h
@@ -48,25 +48,18 @@
#include <sys/file.h>
#include <sys/param.h>
#include <sys/zone.h>
+#include <sys/sdt.h>
+#include <sys/modctl.h>
+#include <sys/atomic.h>
+#include <sys/socket.h>
+#include <sys/ksocket.h>
#include <sys/sodirect.h>
-#include <inet/kssl/ksslapi.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
- * Internal representation used for addresses.
- */
-struct soaddr {
- struct sockaddr *soa_sa; /* Actual address */
- t_uscalar_t soa_len; /* Length in bytes for kmem_free */
- t_uscalar_t soa_maxlen; /* Allocated length */
-};
-/* Maximum size address for transports that have ADDR_size == 1 */
-#define SOA_DEFSIZE 128
-
-/*
* Internal representation of the address used to represent addresses
* in the loopback transport for AF_UNIX. While the sockaddr_un is used
* as the sockfs layer address for AF_UNIX the pathnames contained in
@@ -97,6 +90,10 @@ struct sockaddr_ux {
struct so_ux_addr sou_addr;
};
+#if defined(_KERNEL) || defined(_KMEMUSER)
+
+#include <sys/socket_proto.h>
+
typedef struct sonodeops sonodeops_t;
typedef struct sonode sonode_t;
@@ -105,236 +102,149 @@ typedef struct sonode sonode_t;
* name space and can not be opened using open() - only the socket, socketpair
* and accept calls create sonodes.
*
- * When an AF_UNIX socket is bound to a pathname the sockfs
- * creates a VSOCK vnode in the underlying file system. However, the vnodeops
- * etc in this VNODE remain those of the underlying file system.
- * Sockfs uses the v_stream pointer in the underlying file system VSOCK node
- * to find the sonode bound to the pathname. The bound pathname vnode
- * is accessed through so_ux_vp.
- *
- * A socket always corresponds to a VCHR stream representing the transport
- * provider (e.g. /dev/tcp). This information is retrieved from the kernel
- * socket configuration table and entered into so_accessvp. sockfs uses
- * this to perform VOP_ACCESS checks before allowing an open of the transport
- * provider.
+ * The locking of sockfs uses the so_lock mutex plus the SOLOCKED and
+ * SOREADLOCKED flags in so_flag. The mutex protects all the state in the
+ * sonode. It is expected that the underlying transport protocol serializes
+ * socket operations, so sockfs will not normally not single-thread
+ * operations. However, certain sockets, including TPI based ones, can only
+ * handle one control operation at a time. The SOLOCKED flag is used to
+ * single-thread operations from sockfs users to prevent e.g. multiple bind()
+ * calls to operate on the same sonode concurrently. The SOREADLOCKED flag is
+ * used to ensure that only one thread sleeps in kstrgetmsg for a given
+ * sonode. This is needed to ensure atomic operation for things like
+ * MSG_WAITALL.
*
- * The locking of sockfs uses the so_lock mutex plus the SOLOCKED
- * and SOREADLOCKED flags in so_flag. The mutex protects all the state
- * in the sonode. The SOLOCKED flag is used to single-thread operations from
- * sockfs users to prevent e.g. multiple bind() calls to operate on the
- * same sonode concurrently. The SOREADLOCKED flag is used to ensure that
- * only one thread sleeps in kstrgetmsg for a given sonode. This is needed
- * to ensure atomic operation for things like MSG_WAITALL.
+ * The so_fallback_rwlock is used to ensure that for sockets that can
+ * fall back to TPI, the fallback is not initiated until all pending
+ * operations have completed.
*
* Note that so_lock is sometimes held across calls that might go to sleep
* (kmem_alloc and soallocproto*). This implies that no other lock in
* the system should be held when calling into sockfs; from the system call
- * side or from strrput. If locks are held while calling into sockfs
- * the system might hang when running low on memory.
+ * side or from strrput (in case of TPI based sockets). If locks are held
+ * while calling into sockfs the system might hang when running low on memory.
*/
struct sonode {
struct vnode *so_vnode; /* vnode associated with this sonode */
- sonodeops_t *so_ops; /* operations vector for this sonode */
-
- /*
- * These fields are initialized once.
- */
- dev_t so_dev; /* device the sonode represents */
- struct vnode *so_accessvp; /* vnode for the /dev entry */
+ sonodeops_t *so_ops; /* operations vector for this sonode */
+ void *so_priv; /* sonode private data */
- /* The locks themselves */
+ krwlock_t so_fallback_rwlock;
kmutex_t so_lock; /* protects sonode fields */
- kmutex_t so_plumb_lock; /* serializes plumbs, and the related */
- /* fields so_version and so_pushcnt */
+
kcondvar_t so_state_cv; /* synchronize state changes */
- kcondvar_t so_ack_cv; /* wait for TPI acks */
- kcondvar_t so_connind_cv; /* wait for T_CONN_IND */
kcondvar_t so_want_cv; /* wait due to SOLOCKED */
/* These fields are protected by so_lock */
- uint_t so_state; /* internal state flags SS_*, below */
- uint_t so_mode; /* characteristics on socket. SM_* */
- mblk_t *so_ack_mp; /* TPI ack received from below */
- mblk_t *so_conn_ind_head; /* b_next list of T_CONN_IND */
- mblk_t *so_conn_ind_tail;
- mblk_t *so_unbind_mp; /* Preallocated T_UNBIND_REQ message */
+ uint_t so_state; /* internal state flags SS_*, below */
+ uint_t so_mode; /* characteristics on socket. SM_* */
+ ushort_t so_flag; /* flags, see below */
+ int so_count; /* count of opened references */
+
+ sock_connid_t so_proto_connid; /* protocol generation number */
- ushort_t so_flag; /* flags, see below */
- dev_t so_fsid; /* file system identifier */
- time_t so_atime; /* time of last access */
- time_t so_mtime; /* time of last modification */
- time_t so_ctime; /* time of last attributes change */
- int so_count; /* count of opened references */
+ ushort_t so_error; /* error affecting connection */
+ struct sockparams *so_sockparams; /* vnode or socket module */
/* Needed to recreate the same socket for accept */
short so_family;
short so_type;
short so_protocol;
short so_version; /* From so_socket call */
- short so_pushcnt; /* Number of modules above "sockmod" */
+
+ /* Accept queue */
+ kmutex_t so_acceptq_lock; /* protects accept queue */
+ struct sonode *so_acceptq_next; /* acceptq list node */
+ struct sonode *so_acceptq_head;
+ struct sonode **so_acceptq_tail;
+ unsigned int so_acceptq_len;
+ unsigned int so_backlog; /* Listen backlog */
+ kcondvar_t so_acceptq_cv; /* wait for new conn. */
/* Options */
short so_options; /* From socket call, see socket.h */
struct linger so_linger; /* SO_LINGER value */
- int so_sndbuf; /* SO_SNDBUF value */
- int so_rcvbuf; /* SO_RCVBUF value */
- int so_sndlowat; /* send low water mark */
- int so_rcvlowat; /* receive low water mark */
-#ifdef notyet
- int so_sndtimeo; /* Not yet implemented */
- int so_rcvtimeo; /* Not yet implemented */
-#endif /* notyet */
- ushort_t so_error; /* error affecting connection */
- ushort_t so_delayed_error; /* From T_uderror_ind */
- int so_backlog; /* Listen backlog */
+#define so_sndbuf so_proto_props.sopp_txhiwat /* SO_SNDBUF value */
+#define so_sndlowat so_proto_props.sopp_txlowat /* tx low water mark */
+#define so_rcvbuf so_proto_props.sopp_rxhiwat /* SO_RCVBUF value */
+#define so_rcvlowat so_proto_props.sopp_rxlowat /* rx low water mark */
+#define so_max_addr_len so_proto_props.sopp_maxaddrlen
+#define so_minpsz so_proto_props.sopp_minpsz
+#define so_maxpsz so_proto_props.sopp_maxpsz
+
+ clock_t so_sndtimeo; /* send timeout */
+ clock_t so_rcvtimeo; /* recv timeout */
- /*
- * The counts (so_oobcnt and so_oobsigcnt) track the number of
- * urgent indicates that are (logically) queued on the stream head
- * read queue. The urgent data is queued on the stream head
- * as follows.
- *
- * In the normal case the SIGURG is not generated until
- * the T_EXDATA_IND arrives at the stream head. However, transports
- * that have an early indication that urgent data is pending
- * (e.g. TCP receiving a "new" urgent pointer value) can send up
- * an M_PCPROTO/SIGURG message to generate the signal early.
- *
- * The mark is indicated by either:
- * - a T_EXDATA_IND (with no M_DATA b_cont) with MSGMARK set.
- * When this message is consumed by sorecvmsg the socket layer
- * sets SS_RCVATMARK until data has been consumed past the mark.
- * - a message with MSGMARKNEXT set (indicating that the
- * first byte of the next message constitutes the mark). When
- * the last byte of the MSGMARKNEXT message is consumed in
- * the stream head the stream head sets STRATMARK. This flag
- * is cleared when at least one byte is read. (Note that
- * the MSGMARKNEXT messages can be of zero length when there
- * is no previous data to which the marknext can be attached.)
- *
- * While the T_EXDATA_IND method is the common case which is used
- * with all TPI transports, the MSGMARKNEXT method is needed to
- * indicate the mark when e.g. the TCP urgent byte has not been
- * received yet but the TCP urgent pointer has made TCP generate
- * the M_PCSIG/SIGURG.
- *
- * The signal (the M_PCSIG carrying the SIGURG) and the mark
- * indication can not be delivered as a single message, since
- * the signal should be delivered as high priority and any mark
- * indication must flow with the data. This implies that immediately
- * when the SIGURG has been delivered if the stream head queue is
- * empty it is impossible to determine if this will be the position
- * of the mark. This race condition is resolved by using MSGNOTMARKNEXT
- * messages and the STRNOTATMARK flag in the stream head. The
- * SIOCATMARK code calls the stream head to wait for either a
- * non-empty queue or one of the STR*ATMARK flags being set.
- * This implies that any transport that is sending M_PCSIG(SIGURG)
- * should send the appropriate MSGNOTMARKNEXT message (which can be
- * zero length) after sending an M_PCSIG to prevent SIOCATMARK
- * from sleeping unnecessarily.
- */
mblk_t *so_oobmsg; /* outofline oob data */
- uint_t so_oobsigcnt; /* Number of SIGURG generated */
- uint_t so_oobcnt; /* Number of T_EXDATA_IND queued */
+ ssize_t so_oobmark; /* offset of the oob data */
+
pid_t so_pgrp; /* pgrp for signals */
- /* From T_info_ack */
- t_uscalar_t so_tsdu_size;
- t_uscalar_t so_etsdu_size;
- t_scalar_t so_addr_size;
- t_uscalar_t so_opt_size;
- t_uscalar_t so_tidu_size;
- t_scalar_t so_serv_type;
+ cred_t *so_peercred; /* connected socket peer cred */
+ pid_t so_cpid; /* connected socket peer cached pid */
+ zoneid_t so_zoneid; /* opener's zoneid */
- /* From T_capability_ack */
- t_uscalar_t so_acceptor_id;
+ struct pollhead so_poll_list; /* common pollhead */
+ short so_pollev; /* events that should be generated */
- /* Internal provider information */
- struct tpi_provinfo *so_provinfo;
+ /* Receive */
+ unsigned int so_rcv_queued;
+ mblk_t *so_rcv_q_head;
+ mblk_t *so_rcv_q_last_head;
+ mblk_t *so_rcv_head; /* 1st mblk in the list */
+ mblk_t *so_rcv_last_head; /* last mblk in b_next chain */
+ kcondvar_t so_rcv_cv;
+ uint_t so_rcv_wanted; /* # of bytes wanted by app */
+ timeout_id_t so_rcv_timer_tid;
- /*
- * The local and remote addresses have multiple purposes
- * but one of the key reasons for their existence and careful
- * tracking in sockfs is to support getsockname and getpeername
- * when the transport does not handle the TI_GET*NAME ioctls
- * and caching when it does (signaled by valid bits in so_state).
- * When all transports support the new TPI (with T_ADDR_REQ)
- * we can revisit this code.
- * The other usage of so_faddr is to keep the "connected to"
- * address for datagram sockets.
- * Finally, for AF_UNIX both local and remote addresses are used
- * to record the sockaddr_un since we use a separate namespace
- * in the loopback transport.
- */
- struct soaddr so_laddr; /* Local address */
- struct soaddr so_faddr; /* Peer address */
-#define so_laddr_sa so_laddr.soa_sa
-#define so_faddr_sa so_faddr.soa_sa
-#define so_laddr_len so_laddr.soa_len
-#define so_faddr_len so_faddr.soa_len
-#define so_laddr_maxlen so_laddr.soa_maxlen
-#define so_faddr_maxlen so_faddr.soa_maxlen
- mblk_t *so_eaddr_mp; /* for so_delayed_error */
+#define so_rcv_thresh so_proto_props.sopp_rcvthresh
+#define so_rcv_timer_interval so_proto_props.sopp_rcvtimer
- /*
- * For AF_UNIX sockets:
- * so_ux_laddr/faddr records the internal addresses used with the
- * transport.
- * so_ux_vp and v_stream->sd_vnode form the cross-
- * linkage between the underlying fs vnode corresponding to
- * the bound sockaddr_un and the socket node.
- */
- struct so_ux_addr so_ux_laddr; /* laddr bound with the transport */
- struct so_ux_addr so_ux_faddr; /* temporary peer address */
- struct vnode *so_ux_bound_vp; /* bound AF_UNIX file system vnode */
- struct sonode *so_next; /* next sonode on socklist */
- struct sonode *so_prev; /* previous sonode on socklist */
- mblk_t *so_discon_ind_mp; /* T_DISCON_IND received from below */
-
- /* put here for delayed processing */
- void *so_priv; /* sonode private data */
- cred_t *so_peercred; /* connected socket peer cred */
- pid_t so_cpid; /* connected socket peer cached pid */
- zoneid_t so_zoneid; /* opener's zoneid */
+ /* Send */
+ boolean_t so_snd_qfull; /* Transmit full */
+ kcondvar_t so_snd_cv;
- kmem_cache_t *so_cache; /* object cache of this "sonode". */
- void *so_obj; /* object to free */
+ boolean_t so_rcv_wakeup;
+ boolean_t so_snd_wakeup;
- /*
- * For NL7C sockets:
- *
- * so_nl7c_flags the NL7C state of URL processing.
- *
- * so_nl7c_rcv_mp mblk_t chain of already received data to be
- * passed up to the app after NL7C gives up on
- * a socket.
- *
- * so_nl7c_rcv_rval returned rval for last mblk_t from above.
- *
- * so_nl7c_uri the URI currently being processed.
- *
- * so_nl7c_rtime URI request gethrestime_sec().
- *
- * so_nl7c_addr pointer returned by nl7c_addr_lookup().
- */
- uint64_t so_nl7c_flags;
- mblk_t *so_nl7c_rcv_mp;
- int64_t so_nl7c_rcv_rval;
- void *so_nl7c_uri;
- time_t so_nl7c_rtime;
- void *so_nl7c_addr;
-
- /* For sockets acting as an in-kernel SSL proxy */
- kssl_endpt_type_t so_kssl_type; /* is proxy/is proxied/none */
- kssl_ent_t so_kssl_ent; /* SSL config entry */
- kssl_ctx_t so_kssl_ctx; /* SSL session context */
+ /* Communication channel with protocol */
+ sock_lower_handle_t so_proto_handle;
+ sock_downcalls_t *so_downcalls;
+
+ struct sock_proto_props so_proto_props; /* protocol settings */
+ boolean_t so_flowctrld; /* Flow controlled */
+ uint_t so_copyflag; /* Copy related flag */
+ kcondvar_t so_copy_cv; /* Copy cond variable */
+
+ /* kernel sockets */
+ ksocket_callbacks_t so_ksock_callbacks;
+ void *so_ksock_cb_arg; /* callback argument */
+ kcondvar_t so_closing_cv;
/* != NULL for sodirect_t enabled socket */
- sodirect_t *so_direct;
+ sodirect_t *so_direct;
};
+/*
+ * We do an initial check for events without holding locks. However,
+ * if there are no event available, then we redo the check for POLLIN
+ * events under the lock.
+ */
+#define SO_HAVE_DATA(so) \
+ ((so)->so_rcv_timer_tid == 0 && (so->so_rcv_queued > 0)) || \
+ ((so)->so_rcv_queued > (so)->so_rcv_thresh) || \
+ ((so)->so_state & SS_CANTRCVMORE)
+
+/*
+ * Events handled by the protocol (in case sd_poll is set)
+ */
+#define SO_PROTO_POLLEV (POLLIN|POLLRDNORM|POLLRDBAND)
+
+
+#endif /* _KERNEL || _KMEMUSER */
+
/* flags */
#define SOMOD 0x0001 /* update socket modification time */
#define SOACC 0x0002 /* update socket access time */
@@ -345,6 +255,8 @@ struct sonode {
#define SOCLONE 0x0080 /* child of clone driver */
#define SOASYNC_UNBIND 0x0100 /* wait for ACK of async unbind */
+#define SOCK_IS_NONSTR(so) ((so)->so_vnode->v_stream == NULL)
+
/*
* Socket state bits.
*/
@@ -360,31 +272,59 @@ struct sonode {
#define SS_ASYNC 0x00000100 /* async i/o notify */
#define SS_ACCEPTCONN 0x00000200 /* listen done */
-#define SS_HASCONNIND 0x00000400 /* T_CONN_IND for poll */
+/* unused 0x00000400 */ /* was SS_HASCONNIND */
#define SS_SAVEDEOR 0x00000800 /* Saved MSG_EOR rcv side state */
#define SS_RCVATMARK 0x00001000 /* at mark on input */
#define SS_OOBPEND 0x00002000 /* OOB pending or present - poll */
#define SS_HAVEOOBDATA 0x00004000 /* OOB data present */
#define SS_HADOOBDATA 0x00008000 /* OOB data consumed */
+#define SS_CLOSING 0x00010000 /* in process of closing */
-#define SS_FADDR_NOXLATE 0x00020000 /* No xlation of faddr for AF_UNIX */
-
-#define SS_HASDATA 0x00040000 /* NCAfs: data available */
-#define SS_DONEREAD 0x00080000 /* NCAfs: all data read */
-#define SS_MOREDATA 0x00100000 /* NCAfs: NCA has more data */
+/* unused 0x00020000 */ /* was SS_FADDR_NOXLATE */
+/* unused 0x00040000 */ /* was SS_HASDATA */
+/* unused 0x00080000 */ /* was SS_DONEREAD */
+/* unused 0x00100000 */ /* was SS_MOREDATA */
+/* unused 0x00200000 */ /* was SS_DIRECT */
-#define SS_DIRECT 0x00200000 /* transport is directly below */
#define SS_SODIRECT 0x00400000 /* transport supports sodirect */
-#define SS_LADDR_VALID 0x01000000 /* so_laddr valid for user */
-#define SS_FADDR_VALID 0x02000000 /* so_faddr valid for user */
+/* unused 0x01000000 */ /* was SS_LADDR_VALID */
+/* unused 0x02000000 */ /* was SS_FADDR_VALID */
+
+#define SS_SENTLASTREADSIG 0x10000000 /* last rx signal has been sent */
+#define SS_SENTLASTWRITESIG 0x20000000 /* last tx signal has been sent */
+
+#define SS_FALLBACK_PENDING 0x40000000
+#define SS_FALLBACK_COMP 0x80000000
+
/* Set of states when the socket can't be rebound */
#define SS_CANTREBIND (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING|\
SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ACCEPTCONN)
/*
+ * Sockets that can fall back to TPI must ensure that fall back is not
+ * initiated while a thread is using a socket.
+ */
+#define SO_BLOCK_FALLBACK(so, fn) { \
+ ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \
+ rw_enter(&(so)->so_fallback_rwlock, RW_READER); \
+ if ((so)->so_state & SS_FALLBACK_COMP) { \
+ rw_exit(&(so)->so_fallback_rwlock); \
+ return (fn); \
+ } \
+}
+
+#define SO_UNBLOCK_FALLBACK(so) { \
+ rw_exit(&(so)->so_fallback_rwlock); \
+}
+
+/* Poll events */
+#define SO_POLLEV_IN 0x1 /* POLLIN wakeup needed */
+#define SO_POLLEV_ALWAYS 0x2 /* wakeups */
+
+/*
* Characteristics of sockets. Not changed after the socket is created.
*/
#define SM_PRIV 0x001 /* privileged for broadcast, raw... */
@@ -399,6 +339,10 @@ struct sonode {
#define SM_ACCEPTOR_ID 0x100 /* so_acceptor_id is valid */
+#define SM_KERNEL 0x200 /* kernel socket */
+
+#define SM_ACCEPTSUPP 0x400 /* can handle accept() */
+
/*
* Socket versions. Used by the socket library when calling _so_socket().
*/
@@ -409,21 +353,177 @@ struct sonode {
#define SOV_XPG4_2 4 /* Xnet socket */
#if defined(_KERNEL) || defined(_KMEMUSER)
+
+/*
+ * sonode create and destroy functions.
+ */
+typedef struct sonode *(*so_create_func_t)(struct sockparams *,
+ int, int, int, int, int, int *, cred_t *);
+typedef void (*so_destroy_func_t)(struct sonode *);
+
+/* STREAM device information */
+typedef struct sdev_info {
+ char *sd_devpath;
+ int sd_devpathlen; /* Is 0 if sp_devpath is a static string */
+ vnode_t *sd_vnode;
+} sdev_info_t;
+
+#define SOCKMOD_VERSION 1
+/* name of the TPI pseudo socket module */
+#define SOTPI_SMOD_NAME "socktpi"
+
+typedef struct __smod_priv_s {
+ so_create_func_t smodp_sock_create_func;
+ so_destroy_func_t smodp_sock_destroy_func;
+ so_proto_fallback_func_t smodp_proto_fallback_func;
+} __smod_priv_t;
+
/*
- * Used for mapping family/type/protocol to vnode.
- * Defined here so that crash can use it.
+ * Socket module register information
+ */
+typedef struct smod_reg_s {
+ int smod_version;
+ char *smod_name;
+ size_t smod_uc_version;
+ size_t smod_dc_version;
+ so_proto_create_func_t smod_proto_create_func;
+
+ /* __smod_priv_data must be NULL */
+ __smod_priv_t *__smod_priv;
+} smod_reg_t;
+
+/*
+ * Socket module information
+ */
+typedef struct smod_info {
+ int smod_version;
+ char *smod_name;
+ uint_t smod_refcnt; /* # of entries */
+ size_t smod_uc_version; /* upcall version */
+ size_t smod_dc_version; /* down call version */
+ so_proto_create_func_t smod_proto_create_func;
+ so_proto_fallback_func_t smod_proto_fallback_func;
+ so_create_func_t smod_sock_create_func;
+ so_destroy_func_t smod_sock_destroy_func;
+ list_node_t smod_node;
+} smod_info_t;
+
+/*
+ * sockparams
+ *
+ * Used for mapping family/type/protocol to module
*/
struct sockparams {
- int sp_domain;
- int sp_type;
- int sp_protocol;
- char *sp_devpath;
- int sp_devpathlen; /* Is 0 if sp_devpath is a static string */
- vnode_t *sp_vnode;
- struct sockparams *sp_next;
+ /*
+ * The family, type, protocol, sdev_info and smod_info are
+ * set when the entry is created, and they will never change
+ * thereafter.
+ */
+ int sp_family;
+ int sp_type;
+ int sp_protocol;
+
+ sdev_info_t sp_sdev_info; /* STREAM device */
+ char *sp_smod_name; /* socket module name */
+ smod_info_t *sp_smod_info; /* socket module */
+
+ kmutex_t sp_lock; /* lock for refcnt */
+ uint64_t sp_refcnt; /* entry reference count */
+
+ /*
+ * The entries below are only modified while holding
+ * splist_lock as a writer.
+ */
+ int sp_flags; /* see below */
+ list_node_t sp_node;
};
-extern struct sockparams *sphead;
+
+/*
+ * sockparams flags
+ */
+#define SOCKPARAMS_EPHEMERAL 0x1 /* temp. entry, not on global list */
+
+extern void sockparams_init(void);
+extern struct sockparams *sockparams_hold_ephemeral_bydev(int, int, int,
+ const char *, int, int *);
+extern struct sockparams *sockparams_hold_ephemeral_bymod(int, int, int,
+ const char *, int, int *);
+extern void sockparams_ephemeral_drop_last_ref(struct sockparams *);
+
+extern void smod_init(void);
+extern void smod_add(smod_info_t *);
+extern int smod_register(const smod_reg_t *);
+extern int smod_unregister(const char *);
+extern smod_info_t *smod_lookup_byname(const char *);
+
+#define SOCKPARAMS_HAS_DEVICE(sp) \
+ ((sp)->sp_sdev_info.sd_devpath != NULL)
+
+/* Increase the smod_info_t reference count */
+#define SMOD_INC_REF(smodp) { \
+ ASSERT((smodp) != NULL); \
+ DTRACE_PROBE1(smodinfo__inc__ref, struct smod_info *, (smodp)); \
+ atomic_inc_uint(&(smodp)->smod_refcnt); \
+}
+
+/*
+ * Decreace the socket module entry reference count.
+ * When no one mapping to the entry, we try to unload the module from the
+ * kernel. If the module can't unload, just leave the module entry with
+ * a zero refcnt.
+ */
+#define SMOD_DEC_REF(sp, smodp) { \
+ ASSERT((smodp) != NULL); \
+ ASSERT((smodp)->smod_refcnt != 0); \
+ atomic_dec_uint(&(smodp)->smod_refcnt); \
+ /* \
+ * No need to atomically check the return value because the \
+ * socket module framework will verify that no one is using \
+ * the module before unloading. Worst thing that can happen \
+ * here is multiple calls to mod_remove_by_name(), which is OK. \
+ */ \
+ if ((smodp)->smod_refcnt == 0) \
+ (void) mod_remove_by_name((sp)->sp_smod_name); \
+}
+
+/* Increase the reference count */
+#define SOCKPARAMS_INC_REF(sp) { \
+ ASSERT((sp) != NULL); \
+ DTRACE_PROBE1(sockparams__inc__ref, struct sockparams *, (sp)); \
+ mutex_enter(&(sp)->sp_lock); \
+ (sp)->sp_refcnt++; \
+ ASSERT((sp)->sp_refcnt != 0); \
+ mutex_exit(&(sp)->sp_lock); \
+}
+
+/*
+ * Decrease the reference count.
+ *
+ * If the sockparams is ephemeral, then the thread dropping the last ref
+ * count will destroy the entry.
+ */
+#define SOCKPARAMS_DEC_REF(sp) { \
+ ASSERT((sp) != NULL); \
+ DTRACE_PROBE1(sockparams__dec__ref, struct sockparams *, (sp)); \
+ mutex_enter(&(sp)->sp_lock); \
+ ASSERT((sp)->sp_refcnt > 0); \
+ if ((sp)->sp_refcnt == 1) { \
+ if ((sp)->sp_flags & SOCKPARAMS_EPHEMERAL) { \
+ mutex_exit(&(sp)->sp_lock); \
+ sockparams_ephemeral_drop_last_ref((sp)); \
+ } else { \
+ (sp)->sp_refcnt--; \
+ if ((sp)->sp_smod_info != NULL) \
+ SMOD_DEC_REF(sp, (sp)->sp_smod_info); \
+ (sp)->sp_smod_info = NULL; \
+ mutex_exit(&(sp)->sp_lock); \
+ } \
+ } else { \
+ (sp)->sp_refcnt--; \
+ mutex_exit(&(sp)->sp_lock); \
+ } \
+}
/*
* Used to traverse the list of AF_UNIX sockets to construct the kstat
@@ -490,49 +590,71 @@ struct sendfile_queue {
/* Socket network operations switch */
struct sonodeops {
- int (*sop_accept)(struct sonode *, int, struct sonode **);
- int (*sop_bind)(struct sonode *, struct sockaddr *, socklen_t,
+ int (*sop_init)(struct sonode *, struct sonode *, cred_t *,
int);
- int (*sop_listen)(struct sonode *, int);
+ int (*sop_accept)(struct sonode *, int, cred_t *, struct sonode **);
+ int (*sop_bind)(struct sonode *, struct sockaddr *, socklen_t,
+ int, cred_t *);
+ int (*sop_listen)(struct sonode *, int, cred_t *);
int (*sop_connect)(struct sonode *, const struct sockaddr *,
- socklen_t, int, int);
+ socklen_t, int, int, cred_t *);
int (*sop_recvmsg)(struct sonode *, struct msghdr *,
- struct uio *);
+ struct uio *, cred_t *);
int (*sop_sendmsg)(struct sonode *, struct msghdr *,
- struct uio *);
- int (*sop_getpeername)(struct sonode *);
- int (*sop_getsockname)(struct sonode *);
- int (*sop_shutdown)(struct sonode *, int);
+ struct uio *, cred_t *);
+ int (*sop_sendmblk)(struct sonode *, struct msghdr *, int,
+ cred_t *, mblk_t **);
+ int (*sop_getpeername)(struct sonode *, struct sockaddr *,
+ socklen_t *, boolean_t, cred_t *);
+ int (*sop_getsockname)(struct sonode *, struct sockaddr *,
+ socklen_t *, cred_t *);
+ int (*sop_shutdown)(struct sonode *, int, cred_t *);
int (*sop_getsockopt)(struct sonode *, int, int, void *,
- socklen_t *, int);
+ socklen_t *, int, cred_t *);
int (*sop_setsockopt)(struct sonode *, int, int, const void *,
- socklen_t);
+ socklen_t, cred_t *);
+ int (*sop_ioctl)(struct sonode *, int, intptr_t, int,
+ cred_t *, int32_t *);
+ int (*sop_poll)(struct sonode *, short, int, short *,
+ struct pollhead **);
+ int (*sop_close)(struct sonode *, int, cred_t *);
};
-#define SOP_ACCEPT(so, fflag, nsop) \
- ((so)->so_ops->sop_accept((so), (fflag), (nsop)))
-#define SOP_BIND(so, name, namelen, flags) \
- ((so)->so_ops->sop_bind((so), (name), (namelen), (flags)))
-#define SOP_LISTEN(so, backlog) \
- ((so)->so_ops->sop_listen((so), (backlog)))
-#define SOP_CONNECT(so, name, namelen, fflag, flags) \
- ((so)->so_ops->sop_connect((so), (name), (namelen), (fflag), (flags)))
-#define SOP_RECVMSG(so, msg, uiop) \
- ((so)->so_ops->sop_recvmsg((so), (msg), (uiop)))
-#define SOP_SENDMSG(so, msg, uiop) \
- ((so)->so_ops->sop_sendmsg((so), (msg), (uiop)))
-#define SOP_GETPEERNAME(so) \
- ((so)->so_ops->sop_getpeername((so)))
-#define SOP_GETSOCKNAME(so) \
- ((so)->so_ops->sop_getsockname((so)))
-#define SOP_SHUTDOWN(so, how) \
- ((so)->so_ops->sop_shutdown((so), (how)))
-#define SOP_GETSOCKOPT(so, level, optionname, optval, optlenp, flags) \
+#define SOP_INIT(so, flag, cr, flags) \
+ ((so)->so_ops->sop_init((so), (flag), (cr), (flags)))
+#define SOP_ACCEPT(so, fflag, cr, nsop) \
+ ((so)->so_ops->sop_accept((so), (fflag), (cr), (nsop)))
+#define SOP_BIND(so, name, namelen, flags, cr) \
+ ((so)->so_ops->sop_bind((so), (name), (namelen), (flags), (cr)))
+#define SOP_LISTEN(so, backlog, cr) \
+ ((so)->so_ops->sop_listen((so), (backlog), (cr)))
+#define SOP_CONNECT(so, name, namelen, fflag, flags, cr) \
+ ((so)->so_ops->sop_connect((so), (name), (namelen), (fflag), (flags), \
+ (cr)))
+#define SOP_RECVMSG(so, msg, uiop, cr) \
+ ((so)->so_ops->sop_recvmsg((so), (msg), (uiop), (cr)))
+#define SOP_SENDMSG(so, msg, uiop, cr) \
+ ((so)->so_ops->sop_sendmsg((so), (msg), (uiop), (cr)))
+#define SOP_SENDMBLK(so, msg, size, cr, mpp) \
+ ((so)->so_ops->sop_sendmblk((so), (msg), (size), (cr), (mpp)))
+#define SOP_GETPEERNAME(so, addr, addrlen, accept, cr) \
+ ((so)->so_ops->sop_getpeername((so), (addr), (addrlen), (accept), (cr)))
+#define SOP_GETSOCKNAME(so, addr, addrlen, cr) \
+ ((so)->so_ops->sop_getsockname((so), (addr), (addrlen), (cr)))
+#define SOP_SHUTDOWN(so, how, cr) \
+ ((so)->so_ops->sop_shutdown((so), (how), (cr)))
+#define SOP_GETSOCKOPT(so, level, optionname, optval, optlenp, flags, cr) \
((so)->so_ops->sop_getsockopt((so), (level), (optionname), \
- (optval), (optlenp), (flags)))
-#define SOP_SETSOCKOPT(so, level, optionname, optval, optlen) \
+ (optval), (optlenp), (flags), (cr)))
+#define SOP_SETSOCKOPT(so, level, optionname, optval, optlen, cr) \
((so)->so_ops->sop_setsockopt((so), (level), (optionname), \
- (optval), (optlen)))
+ (optval), (optlen), (cr)))
+#define SOP_IOCTL(so, cmd, arg, mode, cr, rvalp) \
+ ((so)->so_ops->sop_ioctl((so), (cmd), (arg), (mode), (cr), (rvalp)))
+#define SOP_POLL(so, events, anyyet, reventsp, phpp) \
+ ((so)->so_ops->sop_poll((so), (events), (anyyet), (reventsp), (phpp)))
+#define SOP_CLOSE(so, flag, cr) \
+ ((so)->so_ops->sop_close((so), (flag), (cr)))
#endif /* defined(_KERNEL) || defined(_KMEMUSER) */
@@ -544,6 +666,8 @@ struct sonodeops {
#define ROUNDUP_cmsglen(len) \
(((len) + _CMSG_HDR_ALIGNMENT - 1) & ~(_CMSG_HDR_ALIGNMENT - 1))
+#define IS_NON_STREAM_SOCK(vp) \
+ ((vp)->v_type == VSOCK && (vp)->v_stream == NULL)
/*
* Macros that operate on struct cmsghdr.
* Used in parsing msg_control.
@@ -686,10 +810,8 @@ extern int sockprinterr;
#endif /* defined(DEBUG) */
extern struct vfsops sock_vfsops;
-extern struct vnodeops *socktpi_vnodeops;
-extern const struct fs_operation_def socktpi_vnodeops_template[];
-
-extern sonodeops_t sotpi_sonodeops;
+extern struct vnodeops *socket_vnodeops;
+extern const struct fs_operation_def socket_vnodeops_template[];
extern dev_t sockdev;
@@ -700,20 +822,10 @@ extern int sock_getmsg(vnode_t *, struct strbuf *, struct strbuf *,
uchar_t *, int *, int, rval_t *);
extern int sock_putmsg(vnode_t *, struct strbuf *, struct strbuf *,
uchar_t, int, int);
-struct sonode *sotpi_create(vnode_t *, int, int, int, int, struct sonode *,
- int *);
-extern int socktpi_open(struct vnode **, int, struct cred *,
- caller_context_t *);
-extern int so_sock2stream(struct sonode *);
-extern void so_stream2sock(struct sonode *);
+extern int sogetvp(char *, vnode_t **, int);
extern int sockinit(int, char *);
-extern struct vnode
- *makesockvp(struct vnode *, int, int, int);
-extern void sockfree(struct sonode *);
-extern void so_update_attrs(struct sonode *, int);
-extern int soconfig(int, int, int, char *, int);
-extern struct vnode
- *solookup(int, int, int, char *, int *);
+extern int soconfig(int, int, int, char *, int, char *);
+extern int solookup(int, int, int, struct sockparams **);
extern void so_lock_single(struct sonode *);
extern void so_unlock_single(struct sonode *, int);
extern int so_lock_read(struct sonode *, int);
@@ -723,10 +835,6 @@ extern void *sogetoff(mblk_t *, t_uscalar_t, t_uscalar_t, uint_t);
extern void so_getopt_srcaddr(void *, t_uscalar_t,
void **, t_uscalar_t *);
extern int so_getopt_unix_close(void *, t_uscalar_t);
-extern int so_addr_verify(struct sonode *, const struct sockaddr *,
- socklen_t);
-extern int so_ux_addr_xlate(struct sonode *, struct sockaddr *,
- socklen_t, int, void **, socklen_t *);
extern void fdbuf_free(struct fdbuf *);
extern mblk_t *fdbuf_allocmsg(int, struct fdbuf *);
extern int fdbuf_create(void *, int, struct fdbuf **);
@@ -744,55 +852,13 @@ extern void soisdisconnected(struct sonode *, int);
extern void socantsendmore(struct sonode *);
extern void socantrcvmore(struct sonode *);
extern void soseterror(struct sonode *, int);
-extern int sogeterr(struct sonode *);
-extern int sogetrderr(vnode_t *, int, int *);
-extern int sogetwrerr(vnode_t *, int, int *);
-extern void so_unix_close(struct sonode *);
-extern mblk_t *soallocproto(size_t, int);
-extern mblk_t *soallocproto1(const void *, ssize_t, ssize_t, int);
-extern void soappendmsg(mblk_t *, const void *, ssize_t);
-extern mblk_t *soallocproto2(const void *, ssize_t, const void *, ssize_t,
- ssize_t, int);
-extern mblk_t *soallocproto3(const void *, ssize_t, const void *, ssize_t,
- const void *, ssize_t, ssize_t, int);
-extern int sowaitprim(struct sonode *, t_scalar_t, t_scalar_t,
- t_uscalar_t, mblk_t **, clock_t);
-extern int sowaitokack(struct sonode *, t_scalar_t);
-extern int sowaitack(struct sonode *, mblk_t **, clock_t);
-extern void soqueueack(struct sonode *, mblk_t *);
-extern int sowaitconnind(struct sonode *, int, mblk_t **);
-extern void soqueueconnind(struct sonode *, mblk_t *);
-extern int soflushconnind(struct sonode *, t_scalar_t);
-extern void so_drain_discon_ind(struct sonode *);
-extern void so_flush_discon_ind(struct sonode *);
+extern int sogeterr(struct sonode *, boolean_t);
extern int sowaitconnected(struct sonode *, int, int);
-extern int sostream_direct(struct sonode *, struct uio *,
- mblk_t *, cred_t *);
-extern int sosend_dgram(struct sonode *, struct sockaddr *,
- socklen_t, struct uio *, int);
-extern int sosend_svc(struct sonode *, struct uio *, t_scalar_t, int, int);
-extern void so_installhooks(struct sonode *);
-extern int so_strinit(struct sonode *, struct sonode *);
-extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *,
- struct uio *);
-extern int sotpi_getpeername(struct sonode *);
-extern int sotpi_getsockopt(struct sonode *, int, int, void *,
- socklen_t *, int);
-extern int sotpi_setsockopt(struct sonode *, int, int, const void *,
- socklen_t);
-extern int socktpi_ioctl(struct vnode *, int, intptr_t, int,
- struct cred *, int *, caller_context_t *);
-extern int sodisconnect(struct sonode *, t_scalar_t, int);
extern ssize_t soreadfile(file_t *, uchar_t *, u_offset_t, int *, size_t);
-extern int so_set_asyncsigs(vnode_t *, pid_t, int, int, cred_t *);
-extern int so_set_events(struct sonode *, vnode_t *, cred_t *);
-extern int so_flip_async(struct sonode *, vnode_t *, int, cred_t *);
-extern int so_set_siggrp(struct sonode *, vnode_t *, pid_t, int, cred_t *);
extern void *sock_kstat_init(zoneid_t);
extern void sock_kstat_fini(zoneid_t, void *);
extern struct sonode *getsonode(int, int *, file_t **);
-
/*
* Function wrappers (mostly around the sonode switch) for
* backward compatibility.
@@ -805,44 +871,18 @@ extern int soconnect(struct sonode *, const struct sockaddr *, socklen_t,
int, int);
extern int sorecvmsg(struct sonode *, struct nmsghdr *, struct uio *);
extern int sosendmsg(struct sonode *, struct nmsghdr *, struct uio *);
-extern int sogetpeername(struct sonode *);
-extern int sogetsockname(struct sonode *);
extern int soshutdown(struct sonode *, int);
extern int sogetsockopt(struct sonode *, int, int, void *, socklen_t *,
int);
extern int sosetsockopt(struct sonode *, int, int, const void *,
t_uscalar_t);
-extern struct sonode *socreate(vnode_t *, int, int, int, int,
- struct sonode *, int *);
+extern struct sonode *socreate(struct sockparams *, int, int, int, int,
+ int *);
extern int so_copyin(const void *, void *, size_t, int);
extern int so_copyout(const void *, void *, size_t, int);
-extern int socktpi_access(struct vnode *, int, int, struct cred *,
- caller_context_t *);
-extern int socktpi_fid(struct vnode *, struct fid *, caller_context_t *);
-extern int socktpi_fsync(struct vnode *, int, struct cred *,
- caller_context_t *);
-extern int socktpi_getattr(struct vnode *, struct vattr *, int,
- struct cred *, caller_context_t *);
-extern int socktpi_seek(struct vnode *, offset_t, offset_t *,
- caller_context_t *);
-extern int socktpi_setattr(struct vnode *, struct vattr *, int,
- struct cred *, caller_context_t *);
-extern int socktpi_setfl(vnode_t *, int, int, cred_t *,
- caller_context_t *);
-
-/* SCTP sockfs */
-extern struct sonode *sosctp_create(vnode_t *, int, int, int, int,
- struct sonode *, int *);
-extern int sosctp_init(void);
-
-/* SDP sockfs */
-extern struct sonode *sosdp_create(vnode_t *, int, int, int, int,
- struct sonode *, int *);
-extern int sosdp_init(void);
-
#endif
/*
@@ -865,9 +905,11 @@ struct sockinfo {
uint16_t si_faddr_family;
char si_laddr_sun_path[MAXPATHLEN + 1]; /* NULL terminated */
char si_faddr_sun_path[MAXPATHLEN + 1];
+ boolean_t si_faddr_noxlate;
zoneid_t si_szoneid;
};
+#define SOCKMOD_PATH "socketmod" /* dir where sockmods are stored */
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/sockio.h b/usr/src/uts/common/sys/sockio.h
index 012e7f3061..9e107ff3ef 100644
--- a/usr/src/uts/common/sys/sockio.h
+++ b/usr/src/uts/common/sys/sockio.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -39,8 +39,6 @@
#ifndef _SYS_SOCKIO_H
#define _SYS_SOCKIO_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* General socket ioctl definitions.
*/
@@ -316,7 +314,9 @@ extern "C" {
#define SIOCSIPMPFAILBACK _IOW('i', 182, int) /* enable/disable */
/* FAILBACK */
-#define SIOCSENABLESDP _IOWR('i', 183, int) /* Enable SDP */
+#define SIOCSENABLESDP _IOWR('i', 183, int) /* Enable SDP */
+
+#define SIOCSQPTR _IOWR('i', 184, int) /* set q_ptr of stream */
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/sodirect.h b/usr/src/uts/common/sys/sodirect.h
index c8acfcea44..f87d010f56 100644
--- a/usr/src/uts/common/sys/sodirect.h
+++ b/usr/src/uts/common/sys/sodirect.h
@@ -52,12 +52,15 @@
extern "C" {
#endif
+typedef int (*sod_enq_func)();
+typedef void (*sod_wakeup_func)();
+
typedef struct sodirect_s {
uint32_t sod_state; /* State bits */
uint32_t sod_want; /* Pending read byte count or 0 */
queue_t *sod_q; /* Socket Q */
- int (*sod_enqueue)(); /* Call to enqueue an mblk_t */
- void (*sod_wakeup)(); /* Call to awkake a read()er, if any */
+ sod_enq_func sod_enqueue; /* Call to enqueue an mblk_t */
+ sod_wakeup_func sod_wakeup; /* Call to awkake a read()er, if any */
mblk_t *sod_uioafh; /* To be freed list head, or NULL */
mblk_t *sod_uioaft; /* To be freed list tail */
kmutex_t *sod_lockp; /* Pointer to the lock needed */
@@ -107,10 +110,36 @@ typedef struct sodirect_s {
#define SOD_QFULL(p) ((p)->sod_q->q_flag & QFULL)
#define SOD_QCNT(p) ((p)->sod_q->q_count)
-#define SOD_DISABLE(p) (p)->sod_state &= ~SOD_ENABLED
+#define SOD_DISABLE(p) { \
+ if ((p) != NULL) \
+ (p)->sod_state &= ~SOD_ENABLED; \
+}
#define SOD_QTOSODP(q) (q)->q_stream->sd_sodirect
+#define SOD_SOTOSODP(so) ((sonode_t *)so)->so_direct
+
+#define SOD_UIOAFINI(sodp) { \
+ if ((sodp) && (sodp)->sod_uioa.uioa_state & UIOA_ENABLED) { \
+ (sodp)->sod_uioa.uioa_state &= UIOA_CLR; \
+ (sodp)->sod_uioa.uioa_state |= UIOA_FINI; \
+ } \
+}
+
+struct sonode;
+struct sodirect_s;
+
+extern uio_t *sod_rcv_init(struct sonode *, int, struct uio **);
+extern int sod_rcv_done(struct sonode *, struct uio *, struct uio *);
+
+extern mblk_t *sod_uioa_mblk_init(struct sodirect_s *, mblk_t *, size_t);
+extern void sod_uioa_so_init(struct sonode *, struct sodirect_s *,
+ struct uio *);
+extern ssize_t sod_uioa_mblk(struct sonode *, mblk_t *);
+extern void sod_uioa_mblk_done(struct sodirect_s *, mblk_t *);
+extern void sod_init();
+extern void sod_sock_init(struct sonode *, struct stdata *, sod_enq_func,
+ sod_wakeup_func, kmutex_t *);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/squeue.h b/usr/src/uts/common/sys/squeue.h
index ec09b3a88b..e14ded203a 100644
--- a/usr/src/uts/common/sys/squeue.h
+++ b/usr/src/uts/common/sys/squeue.h
@@ -85,6 +85,9 @@ extern void squeue_enter(squeue_t *, mblk_t *, mblk_t *,
uint32_t, int, uint8_t);
extern uintptr_t *squeue_getprivate(squeue_t *, sqprivate_t);
+extern int squeue_synch_enter(squeue_t *, void *, uint8_t);
+extern void squeue_synch_exit(squeue_t *, void *);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/squeue_impl.h b/usr/src/uts/common/sys/squeue_impl.h
index 501377e53f..bd934cc0b3 100644
--- a/usr/src/uts/common/sys/squeue_impl.h
+++ b/usr/src/uts/common/sys/squeue_impl.h
@@ -102,6 +102,7 @@ struct squeue_s {
clock_t sq_curr_time; /* Current tick (lbolt) */
kcondvar_t sq_worker_cv; /* cond var. worker thread blocks on */
kcondvar_t sq_poll_cv; /* cond variable poll_thr waits on */
+ kcondvar_t sq_synch_cv; /* cond var. synch thread waits on */
kcondvar_t sq_ctrlop_done_cv; /* cond variable for ctrl ops */
clock_t sq_wait; /* lbolts to wait after a fill() */
timeout_id_t sq_tid; /* timer id of pending timeout() */
@@ -163,6 +164,7 @@ struct squeue_s {
#define SQS_POLL_RESTART_DONE 0x01000000
#define SQS_POLL_THR_QUIESCE 0x02000000
+#define SQS_PAUSE 0x04000000 /* The squeue has been paused */
#define SQS_WORKER_THR_CONTROL \
(SQS_POLL_QUIESCE | SQS_POLL_RESTART | SQS_POLL_CLEANUP)
diff --git a/usr/src/uts/common/sys/stream.h b/usr/src/uts/common/sys/stream.h
index 41097cab7f..8d1ac458df 100644
--- a/usr/src/uts/common/sys/stream.h
+++ b/usr/src/uts/common/sys/stream.h
@@ -425,6 +425,7 @@ typedef struct bcache {
#define MSGMARKNEXT 0x10 /* Private: first byte of next msg marked */
#define MSGNOTMARKNEXT 0x20 /* Private: ... not marked */
#define MSGHASREF 0x40 /* Private: message has reference to owner */
+#define MSGWAITSYNC 0x80 /* Private: waiting for sync squeue enter */
/*
* Streams message types.
diff --git a/usr/src/uts/common/sys/strsubr.h b/usr/src/uts/common/sys/strsubr.h
index 04c778feaa..33ec38cac5 100644
--- a/usr/src/uts/common/sys/strsubr.h
+++ b/usr/src/uts/common/sys/strsubr.h
@@ -1126,7 +1126,6 @@ extern void strclean(struct vnode *);
extern void str_cn_clean(); /* XXX hook for consoles signal cleanup */
extern int strwrite(struct vnode *, struct uio *, cred_t *);
extern int strwrite_common(struct vnode *, struct uio *, cred_t *, int);
-extern int kstrwritemp(struct vnode *, mblk_t *, ushort_t);
extern int strread(struct vnode *, struct uio *, cred_t *);
extern int strioctl(struct vnode *, int, intptr_t, int, int, cred_t *, int *);
extern int strrput(queue_t *, mblk_t *);
@@ -1151,6 +1150,7 @@ extern int strcopyout(void *, void *, size_t, int);
extern void strsignal(struct stdata *, int, int32_t);
extern clock_t str_cv_wait(kcondvar_t *, kmutex_t *, clock_t, int);
extern void disable_svc(queue_t *);
+extern void enable_svc(queue_t *);
extern void remove_runlist(queue_t *);
extern void wait_svc(queue_t *);
extern void backenable(queue_t *, uchar_t);
@@ -1212,6 +1212,7 @@ extern mblk_t *allocb_cred_wait(size_t, uint_t, int *, cred_t *);
extern mblk_t *allocb_tmpl(size_t, const mblk_t *);
extern mblk_t *allocb_tryhard(size_t);
extern void mblk_setcred(mblk_t *, cred_t *);
+extern void msg_setcredpid(mblk_t *, cred_t *, pid_t);
extern void strpollwakeup(vnode_t *, short);
extern int putnextctl_wait(queue_t *, int);
diff --git a/usr/src/uts/common/syscall/sendfile.c b/usr/src/uts/common/syscall/sendfile.c
index 16ada25629..13b480a304 100644
--- a/usr/src/uts/common/syscall/sendfile.c
+++ b/usr/src/uts/common/syscall/sendfile.c
@@ -53,6 +53,8 @@
#include <sys/socket.h>
#include <sys/socketvar.h>
+#include <fs/sockfs/sockcommon.h>
+#include <fs/sockfs/socktpi.h>
#include <netinet/in.h>
#include <sys/sendfile.h>
@@ -71,103 +73,11 @@ extern int nl7c_sendfilev(struct sonode *, u_offset_t *, struct sendfilevec *,
int, ssize_t *);
extern int snf_segmap(file_t *, vnode_t *, u_offset_t, u_offset_t, ssize_t *,
boolean_t);
+extern sotpi_info_t *sotpi_sototpi(struct sonode *);
#define readflg (V_WRITELOCK_FALSE)
#define rwflag (V_WRITELOCK_TRUE)
-/*
- * kstrwritemp() has very similar semantics as that of strwrite().
- * The main difference is it obtains mblks from the caller and also
- * does not do any copy as done in strwrite() from user buffers to
- * kernel buffers.
- *
- * Currently, this routine is used by sendfile to send data allocated
- * within the kernel without any copying. This interface does not use the
- * synchronous stream interface as synch. stream interface implies
- * copying.
- */
-int
-kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode)
-{
- struct stdata *stp;
- struct queue *wqp;
- mblk_t *newmp;
- char waitflag;
- int tempmode;
- int error = 0;
- int done = 0;
- struct sonode *so;
- boolean_t direct;
-
- ASSERT(vp->v_stream);
- stp = vp->v_stream;
-
- so = VTOSO(vp);
- direct = (so->so_state & SS_DIRECT);
-
- /*
- * This is the sockfs direct fast path. canputnext() need
- * not be accurate so we don't grab the sd_lock here. If
- * we get flow-controlled, we grab sd_lock just before the
- * do..while loop below to emulate what strwrite() does.
- */
- wqp = stp->sd_wrq;
- if (canputnext(wqp) && direct &&
- !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
- return (sostream_direct(so, NULL, mp, CRED()));
- } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
- /* Fast check of flags before acquiring the lock */
- mutex_enter(&stp->sd_lock);
- error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
- mutex_exit(&stp->sd_lock);
- if (error != 0) {
- if (!(stp->sd_flag & STPLEX) &&
- (stp->sd_wput_opt & SW_SIGPIPE)) {
- tsignal(curthread, SIGPIPE);
- error = EPIPE;
- }
- return (error);
- }
- }
-
- waitflag = WRITEWAIT;
- if (stp->sd_flag & OLDNDELAY)
- tempmode = fmode & ~FNDELAY;
- else
- tempmode = fmode;
-
- mutex_enter(&stp->sd_lock);
- do {
- if (canputnext(wqp)) {
- mutex_exit(&stp->sd_lock);
- if (stp->sd_wputdatafunc != NULL) {
- newmp = (stp->sd_wputdatafunc)(vp, mp, NULL,
- NULL, NULL, NULL);
- if (newmp == NULL) {
- /* The caller will free mp */
- return (ECOMM);
- }
- mp = newmp;
- }
- putnext(wqp, mp);
- return (0);
- }
- error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1,
- &done);
- } while (error == 0 && !done);
-
- mutex_exit(&stp->sd_lock);
- /*
- * EAGAIN tells the application to try again. ENOMEM
- * is returned only if the memory allocation size
- * exceeds the physical limits of the system. ENOMEM
- * can't be true here.
- */
- if (error == ENOMEM)
- error = EAGAIN;
- return (error);
-}
-
#define SEND_MAX_CHUNK 16
#if defined(_SYSCALL32_IMPL) || defined(_ILP32)
@@ -510,6 +420,7 @@ sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
size_t size = total_size;
size_t extra;
int tail_len;
+ struct nmsghdr msg;
fflag = fp->f_flag;
vp = fp->f_vnode;
@@ -521,8 +432,17 @@ sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
if (total_size == 0)
return (0);
- wroff = (int)vp->v_stream->sd_wroff;
- tail_len = (int)vp->v_stream->sd_tail;
+ if (vp->v_stream != NULL) {
+ wroff = (int)vp->v_stream->sd_wroff;
+ tail_len = (int)vp->v_stream->sd_tail;
+ } else {
+ struct sonode *so;
+
+ so = VTOSO(vp);
+ wroff = so->so_proto_props.sopp_wroff;
+ tail_len = so->so_proto_props.sopp_tail;
+ }
+
extra = wroff + tail_len;
buf_left = MIN(total_size, maxblk);
@@ -530,6 +450,7 @@ sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
if (head == NULL)
return (ENOMEM);
head->b_wptr = head->b_rptr = head->b_rptr + wroff;
+ bzero(&msg, sizeof (msg));
auio.uio_extflg = UIO_COPY_DEFAULT;
for (i = 0; i < copy_cnt; i++) {
@@ -738,9 +659,10 @@ sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
}
ASSERT(total_size == 0);
- error = kstrwritemp(vp, head, fflag);
+ error = socket_sendmblk(VTOSO(vp), &msg, fflag, CRED(), &head);
if (error != 0) {
- freemsg(head);
+ if (head != NULL)
+ freemsg(head);
return (error);
}
ttolwp(curthread)->lwp_ru.ioch += (ulong_t)size;
@@ -776,19 +698,28 @@ sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
int maxblk, wroff, tail_len;
struct sonode *so;
stdata_t *stp;
+ struct nmsghdr msg;
fflag = fp->f_flag;
vp = fp->f_vnode;
if (vp->v_type == VSOCK) {
so = VTOSO(vp);
- stp = vp->v_stream;
- wroff = (int)stp->sd_wroff;
- tail_len = (int)stp->sd_tail;
- maxblk = (int)stp->sd_maxblk;
+ if (vp->v_stream != NULL) {
+ stp = vp->v_stream;
+ wroff = (int)stp->sd_wroff;
+ tail_len = (int)stp->sd_tail;
+ maxblk = (int)stp->sd_maxblk;
+ } else {
+ stp = NULL;
+ wroff = so->so_proto_props.sopp_wroff;
+ tail_len = so->so_proto_props.sopp_tail;
+ maxblk = so->so_proto_props.sopp_maxblk;
+ }
extra = wroff + tail_len;
}
+ bzero(&msg, sizeof (msg));
auio.uio_extflg = UIO_COPY_DEFAULT;
for (i = 0; i < copy_cnt; i++) {
if (ISSIG(curthread, JUSTLOOKING))
@@ -841,7 +772,8 @@ sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
size_t iov_len;
iov_len = sfv_len;
- if (so->so_kssl_ctx != NULL)
+ if (!SOCK_IS_NONSTR(so) &&
+ SOTOTPI(so)->sti_kssl_ctx != NULL)
iov_len = MIN(iov_len, maxblk);
aiov.iov_len = iov_len;
@@ -868,9 +800,12 @@ sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
return (error);
}
dmp->b_wptr += iov_len;
- error = kstrwritemp(vp, dmp, fflag);
+ error = socket_sendmblk(VTOSO(vp),
+ &msg, fflag, CRED(), &dmp);
+
if (error != 0) {
- freeb(dmp);
+ if (dmp != NULL)
+ freeb(dmp);
return (error);
}
ttolwp(curthread)->lwp_ru.ioch +=
@@ -880,6 +815,9 @@ sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
sfv_off += iov_len;
}
} else {
+ ttolwp(curthread)->lwp_ru.ioch +=
+ (ulong_t)sfv_len;
+ *count += sfv_len;
aiov.iov_len = sfv_len;
aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
@@ -971,25 +909,30 @@ sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
return (ENOMEM);
}
} else {
+ uint_t copyflag;
+
+ copyflag = stp != NULL ? stp->sd_copyflag :
+ so->so_proto_props.sopp_zcopyflag;
/*
* For sockets acting as an SSL proxy, we
* need to adjust the size to the maximum
* SSL record size set in the stream head.
*/
- if (so->so_kssl_ctx != NULL)
+ if (!SOCK_IS_NONSTR(so) &&
+ _SOTOTPI(so)->sti_kssl_ctx != NULL)
size = MIN(size, maxblk);
if (vn_has_flocks(readvp) ||
readvp->v_flag & VNOMAP ||
- stp->sd_copyflag & STZCVMUNSAFE) {
+ copyflag & STZCVMUNSAFE) {
segmapit = 0;
- } else if (stp->sd_copyflag & STZCVMSAFE) {
+ } else if (copyflag & STZCVMSAFE) {
segmapit = 1;
} else {
int on = 1;
- if (SOP_SETSOCKOPT(VTOSO(vp),
+ if (socket_setsockopt(VTOSO(vp),
SOL_SOCKET, SO_SND_COPYAVOID,
- &on, sizeof (on)) == 0)
+ &on, sizeof (on), CRED()) == 0)
segmapit = 1;
}
}
@@ -1085,9 +1028,12 @@ sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
if (vp->v_type == VSOCK) {
dmp->b_wptr = dmp->b_rptr + cnt;
- error = kstrwritemp(vp, dmp, fflag);
+ error = socket_sendmblk(VTOSO(vp),
+ &msg, fflag, CRED(), &dmp);
+
if (error != 0) {
- freeb(dmp);
+ if (dmp != NULL)
+ freeb(dmp);
VOP_RWUNLOCK(readvp, readflg,
NULL);
releasef(sfv->sfv_fd);
@@ -1186,45 +1132,11 @@ sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt,
switch (vp->v_type) {
case VSOCK:
so = VTOSO(vp);
- /* sendfile not supported for SCTP */
- if (so->so_protocol == IPPROTO_SCTP) {
- error = EPROTONOSUPPORT;
- goto err;
- }
is_sock = B_TRUE;
- switch (so->so_family) {
- case AF_INET:
- case AF_INET6:
- /*
- * Make similar checks done in SOP_WRITE().
- */
- if (so->so_state & SS_CANTSENDMORE) {
- tsignal(curthread, SIGPIPE);
- error = EPIPE;
- goto err;
- }
- if (so->so_type != SOCK_STREAM) {
- error = EOPNOTSUPP;
- goto err;
- }
-
- if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) !=
- (SS_ISCONNECTED|SS_ISBOUND)) {
- error = ENOTCONN;
- goto err;
- }
-
- if ((so->so_state & SS_DIRECT) &&
- (so->so_priv != NULL) &&
- (so->so_kssl_ctx == NULL)) {
- maxblk = ((tcp_t *)so->so_priv)->tcp_mss;
- } else {
- maxblk = (int)vp->v_stream->sd_maxblk;
- }
- break;
- default:
- error = EAFNOSUPPORT;
- goto err;
+ if (SOCK_IS_NONSTR(so)) {
+ maxblk = so->so_proto_props.sopp_maxblk;
+ } else {
+ maxblk = (int)vp->v_stream->sd_maxblk;
}
break;
case VREG:
@@ -1361,21 +1273,18 @@ sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt,
* senfilev() function to consume the sfv[].
*/
if (is_sock) {
- switch (so->so_family) {
- case AF_INET:
- case AF_INET6:
- if (so->so_nl7c_flags != 0)
- error = nl7c_sendfilev(so, &fileoff,
- sfv, copy_cnt, &count);
- else if ((total_size <= (4 * maxblk)) &&
- error == 0)
- error = sendvec_small_chunk(fp,
- &fileoff, sfv, copy_cnt,
- total_size, maxblk, &count);
- else
- error = sendvec_chunk(fp, &fileoff,
- sfv, copy_cnt, &count);
- break;
+ if (!SOCK_IS_NONSTR(so) &&
+ _SOTOTPI(so)->sti_nl7c_flags != 0) {
+ error = nl7c_sendfilev(so, &fileoff,
+ sfv, copy_cnt, &count);
+ } else if ((total_size <= (4 * maxblk)) &&
+ error == 0) {
+ error = sendvec_small_chunk(fp,
+ &fileoff, sfv, copy_cnt,
+ total_size, maxblk, &count);
+ } else {
+ error = sendvec_chunk(fp, &fileoff,
+ sfv, copy_cnt, &count);
}
} else {
ASSERT(vp->v_type == VREG);
diff --git a/usr/src/uts/intel/Makefile.intel.shared b/usr/src/uts/intel/Makefile.intel.shared
index 0eba71bc6f..62e23247bf 100644
--- a/usr/src/uts/intel/Makefile.intel.shared
+++ b/usr/src/uts/intel/Makefile.intel.shared
@@ -565,6 +565,7 @@ MISC_KMODS += kcf
MISC_KMODS += kgssapi
MISC_KMODS += kmech_dummy
MISC_KMODS += kmech_krb5
+MISC_KMODS += ksocket
MISC_KMODS += mac
MISC_KMODS += mixer
MISC_KMODS += net80211
@@ -685,6 +686,12 @@ MAC_KMODS += mac_ib
DEVNAME_KMODS += sdev_nsconfig_mod
#
+# socketmod (kernel/socketmod)
+#
+SOCKET_KMODS += socksctp
+SOCKET_KMODS += socksdp
+
+#
# kiconv modules (/kernel/kiconv):
#
KICONV_KMODS += kiconv_emea kiconv_ja kiconv_ko kiconv_sc kiconv_tc
diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s
index e29afc6c29..0569b9e394 100644
--- a/usr/src/uts/intel/ia32/ml/modstubs.s
+++ b/usr/src/uts/intel/ia32/ml/modstubs.s
@@ -497,7 +497,10 @@ fcnname/**/_info: \
NO_UNLOAD_STUB(sockfs, snf_segmap, nomod_einval);
NO_UNLOAD_STUB(sockfs, sock_getfasync, nomod_zero);
NO_UNLOAD_STUB(sockfs, nl7c_sendfilev, nomod_zero);
- NO_UNLOAD_STUB(sockfs, sostream_direct, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, sotpi_sototpi, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, socket_sendmblk, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, socket_setsockopt, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, sod_uioa_mblk_done, nomod_zero);
END_MODULE(sockfs);
#endif
@@ -1278,30 +1281,6 @@ fcnname/**/_info: \
#endif
/*
- * Stubs for SDP-IB driver.
- */
-#ifndef SDPIB_MODULE
- MODULE(sdpib,drv);
- STUB(sdpib, sdp_create, nomod_zero);
- STUB(sdpib, sdp_bind, nomod_einval);
- STUB(sdpib, sdp_listen, nomod_einval);
- STUB(sdpib, sdp_connect, nomod_einval);
- STUB(sdpib, sdp_recv, nomod_einval);
- STUB(sdpib, sdp_send, nomod_einval);
- STUB(sdpib, sdp_getpeername, nomod_einval);
- STUB(sdpib, sdp_getsockname, nomod_einval);
- STUB(sdpib, sdp_disconnect, nomod_einval);
- STUB(sdpib, sdp_shutdown, nomod_einval);
- STUB(sdpib, sdp_get_opt, nomod_einval);
- STUB(sdpib, sdp_set_opt, nomod_einval);
- STUB(sdpib, sdp_close, nomod_void);
- STUB(sdpib, sdp_polldata, nomod_zero);
- STUB(sdpib, sdp_ioctl, nomod_einval);
- END_MODULE(sdpib);
-#endif
-
-
-/*
* Stubs for kssl, the kernel SSL proxy
*/
#ifndef KSSL_MODULE
@@ -1348,6 +1327,35 @@ fcnname/**/_info: \
END_MODULE(iommulib);
#endif
+/*
+ * Stubs for kernel socket, for iscsi
+ */
+#ifndef KSOCKET_MODULE
+ MODULE(ksocket, misc);
+ NO_UNLOAD_STUB(ksocket, ksocket_setsockopt, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_getsockopt, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_getpeername, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_getsockname, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_socket, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_bind, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_listen, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_accept, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_connect, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_recv, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_recvfrom, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_recvmsg, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_send, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_sendto, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_sendmsg, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_ioctl, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_setcallbacks, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_hold, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_rele, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_shutdown, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_close, nomod_minus_one);
+ END_MODULE(ksocket);
+#endif
+
/ this is just a marker for the area of text that contains stubs
ENTRY_NP(stubs_end)
diff --git a/usr/src/uts/intel/icmp/Makefile b/usr/src/uts/intel/icmp/Makefile
index 25a104ffbb..259530f9dc 100644
--- a/usr/src/uts/intel/icmp/Makefile
+++ b/usr/src/uts/intel/icmp/Makefile
@@ -21,11 +21,9 @@
#
# uts/intel/icmp/Makefile
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
# This makefile drives the production of the icmp IP driver
#
# intel implementation architecture dependent
@@ -43,7 +41,7 @@ MODULE = icmp
OBJECTS = $(ICMP_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(ICMP_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
-ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE)
+ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE) $(ROOT_SOCK_DIR)/$(MODULE)
CONF_SRCDIR = $(UTSBASE)/common/inet/ip
#
@@ -66,9 +64,9 @@ LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
#
-# depends on ip
+# depends on ip and sockfs
#
-LDFLAGS += -dy -Ndrv/ip
+LDFLAGS += -dy -Ndrv/ip -Nfs/sockfs
#
# For now, disable these lint checks; maintainers should endeavor
@@ -100,7 +98,7 @@ clean.lint: $(CLEAN_LINT_DEPS)
install: $(INSTALL_DEPS) $(SISCHECK_DEPS)
-$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE)
+$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOT_SOCK_DIR) $(ROOTMODULE)
-$(RM) $@; ln $(ROOTMODULE) $@
#
diff --git a/usr/src/uts/intel/icmp/icmp.global-objs.debug64 b/usr/src/uts/intel/icmp/icmp.global-objs.debug64
index ba041c7e17..eeeeedc77e 100644
--- a/usr/src/uts/intel/icmp/icmp.global-objs.debug64
+++ b/usr/src/uts/intel/icmp/icmp.global-objs.debug64
@@ -19,10 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
cb_inet_devops
fsw
@@ -30,5 +29,8 @@ inet_dev_info
inet_devops
modldrv
modlinkage
+modlsockmod
modlstrmod
netdev_privs
+smodpriv
+smodreg
diff --git a/usr/src/uts/intel/idm/Makefile b/usr/src/uts/intel/idm/Makefile
index 463a8be02a..870fc039ed 100644
--- a/usr/src/uts/intel/idm/Makefile
+++ b/usr/src/uts/intel/idm/Makefile
@@ -60,7 +60,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
DEBUG_FLGS =
DEBUG_DEFS += $(DEBUG_FLGS)
-LDFLAGS += -dy -Nfs/sockfs
+LDFLAGS += -dy -Nfs/sockfs -Nmisc/ksocket
#
# Default build targets.
diff --git a/usr/src/uts/intel/ip/ip.global-objs.debug64 b/usr/src/uts/intel/ip/ip.global-objs.debug64
index f4bcb8ab0c..2e501f8abc 100644
--- a/usr/src/uts/intel/ip/ip.global-objs.debug64
+++ b/usr/src/uts/intel/ip/ip.global-objs.debug64
@@ -64,6 +64,7 @@ gcgrp4_hash
gcgrp6_hash
gcgrp_hash_size
gcgrp_lock
+icmp_fallback_sock_winit
icmp_frag_size_table
icmp_g_t_info_ack
icmp_ipha
@@ -104,6 +105,10 @@ ip_cgtp_filter_rev
ip_conn_cache
ip_debug
ip_g_all_ones
+ip_helper_stream_cache
+ip_helper_stream_info
+ip_helper_stream_rinit
+ip_helper_stream_winit
ip_ioctl_ftbl
ip_ire_cleanup_cnt
ip_ire_cpu_ratio
@@ -140,6 +145,7 @@ ip_squeue_worker_wait
ip_thread_data
ip_thread_list
ip_thread_rwlock
+ip_use_helper_cache
ip_wput_frag_mdt_min
ipcl_bind_fanout_size
ipcl_conn_hash_maxsize
@@ -251,6 +257,10 @@ sendq_loop_cnt
sin6_null
sin_null
skip_sctp_cksum
+sock_tcp_downcalls
+sock_rts_downcalls
+sock_rawip_downcalls
+sock_udp_downcalls
sqset_global_list
sqset_global_size
sqset_lock
@@ -264,6 +274,7 @@ tcp_acceptor_winit
tcp_conn_cache
tcp_conn_hash_size
tcp_drop_ack_unsent_cnt
+tcp_fallback_sock_winit
tcp_free_list_max_cnt
tcp_fusion_rcv_unread_min
tcp_g_kstat
@@ -303,10 +314,12 @@ tcp_winit
tcp_outbound_squeue_switch
tcpinfov4
tcpinfov6
+tli_errs
tsol_strict_error
tun_spd_hashsize
udp_bind_fanout_size
udp_conn_cache
+udp_fallback_sock_winit
udp_g_t_info_ack_ipv4
udp_g_t_info_ack_ipv6
udp_lrinit
diff --git a/usr/src/uts/intel/ip/ip.global-objs.obj64 b/usr/src/uts/intel/ip/ip.global-objs.obj64
index 3866432363..b773f8a5e0 100644
--- a/usr/src/uts/intel/ip/ip.global-objs.obj64
+++ b/usr/src/uts/intel/ip/ip.global-objs.obj64
@@ -64,6 +64,7 @@ gcgrp4_hash
gcgrp6_hash
gcgrp_hash_size
gcgrp_lock
+icmp_fallback_sock_winit
icmp_frag_size_table
icmp_g_t_info_ack
icmp_ipha
@@ -104,6 +105,10 @@ ip_cgtp_filter_rev
ip_conn_cache
ip_debug
ip_g_all_ones
+ip_helper_stream_cache
+ip_helper_stream_info
+ip_helper_stream_rinit
+ip_helper_stream_winit
ip_ioctl_ftbl
ip_ire_cleanup_cnt
ip_ire_cpu_ratio
@@ -140,6 +145,7 @@ ip_squeue_worker_wait
ip_thread_data
ip_thread_list
ip_thread_rwlock
+ip_use_helper_cache
ip_wput_frag_mdt_min
ipcl_bind_fanout_size
ipcl_conn_hash_maxsize
@@ -243,6 +249,10 @@ sctprinit
sctpwinit
sin6_null
sin_null
+sock_tcp_downcalls
+sock_rts_downcalls
+sock_rawip_downcalls
+sock_udp_downcalls
sqset_global_list
sqset_global_size
sqset_lock
@@ -256,6 +266,7 @@ tcp_acceptor_winit
tcp_conn_cache
tcp_conn_hash_size
tcp_drop_ack_unsent_cnt
+tcp_fallback_sock_winit
tcp_free_list_max_cnt
tcp_fusion_rcv_unread_min
tcp_g_kstat
@@ -295,10 +306,12 @@ tcp_winit
tcp_outbound_squeue_switch
tcpinfov4
tcpinfov6
+tli_errs
tsol_strict_error
tun_spd_hashsize
udp_bind_fanout_size
udp_conn_cache
+udp_fallback_sock_winit
udp_g_t_info_ack_ipv4
udp_g_t_info_ack_ipv6
udp_lrinit
diff --git a/usr/src/uts/intel/iscsi/Makefile b/usr/src/uts/intel/iscsi/Makefile
index 480f9caffa..efff98b964 100644
--- a/usr/src/uts/intel/iscsi/Makefile
+++ b/usr/src/uts/intel/iscsi/Makefile
@@ -61,7 +61,7 @@ INC_PATH += -I$(UTSBASE)/common/io/scsi/adapters/iscsi
#
# Note dependancy on misc/scsi.
#
-LDFLAGS += -dy -N"misc/scsi" -N"fs/sockfs" -N"sys/doorfs" -Nmisc/md5
+LDFLAGS += -dy -N"misc/scsi" -N"fs/sockfs" -N"sys/doorfs" -Nmisc/md5 -Nmisc/ksocket
LINTFLAGS += -a -erroff=E_BAD_PTR_CAST_ALIGN -erroff=E_PTRDIFF_OVERFLOW
LINTFLAGS64 += -a -erroff=E_BAD_PTR_CAST_ALIGN -erroff=E_PTRDIFF_OVERFLOW
diff --git a/usr/src/uts/intel/iscsit/Makefile b/usr/src/uts/intel/iscsit/Makefile
index 1df1235747..7ecd8be223 100644
--- a/usr/src/uts/intel/iscsit/Makefile
+++ b/usr/src/uts/intel/iscsit/Makefile
@@ -59,7 +59,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
# Overrides and depends_on
#
MODSTUBS_DIR = $(OBJS_DIR)
-LDFLAGS += -dy -Ndrv/stmf -Nmisc/idm -Nfs/sockfs -Nmisc/md5
+LDFLAGS += -dy -Ndrv/stmf -Nmisc/idm -Nfs/sockfs -Nmisc/md5 -Nmisc/ksocket
INC_PATH += -I$(UTSBASE)/common/io/comstar/port/iscsit
diff --git a/usr/src/uts/intel/ksocket/Makefile b/usr/src/uts/intel/ksocket/Makefile
new file mode 100644
index 0000000000..288c777b46
--- /dev/null
+++ b/usr/src/uts/intel/ksocket/Makefile
@@ -0,0 +1,84 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+# This makefile drives the production of the kernel socket module
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = ksocket
+OBJECTS = $(KSOCKET_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(KSOCKET_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Overrides.
+#
+CFLAGS += $(CCVERBOSE)
+LDFLAGS += -dy -Nfs/sockfs
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/rts/Makefile b/usr/src/uts/intel/rts/Makefile
index 2247001290..8e8ec349a5 100644
--- a/usr/src/uts/intel/rts/Makefile
+++ b/usr/src/uts/intel/rts/Makefile
@@ -21,11 +21,9 @@
#
# uts/intel/rts/Makefile
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
# This makefile drives the production of the rts IP driver
#
# intel implementation architecture dependent
@@ -43,6 +41,7 @@ MODULE = rts
OBJECTS = $(RTS_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(RTS_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+ROOTLINK = $(ROOT_SOCK_DIR)/$(MODULE)
CONF_SRCDIR = $(UTSBASE)/common/inet/ip
#
@@ -65,9 +64,9 @@ LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
#
-# depends on ip
+# depends on ip and sockfs
#
-LDFLAGS += -dy -Ndrv/ip
+LDFLAGS += -dy -Ndrv/ip -Nfs/sockfs
#
# For now, disable these lint checks; maintainers should endeavor
@@ -99,7 +98,7 @@ clean.lint: $(CLEAN_LINT_DEPS)
install: $(INSTALL_DEPS) $(SISCHECK_DEPS)
-$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE)
+$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOT_SOCK_DIR) $(ROOTMODULE)
-$(RM) $@; ln $(ROOTMODULE) $@
#
diff --git a/usr/src/uts/intel/rts/rts.global-objs.debug64 b/usr/src/uts/intel/rts/rts.global-objs.debug64
index 4c699f6410..75b422acf6 100644
--- a/usr/src/uts/intel/rts/rts.global-objs.debug64
+++ b/usr/src/uts/intel/rts/rts.global-objs.debug64
@@ -19,14 +19,15 @@
# CDDL HEADER END
#
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
cb_inet_devops
inet_dev_info
inet_devops
modldrv
modlinkage
+modlsockmod
netdev_privs
+smodreg
diff --git a/usr/src/uts/intel/smbsrv/Makefile b/usr/src/uts/intel/smbsrv/Makefile
index f8482ba8ce..77ef7351ba 100644
--- a/usr/src/uts/intel/smbsrv/Makefile
+++ b/usr/src/uts/intel/smbsrv/Makefile
@@ -19,11 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# This makefile drives the production of the cifs server file system
# kernel module.
#
@@ -53,7 +51,8 @@ include $(UTSBASE)/intel/Makefile.intel
# Module dependencies
#
#
-LDFLAGS += -dy -Nfs/sockfs -Ndrv/ip -Nstrmod/rpcmod -Nsys/doorfs -Nmisc/kcf
+LDFLAGS += -dy -Nfs/sockfs -Nmisc/ksocket -Ndrv/ip -Nstrmod/rpcmod -Nsys/doorfs
+LDFLAGS += -Nmisc/kcf
#
# Define targets
diff --git a/usr/src/uts/intel/socksctp/Makefile b/usr/src/uts/intel/socksctp/Makefile
new file mode 100644
index 0000000000..fa316464ad
--- /dev/null
+++ b/usr/src/uts/intel/socksctp/Makefile
@@ -0,0 +1,95 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the nca driver
+# kernel module.
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = socksctp
+OBJECTS = $(SCTP_SOCK_MOD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(SCTP_SOCK_MOD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_SOCK_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement and OS version
+#
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nfs/sockfs -Ndrv/ip
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/socksdp/Makefile b/usr/src/uts/intel/socksdp/Makefile
new file mode 100644
index 0000000000..966b436fce
--- /dev/null
+++ b/usr/src/uts/intel/socksdp/Makefile
@@ -0,0 +1,87 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the nca driver
+# kernel module.
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = socksdp
+OBJECTS = $(SDP_SOCK_MOD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(SDP_SOCK_MOD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_SOCK_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement and OS version
+#
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nfs/sockfs -Ndrv/ip -Ndrv/sdpib
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/tcp/Makefile b/usr/src/uts/intel/tcp/Makefile
index 5bd267f765..d083460646 100644
--- a/usr/src/uts/intel/tcp/Makefile
+++ b/usr/src/uts/intel/tcp/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -22,10 +21,9 @@
#
# uts/intel/tcp/Makefile
#
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#pragma ident "%Z%%M% %I% %E% SMI"
#
# This makefile drives the production of the tcp driver kernel module.
#
@@ -44,7 +42,7 @@ MODULE = tcp
OBJECTS = $(TCP_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(TCP_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
-ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE)
+ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE) $(ROOT_SOCK_DIR)/$(MODULE)
CONF_SRCDIR = $(UTSBASE)/common/inet/tcp
#
@@ -75,9 +73,9 @@ CINLINES = -xinline=tcp_set_ws_value,tcp_fill_header
CFLAGS += $(CINLINES)
#
-# depends on ip and md5
+# depends on ip, md5 and sockfs
#
-LDFLAGS += -dy -Ndrv/ip -Ncrypto/md5
+LDFLAGS += -dy -Ndrv/ip -Ncrypto/md5 -Nfs/sockfs
#
# Default build targets.
@@ -100,7 +98,7 @@ clean.lint: $(CLEAN_LINT_DEPS)
install: $(INSTALL_DEPS)
-$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE)
+$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOT_SOCK_DIR) $(ROOTMODULE)
-$(RM) $@; ln $(ROOTMODULE) $@
#
diff --git a/usr/src/uts/intel/udp/Makefile b/usr/src/uts/intel/udp/Makefile
index dad550d3cf..c6238ebd8c 100644
--- a/usr/src/uts/intel/udp/Makefile
+++ b/usr/src/uts/intel/udp/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -22,11 +21,9 @@
#
# uts/intel/udp/Makefile
#
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#pragma ident "%Z%%M% %I% %E% SMI"
-#
# This makefile drives the production of the udp driver kernel module.
#
# intel implementation architecture dependent
@@ -44,7 +41,7 @@ MODULE = udp
OBJECTS = $(UDP_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(UDP_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
-ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE)
+ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE) $(ROOT_SOCK_DIR)/$(MODULE)
CONF_SRCDIR = $(UTSBASE)/common/inet/udp
#
@@ -67,9 +64,9 @@ LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
#
-# depends on ip
+# depends on ip and sockfs
#
-LDFLAGS += -dy -Ndrv/ip
+LDFLAGS += -dy -Ndrv/ip -Nfs/sockfs
#
# Default build targets.
@@ -92,7 +89,7 @@ clean.lint: $(CLEAN_LINT_DEPS)
install: $(INSTALL_DEPS)
-$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE)
+$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOT_SOCK_DIR) $(ROOTMODULE)
-$(RM) $@; ln $(ROOTMODULE) $@
#
diff --git a/usr/src/uts/sparc/Makefile.sparc.shared b/usr/src/uts/sparc/Makefile.sparc.shared
index 80a188f75a..061befa7e3 100644
--- a/usr/src/uts/sparc/Makefile.sparc.shared
+++ b/usr/src/uts/sparc/Makefile.sparc.shared
@@ -385,6 +385,7 @@ MISC_KMODS += s1394
MISC_KMODS += hpcsvc pcihp pciehpc pcishpc
MISC_KMODS += rsmops
MISC_KMODS += kcf
+MISC_KMODS += ksocket
MISC_KMODS += ibcm
MISC_KMODS += ibdm
MISC_KMODS += ibmf
@@ -486,6 +487,12 @@ MAC_KMODS += mac_ib
DEVNAME_KMODS += sdev_nsconfig_mod
#
+# socketmod (kernel/socketmod)
+#
+SOCKET_KMODS += socksctp
+SOCKET_KMODS += socksdp
+
+#
# kiconv modules (/kernel/kiconv):
#
KICONV_KMODS += kiconv_emea kiconv_ja kiconv_ko kiconv_sc kiconv_tc
diff --git a/usr/src/uts/sparc/icmp/Makefile b/usr/src/uts/sparc/icmp/Makefile
index 5fd067b116..55c11a1ea0 100644
--- a/usr/src/uts/sparc/icmp/Makefile
+++ b/usr/src/uts/sparc/icmp/Makefile
@@ -20,11 +20,9 @@
#
#
# uts/sparc/icmp/Makefile
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# This makefile drives the production of the icmp IP driver
#
# sparc architecture dependent
@@ -42,7 +40,7 @@ MODULE = icmp
OBJECTS = $(ICMP_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(ICMP_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
-ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE)
+ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE) $(ROOT_SOCK_DIR)/$(MODULE)
CONF_SRCDIR = $(UTSBASE)/common/inet/ip
#
@@ -70,9 +68,9 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
CFLAGS += $(CCVERBOSE)
#
-# depends on ip
+# depends on ip and sockfs
#
-LDFLAGS += -dy -Ndrv/ip
+LDFLAGS += -dy -Ndrv/ip -Nfs/sockfs
#
# For now, disable these lint checks; maintainers should endeavor
@@ -104,7 +102,7 @@ clean.lint: $(CLEAN_LINT_DEPS)
install: $(INSTALL_DEPS) $(SISCHECK_DEPS)
-$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE)
+$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOT_SOCK_DIR) $(ROOTMODULE)
-$(RM) $@; ln $(ROOTMODULE) $@
#
diff --git a/usr/src/uts/sparc/icmp/icmp.global-objs.debug64 b/usr/src/uts/sparc/icmp/icmp.global-objs.debug64
index ba041c7e17..eeeeedc77e 100644
--- a/usr/src/uts/sparc/icmp/icmp.global-objs.debug64
+++ b/usr/src/uts/sparc/icmp/icmp.global-objs.debug64
@@ -19,10 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
cb_inet_devops
fsw
@@ -30,5 +29,8 @@ inet_dev_info
inet_devops
modldrv
modlinkage
+modlsockmod
modlstrmod
netdev_privs
+smodpriv
+smodreg
diff --git a/usr/src/uts/sparc/idm/Makefile b/usr/src/uts/sparc/idm/Makefile
index 6b03fb56df..27535cf198 100644
--- a/usr/src/uts/sparc/idm/Makefile
+++ b/usr/src/uts/sparc/idm/Makefile
@@ -58,7 +58,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
DEBUG_FLGS =
DEBUG_DEFS += $(DEBUG_FLGS)
-LDFLAGS += -dy -Nfs/sockfs
+LDFLAGS += -dy -Nfs/sockfs -Nmisc/ksocket
#
# Default build targets.
diff --git a/usr/src/uts/sparc/ip/ip.global-objs.debug64 b/usr/src/uts/sparc/ip/ip.global-objs.debug64
index f4bcb8ab0c..fabffbc5f5 100644
--- a/usr/src/uts/sparc/ip/ip.global-objs.debug64
+++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64
@@ -64,6 +64,7 @@ gcgrp4_hash
gcgrp6_hash
gcgrp_hash_size
gcgrp_lock
+icmp_fallback_sock_winit
icmp_frag_size_table
icmp_g_t_info_ack
icmp_ipha
@@ -104,6 +105,10 @@ ip_cgtp_filter_rev
ip_conn_cache
ip_debug
ip_g_all_ones
+ip_helper_stream_cache
+ip_helper_stream_info
+ip_helper_stream_rinit
+ip_helper_stream_winit
ip_ioctl_ftbl
ip_ire_cleanup_cnt
ip_ire_cpu_ratio
@@ -140,6 +145,7 @@ ip_squeue_worker_wait
ip_thread_data
ip_thread_list
ip_thread_rwlock
+ip_use_helper_cache
ip_wput_frag_mdt_min
ipcl_bind_fanout_size
ipcl_conn_hash_maxsize
@@ -251,6 +257,10 @@ sendq_loop_cnt
sin6_null
sin_null
skip_sctp_cksum
+sock_rawip_downcalls
+sock_rts_downcalls
+sock_tcp_downcalls
+sock_udp_downcalls
sqset_global_list
sqset_global_size
sqset_lock
@@ -264,6 +274,7 @@ tcp_acceptor_winit
tcp_conn_cache
tcp_conn_hash_size
tcp_drop_ack_unsent_cnt
+tcp_fallback_sock_winit
tcp_free_list_max_cnt
tcp_fusion_rcv_unread_min
tcp_g_kstat
@@ -303,10 +314,12 @@ tcp_winit
tcp_outbound_squeue_switch
tcpinfov4
tcpinfov6
+tli_errs
tsol_strict_error
tun_spd_hashsize
udp_bind_fanout_size
udp_conn_cache
+udp_fallback_sock_winit
udp_g_t_info_ack_ipv4
udp_g_t_info_ack_ipv6
udp_lrinit
diff --git a/usr/src/uts/sparc/ip/ip.global-objs.obj64 b/usr/src/uts/sparc/ip/ip.global-objs.obj64
index 3866432363..c7fb907f8c 100644
--- a/usr/src/uts/sparc/ip/ip.global-objs.obj64
+++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64
@@ -64,6 +64,7 @@ gcgrp4_hash
gcgrp6_hash
gcgrp_hash_size
gcgrp_lock
+icmp_fallback_sock_winit
icmp_frag_size_table
icmp_g_t_info_ack
icmp_ipha
@@ -104,6 +105,10 @@ ip_cgtp_filter_rev
ip_conn_cache
ip_debug
ip_g_all_ones
+ip_helper_stream_cache
+ip_helper_stream_info
+ip_helper_stream_rinit
+ip_helper_stream_winit
ip_ioctl_ftbl
ip_ire_cleanup_cnt
ip_ire_cpu_ratio
@@ -140,6 +145,7 @@ ip_squeue_worker_wait
ip_thread_data
ip_thread_list
ip_thread_rwlock
+ip_use_helper_cache
ip_wput_frag_mdt_min
ipcl_bind_fanout_size
ipcl_conn_hash_maxsize
@@ -243,6 +249,10 @@ sctprinit
sctpwinit
sin6_null
sin_null
+sock_rawip_downcalls
+sock_rts_downcalls
+sock_tcp_downcalls
+sock_udp_downcalls
sqset_global_list
sqset_global_size
sqset_lock
@@ -256,6 +266,7 @@ tcp_acceptor_winit
tcp_conn_cache
tcp_conn_hash_size
tcp_drop_ack_unsent_cnt
+tcp_fallback_sock_winit
tcp_free_list_max_cnt
tcp_fusion_rcv_unread_min
tcp_g_kstat
@@ -295,10 +306,12 @@ tcp_winit
tcp_outbound_squeue_switch
tcpinfov4
tcpinfov6
+tli_errs
tsol_strict_error
tun_spd_hashsize
udp_bind_fanout_size
udp_conn_cache
+udp_fallback_sock_winit
udp_g_t_info_ack_ipv4
udp_g_t_info_ack_ipv6
udp_lrinit
diff --git a/usr/src/uts/sparc/iscsi/Makefile b/usr/src/uts/sparc/iscsi/Makefile
index 0e35ba9d0d..437d9b5838 100644
--- a/usr/src/uts/sparc/iscsi/Makefile
+++ b/usr/src/uts/sparc/iscsi/Makefile
@@ -61,7 +61,7 @@ INC_PATH += -I$(UTSBASE)/common/io/scsi/adapters/iscsi
#
# Note dependancy on misc/scsi.
#
-LDFLAGS += -dy -N"misc/scsi" -N"fs/sockfs" -N"sys/doorfs" -N"misc/md5"
+LDFLAGS += -dy -N"misc/scsi" -N"fs/sockfs" -N"sys/doorfs" -N"misc/md5" -Nmisc/ksocket
LINTFLAGS += -a -erroff=E_BAD_PTR_CAST_ALIGN -erroff=E_PTRDIFF_OVERFLOW
LINTFLAGS64 += -a -erroff=E_BAD_PTR_CAST_ALIGN -erroff=E_PTRDIFF_OVERFLOW
diff --git a/usr/src/uts/sparc/iscsit/Makefile b/usr/src/uts/sparc/iscsit/Makefile
index 1df1235747..7ecd8be223 100644
--- a/usr/src/uts/sparc/iscsit/Makefile
+++ b/usr/src/uts/sparc/iscsit/Makefile
@@ -59,7 +59,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
# Overrides and depends_on
#
MODSTUBS_DIR = $(OBJS_DIR)
-LDFLAGS += -dy -Ndrv/stmf -Nmisc/idm -Nfs/sockfs -Nmisc/md5
+LDFLAGS += -dy -Ndrv/stmf -Nmisc/idm -Nfs/sockfs -Nmisc/md5 -Nmisc/ksocket
INC_PATH += -I$(UTSBASE)/common/io/comstar/port/iscsit
diff --git a/usr/src/uts/sparc/ksocket/Makefile b/usr/src/uts/sparc/ksocket/Makefile
new file mode 100644
index 0000000000..287a7cfda6
--- /dev/null
+++ b/usr/src/uts/sparc/ksocket/Makefile
@@ -0,0 +1,84 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+#
+# This makefile drives the production of the kernel socket module
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = ksocket
+OBJECTS = $(KSOCKET_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(KSOCKET_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Overrides.
+#
+CFLAGS += $(CCVERBOSE)
+LDFLAGS += -dy -Nfs/sockfs
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/sparc/ml/modstubs.s b/usr/src/uts/sparc/ml/modstubs.s
index e315c9857c..e3379799a7 100644
--- a/usr/src/uts/sparc/ml/modstubs.s
+++ b/usr/src/uts/sparc/ml/modstubs.s
@@ -385,7 +385,10 @@ stubs_base:
NO_UNLOAD_STUB(sockfs, snf_segmap, nomod_einval);
NO_UNLOAD_STUB(sockfs, sock_getfasync, nomod_zero);
NO_UNLOAD_STUB(sockfs, nl7c_sendfilev, nomod_zero);
- NO_UNLOAD_STUB(sockfs, sostream_direct, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, sotpi_sototpi, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, socket_sendmblk, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, socket_setsockopt, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, sod_uioa_mblk_done, nomod_zero);
END_MODULE(sockfs);
#endif
@@ -1232,27 +1235,6 @@ stubs_base:
END_MODULE(softmac);
#endif
-#ifndef SDPIB_MODULE
- MODULE(sdpib,drv);
- STUB(sdpib, sdp_create, nomod_zero);
- STUB(sdpib, sdp_bind, nomod_einval);
- STUB(sdpib, sdp_listen, nomod_einval);
- STUB(sdpib, sdp_connect, nomod_einval);
- STUB(sdpib, sdp_recv, nomod_einval);
- STUB(sdpib, sdp_send, nomod_einval);
- STUB(sdpib, sdp_getpeername, nomod_einval);
- STUB(sdpib, sdp_getsockname, nomod_einval);
- STUB(sdpib, sdp_disconnect, nomod_einval);
- STUB(sdpib, sdp_shutdown, nomod_einval);
- STUB(sdpib, sdp_get_opt, nomod_einval);
- STUB(sdpib, sdp_set_opt, nomod_einval);
- STUB(sdpib, sdp_close, nomod_void);
- STUB(sdpib, sdp_polldata, nomod_zero);
- STUB(sdpib, sdp_ioctl, nomod_einval);
- END_MODULE(sdpib);
-#endif
-
-
/*
* Stubs for kssl, the kernel SSL proxy
*/
@@ -1294,6 +1276,35 @@ stubs_base:
END_MODULE(ipnet);
#endif
+/*
+ * Stubs for kernel socket, for iscsi
+ */
+#ifndef KSOCKET_MODULE
+ MODULE(ksocket, misc);
+ NO_UNLOAD_STUB(ksocket, ksocket_setsockopt, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_getsockopt, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_getpeername, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_getsockname, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_socket, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_bind, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_listen, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_accept, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_connect, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_recv, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_recvfrom, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_recvmsg, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_send, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_sendto, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_sendmsg, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_ioctl, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_setcallbacks, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_hold, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_rele, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_shutdown, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_close, nomod_minus_one);
+ END_MODULE(ksocket);
+#endif
+
! this is just a marker for the area of text that contains stubs
.seg ".text"
.global stubs_end
diff --git a/usr/src/uts/sparc/rts/Makefile b/usr/src/uts/sparc/rts/Makefile
index ff635303bc..4078c24237 100644
--- a/usr/src/uts/sparc/rts/Makefile
+++ b/usr/src/uts/sparc/rts/Makefile
@@ -20,11 +20,9 @@
#
#
# uts/sparc/rts/Makefile
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# This makefile drives the production of the rts IP driver
#
# sparc architecture dependent
@@ -42,6 +40,7 @@ MODULE = rts
OBJECTS = $(RTS_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(RTS_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+ROOTLINK = $(ROOT_SOCK_DIR)/$(MODULE)
CONF_SRCDIR = $(UTSBASE)/common/inet/ip
#
@@ -69,9 +68,9 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
CFLAGS += $(CCVERBOSE)
#
-# depends on tun
+# depends on ip and sockfs
#
-LDFLAGS += -dy -Ndrv/ip
+LDFLAGS += -dy -Ndrv/ip -Nfs/sockfs
#
# For now, disable these lint checks; maintainers should endeavor
@@ -103,7 +102,7 @@ clean.lint: $(CLEAN_LINT_DEPS)
install: $(INSTALL_DEPS) $(SISCHECK_DEPS)
-$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE)
+$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOT_SOCK_DIR) $(ROOTMODULE)
-$(RM) $@; ln $(ROOTMODULE) $@
#
diff --git a/usr/src/uts/sparc/rts/rts.global-objs.debug64 b/usr/src/uts/sparc/rts/rts.global-objs.debug64
index 4c699f6410..75b422acf6 100644
--- a/usr/src/uts/sparc/rts/rts.global-objs.debug64
+++ b/usr/src/uts/sparc/rts/rts.global-objs.debug64
@@ -19,14 +19,15 @@
# CDDL HEADER END
#
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
cb_inet_devops
inet_dev_info
inet_devops
modldrv
modlinkage
+modlsockmod
netdev_privs
+smodreg
diff --git a/usr/src/uts/sparc/smbsrv/Makefile b/usr/src/uts/sparc/smbsrv/Makefile
index 71c4cc5398..023d1c1cd5 100644
--- a/usr/src/uts/sparc/smbsrv/Makefile
+++ b/usr/src/uts/sparc/smbsrv/Makefile
@@ -19,11 +19,8 @@
# CDDL HEADER END
#
#
-#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
-#
-#ident "%Z%%M% %I% %E% SMI"
#
# This makefile drives the production of the cifs server file system
@@ -52,7 +49,8 @@ include $(UTSBASE)/sparc/Makefile.sparc
#
# Module dependencies
#
-LDFLAGS += -dy -Nfs/sockfs -Ndrv/ip -Nstrmod/rpcmod -Nsys/doorfs -Nmisc/kcf
+LDFLAGS += -dy -Nfs/sockfs -Nmisc/ksocket -Ndrv/ip -Nstrmod/rpcmod -Nsys/doorfs
+LDFLAGS += -Nmisc/kcf
#
# Define targets
diff --git a/usr/src/uts/sparc/socksctp/Makefile b/usr/src/uts/sparc/socksctp/Makefile
new file mode 100644
index 0000000000..5acab4cfb1
--- /dev/null
+++ b/usr/src/uts/sparc/socksctp/Makefile
@@ -0,0 +1,96 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+#
+# This makefile drives the production of the nca driver
+# kernel module.
+#
+# sparc architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = socksctp
+OBJECTS = $(SCTP_SOCK_MOD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(SCTP_SOCK_MOD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_SOCK_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement and OS version
+#
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nfs/sockfs -Ndrv/ip
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ
diff --git a/usr/src/uts/sparc/socksdp/Makefile b/usr/src/uts/sparc/socksdp/Makefile
new file mode 100644
index 0000000000..6970c44faf
--- /dev/null
+++ b/usr/src/uts/sparc/socksdp/Makefile
@@ -0,0 +1,88 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+#
+# This makefile drives the production of the nca driver
+# kernel module.
+#
+# sparc architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = socksdp
+OBJECTS = $(SDP_SOCK_MOD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(SDP_SOCK_MOD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_SOCK_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement and OS version
+#
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nfs/sockfs -Ndrv/ip
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ
diff --git a/usr/src/uts/sparc/tcp/Makefile b/usr/src/uts/sparc/tcp/Makefile
index 192fda758f..7276ecfaeb 100644
--- a/usr/src/uts/sparc/tcp/Makefile
+++ b/usr/src/uts/sparc/tcp/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -21,11 +20,9 @@
#
#
# uts/sparc/tcp/Makefile
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# This makefile drives the production of the tcp driver kernel module.
#
# sparc architecture dependent
@@ -43,7 +40,7 @@ MODULE = tcp
OBJECTS = $(TCP_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(TCP_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
-ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE)
+ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE) $(ROOT_SOCK_DIR)/$(MODULE)
CONF_SRCDIR = $(UTSBASE)/common/inet/tcp
#
@@ -77,9 +74,9 @@ CFLAGS += $(CCVERBOSE)
CFLAGS += -xinline=tcp_set_ws_value,tcp_fill_header
#
-# depends on ip and md5
+# depends on ip, md5 and sockfs
#
-LDFLAGS += -dy -Ndrv/ip -Ncrypto/md5
+LDFLAGS += -dy -Ndrv/ip -Ncrypto/md5 -Nfs/sockfs
#
# Default build targets.
@@ -102,7 +99,7 @@ clean.lint: $(CLEAN_LINT_DEPS)
install: $(INSTALL_DEPS)
-$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE)
+$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOT_SOCK_DIR) $(ROOTMODULE)
-$(RM) $@; ln $(ROOTMODULE) $@
#
diff --git a/usr/src/uts/sparc/udp/Makefile b/usr/src/uts/sparc/udp/Makefile
index c0deb87087..07a4435112 100644
--- a/usr/src/uts/sparc/udp/Makefile
+++ b/usr/src/uts/sparc/udp/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -21,11 +20,9 @@
#
#
# uts/sparc/udp/Makefile
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# This makefile drives the production of the udp driver kernel module.
#
# sparc architecture dependent
@@ -43,7 +40,7 @@ MODULE = udp
OBJECTS = $(UDP_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(UDP_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
-ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE)
+ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE) $(ROOT_SOCK_DIR)/$(MODULE)
CONF_SRCDIR = $(UTSBASE)/common/inet/udp
#
@@ -71,9 +68,9 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
CFLAGS += $(CCVERBOSE)
#
-# depends on ip
+# depends on ip and sockfs
#
-LDFLAGS += -dy -Ndrv/ip
+LDFLAGS += -dy -Ndrv/ip -Nfs/sockfs
#
# Default build targets.
@@ -96,7 +93,7 @@ clean.lint: $(CLEAN_LINT_DEPS)
install: $(INSTALL_DEPS)
-$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE)
+$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOT_SOCK_DIR) $(ROOTMODULE)
-$(RM) $@; ln $(ROOTMODULE) $@
#